aboutsummaryrefslogtreecommitdiffstats
path: root/system/doc/efficiency_guide
diff options
context:
space:
mode:
Diffstat (limited to 'system/doc/efficiency_guide')
-rw-r--r--system/doc/efficiency_guide/Makefile117
-rw-r--r--system/doc/efficiency_guide/README122
-rw-r--r--system/doc/efficiency_guide/advanced.xml204
-rw-r--r--system/doc/efficiency_guide/all.erl106
-rw-r--r--system/doc/efficiency_guide/appendix.xml72
-rw-r--r--system/doc/efficiency_guide/bench.erl488
-rw-r--r--system/doc/efficiency_guide/bench.hrl22
-rw-r--r--system/doc/efficiency_guide/binaryhandling.xml528
-rw-r--r--system/doc/efficiency_guide/book.xml42
-rw-r--r--system/doc/efficiency_guide/call_bm.erl75
-rw-r--r--system/doc/efficiency_guide/call_result.html26
-rw-r--r--system/doc/efficiency_guide/commoncaveats.xml239
-rw-r--r--system/doc/efficiency_guide/digger.ps197
-rw-r--r--system/doc/efficiency_guide/drivers.xml148
-rw-r--r--system/doc/efficiency_guide/efficiency_guide.erl214
-rw-r--r--system/doc/efficiency_guide/functions.xml234
-rw-r--r--system/doc/efficiency_guide/introduction.xml69
-rw-r--r--system/doc/efficiency_guide/listhandling.xml241
-rw-r--r--system/doc/efficiency_guide/make.dep16
-rw-r--r--system/doc/efficiency_guide/myths.xml203
-rw-r--r--system/doc/efficiency_guide/part.xml42
-rw-r--r--system/doc/efficiency_guide/processes.xml264
-rw-r--r--system/doc/efficiency_guide/profiling.xml258
-rw-r--r--system/doc/efficiency_guide/tablesDatabases.xml379
-rw-r--r--system/doc/efficiency_guide/xmlfiles.mk32
25 files changed, 4338 insertions, 0 deletions
diff --git a/system/doc/efficiency_guide/Makefile b/system/doc/efficiency_guide/Makefile
new file mode 100644
index 0000000000..f51313de84
--- /dev/null
+++ b/system/doc/efficiency_guide/Makefile
@@ -0,0 +1,117 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 2001-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+#
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include $(ERL_TOP)/erts/vsn.mk
+#VSN=$(SYSTEM_VSN)
+
+APPLICATION=otp-system-documentation
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/doc/efficiency_guide
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+XML_PART_FILES = part.xml
+
+include xmlfiles.mk
+
+XML_CHAPTER_FILES = $(EFF_GUIDE_CHAPTER_FILES)
+
+TOPDOCDIR=..
+
+BOOK_FILES = book.xml
+
+PS_FILES = digger.ps
+
+XML_FILES = \
+ $(BOOK_FILES) $(XML_CHAPTER_FILES) \
+ $(XML_PART_FILES)
+
+# ----------------------------------------------------
+
+C_FILES = \
+
+ERL_FILES = all.erl\
+ bench.erl\
+ call_bm.erl
+
+HRL_FILES = bench.hrl
+
+
+MISC_FILES = call_result.html\
+ README
+
+HTML_FILES = \
+ $(XML_PART_FILES:%.xml=%.html)
+
+HTMLDIR = ../html/efficiency_guide
+
+EXTRA_FILES = $(ERL_FILES) \
+ $(HRL_FILES) \
+ $(MISC_FILES)
+
+HTML_UG_FILE = $(HTMLDIR)/users_guide.html
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+XML_FLAGS +=
+DVIPS_FLAGS +=
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+docs: html
+
+local_docs: PDFDIR=../../pdf
+
+html: $(GIF_FILES) $(HTML_UG_FILE)
+
+debug opt:
+
+clean clean_docs:
+ rm -rf $(HTMLDIR)
+ rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo)
+ rm -f errs core *~
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_docs_spec: docs
+# $(INSTALL_DIR) $(RELEASE_PATH)/pdf
+# $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELEASE_PATH)/pdf
+ $(INSTALL_DIR) $(RELSYSDIR)
+ $(INSTALL_DATA) $(GIF_FILES) $(EXTRA_FILES) $(HTMLDIR)/*.html \
+ $(RELSYSDIR)
+
+
+release_spec:
+
+
+
diff --git a/system/doc/efficiency_guide/README b/system/doc/efficiency_guide/README
new file mode 100644
index 0000000000..6830a46d9e
--- /dev/null
+++ b/system/doc/efficiency_guide/README
@@ -0,0 +1,122 @@
+Benchmark framework
+-------------------
+
+This benchmark framework consists of the files:
+bench.erl - see bench module below
+bench.hrl - Defines some useful macros
+all.erl - see all module below
+
+bench module
+-----------
+
+The module bench is a generic module that measures execution time
+of functions in callback modules and writes an html-report on the outcome.
+
+When you execute the function bench:run/0 it will compile and run all
+benchmark modules in the current directory.
+
+all module
+-----------
+
+In the all module there is a function called releases/0 that you can
+edit to contain all your erlang installations and then you can
+run your benchmarks on several erlang versions using only one command i.e.
+all:run().
+
+Requirements on callback modules
+---------------------------------
+
+* A callback module must be named <callbackModuleName>_bm.erl
+
+* The module must export the function benchmarks/0 that must return:
+ {Iterations, [Name1,Name2...]} where Iterations is the number of
+ times each benchmark should be run. Name1, Name2 and so one are the
+ name of exported functions in the module.
+
+* The exported functions Name1 etc. must take one argument i.e. the number
+ of iterations and should return the atom ok.
+
+* The functions in a benchmark module should represent different
+ ways/different sequential algorithms for doing something. And the
+ result will be how fast they are compared to each other.
+
+Files created
+--------------
+
+Files that are created in the current directory are *.bmres and
+index.html. The file(s) with the extension "bmres" are an intermediate
+representation of the benchmark results and is only meant to be read
+by the reporting mechanism defined in bench.erl. The index.html file
+is the report telling you how good the benchmarks are in comparison to
+each other. If you run your test on several erlang releases the
+html-file will include the result for all versions.
+
+
+Pitfalls
+---------
+To get meaningful measurements, you should make sure that:
+
+* The total execution time is at least several seconds.
+
+* That any time spent in setup before entering the measurement loop is very
+ small compared to the total time.
+
+* That time spent by the loop itself is small compared to the total execution
+ time
+
+Consider the following example of a benchmark function that does
+a local function call.
+
+local_call(0) -> ok;
+local_call(Iter) ->
+ foo(), % Local function call
+ local_call(Iter-1).
+
+The problem is that both "foo()" and "local_call(Iter-1)" takes about
+the same amount of time. To get meaningful figures you'll need to make
+sure that the loop overhead will not be visible. In this case we can
+take help of a macro in bench.hrl to repeat the local function call
+many times, making sure that time spent calling the local function is
+relatively much longer than the time spent iterating. Of course, all
+benchmarks in the same module must be repeated the same number of
+times; thus external_call will look like
+
+external_call(0) -> ok;
+external_call(Iter) ->
+ ?rep20(?MODULE:foo()),
+ external_call(Iter-1).
+
+This technique is only necessary if the operation we are testing executes
+really fast.
+
+If you for instance want to test a sort routine we can keep it simple:
+
+sorted(Iter) ->
+ do_sort(Iter, lists:seq(0, 63)).
+
+do_sort(0, List) -> ok;
+do_sort(Iter, List) ->
+ lists:sort(List),
+ do_sort(Iter-1, List).
+
+The call to lists:seq/2 is only done once. The loop overhead in the
+do_sort/2 function is small compared to the execution time of lists:sort/1.
+
+Error handling
+---------------
+
+Any error enforced by a callback module will result in exit of the benchmark
+program and an errormessage that should give a good idea of what is wrong.
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/system/doc/efficiency_guide/advanced.xml b/system/doc/efficiency_guide/advanced.xml
new file mode 100644
index 0000000000..0ec3afbd59
--- /dev/null
+++ b/system/doc/efficiency_guide/advanced.xml
@@ -0,0 +1,204 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Advanced</title>
+ <prepared>Kenneth Lundin</prepared>
+ <docno></docno>
+ <date>2001-08-21</date>
+ <rev></rev>
+ <file>advanced.xml</file>
+ </header>
+
+ <section>
+ <title>Memory</title>
+ <p>A good start when programming efficiently is to have knowledge about
+ how much memory different data types and operations require. It is
+ implementation-dependent how much memory the Erlang data types and
+ other items consume, but here are some figures for
+ erts-5.2 system (OTP release R9B). (There have been no significant
+ changes in R13.)</p>
+
+ <p>The unit of measurement is memory words. There exists both a 32-bit
+ and a 64-bit implementation, and a word is therefore, 4 bytes or
+ 8 bytes, respectively.</p>
+ <table>
+ <row>
+ <cell align="center" valign="middle">Data type</cell>
+ <cell align="center" valign="middle">Memory size</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Integer (-16#7FFFFFF &lt; i &lt;16#7FFFFFF)</cell>
+ <cell align="left" valign="middle">1 word</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Integer (big numbers)</cell>
+ <cell align="left" valign="middle">3..N words</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Atom</cell>
+ <cell align="left" valign="middle">1 word. Note: an atom refers into
+ an atom table which also consumes memory.
+ The atom text is stored once for each unique atom in this table.
+ The atom table is <em>not</em> garbage-collected.</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Float</cell>
+ <cell align="left" valign="middle">On 32-bit architectures: 4 words <br></br>
+On 64-bit architectures: 3 words</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Binary</cell>
+ <cell align="left" valign="middle">3..6 + data (can be shared)</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">List</cell>
+ <cell align="left" valign="middle">1 word per element + the size of each element</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">String (is the same as a list of integers)</cell>
+ <cell align="left" valign="middle">2 words per character</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Tuple</cell>
+ <cell align="left" valign="middle">2 words + the size of each element</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Pid</cell>
+ <cell align="left" valign="middle">1 word for a process identifier from the current local node, and 5 words for a process identifier from another node. Note: a process identifier refers into a process table and a node table which also consumes memory.</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Port</cell>
+ <cell align="left" valign="middle">1 word for a port identifier from the current local node, and 5 words for a port identifier from another node. Note: a port identifier refers into a port table and a node table which also consumes memory.</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Reference</cell>
+ <cell align="left" valign="middle">On 32-bit architectures: 5 words for a reference from the current local node, and 7 words for a reference from another node. <br></br>
+On 64-bit architectures: 4 words for a reference from the current local node, and 6 words for a reference from another node. Note: a reference refers into a node table which also consumes memory.</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Fun</cell>
+ <cell align="left" valign="middle">9..13 words + size of environment. Note: a fun refers into a fun table which also consumes memory.</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Ets table</cell>
+ <cell align="left" valign="middle">Initially 768 words + the size of each element (6 words + size of Erlang data). The table will grow when necessary.</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle">Erlang process</cell>
+ <cell align="left" valign="middle">327 words when spawned including a heap of 233 words.</cell>
+ </row>
+ <tcaption>Memory size of different data types</tcaption>
+ </table>
+ </section>
+
+ <section>
+ <title>System limits</title>
+ <p>The Erlang language specification puts no limits on number of processes,
+ length of atoms etc., but for performance and memory saving reasons,
+ there will always be limits in a practical implementation of the Erlang
+ language and execution environment.</p>
+ <taglist>
+ <tag><em>Processes</em></tag>
+ <item>
+ <p>The maximum number of simultaneously alive Erlang processes is
+ by default 32768. This limit can be raised up to at most 268435456
+ processes at startup (see documentation of the system flag
+ <seealso marker="erts:erl#max_processes">+P</seealso> in the
+ <seealso marker="erts:erl">erl(1)</seealso> documentation).
+ The maximum limit of 268435456 processes will at least on a 32-bit
+ architecture be impossible to reach due to memory shortage.</p>
+ </item>
+ <tag><em>Distributed nodes</em></tag>
+ <item>
+ <taglist>
+ <tag>Known nodes</tag>
+ <item>
+ <p>A remote node Y has to be known to node X if there exist
+ any pids, ports, references, or funs (Erlang data types) from Y
+ on X, or if X and Y are connected. The maximum number of remote
+ nodes simultaneously/ever known to a node is limited by the
+ <seealso marker="#atoms">maximum number of atoms</seealso>
+ available for node names. All data concerning remote nodes,
+ except for the node name atom, are garbage-collected.</p>
+ </item>
+ <tag>Connected nodes</tag>
+ <item>The maximum number of simultaneously connected nodes is limited by
+ either the maximum number of simultaneously known remote nodes,
+ <seealso marker="#ports">the maximum number of (Erlang) ports</seealso>
+ available, or
+ <seealso marker="#files_sockets">the maximum number of sockets</seealso>
+ available.</item>
+ </taglist>
+ </item>
+ <tag><em>Characters in an atom</em></tag>
+ <item>255</item>
+ <tag><em>Atoms </em></tag>
+ <item> <marker id="atoms"></marker>
+The maximum number of atoms is 1048576. </item>
+ <tag><em>Ets-tables</em></tag>
+ <item>The default is 1400, can be changed with the environment variable <c>ERL_MAX_ETS_TABLES</c>.</item>
+ <tag><em>Elements in a tuple</em></tag>
+ <item>The maximum number of elements in a tuple is 67108863 (26 bit unsigned integer). Other factors
+ such as the available memory can of course make it hard to create a tuple of that size. </item>
+ <tag><em>Size of binary</em></tag>
+ <item>In the 32-bit implementation of Erlang, 536870911 bytes is the
+ largest binary that can be constructed or matched using the bit syntax.
+ (In the 64-bit implementation, the maximum size is 2305843009213693951 bytes.)
+ If the limit is exceeded, bit syntax construction will fail with a
+ <c>system_limit</c> exception, while any attempt to match a binary that is
+ too large will fail.
+ This limit is enforced starting with the R11B-4 release; in earlier releases,
+ operations on too large binaries would in general either fail or give incorrect
+ results.
+ In future releases of Erlang/OTP, other operations that create binaries (such as
+ <c>list_to_binary/1</c>) will probably also enforce the same limit.</item>
+ <tag><em>Total amount of data allocated by an Erlang node</em></tag>
+ <item>The Erlang runtime system can use the complete 32 (or 64) bit address space,
+ but the operating system often limits a single process to use less than that.</item>
+ <tag><em>length of a node name</em></tag>
+ <item>An Erlang node name has the form host@shortname or host@longname. The node name is
+ used as an atom within the system so the maximum size of 255 holds for the node name too.</item>
+ <tag><em>Open ports</em></tag>
+ <item>
+ <marker id="ports"></marker>
+ <p>The maximum number of simultaneously open Erlang ports is
+ by default 1024. This limit can be raised up to at most 268435456
+ at startup (see environment variable
+ <seealso marker="erts:erlang#ERL_MAX_PORTS">ERL_MAX_PORTS</seealso>
+ in <seealso marker="erts:erlang">erlang(3)</seealso>)
+ The maximum limit of 268435456 open ports will at least on a 32-bit
+ architecture be impossible to reach due to memory shortage.</p>
+ </item>
+ <tag><em>Open files, and sockets</em></tag>
+ <item> <marker id="files_sockets"></marker>
+
+ The maximum number of simultaneously open files and sockets
+ depend on
+ <seealso marker="#ports">the maximum number of Erlang ports</seealso>
+ available, and operating system specific settings and limits.</item>
+ <tag><em>Number of arguments to a function or fun</em></tag>
+ <item>256</item>
+ </taglist>
+ </section>
+</chapter>
+
diff --git a/system/doc/efficiency_guide/all.erl b/system/doc/efficiency_guide/all.erl
new file mode 100644
index 0000000000..a0f7809422
--- /dev/null
+++ b/system/doc/efficiency_guide/all.erl
@@ -0,0 +1,106 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+-module(all).
+
+%% User interface
+-export([run/0]).
+
+%% Interna constants
+-define(NORMAL, 0).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Interface
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%---------------------------------------------------------------------------
+%% run() -> _
+%%
+%% Runs all benchmark modules in the current directory on all erlang
+%% installations specified by releases/0
+%%---------------------------------------------------------------------------
+run() ->
+ %% Delete previous intermediate test result files.
+ lists:foreach(fun(F) -> file:delete(F) end, filelib:wildcard("*.bmres")),
+ lists:foreach(fun run/1, releases()).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Internal functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%---------------------------------------------------------------------------
+%% run(Release) -> _
+%% Release = string() - Erlang release
+%% Help functions to run/0
+%%---------------------------------------------------------------------------
+run(Release) ->
+ command(Release ++ " -noshell -compile bench -s erlang halt"),
+ command(Release ++ " -noshell -s bench run -s erlang halt").
+%%---------------------------------------------------------------------------
+%% command(Command) -> _
+%% Command = string() - is the name and arguments of the external
+%% program which will be run
+%%---------------------------------------------------------------------------
+command(Command) ->
+ io:format("~s\n", [Command]), % Progress info to user
+ Port = open_port({spawn,Command}, [exit_status, in]),
+ print_output(Port).
+%%---------------------------------------------------------------------------
+%% print_output(Port) -> _
+%% Port = port()
+%% Print data from the port i.e. output from external program,
+%% on standard out.
+%%---------------------------------------------------------------------------
+print_output(Port) ->
+ receive
+ {Port, {data,Bytes}} ->
+ io:put_chars(Bytes),
+ print_output(Port);
+ {Port, {exit_status, ?NORMAL}} ->
+ ok
+ end.
+%%---------------------------------------------------------------------------
+%% run() -> Releases
+%% Releases = [Release |_]
+%% Release = string() - Erlang release
+%% Defines which erlang releases to run on
+%% --- Change this function to reflect your own erlang installations ---
+%%---------------------------------------------------------------------------
+releases() ->
+ ["/usr/local/otp/releases/otp_beam_sunos5_r7b01_patched/bin/erl",
+ "/usr/local/otp/releases/otp_beam_sunos5_r8b_patched/bin/erl"].
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/system/doc/efficiency_guide/appendix.xml b/system/doc/efficiency_guide/appendix.xml
new file mode 100644
index 0000000000..631ef9bee7
--- /dev/null
+++ b/system/doc/efficiency_guide/appendix.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2002</year>
+ <year>2007</year>
+ <holder>Ericsson AB, All Rights Reserved</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ The Initial Developer of the Original Code is Ericsson AB.
+ </legalnotice>
+
+ <title>Appendix - Programs</title>
+ <prepared>Ingela Anderton</prepared>
+ <docno></docno>
+ <date>2002-09-23</date>
+ <rev></rev>
+ <file>appendix.sgml</file>
+ </header>
+ <p>This appendix contains the programs referred to in the previous
+ chapters within the Efficiency Guide.</p>
+
+ <section>
+ <marker id="bench"></marker>
+ <title>bench.erl</title>
+ <codeinclude file="bench.erl" tag="" type="none"></codeinclude>
+ </section>
+
+ <section>
+ <marker id="bench_hrl"></marker>
+ <title>bench.hrl</title>
+ <codeinclude file="bench.hrl" tag="" type="none"></codeinclude>
+ </section>
+
+ <section>
+ <marker id="all"></marker>
+ <title>all.erl</title>
+ <codeinclude file="all.erl" tag="" type="none"></codeinclude>
+ </section>
+
+ <section>
+ <marker id="readme"></marker>
+ <title>README</title>
+ <codeinclude file="README" tag="" type="none"></codeinclude>
+ </section>
+
+ <section>
+ <marker id="call_bm"></marker>
+ <title>call_bm.erl</title>
+ <codeinclude file="call_bm.erl" tag="" type="none"></codeinclude>
+ </section>
+
+ <section>
+ <marker id="call_result"></marker>
+ <title>index.html</title>
+ <codeinclude file="call_result.html" tag="" type="none"></codeinclude>
+ </section>
+</chapter>
+
diff --git a/system/doc/efficiency_guide/bench.erl b/system/doc/efficiency_guide/bench.erl
new file mode 100644
index 0000000000..8f26b8d0af
--- /dev/null
+++ b/system/doc/efficiency_guide/bench.erl
@@ -0,0 +1,488 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+
+-module(bench).
+
+%% User interface
+-export([run/0]).
+
+%% Exported to be used in spawn
+-export([measure/4]).
+
+%% Internal constants
+-define(MAX, 999999999999999).
+-define(RANGE_MAX, 16#7ffffff).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Interface
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%---------------------------------------------------------------------------
+%% run() -> _
+%%
+%% Compiles and runs all benchmarks in the current directory,
+%% and creates a report
+%%---------------------------------------------------------------------------
+run() ->
+ run(compiler_options()).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Generic Benchmark functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%---------------------------------------------------------------------------
+%% compiler_options() -> OptionsList
+%% OptionsList = list() - See Erlang/OTP module compile
+%%---------------------------------------------------------------------------
+compiler_options() ->
+ [report_errors, report_warnings].
+
+%%---------------------------------------------------------------------------
+%% run(OptionsList) ->
+%% OptionsList = list() - See Erlang/OTP module compile
+%%
+%% Help function to run/0.
+%%---------------------------------------------------------------------------
+run(OptionsList) ->
+ Bms = compile_benchmarks(OptionsList),
+ run_benchmarks(Bms),
+ report().
+
+%%---------------------------------------------------------------------------
+%% compile_benchmarks(OptionsList) -> [BmInfo| _]
+%% OptionsList = list() - See Erlang/OTP module compile
+%% BmInfo = {Module, Iterations, [BmFunctionName| _]}
+%% Module = atom()
+%% Iterations = integer()
+%% BmFunctionName = atom()
+%%
+%% Compiles all benchmark modules in the current directory and
+%% returns info about the benchmarks.
+%%---------------------------------------------------------------------------
+compile_benchmarks(OptionsList) ->
+ {ok, FilesInCurrentDir} = file:list_dir("."),
+ ErlFiles = [ErlFile || ErlFile <- lists:sort(FilesInCurrentDir),
+ lists:suffix(".erl", ErlFile)],
+ lists:foldr(fun(File, BmInfoAcc) ->
+ case lists:suffix("_bm.erl", File) of
+ true ->
+ BmInfo = bm_compile(File, OptionsList),
+ [BmInfo | BmInfoAcc];
+ false ->
+ just_compile(File, OptionsList),
+ BmInfoAcc
+ end
+ end, [], ErlFiles).
+
+%%---------------------------------------------------------------------------
+%% just_compile(FileName, OptionsList) -> ok
+%% FileName = string()
+%% OptionsList = list() - See Erlang/OTP module compile
+%%
+%% Compiles a support module.
+%%---------------------------------------------------------------------------
+just_compile(FileName, OptionsList) ->
+ io:format("Compiling ~s...\n", [FileName]), % Progress info to user
+ case c:c(FileName, OptionsList) of
+ {ok, _Mod} ->
+ ok;
+ %% If compilation fails there is no point in trying to continue
+ error ->
+ Reason =
+ lists:flatten(
+ io_lib:format("Could not compile file ~s", [FileName])),
+ exit(self(), Reason)
+ end.
+%%---------------------------------------------------------------------------
+%% bm_compile(FileName, OptionsList) -> BmInfo
+%% FileName = string()
+%% OptionsList = list() - See Erlang/OTP module compile
+%% BmInfo = {Module, Iterations, [BmFunctionName| _]}
+%% Iterations = integer()
+%% Module = atom()
+%% BmFunctionName = atom()
+%%
+%% Compiles the benchmark module implemented in <FileName> and returns
+%% information about the benchmark tests.
+%%---------------------------------------------------------------------------
+bm_compile(FileName, OptionsList) ->
+ io:format("Compiling ~s...\n", [FileName]), % Progress info to user
+ case c:c(FileName, OptionsList) of
+ {ok, Mod} ->
+ bm_cases(Mod);
+ %% If compilation fails there is no point in trying to continue
+ error ->
+ Reason =
+ lists:flatten(
+ io_lib:format("Could not compile file ~s", [FileName])),
+ exit(self(), Reason)
+ end.
+%%---------------------------------------------------------------------------
+%% bm_cases(Module) -> {Module, Iter, [BmFunctionName |_]}
+%% Module = atom()
+%% Iter = integer()
+%% BmFunctionName = atom()
+%%
+%% Fetches the number of iterations and the names of the benchmark
+%% functions for the module <Module>.
+%%---------------------------------------------------------------------------
+bm_cases(Module) ->
+ case catch Module:benchmarks() of
+ {Iter, BmList} when integer(Iter), list(BmList) ->
+ {Module, Iter, BmList};
+ %% The benchmark is incorrect implemented there is no point in
+ %% trying to continue
+ Other ->
+ Reason =
+ lists:flatten(
+ io_lib:format("Incorrect return value: ~p "
+ "from ~p:benchmarks()",
+ [Other, Module])),
+ exit(self(), Reason)
+ end.
+%%---------------------------------------------------------------------------
+%% run_benchmarks(Bms) ->
+%% Bms = [{Module, Iter, [BmFunctionName |_]} | _]
+%% Module = atom()
+%% Iter = integer()
+%% BmFunctionName = atom()
+%%
+%% Runs all the benchmark tests described in <Bms>.
+%%---------------------------------------------------------------------------
+run_benchmarks(Bms) ->
+ Ver = erlang:system_info(version),
+ Machine = erlang:system_info(machine),
+ SysInfo = {Ver,Machine},
+
+ Res = [bms_run(Mod, Tests, Iter, SysInfo) || {Mod,Iter,Tests} <- Bms],
+
+ %% Create an intermediate file that is later used to generate a bench
+ %% mark report.
+ Name = Ver ++ [$.|Machine] ++ ".bmres",
+ {ok, IntermediatFile} = file:open(Name, [write]),
+
+ %% Create mark that identifies version of the benchmark modules
+ io:format(IntermediatFile, "~p.\n", [erlang:phash(Bms, ?RANGE_MAX)]),
+
+ io:format(IntermediatFile, "~p.\n", [Res]),
+ file:close(IntermediatFile).
+
+%%---------------------------------------------------------------------------
+%% bms_run(Module, BmTests, Iter, Info) ->
+%% Module = atom(),
+%% BmTests = [BmFunctionName|_],
+%% BmFunctionName = atom()
+%% Iter = integer(),
+%% SysInfo = {Ver, Machine}
+%% Ver = string()
+%% Machine = string()
+%%
+%% Runs all benchmark tests in module <Module>.
+%%---------------------------------------------------------------------------
+bms_run(Module, BmTests, Iter, SysInfo) ->
+ io:format("Running ~s:", [Module]), % Progress info to user
+ Res =
+ {Module,{SysInfo,[{Bm, bm_run(Module, Bm, Iter)} || Bm <- BmTests]}},
+ io:nl(),
+ Res.
+%%---------------------------------------------------------------------------
+%% bm_run(Module, BmTest, Iter) -> Elapsed
+%% Module = atom(),
+%% BmTest = atom(),
+%% Iter = integer()
+%% Elapsed = integer() - elapsed time in milliseconds.
+%%
+%% Runs the benchmark Module:BmTest(Iter)
+%%---------------------------------------------------------------------------
+bm_run(Module, BmTest, Iter) ->
+ io:format(" ~s", [BmTest]), % Progress info to user
+ spawn_link(?MODULE, measure, [self(), Module, BmTest, Iter]),
+ receive
+ {Elapsed, ok} ->
+ Elapsed;
+ {_Elapsed, Fault} ->
+ io:nl(),
+ Reason =
+ lists:flatten(
+ io_lib:format("~w", [Fault])),
+ exit(self(), Reason)
+ end.
+%%---------------------------------------------------------------------------
+%% measure(Parent, Module, BmTest, Iter) -> _
+%% Parent = pid(),
+%% Module = atom(),
+%% BmTest = atom(),
+%% Iter = integer()
+%%
+%% Measures the time it take to execute Module:Bm(Iter)
+%% and send the result to <Parent>.
+%%---------------------------------------------------------------------------
+measure(Parent, Module, BmTest, Iter) ->
+ statistics(runtime),
+ Res = (catch apply(Module, BmTest, [Iter])),
+ {_TotalRunTime, TimeSinceLastCall} = statistics(runtime),
+ Parent ! {TimeSinceLastCall, Res}.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% Report functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%---------------------------------------------------------------------------
+%% report() -> _
+%%
+%% Creates a report of the bench marking test that appeals to a human.
+%% Currently this means creating a html-file. (Other formats could be added)
+%%---------------------------------------------------------------------------
+report() ->
+ {ok, AllFiles} = file:list_dir("."),
+ BmResultFiles = [File || File <- AllFiles, lists:suffix(".bmres", File)],
+
+ Results = fetch_bmres_data(BmResultFiles),
+ create_report(Results).
+
+%%---------------------------------------------------------------------------
+%% fetch_bmres_data(BmResultFiles) -> Results
+%% BmResultFiles = [FileName | _]
+%% FileName = string()
+%% Results = [[{Bm, Res} | _]]
+%% Bm = atom() - Name of benchmark module
+%% Res = [{VersionInfo, [{Test, Time} | _]}]
+%% VersionInfo = {Ver, Machine}
+%% Ver = string()
+%% Machine = string()
+%% Test = atom()
+%% Time = integer()
+%%
+%% Reads result data from intermediate files
+%%---------------------------------------------------------------------------
+fetch_bmres_data(BmResultFiles) ->
+ fetch_bmres_data(BmResultFiles, [], undefined).
+
+%%---------------------------------------------------------------------------
+%% fetch_bmres_data(BmResultFiles, AccResData, Check) -> Results
+%% BmResultFiles = [FileName | _]
+%% FileName = string()
+%% AccResData = see Results fetch_bmres_data/1
+%% Check = integer() | undefined (first time)
+%%
+%% Help function to fetch_bmres_data/1
+%%---------------------------------------------------------------------------
+fetch_bmres_data([], AccResData, _Check) ->
+ AccResData;
+
+fetch_bmres_data([Name | BmResultFiles], AccResData, Check) ->
+ {DataList, NewCheck} = read_bmres_file(Name, Check),
+ fetch_bmres_data(BmResultFiles, [DataList| AccResData], NewCheck).
+
+%%---------------------------------------------------------------------------
+%% read_bmres_file(Name, Check) ->
+%% Name = string()
+%% Check = integer() | undefined
+%%
+%% Reads the data from the result files. Checks that all result
+%% files where created with the same set of tests.
+%%---------------------------------------------------------------------------
+read_bmres_file(Name, Check) ->
+ case file:consult(Name) of
+ {ok, [Check1, List]} when Check =:= undefined, integer(Check1) ->
+ {List, Check1};
+ {ok, [Check, List]} when integer(Check) ->
+ {List, Check};
+ {ok, [Check1, _List]} when integer(Check1) ->
+ Reason =
+ lists:flatten(
+ io_lib:format("Different test setup, remove old setup "
+ "result by removing *.bmres files and "
+ "try again", [])),
+ exit(self(), Reason);
+ {error, Reason} when atom(Reason) ->
+ exit(self(), Reason);
+ {error, Reason} ->
+ exit(self(), file:format(Reason))
+ end.
+
+%%---------------------------------------------------------------------------
+%% create_report(Results) ->
+%% Results = see Results fetch_bmres_data/1
+%%
+%% Organizes <Result> so it will be right for create_html_report/1
+%% i.e. group results for the same benchmark test, run on different versions
+%% of erlang.
+%%---------------------------------------------------------------------------
+create_report(Results) ->
+ Dictionary =
+ lists:foldl(fun(BmResultList, Dict0) ->
+ lists:foldl(fun({Bm, VerResult}, Dict1) ->
+ dict:append(Bm, VerResult,
+ Dict1)
+ end,Dict0, BmResultList)
+ end,
+ dict:new(), Results),
+
+ create_html_report(dict:to_list(Dictionary)).
+%%---------------------------------------------------------------------------
+%% create_html_report(ResultList) -> _
+%% ResultList = [{Bm, Res} | _]
+%% Bm = atom() - Name of benchmark module
+%% Res = [{VersionInfo, [{Test, Time} | _]} | _]
+%% VersionInfo = {Ver, Machine}
+%% Ver = string()
+%% Machine = string()
+%% Test = atom()
+%% Time = integer()
+%%
+%% Writes the result to an html-file
+%%---------------------------------------------------------------------------
+create_html_report(ResultList) ->
+
+ {ok, OutputFile} = file:open("index.html", [write]),
+
+ %% Create the begining of the result html-file.
+ Head = Title = "Benchmark Results",
+ io:put_chars(OutputFile, "<html>\n"),
+ io:put_chars(OutputFile, "<head>\n"),
+ io:format(OutputFile, "<title>~s</title>\n", [Title]),
+ io:put_chars(OutputFile, "</head>\n"),
+ io:put_chars(OutputFile, "<body bgcolor=\"#FFFFFF\" text=\"#000000\"" ++
+ " link=\"#0000FF\" vlink=\"#800080\" alink=\"#FF0000\">\n"),
+ io:format(OutputFile, "<h1>~s</h1>\n", [Head]),
+
+ %% Add the result tables
+ lists:foreach(fun(Element) ->
+ create_html_table(OutputFile, Element) end,
+ ResultList),
+
+ %% Put in the end-html tags
+ io:put_chars(OutputFile, "</body>\n"),
+ io:put_chars(OutputFile, "</html>\n"),
+
+ file:close(OutputFile).
+
+%%---------------------------------------------------------------------------
+%% create_html_table(File, {Bm, Res}) -> _
+%% File = file() - html file to write data to.
+%% Bm = atom() - Name of benchmark module
+%% Res = [{VersionInfo, [{Test, Time} | _]}]
+%% VersionInfo = {Ver, Machine}
+%% Ver = string()
+%% Machine = string()
+%% Test = atom()
+%% Time = integer()
+%%
+%% Creates a html table that displays the result of the benchmark <Bm>.
+%%---------------------------------------------------------------------------
+create_html_table(File, {Bm, Res}) ->
+
+ {MinTime, Order} = min_time_and_sort(Res),
+
+ io:format(File, "<h2>~s</h2>\n" , [Bm]),
+
+ %% Fun that calculates relative measure values and puts them in
+ %% a dictionary
+ RelativeMesureFun = fun({TestName, Time}, Dict1) ->
+ dict:append(TestName, Time/MinTime, Dict1)
+ end,
+
+ %% For all erlang versions that the benchmark tests has been run,
+ %% calculate the relative measure values and put them in a dictionary.
+ ResultDict =
+ lists:foldl(fun({_VerInfo, Bms}, Dict0) ->
+ lists:foldl(RelativeMesureFun, Dict0, Bms) end,
+ dict:new(), Res),
+
+ %% Create the table and its headings
+ io:put_chars(File, "<table border=0 cellpadding=1><tr>"
+ "<td bgcolor=\"#000000\">\n"),
+ io:put_chars(File, "<table cellpadding=3 border=0 cellspacing=1>\n"),
+ io:put_chars(File, "<tr bgcolor=white>"),
+ io:put_chars(File, "<td>Test</td>"),
+ Heads = table_headers(Res),
+ lists:foreach(fun({Ver,Machine}) -> io:format(File, "<td>~s<br>~s</td>",
+ [Ver,Machine]) end, Heads),
+ io:put_chars(File, "</tr>\n"),
+
+ %% Create table rows
+ lists:foreach(fun(Name) ->
+ create_html_row(File, Name, ResultDict)
+ end, Order),
+
+ %% Tabel end-tags
+ io:put_chars(File, "</table></td></tr></table>\n"),
+
+ %% Create link to benchmark source code
+ io:format(File, "<p><a href=\"~s.erl\">Source for ~s.erl</a>\n",
+ [Bm,Bm]).
+
+%%---------------------------------------------------------------------------
+%% create_html_row(File, Name, Dict) -> _
+%% File = file() - html file to write data to.
+%% Name = atom() - Name of benchmark test
+%% Dict = dict() - Dictonary where the relative time measures for
+%% the test can be found.
+%%
+%% Creates an actual html table-row.
+%%---------------------------------------------------------------------------
+create_html_row(File, Name, Dict) ->
+ ReletiveTimes = dict:fetch(Name, Dict),
+ io:put_chars(File, "<tr bgcolor=white>\n"),
+ io:format(File, "<td>~s</td>", [Name]),
+ lists:foreach(fun(Time) ->
+ io:format(File, "<td>~-8.2f</td>", [Time]) end,
+ ReletiveTimes),
+ io:put_chars(File, "</tr>\n").
+
+%%---------------------------------------------------------------------------
+%% min_time_and_sort(ResultList) -> {MinTime, Order}
+%% ResultList = [{VersionInfo, [{Test, Time} | _]}]
+%% MinTime = integer() - The execution time of the fastes test
+%% Order = [BmFunctionName|_] - the order of the testcases in
+%% increasing execution time.
+%% BmFunctionName = atom()
+%%---------------------------------------------------------------------------
+min_time_and_sort(ResultList) ->
+
+ %% Use the results from the run on the highest version
+ %% of Erlang as norm.
+ {_, TestRes} =
+ lists:foldl(fun ({{Ver, _}, ResList},
+ {CurrentVer, _}) when Ver > CurrentVer ->
+ {Ver, ResList};
+ (_, VerAndRes) ->
+ VerAndRes
+ end, {"0", []}, ResultList),
+
+ {lists:foldl(fun ({_, Time0}, Min1) when Time0 < Min1 ->
+ Time0;
+ (_, Min1) ->
+ Min1
+ end, ?MAX, TestRes),
+ [Name || {Name, _} <- lists:keysort(2, TestRes)]}.
+
+%%---------------------------------------------------------------------------
+%% table_headers(VerResultList) -> SysInfo
+%% VerResultList = [{{Ver, Machine},[{BmFunctionName, Time}]} | _]
+%% Ver = string()
+%% Machine = string()
+%% BmFunctionName = atom()
+%% Time = integer()
+%% SysInfo = {Ver, Machine}
+%%---------------------------------------------------------------------------
+table_headers(VerResultList) ->
+ [SysInfo || {SysInfo, _} <- VerResultList].
diff --git a/system/doc/efficiency_guide/bench.hrl b/system/doc/efficiency_guide/bench.hrl
new file mode 100644
index 0000000000..a98ca1c89e
--- /dev/null
+++ b/system/doc/efficiency_guide/bench.hrl
@@ -0,0 +1,22 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+-define(rep5(X), X, X, X, X, X).
+-define(rep10(X), ?rep5(X), ?rep5(X)).
+-define(rep20(X), ?rep10(X), ?rep10(X)).
+-define(rep40(X), ?rep20(X), ?rep20(X)).
+-define(rep80(X), ?rep40(X), ?rep40(X)).
diff --git a/system/doc/efficiency_guide/binaryhandling.xml b/system/doc/efficiency_guide/binaryhandling.xml
new file mode 100644
index 0000000000..8746de4b60
--- /dev/null
+++ b/system/doc/efficiency_guide/binaryhandling.xml
@@ -0,0 +1,528 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2007</year>
+ <year>2007</year>
+ <holder>Ericsson AB, All Rights Reserved</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ The Initial Developer of the Original Code is Ericsson AB.
+ </legalnotice>
+
+ <title>Constructing and matching binaries</title>
+ <prepared>Bjorn Gustavsson</prepared>
+ <docno></docno>
+ <date>2007-10-12</date>
+ <rev></rev>
+ <file>binaryhandling.xml</file>
+ </header>
+
+ <p>In R12B, the most natural way to write binary construction and matching is now
+ significantly faster than in earlier releases.</p>
+
+ <p>To construct at binary, you can simply write</p>
+
+ <p><em>DO</em> (in R12B) / <em>REALLY DO NOT</em> (in earlier releases)</p>
+ <code type="erl"><![CDATA[
+my_list_to_binary(List) ->
+ my_list_to_binary(List, <<>>).
+
+my_list_to_binary([H|T], Acc) ->
+ my_list_to_binary(T, <<Acc/binary,H>>);
+my_list_to_binary([], Acc) ->
+ Acc.]]></code>
+
+ <p>In releases before R12B, <c>Acc</c> would be copied in every iteration.
+ In R12B, <c>Acc</c> will be copied only in the first iteration and extra
+ space will be allocated at the end of the copied binary. In the next iteration,
+ <c>H</c> will be written in to the extra space. When the extra space runs out,
+ the binary will be reallocated with more extra space.</p>
+
+ <p>The extra space allocated (or reallocated) will be twice the size of the
+ existing binary data, or 256, whichever is larger.</p>
+
+ <p>The most natural way to match binaries is now the fastest:</p>
+
+ <p><em>DO</em> (in R12B)</p>
+ <code type="erl"><![CDATA[
+my_binary_to_list(<<H,T/binary>>) ->
+ [H|my_binary_to_list(T)];
+my_binary_to_list(<<>>) -> [].]]></code>
+
+ <section>
+ <title>How binaries are implemented</title>
+
+ <p>Internally, binaries and bitstrings are implemented in the same way.
+ In this section, we will call them <em>binaries</em> since that is what
+ they are called in the emulator source code.</p>
+
+ <p>There are four types of binary objects internally. Two of them are
+ containers for binary data and two of them are merely references to
+ a part of a binary.</p>
+
+ <p>The binary containers are called <em>refc binaries</em>
+ (short for <em>reference-counted binaries</em>) and <em>heap binaries</em>.</p>
+
+ <p><marker id="refc_binary"></marker><em>Refc binaries</em>
+ consist of two parts: an object stored on
+ the process heap, called a <em>ProcBin</em>, and the binary object itself
+ stored outside all process heaps.</p>
+
+ <p>The binary object can be referenced by any number of ProcBins from any
+ number of processes; the object contains a reference counter to keep track
+ of the number of references, so that it can be removed when the last
+ reference disappears.</p>
+
+ <p>All ProcBin objects in a process are part of a linked list, so that
+ the garbage collector can keep track of them and decrement the reference
+ counters in the binary when a ProcBin disappears.</p>
+
+ <p><marker id="heap_binary"></marker><em>Heap binaries</em> are small binaries,
+ up to 64 bytes, that are stored directly on the process heap.
+ They will be copied when the process
+ is garbage collected and when they are sent as a message. They don't
+ require any special handling by the garbage collector.</p>
+
+ <p>There are two types of reference objects that can reference part of
+ a refc binary or heap binary. They are called <em>sub binaries</em> and
+ <em>match contexts</em>.</p>
+
+ <p><marker id="sub_binary"></marker>A <em>sub binary</em>
+ is created by <c>split_binary/2</c> and when
+ a binary is matched out in a binary pattern. A sub binary is a reference
+ into a part of another binary (refc or heap binary, never into a another
+ sub binary). Therefore, matching out a binary is relatively cheap because
+ the actual binary data is never copied.</p>
+
+ <p><marker id="match_context"></marker>A <em>match context</em> is
+ similar to a sub binary, but is optimized
+ for binary matching; for instance, it contains a direct pointer to the binary
+ data. For each field that is matched out of a binary, the position in the
+ match context will be incremented.</p>
+
+ <p>In R11B, a match context was only using during a binary matching
+ operation.</p>
+
+ <p>In R12B, the compiler tries to avoid generating code that
+ creates a sub binary, only to shortly afterwards create a new match
+ context and discard the sub binary. Instead of creating a sub binary,
+ the match context is kept.</p>
+
+ <p>The compiler can only do this optimization if it can know for sure
+ that the match context will not be shared. If it would be shared, the
+ functional properties (also called referential transparency) of Erlang
+ would break.</p>
+ </section>
+
+ <section>
+ <title>Constructing binaries</title>
+
+ <p>In R12B, appending to a binary or bitstring</p>
+
+ <code type="erl"><![CDATA[
+<<Binary/binary, ...>>
+<<Binary/bitstring, ...>>]]></code>
+
+ <p>is specially optimized by the <em>run-time system</em>.
+ Because the run-time system handles the optimization (instead of
+ the compiler), there are very few circumstances in which the optimization
+ will not work.</p>
+
+ <p>To explain how it works, we will go through this code</p>
+
+ <code type="erl"><![CDATA[
+Bin0 = <<0>>, %% 1
+Bin1 = <<Bin0/binary,1,2,3>>, %% 2
+Bin2 = <<Bin1/binary,4,5,6>>, %% 3
+Bin3 = <<Bin2/binary,7,8,9>>, %% 4
+Bin4 = <<Bin1/binary,17>>, %% 5 !!!
+{Bin4,Bin3} %% 6]]></code>
+
+ <p>line by line.</p>
+
+ <p>The first line (marked with the <c>%% 1</c> comment), assigns
+ a <seealso marker="#heap_binary">heap binary</seealso> to
+ the variable <c>Bin0</c>.</p>
+
+ <p>The second line is an append operation. Since <c>Bin0</c>
+ has not been involved in an append operation,
+ a new <seealso marker="#refc_binary">refc binary</seealso>
+ will be created and the contents of <c>Bin0</c> will be copied
+ into it. The <em>ProcBin</em> part of the refc binary will have
+ its size set to the size of the data stored in the binary, while
+ the binary object will have extra space allocated.
+ The size of the binary object will be either twice the
+ size of <c>Bin0</c> or 256, whichever is larger. In this case
+ it will be 256.</p>
+
+ <p>It gets more interesting in the third line.
+ <c>Bin1</c> <em>has</em> been used in an append operation,
+ and it has 255 bytes of unused storage at the end, so the three new bytes
+ will be stored there.</p>
+
+ <p>Same thing in the fourth line. There are 252 bytes left,
+ so there is no problem storing another three bytes.</p>
+
+ <p>But in the fifth line something <em>interesting</em> happens.
+ Note that we don't append to the previous result in <c>Bin3</c>,
+ but to <c>Bin1</c>. We expect that <c>Bin4</c> will be assigned
+ the value <c>&lt;&lt;0,1,2,3,17&gt;&gt;</c>. We also expect that
+ <c>Bin3</c> will retain its value
+ (<c>&lt;&lt;0,1,2,3,4,5,6,7,8,9&gt;&gt;</c>).
+ Clearly, the run-time system cannot write the byte <c>17</c> into the binary,
+ because that would change the value of <c>Bin3</c> to
+ <c>&lt;&lt;0,1,2,3,4,17,6,7,8,9&gt;&gt;</c>.</p>
+
+ <p>What will happen?</p>
+
+ <p>The run-time system will see that <c>Bin1</c> is the result
+ from a previous append operation (not from the latest append operation),
+ so it will <em>copy</em> the contents of <c>Bin1</c> to a new binary
+ and reserve extra storage and so on. (We will not explain here how the
+ run-time system can know that it is not allowed to write into <c>Bin1</c>;
+ it is left as an exercise to the curious reader to figure out how it is
+ done by reading the emulator sources, primarily <c>erl_bits.c</c>.)</p>
+
+ <section>
+ <title>Circumstances that force copying</title>
+
+ <p>The optimization of the binary append operation requires that
+ there is a <em>single</em> ProcBin and a <em>single reference</em> to the
+ ProcBin for the binary. The reason is that the binary object can be
+ moved (reallocated) during an append operation, and when that happens
+ the pointer in the ProcBin must be updated. If there would be more than
+ on ProcBin pointing to the binary object, it would not be possible to
+ find and update all of them.</p>
+
+ <p>Therefore, certain operations on a binary will mark it so that
+ any future append operation will be forced to copy the binary.
+ In most cases, the binary object will be shrunk at the same time
+ to reclaim the extra space allocated for growing.</p>
+
+ <p>When appending to a binary</p>
+
+ <code type="erl"><![CDATA[
+Bin = <<Bin0,...>>]]></code>
+
+ <p>only the binary returned from the latest append operation will
+ support further cheap append operations. In the code fragment above,
+ appending to <c>Bin</c> will be cheap, while appending to <c>Bin0</c>
+ will force the creation of a new binary and copying of the contents
+ of <c>Bin0</c>.</p>
+
+ <p>If a binary is sent as a message to a process or port, the binary
+ will be shrunk and any further append operation will copy the binary
+ data into a new binary. For instance, in the following code fragment</p>
+
+ <code type="erl"><![CDATA[
+Bin1 = <<Bin0,...>>,
+PortOrPid ! Bin1,
+Bin = <<Bin1,...>> %% Bin1 will be COPIED
+]]></code>
+
+ <p><c>Bin1</c> will be copied in the third line.</p>
+
+ <p>The same thing happens if you insert a binary into an <em>ets</em>
+ table or send it to a port using <c>erlang:port_command/2</c>.</p>
+
+ <p>Matching a binary will also cause it to shrink and the next append
+ operation will copy the binary data:</p>
+
+ <code type="erl"><![CDATA[
+Bin1 = <<Bin0,...>>,
+<<X,Y,Z,T/binary>> = Bin1,
+Bin = <<Bin1,...>> %% Bin1 will be COPIED
+]]></code>
+
+ <p>The reason is that a <seealso marker="#match_context">match context</seealso>
+ contains a direct pointer to the binary data.</p>
+
+ <p>If a process simply keeps binaries (either in "loop data" or in the process
+ dictionary), the garbage collector may eventually shrink the binaries.
+ If only one such binary is kept, it will not be shrunk. If the process later
+ appends to a binary that has been shrunk, the binary object will be reallocated
+ to make place for the data to be appended.</p>
+ </section>
+
+ </section>
+
+ <section>
+ <title>Matching binaries</title>
+
+ <p>We will revisit the example shown earlier</p>
+
+ <p><em>DO</em> (in R12B)</p>
+ <code type="erl"><![CDATA[
+my_binary_to_list(<<H,T/binary>>) ->
+ [H|my_binary_to_list(T)];
+my_binary_to_list(<<>>) -> [].]]></code>
+
+ <p>too see what is happening under the hood.</p>
+
+ <p>The very first time <c>my_binary_to_list/1</c> is called,
+ a <seealso marker="#match_context">match context</seealso>
+ will be created. The match context will point to the first
+ byte of the binary. One byte will be matched out and the match context
+ will be updated to point to the second byte in the binary.</p>
+
+ <p>In R11B, at this point a <seealso marker="#sub_binary">sub binary</seealso>
+ would be created. In R12B,
+ the compiler sees that there is no point in creating a sub binary,
+ because there will soon be a call to a function (in this case,
+ to <c>my_binary_to_list/1</c> itself) that will immediately
+ create a new match context and discard the sub binary.</p>
+
+ <p>Therefore, in R12B, <c>my_binary_to_list/1</c> will call itself
+ with the match context instead of with a sub binary. The instruction
+ that initializes the matching operation will basically do nothing
+ when it sees that it was passed a match context instead of a binary.</p>
+
+ <p>When the end of the binary is reached and second clause matches,
+ the match context will simply be discarded (removed in the next
+ garbage collection, since there is no longer any reference to it).</p>
+
+ <p>To summarize, <c>my_binary_to_list/1</c> in R12B only needs to create
+ <em>one</em> match context and no sub binaries. In R11B, if the binary
+ contains <em>N</em> bytes, <em>N+1</em> match contexts and <em>N</em>
+ sub binaries will be created.</p>
+
+ <p>In R11B, the fastest way to match binaries is:</p>
+
+ <p><em>DO NOT</em> (in R12B)</p>
+ <code type="erl"><![CDATA[
+my_complicated_binary_to_list(Bin) ->
+ my_complicated_binary_to_list(Bin, 0).
+
+my_complicated_binary_to_list(Bin, Skip) ->
+ case Bin of
+ <<_:Skip/binary,Byte,_/binary>> ->
+ [Byte|my_complicated_binary_to_list(Bin, Skip+1)];
+ <<_:Skip/binary>> ->
+ []
+ end.]]></code>
+
+ <p>This function cleverly avoids building sub binaries, but it cannot
+ avoid building a match context in each recursion step. Therefore, in both R11B and R12B,
+ <c>my_complicated_binary_to_list/1</c> builds <em>N+1</em> match
+ contexts. (In a future release, the compiler might be able to generate code
+ that reuses the match context, but don't hold your breath.)</p>
+
+ <p>Returning to <c>my_binary_to_list/1</c>, note that the match context was
+ discarded when the entire binary had been traversed. What happens if
+ the iteration stops before it has reached the end of the binary? Will
+ the optimization still work?</p>
+
+ <code type="erl"><![CDATA[
+after_zero(<<0,T/binary>>) ->
+ T;
+after_zero(<<_,T/binary>>) ->
+ after_zero(T);
+after_zero(<<>>) ->
+ <<>>.
+ ]]></code>
+
+ <p>Yes, it will. The compiler will remove the building of the sub binary in the
+ second clause</p>
+
+ <code type="erl"><![CDATA[
+.
+.
+.
+after_zero(<<_,T/binary>>) ->
+ after_zero(T);
+.
+.
+.]]></code>
+
+ <p>but will generate code that builds a sub binary in the first clause</p>
+
+ <code type="erl"><![CDATA[
+after_zero(<<0,T/binary>>) ->
+ T;
+.
+.
+.]]></code>
+
+ <p>Therefore, <c>after_zero/1</c> will build one match context and one sub binary
+ (assuming it is passed a binary that contains a zero byte).</p>
+
+ <p>Code like the following will also be optimized:</p>
+
+ <code type="erl"><![CDATA[
+all_but_zeroes_to_list(Buffer, Acc, 0) ->
+ {lists:reverse(Acc),Buffer};
+all_but_zeroes_to_list(<<0,T/binary>>, Acc, Remaining) ->
+ all_but_zeroes_to_list(T, Acc, Remaining-1);
+all_but_zeroes_to_list(<<Byte,T/binary>>, Acc, Remaining) ->
+ all_but_zeroes_to_list(T, [Byte|Acc], Remaining-1).]]></code>
+
+ <p>The compiler will remove building of sub binaries in the second and third clauses,
+ and it will add an instruction to the first clause that will convert <c>Buffer</c>
+ from a match context to a sub binary (or do nothing if <c>Buffer</c> already is a binary).</p>
+
+ <p>Before you begin to think that the compiler can optimize any binary patterns,
+ here is a function that the compiler (currently, at least) is not able to optimize:</p>
+
+ <code type="erl"><![CDATA[
+non_opt_eq([H|T1], <<H,T2/binary>>) ->
+ non_opt_eq(T1, T2);
+non_opt_eq([_|_], <<_,_/binary>>) ->
+ false;
+non_opt_eq([], <<>>) ->
+ true.]]></code>
+
+ <p>It was briefly mentioned earlier that the compiler can only delay creation of
+ sub binaries if it can be sure that the binary will not be shared. In this case,
+ the compiler cannot be sure.</p>
+
+ <p>We will soon show how to rewrite <c>non_opt_eq/2</c> so that the delayed sub binary
+ optimization can be applied, and more importantly, we will show how you can find out
+ whether your code can be optimized.</p>
+
+ <section>
+ <title>The bin_opt_info option</title>
+
+ <p>Use the <c>bin_opt_info</c> option to have the compiler print a lot of
+ information about binary optimizations. It can be given either to the compiler or
+ <c>erlc</c></p>
+
+ <code type="erl"><![CDATA[
+erlc +bin_opt_info Mod.erl]]></code>
+
+ <p>or passed via an environment variable</p>
+
+ <code type="erl"><![CDATA[
+export ERL_COMPILER_OPTIONS=bin_opt_info]]></code>
+
+ <p>Note that the <c>bin_opt_info</c> is not meant to be a permanent option added
+ to your <c>Makefile</c>s, because it is not possible to eliminate all messages that
+ it generates. Therefore, passing the option through the environment is in most cases
+ the most practical approach.</p>
+
+ <p>The warnings will look like this:</p>
+
+ <code type="erl"><![CDATA[
+./efficiency_guide.erl:60: Warning: NOT OPTIMIZED: sub binary is used or returned
+./efficiency_guide.erl:62: Warning: OPTIMIZED: creation of sub binary delayed]]></code>
+
+ <p>To make it clearer exactly what code the warnings refer to,
+ in the examples that follow, the warnings are inserted as comments
+ after the clause they refer to:</p>
+
+ <code type="erl"><![CDATA[
+after_zero(<<0,T/binary>>) ->
+ %% NOT OPTIMIZED: sub binary is used or returned
+ T;
+after_zero(<<_,T/binary>>) ->
+ %% OPTIMIZED: creation of sub binary delayed
+ after_zero(T);
+after_zero(<<>>) ->
+ <<>>.]]></code>
+
+ <p>The warning for the first clause tells us that it is not possible to
+ delay the creation of a sub binary, because it will be returned.
+ The warning for the second clause tells us that a sub binary will not be
+ created (yet).</p>
+
+ <p>It is time to revisit the earlier example of the code that could not
+ be optimized and find out why:</p>
+
+ <code type="erl"><![CDATA[
+non_opt_eq([H|T1], <<H,T2/binary>>) ->
+ %% INFO: matching anything else but a plain variable to
+ %% the left of binary pattern will prevent delayed
+ %% sub binary optimization;
+ %% SUGGEST changing argument order
+ %% NOT OPTIMIZED: called function non_opt_eq/2 does not
+ %% begin with a suitable binary matching instruction
+ non_opt_eq(T1, T2);
+non_opt_eq([_|_], <<_,_/binary>>) ->
+ false;
+non_opt_eq([], <<>>) ->
+ true.]]></code>
+
+ <p>The compiler emitted two warnings. The <c>INFO</c> warning refers to the function
+ <c>non_opt_eq/2</c> as a callee, indicating that any functions that call <c>non_opt_eq/2</c>
+ will not be able to make delayed sub binary optimization.
+ There is also a suggestion to change argument order.
+ The second warning (that happens to refer to the same line) refers to the construction of
+ the sub binary itself.</p>
+
+ <p>We will soon show another example that should make the distinction between <c>INFO</c>
+ and <c>NOT OPTIMIZED</c> warnings somewhat clearer, but first we will heed the suggestion
+ to change argument order:</p>
+
+ <code type="erl"><![CDATA[
+opt_eq(<<H,T1/binary>>, [H|T2]) ->
+ %% OPTIMIZED: creation of sub binary delayed
+ opt_eq(T1, T2);
+opt_eq(<<_,_/binary>>, [_|_]) ->
+ false;
+opt_eq(<<>>, []) ->
+ true.]]></code>
+
+ <p>The compiler gives a warning for the following code fragment:</p>
+
+ <code type="erl"><![CDATA[
+match_body([0|_], <<H,_/binary>>) ->
+ %% INFO: matching anything else but a plain variable to
+ %% the left of binary pattern will prevent delayed
+ %% sub binary optimization;
+ %% SUGGEST changing argument order
+ done;
+.
+.
+.]]></code>
+
+ <p>The warning means that <em>if</em> there is a call to <c>match_body/2</c>
+ (from another clause in <c>match_body/2</c> or another function), the
+ delayed sub binary optimization will not be possible. There will be additional
+ warnings for any place where a sub binary is matched out at the end of and
+ passed as the second argument to <c>match_body/2</c>. For instance:</p>
+
+ <code type="erl"><![CDATA[
+match_head(List, <<_:10,Data/binary>>) ->
+ %% NOT OPTIMIZED: called function match_body/2 does not
+ %% begin with a suitable binary matching instruction
+ match_body(List, Data).]]></code>
+
+ </section>
+
+ <section>
+ <title>Unused variables</title>
+
+ <p>The compiler itself figures out if a variable is unused. The same
+ code is generated for each of the following functions</p>
+
+ <code type="erl"><![CDATA[
+count1(<<_,T/binary>>, Count) -> count1(T, Count+1);
+count1(<<>>, Count) -> Count.
+
+count2(<<H,T/binary>>, Count) -> count2(T, Count+1);
+count2(<<>>, Count) -> Count.
+
+count3(<<_H,T/binary>>, Count) -> count3(T, Count+1);
+count3(<<>>, Count) -> Count.]]></code>
+
+ <p>In each iteration, the first 8 bits in the binary will be skipped, not matched out.</p>
+
+ </section>
+
+ </section>
+
+</chapter>
+
diff --git a/system/doc/efficiency_guide/book.xml b/system/doc/efficiency_guide/book.xml
new file mode 100644
index 0000000000..5df40695bb
--- /dev/null
+++ b/system/doc/efficiency_guide/book.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE book SYSTEM "book.dtd">
+
+<book xmlns:xi="http://www.w3.org/2001/XInclude">
+ <header titlestyle="normal">
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Efficiency Guide</title>
+ <prepared>OTP Team</prepared>
+ <docno></docno>
+ <date>2001-08-07</date>
+ <rev>R8A</rev>
+ </header>
+ <insidecover>
+ </insidecover>
+ <pagetext>Efficiency Guide</pagetext>
+ <preamble>
+ <contents level="2"></contents>
+ </preamble>
+ <parts lift="no">
+ <xi:include href="part.xml"/>
+ </parts>
+ <listofterms></listofterms>
+ <index></index>
+</book>
+
diff --git a/system/doc/efficiency_guide/call_bm.erl b/system/doc/efficiency_guide/call_bm.erl
new file mode 100644
index 0000000000..389814f11f
--- /dev/null
+++ b/system/doc/efficiency_guide/call_bm.erl
@@ -0,0 +1,75 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
+%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id$
+%%
+
+-module(call_bm).
+
+-include("bench.hrl").
+
+-export([benchmarks/0]).
+-export([local_call/1,external_call/1,fun_call/1,apply_fun/1,
+ apply_mfa_implicit/1, apply_mfa_explicit/1]).
+-export([foo/0]).
+
+benchmarks() ->
+ {400000,[local_call,external_call,fun_call,apply_fun,
+ apply_mfa_implicit, apply_mfa_explicit]}.
+
+local_call(0) ->
+ ok;
+local_call(Iter) ->
+ ?rep40(foo()),
+ local_call(Iter-1).
+
+external_call(0) ->
+ ok;
+external_call(Iter) ->
+ ?rep40(?MODULE:foo()),
+ external_call(Iter-1).
+
+fun_call(Iter) ->
+ fun_call(Iter, fun() -> ok end).
+fun_call(0, _) ->
+ ok;
+fun_call(Iter, Fun) ->
+ ?rep40(Fun()),
+ fun_call(Iter-1, Fun).
+
+apply_fun(Iter) ->
+ apply_fun(Iter, fun() -> ok end).
+apply_fun(0, _) ->
+ ok;
+apply_fun(Iter, Fun) ->
+ ?rep40(apply(Fun, [])),
+ apply_fun(Iter-1, Fun).
+
+apply_mfa_explicit(0) ->
+ ok;
+apply_mfa_explicit(Iter) ->
+ ?rep40(apply(?MODULE, foo, [])),
+ apply_mfa_explicit(Iter-1).
+
+apply_mfa_implicit(Iter) ->
+ apply_mfa_implicit(?MODULE, foo, Iter).
+
+apply_mfa_implicit(_, _, 0) ->
+ ok;
+apply_mfa_implicit(Module, Function, Iter) ->
+ ?rep40(Module:Function()),
+ apply_mfa_implicit(Module, Function, Iter-1).
+
+foo() -> ok.
diff --git a/system/doc/efficiency_guide/call_result.html b/system/doc/efficiency_guide/call_result.html
new file mode 100644
index 0000000000..37b8931cdf
--- /dev/null
+++ b/system/doc/efficiency_guide/call_result.html
@@ -0,0 +1,26 @@
+<html>
+<head>
+<title>Benchmark Results</title>
+</head>
+<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<h1>Benchmark Results</h1>
+<h2>call_bm</h2>
+<table border=0 cellpadding=1><tr><td bgcolor="#000000">
+<table cellpadding=3 border=0 cellspacing=1>
+<tr bgcolor=white><td>Test</td><td>5.4<br>BEAM</td></tr>
+<tr bgcolor=white>
+<td>local_call</td><td>1.00 </td></tr>
+<tr bgcolor=white>
+<td>external_call</td><td>1.08 </td></tr>
+<tr bgcolor=white>
+<td>fun_call</td><td>2.79 </td></tr>
+<tr bgcolor=white>
+<td>apply_fun</td><td>3.54 </td></tr>
+<tr bgcolor=white>
+<td>apply_mfa_implicit</td><td>7.76 </td></tr>
+<tr bgcolor=white>
+<td>apply_mfa_explicit</td><td>8.21 </td></tr>
+</table></td></tr></table>
+<p><a href="call_bm.erl">Source for call_bm.erl</a>
+</body>
+</html>
diff --git a/system/doc/efficiency_guide/commoncaveats.xml b/system/doc/efficiency_guide/commoncaveats.xml
new file mode 100644
index 0000000000..e18e5aa510
--- /dev/null
+++ b/system/doc/efficiency_guide/commoncaveats.xml
@@ -0,0 +1,239 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Common Caveats</title>
+ <prepared>Bjorn Gustavsson</prepared>
+ <docno></docno>
+ <date>2001-08-08</date>
+ <rev></rev>
+ <file>commoncaveats.xml</file>
+ </header>
+
+ <p>Here we list a few modules and BIFs to watch out for, and not only
+ from a performance point of view.</p>
+
+ <section>
+ <title>The regexp module</title>
+
+ <p>The regular expression functions in the
+ <seealso marker="stdlib:regexp">regexp</seealso>
+ module are written in Erlang, not in C, and were
+ meant for occasional use on small amounts of data,
+ for instance for validation of configuration files
+ when starting an application.</p>
+
+ <p>Use the <seealso marker="stdlib:re">re</seealso> module
+ (introduced in R13A) instead, especially in time-critical code.</p>
+ </section>
+
+ <section>
+ <title>The timer module</title>
+
+ <p>Creating timers using <seealso
+ marker="erts:erlang#erlang:send_after/3">erlang:send_after/3</seealso>
+ and <seealso marker="erts:erlang#erlang:start_timer/3">erlang:start_timer/3</seealso>
+ is much more efficient than using the timers provided by the
+ <seealso marker="stdlib:timer">timer</seealso> module. The
+ <c>timer</c> module uses a separate process to manage the timers,
+ and that process can easily become overloaded if many processes
+ create and cancel timers frequently (especially when using the
+ SMP emulator).</p>
+
+ <p>The functions in the <c>timer</c> module that do not manage timers (such as
+ <c>timer:tc/3</c> or <c>timer:sleep/1</c>), do not call the timer-server process
+ and are therefore harmless.</p>
+ </section>
+
+ <section>
+ <title>list_to_atom/1</title>
+
+ <p>Atoms are not garbage-collected. Once an atom is created, it will never
+ be removed. The emulator will terminate if the limit for the number
+ of atoms (1048576) is reached.</p>
+
+ <p>Therefore, converting arbitrary input strings to atoms could be
+ dangerous in a system that will run continuously.
+ If only certain well-defined atoms are allowed as input, you can use
+ <seealso marker="erts:erlang#list_to_existing_atom/1">list_to_existing_atom/1</seealso>
+ to guard against a denial-of-service attack. (All atoms that are allowed
+ must have been created earlier, for instance by simply using all of them
+ in a module and loading that module.)</p>
+
+ <p>Using <c>list_to_atom/1</c> to construct an atom that is passed to
+ <c>apply/3</c> like this</p>
+
+ <code type="erl">
+apply(list_to_atom("some_prefix"++Var), foo, Args)</code>
+
+ <p>is quite expensive and is not recommended in time-critical code.</p>
+ </section>
+
+ <section>
+ <title>length/1</title>
+
+ <p>The time for calculating the length of a list is proportional to the
+ length of the list, as opposed to <c>tuple_size/1</c>, <c>byte_size/1</c>,
+ and <c>bit_size/1</c>, which all execute in constant time.</p>
+
+ <p>Normally you don't have to worry about the speed of <c>length/1</c>,
+ because it is efficiently implemented in C. In time critical-code, though,
+ you might want to avoid it if the input list could potentially be very long.</p>
+
+ <p>Some uses of <c>length/1</c> can be replaced by matching.
+ For instance, this code</p>
+
+ <code type="erl">
+foo(L) when length(L) >= 3 ->
+ ...</code>
+
+ <p>can be rewritten to</p>
+ <code type="erl">
+foo([_,_,_|_]=L) ->
+ ...</code>
+
+ <p>(One slight difference is that <c>length(L)</c> will fail if the <c>L</c>
+ is an improper list, while the pattern in the second code fragment will
+ accept an improper list.)</p>
+ </section>
+
+ <section>
+ <title>setelement/3</title>
+
+ <p><seealso marker="erts:erlang#setelement/3">setelement/3</seealso>
+ copies the tuple it modifies. Therefore, updating a tuple in a loop
+ using <c>setelement/3</c> will create a new copy of the tuple every time.</p>
+
+ <p>There is one exception to the rule that the tuple is copied.
+ If the compiler clearly can see that destructively updating the tuple would
+ give exactly the same result as if the tuple was copied, the call to
+ <c>setelement/3</c> will be replaced with a special destructive setelement
+ instruction. In the following code sequence</p>
+
+ <code type="erl">
+multiple_setelement(T0) ->
+ T1 = setelement(9, T0, bar),
+ T2 = setelement(7, T1, foobar),
+ setelement(5, T2, new_value).</code>
+
+ <p>the first <c>setelement/3</c> call will copy the tuple and modify the
+ ninth element. The two following <c>setelement/3</c> calls will modify
+ the tuple in place.</p>
+
+ <p>For the optimization to be applied, <em>all</em> of the followings conditions
+ must be true:</p>
+
+ <list type="bulleted">
+ <item>The indices must be integer literals, not variables or expressions.</item>
+ <item>The indices must be given in descending order.</item>
+ <item>There must be no calls to other function in between the calls to
+ <c>setelement/3</c>.</item>
+ <item>The tuple returned from one <c>setelement/3</c> call must only be used
+ in the subsequent call to <c>setelement/3</c>.</item>
+ </list>
+
+ <p>If it is not possible to structure the code as in the <c>multiple_setelement/1</c>
+ example, the best way to modify multiple elements in a large tuple is to
+ convert the tuple to a list, modify the list, and convert the list back to
+ a tuple.</p>
+ </section>
+
+ <section>
+ <title>size/1</title>
+
+ <p><c>size/1</c> returns the size for both tuples and binary.</p>
+
+ <p>Using the new BIFs <c>tuple_size/1</c> and <c>byte_size/1</c> introduced
+ in R12B gives the compiler and run-time system more opportunities for
+ optimization. A further advantage is that the new BIFs could help Dialyzer
+ find more bugs in your program.</p>
+ </section>
+
+ <section>
+ <title>split_binary/2</title>
+ <p>It is usually more efficient to split a binary using matching
+ instead of calling the <c>split_binary/2</c> function.
+ Furthermore, mixing bit syntax matching and <c>split_binary/2</c>
+ may prevent some optimizations of bit syntax matching.</p>
+
+ <p><em>DO</em></p>
+ <code type="none"><![CDATA[
+ <<Bin1:Num/binary,Bin2/binary>> = Bin,]]></code>
+ <p><em>DO NOT</em></p>
+ <code type="none">
+ {Bin1,Bin2} = split_binary(Bin, Num)
+ </code>
+ </section>
+
+ <section>
+ <title>The '--' operator</title>
+ <p>Note that the '<c>--</c>' operator has a complexity
+ proportional to the product of the length of its operands,
+ meaning that it will be very slow if both of its operands
+ are long lists:</p>
+
+ <p><em>DO NOT</em></p>
+ <code type="none"><![CDATA[
+ HugeList1 -- HugeList2]]></code>
+
+ <p>Instead use the <seealso marker="stdlib:ordsets">ordsets</seealso>
+ module:</p>
+
+ <p><em>DO</em></p>
+ <code type="none">
+ HugeSet1 = ordsets:from_list(HugeList1),
+ HugeSet2 = ordsets:from_list(HugeList2),
+ ordsets:subtract(HugeSet1, HugeSet2)
+ </code>
+
+ <p>Obviously, that code will not work if the original order
+ of the list is important. If the order of the list must be
+ preserved, do like this:</p>
+
+ <p><em>DO</em></p>
+ <code type="none"><![CDATA[
+ Set = gb_sets:from_list(HugeList2),
+ [E || E <- HugeList1, not gb_sets:is_element(E, Set)]]]></code>
+
+ <p>Subtle note 1: This code behaves differently from '<c>--</c>'
+ if the lists contain duplicate elements. (One occurrence
+ of an element in HugeList2 will remove <em>all</em>
+ occurrences in HugeList1.)</p>
+
+ <p>Subtle note 2: This code compares lists elements using the
+ '<c>==</c>' operator, while '<c>--</c>' uses the '<c>=:=</c>'. If
+ that difference is important, <c>sets</c> can be used instead of
+ <c>gb_sets</c>, but note that <c>sets:from_list/1</c> is much
+ slower than <c>gb_sets:from_list/1</c> for long lists.</p>
+
+ <p>Using the '<c>--</c>' operator to delete an element
+ from a list is not a performance problem:</p>
+
+ <p><em>OK</em></p>
+ <code type="none">
+ HugeList1 -- [Element]
+ </code>
+
+ </section>
+
+</chapter>
+
diff --git a/system/doc/efficiency_guide/digger.ps b/system/doc/efficiency_guide/digger.ps
new file mode 100644
index 0000000000..07ac8e2fa9
--- /dev/null
+++ b/system/doc/efficiency_guide/digger.ps
@@ -0,0 +1,197 @@
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: /clearcase/otp/system/doc/efficiency_guide/digger.ps
+%%Creator: XV Version 3.10a Rev: 12/29/94 - by John Bradley
+%%BoundingBox: 290 380 322 412
+%%Pages: 1
+%%DocumentFonts:
+%%EndComments
+%%EndProlog
+
+%%Page: 1 1
+
+% remember original state
+/origstate save def
+
+% build a temporary dictionary
+20 dict begin
+
+% define string to hold a scanline's worth of data
+/pix 96 string def
+
+% define space for color conversions
+/grays 32 string def % space for gray scale line
+/npixls 0 def
+/rgbindx 0 def
+
+% lower left corner
+290 380 translate
+
+% size of image (on paper, in 1/72inch coords)
+31.96800 31.96800 scale
+
+% define 'colorimage' if it isn't defined
+% ('colortogray' and 'mergeprocs' come from xwd2ps
+% via xgrab)
+/colorimage where % do we know about 'colorimage'?
+ { pop } % yes: pop off the 'dict' returned
+ { % no: define one
+ /colortogray { % define an RGB->I function
+ /rgbdata exch store % call input 'rgbdata'
+ rgbdata length 3 idiv
+ /npixls exch store
+ /rgbindx 0 store
+ 0 1 npixls 1 sub {
+ grays exch
+ rgbdata rgbindx get 20 mul % Red
+ rgbdata rgbindx 1 add get 32 mul % Green
+ rgbdata rgbindx 2 add get 12 mul % Blue
+ add add 64 idiv % I = .5G + .31R + .18B
+ put
+ /rgbindx rgbindx 3 add store
+ } for
+ grays 0 npixls getinterval
+ } bind def
+
+ % Utility procedure for colorimage operator.
+ % This procedure takes two procedures off the
+ % stack and merges them into a single procedure.
+
+ /mergeprocs { % def
+ dup length
+ 3 -1 roll
+ dup
+ length
+ dup
+ 5 1 roll
+ 3 -1 roll
+ add
+ array cvx
+ dup
+ 3 -1 roll
+ 0 exch
+ putinterval
+ dup
+ 4 2 roll
+ putinterval
+ } bind def
+
+ /colorimage { % def
+ pop pop % remove 'false 3' operands
+ {colortogray} mergeprocs
+ image
+ } bind def
+ } ifelse % end of 'false' case
+
+
+
+32 32 8 % dimensions of data
+[32 0 0 -32 0 32] % mapping matrix
+{currentfile pix readhexstring pop}
+false 3 colorimage
+
+000000000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00000000000000000000000000ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00000000000000000000000000000000000000ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00000000000000000000000000000000000000ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00000000000000000000000000000000000000ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00000000000000000000000000000000000000ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000000000
+000000000000ffff00000000000000000000000000ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00000000ffff00ffff00ffff00ffff00000000
+000000000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00000000ffff00ffff00ffff00000000000000
+000000000000000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00000000ffff00000000000000000000000000
+000000000000ffff00000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00000000ffff00000000000000000000000000
+000000ffff00ffff00000000ffff00ffff00ffff00ffff00ffff00000000000000000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000000000
+ffff00ffff00ffff00000000ffff00ffff00ffff00ffff00ffff00000000000000000000
+000000000000ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00000000000000ffff00000000000000000000000000000000
+ffff00ffff00ffff00000000ffff00ffff00ffff00ffff00ffff00000000000000000000
+000000000000ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000
+000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00000000000000ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00000000
+000000000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00000000000000ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00
+000000000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00
+000000000000000000ffff00ffff00ffff00ffff00ffff00000000000000ffff00ffff00
+000000ffff00000000ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00
+000000000000000000ffff00ffff00ffff00ffff00000000000000000000000000ffff00
+ffff00ffff00000000ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000ffff00ffff00ffff00
+000000000000ffff00ffff00ffff00ffff00000000000000000000000000000000000000
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00
+000000000000000000ffff00ffff00000000000000000000000000000000000000000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00
+000000000000000000ffff00ffff00000000000000000000000000000000000000000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00
+ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000
+000000000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000
+
+showpage
+
+% stop using temporary dictionary
+end
+
+% restore original state
+origstate restore
+
+%%Trailer
diff --git a/system/doc/efficiency_guide/drivers.xml b/system/doc/efficiency_guide/drivers.xml
new file mode 100644
index 0000000000..9fe54fb19a
--- /dev/null
+++ b/system/doc/efficiency_guide/drivers.xml
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2009</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Drivers</title>
+ <prepared>Bjorn Gustavsson</prepared>
+ <docno></docno>
+ <date>2009-11-16</date>
+ <rev></rev>
+ <file>drivers.xml</file>
+ </header>
+
+ <p>This chapter provides a (very) brief overview on how to write efficient
+ drivers. It is assumed that you already have a good understanding of
+ drivers.</p>
+
+ <section>
+ <title>Drivers and concurrency</title>
+
+ <p>The run-time system will always take a lock before running
+ any code in a driver.</p>
+
+ <p>By default, that lock will be at the driver level, meaning that
+ if several ports has been opened to the same driver, only code for
+ one port at the same time can be running.</p>
+
+ <p>A driver can be configured to instead have one lock for each port.</p>
+
+ <p>If a driver is used in a functional way (i.e. it holds no state,
+ but only does some heavy calculation and returns a result), several
+ ports with registered names can be opened beforehand and the port to
+ be used can be chosen based on the scheduler ID like this:</p>
+
+ <code type="none">
+-define(PORT_NAMES(),
+ {some_driver_01, some_driver_02, some_driver_03, some_driver_04,
+ some_driver_05, some_driver_06, some_driver_07, some_driver_08,
+ some_driver_09, some_driver_10, some_driver_11, some_driver_12,
+ some_driver_13, some_driver_14, some_driver_15, some_driver_16}).
+
+client_port() ->
+ element(erlang:system_info(scheduler_id) rem tuple_size(?PORT_NAMES()) + 1,
+ ?PORT_NAMES()).</code>
+
+ <p>As long as there are no more than 16 schedulers, there will never
+ be any lock contention on the port lock for the driver.</p>
+
+ </section>
+
+ <section>
+ <title>Avoiding copying of binaries when calling a driver</title>
+
+ <p>There are basically two ways to avoid copying a binary that is
+ sent to a driver.</p>
+
+ <p>If the <c>Data</c> argument for
+ <seealso marker="erts:erlang#port_control/3">port_control/3</seealso>
+ is a binary, the driver will be passed a pointer to the contents of
+ the binary and the binary will not be copied.
+ If the <c>Data</c> argument is an iolist (list of binaries and lists),
+ all binaries in the iolist will be copied.</p>
+
+ <p>Therefore, if you want to send both a pre-existing binary and some
+ additional data to a driver without copying the binary, you must call
+ <c>port_control/3</c> twice; once with the binary and once with the
+ additional data. However, that will only work if there is only one
+ process communicating with the port (because otherwise another process
+ could call the driver in-between the calls).</p>
+
+ <p>Another way to avoid copying binaries is to implement an <c>outputv</c>
+ callback (instead of an <c>output</c> callback) in the driver.
+ If a driver has an <c>outputv</c> callback, refc binaries passed
+ in an iolist in the <c>Data</c> argument for
+ <seealso marker="erts:erlang#port_command/2">port_command/2</seealso>
+ will be passed as references to the driver.</p>
+ </section>
+
+ <section>
+ <title>Returning small binaries from a driver</title>
+
+ <p>The run-time system can represent binaries up to 64 bytes as
+ heap binaries. They will always be copied when sent in a messages,
+ but they will require less memory if they are not sent to another
+ process and garbage collection is cheaper.</p>
+
+ <p>If you know that the binaries you return are always small,
+ you should use driver API calls that do not require a pre-allocated
+ binary, for instance
+ <seealso marker="erts:erl_driver#int driver_output-3">driver_output()</seealso>
+ or
+ <seealso marker="erts:erl_driver#int driver_output_term-3">driver_output_term()</seealso>
+ using the <c>ERL_DRV_BUF2BINARY</c> format,
+ to allow the run-time to construct a heap binary.</p>
+
+ </section>
+
+ <section>
+ <title>Returning big binaries without copying from a driver</title>
+
+ <p>To avoid copying data when a big binary is sent or returned from
+ the driver to an Erlang process, the driver must first allocate the
+ binary and then send it to an Erlang process in some way.</p>
+
+ <p>Use <seealso marker="erts:erl_driver#ErlDrvBinary* driver_alloc_binary-1">driver_alloc_binary()</seealso> to allocate a binary.</p>
+
+ <p>There are several ways to send a binary created with
+ <c>driver_alloc_binary()</c>.</p>
+
+ <list type="bulleted">
+ <item><p>From the <c>control</c> callback, a binary can be returned provided
+ that
+ <seealso marker="erts:erl_driver#void set_port_control_flags-2">set_port_control()</seealso>
+ has been called with the flag value <c>PORT_CONTROL_FLAG_BINARY</c>.</p>
+ </item>
+
+ <item><p>A single binary can be sent with
+ <seealso marker="erts:erl_driver#int driver_output_binary-6">driver_output_binary()</seealso>.</p></item>
+
+ <item><p>Using
+ <seealso marker="erts:erl_driver#int driver_output_term-3">driver_output_term()</seealso>
+ or
+ <seealso marker="erts:erl_driver#int driver_send_term-4">driver_send_term()</seealso>,
+ a binary can be included in an Erlang term.</p>
+ </item>
+ </list>
+
+ </section>
+
+</chapter>
diff --git a/system/doc/efficiency_guide/efficiency_guide.erl b/system/doc/efficiency_guide/efficiency_guide.erl
new file mode 100644
index 0000000000..e982bdae65
--- /dev/null
+++ b/system/doc/efficiency_guide/efficiency_guide.erl
@@ -0,0 +1,214 @@
+-module(efficiency_guide).
+-compile(export_all).
+
+%% DO NOT
+naive_reverse([H|T]) ->
+ naive_reverse(T)++[H];
+naive_reverse([]) ->
+ [].
+
+%% OK
+naive_but_ok_reverse([H|T], Acc) ->
+ naive_but_ok_reverse(T, [H]++Acc);
+naive_but_ok_reverse([], Acc) ->
+ Acc.
+
+%% DO
+vanilla_reverse([H|T], Acc) ->
+ vanilla_reverse(T, [H|Acc]);
+vanilla_reverse([], Acc) ->
+ Acc.
+
+
+multiple_setelement(T0) ->
+ T1 = setelement(9, T0, bar),
+ T2 = setelement(7, T1, foobar),
+ setelement(5, T2, new_value).
+
+
+my_list_to_binary(List) ->
+ my_list_to_binary(List, <<>>).
+
+my_list_to_binary([H|T], Acc) ->
+ my_list_to_binary(T, <<Acc/binary,H>>);
+my_list_to_binary([], Acc) ->
+ Acc.
+
+my_old_list_to_binary(List) ->
+ my_old_list_to_binary(List, []).
+
+my_old_list_to_binary([H|T], Acc) ->
+ my_old_list_to_binary(T, [Acc,H]);
+my_old_list_to_binary([], Acc) ->
+ list_to_binary(Acc).
+
+my_binary_to_list(<<H,T/binary>>) ->
+ [H|my_binary_to_list(T)];
+my_binary_to_list(<<>>) -> [].
+
+my_complicated_binary_to_list(Bin) ->
+ my_complicated_binary_to_list(Bin, 0).
+
+my_complicated_binary_to_list(Bin, Skip) ->
+ case Bin of
+ <<_:Skip/binary,Byte,_/binary>> ->
+ [Byte|my_complicated_binary_to_list(Bin, Skip+1)];
+ <<_:Skip/binary>> ->
+ []
+ end.
+
+after_zero(<<0,T/binary>>) ->
+ T;
+after_zero(<<_,T/binary>>) ->
+ after_zero(T);
+after_zero(<<>>) ->
+ <<>>.
+
+all_but_zeroes_to_list(Buffer, Acc, 0) ->
+ {lists:reverse(Acc),Buffer};
+all_but_zeroes_to_list(<<0,T/binary>>, Acc, Remaining) ->
+ all_but_zeroes_to_list(T, Acc, Remaining-1);
+all_but_zeroes_to_list(<<Byte,T/binary>>, Acc, Remaining) ->
+ all_but_zeroes_to_list(T, [Byte|Acc], Remaining-1).
+
+non_opt_eq([H|T1], <<H,T2/binary>>) ->
+ non_opt_eq(T1, T2);
+non_opt_eq([_|_], <<_,_/binary>>) ->
+ false;
+non_opt_eq([], <<>>) ->
+ true.
+
+opt_eq(<<H,T1/binary>>, [H|T2]) ->
+ opt_eq(T1, T2);
+opt_eq(<<_,_/binary>>, [_|_]) ->
+ false;
+opt_eq(<<>>, []) ->
+ true.
+
+match_head(List, <<_:10,Data/binary>>) ->
+ match_body(List, Data).
+
+match_body([0|_], <<H,_/binary>>) ->
+ done;
+match_body([H|T1], <<H,T2/binary>>) ->
+ {T1,T2}.
+
+count1(<<_,T/binary>>, Count) -> count1(T, Count+1);
+count1(<<>>, Count) -> Count.
+
+count2(<<H,T/binary>>, Count) -> count2(T, Count+1);
+count2(<<>>, Count) -> Count.
+
+count3(<<_H,T/binary>>, Count) -> count3(T, Count+1);
+count3(<<>>, Count) -> Count.
+
+fib(N) ->
+ fib(N, 0, 1, []).
+
+fib(0, _Current, _Next, Fibs) ->
+ lists:reverse(Fibs);
+fib(N, Current, Next, Fibs) ->
+ fib(N - 1, Next, Current + Next, [Current|Fibs]).
+
+recursive_fib(N) ->
+ recursive_fib(N, 0, 1).
+
+recursive_fib(0, _Current, _Next) ->
+ [];
+recursive_fib(N, Current, Next) ->
+ [Current|recursive_fib(N - 1, Next, Current + Next)].
+
+bad_fib(N) ->
+ bad_fib(N, 0, 1, []).
+
+bad_fib(0, _Current, _Next, Fibs) ->
+ Fibs;
+bad_fib(N, Current, Next, Fibs) ->
+ bad_fib(N - 1, Next, Current + Next, Fibs ++ [Current]).
+
+tail_recursive_fib(N) ->
+ tail_recursive_fib(N, 0, 1, []).
+
+tail_recursive_fib(0, _Current, _Next, Fibs) ->
+ lists:reverse(Fibs);
+tail_recursive_fib(N, Current, Next, Fibs) ->
+ tail_recursive_fib(N - 1, Next, Current + Next, [Current|Fibs]).
+
+append([H|T], Tail) ->
+ [H|append(T, Tail)];
+append([], Tail) ->
+ Tail.
+
+kilo_byte() ->
+ kilo_byte(10, [42]).
+
+kilo_byte(0, Acc) ->
+ Acc;
+kilo_byte(N, Acc) ->
+ kilo_byte(N-1, [Acc|Acc]).
+
+recursive_sum([H|T]) ->
+ H+recursive_sum(T);
+recursive_sum([]) -> 0.
+
+sum(L) -> sum(L, 0).
+
+sum([H|T], Sum) -> sum(T, Sum + H);
+sum([], Sum) -> Sum.
+
+days_in_month(M) ->
+ element(M, {31,28,31,30,31,30,31,31,30,31,30,31}).
+
+atom_map1(one) -> 1;
+atom_map1(two) -> 2;
+atom_map1(three) -> 3;
+atom_map1(Int) when is_integer(Int) -> Int;
+atom_map1(four) -> 4;
+atom_map1(five) -> 5;
+atom_map1(six) -> 6.
+
+atom_map2(one) -> 1;
+atom_map2(two) -> 2;
+atom_map2(three) -> 3;
+atom_map2(four) -> 4;
+atom_map2(five) -> 5;
+atom_map2(six) -> 6;
+atom_map2(Int) when is_integer(Int) -> Int.
+
+atom_map3(Int) when is_integer(Int) -> Int;
+atom_map3(one) -> 1;
+atom_map3(two) -> 2;
+atom_map3(three) -> 3;
+atom_map3(four) -> 4;
+atom_map3(five) -> 5;
+atom_map3(six) -> 6.
+
+
+map_pairs1(_Map, [], Ys) ->
+ Ys;
+map_pairs1(_Map, Xs, [] ) ->
+ Xs;
+map_pairs1(Map, [X|Xs], [Y|Ys]) ->
+ [Map(X, Y)|map_pairs1(Map, Xs, Ys)].
+
+map_pairs2(_Map, [], Ys) ->
+ Ys;
+map_pairs2(_Map, [_|_]=Xs, [] ) ->
+ Xs;
+map_pairs2(Map, [X|Xs], [Y|Ys]) ->
+ [Map(X, Y)|map_pairs2(Map, Xs, Ys)].
+
+explicit_map_pairs(Map, Xs0, Ys0) ->
+ case Xs0 of
+ [X|Xs] ->
+ case Ys0 of
+ [Y|Ys] ->
+ [Map(X, Y)|explicit_map_pairs(Map, Xs, Ys)];
+ [] ->
+ Xs0
+ end;
+ [] ->
+ Ys0
+ end.
+
+
diff --git a/system/doc/efficiency_guide/functions.xml b/system/doc/efficiency_guide/functions.xml
new file mode 100644
index 0000000000..d55b60e39c
--- /dev/null
+++ b/system/doc/efficiency_guide/functions.xml
@@ -0,0 +1,234 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Functions</title>
+ <prepared>Bjorn Gustavsson</prepared>
+ <docno></docno>
+ <date>2007-11-22</date>
+ <rev></rev>
+ <file>functions.xml</file>
+ </header>
+
+ <section>
+ <title>Pattern matching</title>
+ <p>Pattern matching in function head and in <c>case</c> and <c>receive</c>
+ clauses are optimized by the compiler. With a few exceptions, there is nothing
+ to gain by rearranging clauses.</p>
+
+ <p>One exception is pattern matching of binaries. The compiler
+ will not rearrange clauses that match binaries. Placing the
+ clause that matches against the empty binary <em>last</em> will usually
+ be slightly faster than placing it <em>first</em>.</p>
+
+ <p>Here is a rather contrived example to show another exception:</p>
+
+ <p><em>DO NOT</em></p>
+ <code type="erl">
+atom_map1(one) -> 1;
+atom_map1(two) -> 2;
+atom_map1(three) -> 3;
+atom_map1(Int) when is_integer(Int) -> Int;
+atom_map1(four) -> 4;
+atom_map1(five) -> 5;
+atom_map1(six) -> 6.</code>
+
+ <p>The problem is the clause with the variable <c>Int</c>.
+ Since a variable can match anything, including the atoms
+ <c>four</c>, <c>five</c>, and <c>six</c> that the following clauses
+ also will match, the compiler must generate sub-optimal code that will
+ execute as follows:</p>
+
+ <p>First the input value is compared to <c>one</c>, <c>two</c>, and
+ <c>three</c> (using a single instruction that does a binary search;
+ thus, quite efficient even if there are many values) to select which
+ one of the first three clauses to execute (if any).</p>
+
+ <p>If none of the first three clauses matched, the fourth clause
+ will match since a variable always matches. If the guard test
+ <c>is_integer(Int)</c> succeeds, the fourth clause will be
+ executed.</p>
+
+ <p>If the guard test failed, the input value is compared to
+ <c>four</c>, <c>five</c>, and <c>six</c>, and the appropriate clause
+ is selected. (There will be a <c>function_clause</c> exception if
+ none of the values matched.)</p>
+
+ <p>Rewriting to either</p>
+
+ <p><em>DO</em></p>
+ <code type="erl"><![CDATA[
+atom_map2(one) -> 1;
+atom_map2(two) -> 2;
+atom_map2(three) -> 3;
+atom_map2(four) -> 4;
+atom_map2(five) -> 5;
+atom_map2(six) -> 6;
+atom_map2(Int) when is_integer(Int) -> Int.]]></code>
+
+ <p>or</p>
+
+ <p><em>DO</em></p>
+ <code type="erl"><![CDATA[
+atom_map3(Int) when is_integer(Int) -> Int;
+atom_map3(one) -> 1;
+atom_map3(two) -> 2;
+atom_map3(three) -> 3;
+atom_map3(four) -> 4;
+atom_map3(five) -> 5;
+atom_map3(six) -> 6.]]></code>
+
+ <p>will give slightly more efficient matching code.</p>
+
+ <p>Here is a less contrived example:</p>
+
+ <p><em>DO NOT</em></p>
+ <code type="erl"><![CDATA[
+map_pairs1(_Map, [], Ys) ->
+ Ys;
+map_pairs1(_Map, Xs, [] ) ->
+ Xs;
+map_pairs1(Map, [X|Xs], [Y|Ys]) ->
+ [Map(X, Y)|map_pairs1(Map, Xs, Ys)].]]></code>
+
+ <p>The first argument is <em>not</em> a problem. It is variable, but it
+ is a variable in all clauses. The problem is the variable in the second
+ argument, <c>Xs</c>, in the middle clause. Because the variable can
+ match anything, the compiler is not allowed to rearrange the clauses,
+ but must generate code that matches them in the order written.</p>
+
+ <p>If the function is rewritten like this</p>
+
+ <p><em>DO</em></p>
+ <code type="erl"><![CDATA[
+map_pairs2(_Map, [], Ys) ->
+ Ys;
+map_pairs2(_Map, [_|_]=Xs, [] ) ->
+ Xs;
+map_pairs2(Map, [X|Xs], [Y|Ys]) ->
+ [Map(X, Y)|map_pairs2(Map, Xs, Ys)].]]></code>
+
+ <p>the compiler is free rearrange the clauses. It will generate code
+ similar to this</p>
+
+ <p><em>DO NOT (already done by the compiler)</em></p>
+ <code type="erl"><![CDATA[
+explicit_map_pairs(Map, Xs0, Ys0) ->
+ case Xs0 of
+ [X|Xs] ->
+ case Ys0 of
+ [Y|Ys] ->
+ [Map(X, Y)|explicit_map_pairs(Map, Xs, Ys)];
+ [] ->
+ Xs0
+ end;
+ [] ->
+ Ys0
+ end.]]></code>
+
+ <p>which should be slightly faster for presumably the most common case
+ that the input lists are not empty or very short.
+ (Another advantage is that Dialyzer is able to deduce a better type
+ for the variable <c>Xs</c>.)</p>
+ </section>
+
+ <section>
+ <title>Function Calls </title>
+
+ <p>Here is an intentionally rough guide to the relative costs of
+ different kinds of calls. It is based on benchmark figures run on
+ Solaris/Sparc:</p>
+
+ <list type="bulleted">
+ <item>Calls to local or external functions (<c>foo()</c>, <c>m:foo()</c>)
+ are the fastest kind of calls.</item>
+ <item>Calling or applying a fun (<c>Fun()</c>, <c>apply(Fun, [])</c>)
+ is about <em>three times</em> as expensive as calling a local function.</item>
+ <item>Applying an exported function (<c>Mod:Name()</c>,
+ <c>apply(Mod, Name, [])</c>) is about twice as expensive as calling a fun,
+ or about <em>six times</em> as expensive as calling a local function.</item>
+ </list>
+
+ <section>
+ <title>Notes and implementation details</title>
+
+ <p>Calling and applying a fun does not involve any hash-table lookup.
+ A fun contains an (indirect) pointer to the function that implements
+ the fun.</p>
+
+ <warning><p><em>Tuples are not fun(s)</em>.
+ A "tuple fun", <c>{Module,Function}</c>, is not a fun.
+ The cost for calling a "tuple fun" is similar to that
+ of <c>apply/3</c> or worse. Using "tuple funs" is <em>strongly discouraged</em>,
+ as they may not be supported in a future release.</p></warning>
+
+ <p><c>apply/3</c> must look up the code for the function to execute
+ in a hash table. Therefore, it will always be slower than a
+ direct call or a fun call.</p>
+
+ <p>It no longer matters (from a performance point of view)
+ whether you write</p>
+
+ <code type="erl">
+Module:Function(Arg1, Arg2)</code>
+
+ <p>or</p>
+
+ <code type="erl">
+apply(Module, Function, [Arg1,Arg2])</code>
+
+ <p>(The compiler internally rewrites the latter code into the former.)</p>
+
+ <p>The following code</p>
+
+ <code type="erl">
+apply(Module, Function, Arguments)</code>
+
+ <p>is slightly slower because the shape of the list of arguments
+ is not known at compile time.</p>
+ </section>
+ </section>
+
+ <section>
+ <title>Memory usage in recursion</title>
+ <p>When writing recursive functions it is preferable to make them
+ tail-recursive so that they can execute in constant memory space.</p>
+ <p><em>DO</em></p>
+ <code type="none">
+list_length(List) ->
+ list_length(List, 0).
+
+list_length([], AccLen) ->
+ AccLen; % Base case
+
+list_length([_|Tail], AccLen) ->
+ list_length(Tail, AccLen + 1). % Tail-recursive</code>
+ <p><em>DO NOT</em></p>
+ <code type="none">
+list_length([]) ->
+ 0. % Base case
+list_length([_ | Tail]) ->
+ list_length(Tail) + 1. % Not tail-recursive</code>
+ </section>
+
+</chapter>
+
diff --git a/system/doc/efficiency_guide/introduction.xml b/system/doc/efficiency_guide/introduction.xml
new file mode 100644
index 0000000000..ba942c75c2
--- /dev/null
+++ b/system/doc/efficiency_guide/introduction.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Introduction</title>
+ <prepared>Bjorn Gustavsson</prepared>
+ <docno></docno>
+ <date>2007-11-21</date>
+ <rev></rev>
+ <file>introduction.xml</file>
+ </header>
+
+ <section>
+ <title>Purpose</title>
+
+ <quote><p>Premature optimization is the root of all evil. -- D.E. Knuth</p></quote>
+
+ <p>Efficient code can be well-structured and clean code, based on
+ on a sound overall architecture and sound algorithms.
+ Efficient code can be highly implementation-code that bypasses
+ documented interfaces and takes advantage of obscure quirks in
+ the current implementation.</p>
+
+ <p>Ideally, your code should only contain the first kind of efficient
+ code. If that turns out to be too slow, you should profile the application
+ to find out where the performance bottlenecks are and optimize only the
+ bottlenecks. Other code should stay as clean as possible.</p>
+
+ <p>Fortunately, compiler and run-time optimizations introduced in
+ R12B makes it easier to write code that is both clean and
+ efficient. For instance, the ugly workarounds needed in R11B and earlier
+ releases to get the most speed out of binary pattern matching are
+ no longer necessary. In fact, the ugly code is slower
+ than the clean code (because the clean code has become faster, not
+ because the uglier code has become slower).</p>
+
+ <p>This Efficiency Guide cannot really learn you how to write efficient
+ code. It can give you a few pointers about what to avoid and what to use,
+ and some understanding of how certain language features are implemented.
+ We have generally not included general tips about optimization that will
+ work in any language, such as moving common calculations out of loops.</p>
+ </section>
+
+ <section>
+ <title>Prerequisites</title>
+ <p>It is assumed that the reader is familiar with the Erlang
+ programming language and concepts of OTP.</p>
+ </section>
+</chapter>
+
diff --git a/system/doc/efficiency_guide/listhandling.xml b/system/doc/efficiency_guide/listhandling.xml
new file mode 100644
index 0000000000..e9d2dfe556
--- /dev/null
+++ b/system/doc/efficiency_guide/listhandling.xml
@@ -0,0 +1,241 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>List handling</title>
+ <prepared>Bjorn Gustavsson</prepared>
+ <docno></docno>
+ <date>2007-11-16</date>
+ <rev></rev>
+ <file>listHandling.xml</file>
+ </header>
+
+ <section>
+ <title>Creating a list</title>
+
+ <p>Lists can only be built starting from the end and attaching
+ list elements at the beginning. If you use the <c>++</c> operator
+ like this</p>
+
+ <code type="erl">
+List1 ++ List2</code>
+
+ <p>you will create a new list which is copy of the elements in <c>List1</c>,
+ followed by <c>List2</c>. Looking at how <c>lists:append/1</c> or <c>++</c> would be
+ implemented in plain Erlang, it can be seen clearly that the first list
+ is copied:</p>
+
+ <code type="erl">
+append([H|T], Tail) ->
+ [H|append(T, Tail)];
+append([], Tail) ->
+ Tail.</code>
+
+ <p>So the important thing when recursing and building a list is to
+ make sure that you attach the new elements to the beginning of the list,
+ so that you build <em>a</em> list, and not hundreds or thousands of
+ copies of the growing result list.</p>
+
+ <p>Let us first look at how it should not be done:</p>
+
+ <p><em>DO NOT</em></p>
+ <code type="erl"><![CDATA[
+bad_fib(N) ->
+ bad_fib(N, 0, 1, []).
+
+bad_fib(0, _Current, _Next, Fibs) ->
+ Fibs;
+bad_fib(N, Current, Next, Fibs) ->
+ bad_fib(N - 1, Next, Current + Next, Fibs ++ [Current]).]]></code>
+
+ <p>Here we are not a building a list; in each iteration step we
+ create a new list that is one element longer than the new previous list.</p>
+
+ <p>To avoid copying the result in each iteration, we must build the list in
+ reverse order and reverse the list when we are done:</p>
+
+ <p><em>DO</em></p>
+ <code type="erl"><![CDATA[
+tail_recursive_fib(N) ->
+ tail_recursive_fib(N, 0, 1, []).
+
+tail_recursive_fib(0, _Current, _Next, Fibs) ->
+ lists:reverse(Fibs);
+tail_recursive_fib(N, Current, Next, Fibs) ->
+ tail_recursive_fib(N - 1, Next, Current + Next, [Current|Fibs]).]]></code>
+
+ </section>
+
+ <section>
+ <title>List comprehensions</title>
+
+ <p>Lists comprehensions still have a reputation for being slow.
+ They used to be implemented using funs, which used to be slow.</p>
+
+ <p>In recent Erlang/OTP releases (including R12B), a list comprehension</p>
+
+ <code type="erl"><![CDATA[
+[Expr(E) || E <- List]]]></code>
+
+ <p>is basically translated to a local function</p>
+
+ <code type="erl">
+'lc^0'([E|Tail], Expr) ->
+ [Expr(E)|'lc^0'(Tail, Expr)];
+'lc^0'([], _Expr) -> [].</code>
+
+ <p>In R12B, if the result of the list comprehension will <em>obviously</em> not be used,
+ a list will not be constructed. For instance, in this code</p>
+
+ <code type="erl"><![CDATA[
+[io:put_chars(E) || E <- List],
+ok.]]></code>
+
+ <p>or in this code</p>
+
+ <code type="erl"><![CDATA[
+.
+.
+.
+case Var of
+ ... ->
+ [io:put_chars(E) || E <- List];
+ ... ->
+end,
+some_function(...),
+.
+.
+.]]></code>
+
+ <p>the value is neither assigned to a variable, nor passed to another function,
+ nor returned, so there is no need to construct a list and the compiler will simplify
+ the code for the list comprehension to</p>
+
+ <code type="erl">
+'lc^0'([E|Tail], Expr) ->
+ Expr(E),
+ 'lc^0'(Tail, Expr);
+'lc^0'([], _Expr) -> [].</code>
+
+ </section>
+
+ <section>
+ <title>Deep and flat lists</title>
+
+ <p><seealso marker="stdlib:lists#flatten/1">lists:flatten/1</seealso>
+ builds an entirely new list. Therefore, it is expensive, and even
+ <em>more</em> expensive than the <c>++</c> (which copies its left argument,
+ but not its right argument).</p>
+
+ <p>In the following situations, you can easily avoid calling <c>lists:flatten/1</c>:</p>
+
+ <list type="bulleted">
+ <item>When sending data to a port. Ports understand deep lists
+ so there is no reason to flatten the list before sending it to
+ the port.</item>
+ <item>When calling BIFs that accept deep lists, such as
+ <seealso marker="erts:erlang#list_to_binary/1">list_to_binary/1</seealso> or
+ <seealso marker="erts:erlang#iolist_to_binary/1">iolist_to_binary/1</seealso>.</item>
+ <item>When you know that your list is only one level deep, you can can use
+ <seealso marker="stdlib:lists#append/1">lists:append/1</seealso>.</item>
+ </list>
+
+ <p><em>Port example</em></p>
+ <p><em>DO</em></p>
+ <pre>
+ ...
+ port_command(Port, DeepList)
+ ...</pre>
+ <p><em>DO NOT</em></p>
+ <pre>
+ ...
+ port_command(Port, lists:flatten(DeepList))
+ ...</pre>
+
+ <p>A common way to send a zero-terminated string to a port is the following:</p>
+
+ <p><em>DO NOT</em></p>
+ <pre>
+ ...
+ TerminatedStr = String ++ [0], % String="foo" => [$f, $o, $o, 0]
+ port_command(Port, TerminatedStr)
+ ...</pre>
+
+ <p>Instead do like this:</p>
+
+ <p><em>DO</em></p>
+ <pre>
+ ...
+ TerminatedStr = [String, 0], % String="foo" => [[$f, $o, $o], 0]
+ port_command(Port, TerminatedStr)
+ ...</pre>
+
+ <p><em>Append example</em></p>
+ <p><em>DO</em></p>
+ <pre>
+ > lists:append([[1], [2], [3]]).
+ [1,2,3]
+ ></pre>
+ <p><em>DO NOT</em></p>
+ <pre>
+ > lists:flatten([[1], [2], [3]]).
+ [1,2,3]
+ ></pre>
+ </section>
+
+ <section>
+ <title>Why you should not worry about recursive lists functions</title>
+
+ <p>In the performance myth chapter, the following myth was exposed:
+ <seealso marker="myths#tail_recursive">Tail-recursive functions
+ are MUCH faster than recursive functions</seealso>.</p>
+
+ <p>To summarize, in R12B there is usually not much difference between
+ a body-recursive list function and tail-recursive function that reverses
+ the list at the end. Therefore, concentrate on writing beautiful code
+ and forget about the performance of your list functions. In the time-critical
+ parts of your code (and only there), <em>measure</em> before rewriting
+ your code.</p>
+
+ <p><em>Important note</em>: This section talks about lists functions that
+ <em>construct</em> lists. A tail-recursive function that does not construct
+ a list runs in constant space, while the corresponding body-recursive
+ function uses stack space proportional to the length of the list.
+ For instance, a function that sums a list of integers, should <em>not</em> be
+ written like this</p>
+
+ <p><em>DO NOT</em></p>
+ <code type="erl">
+recursive_sum([H|T]) -> H+recursive_sum(T);
+recursive_sum([]) -> 0.</code>
+
+ <p>but like this</p>
+
+ <p><em>DO</em></p>
+ <code type="erl">
+sum(L) -> sum(L, 0).
+
+sum([H|T], Sum) -> sum(T, Sum + H);
+sum([], Sum) -> Sum.</code>
+ </section>
+</chapter>
+
diff --git a/system/doc/efficiency_guide/make.dep b/system/doc/efficiency_guide/make.dep
new file mode 100644
index 0000000000..afa3bd0516
--- /dev/null
+++ b/system/doc/efficiency_guide/make.dep
@@ -0,0 +1,16 @@
+# ----------------------------------------------------
+# >>>> Do not edit this file <<<<
+# This file was automaticly generated by
+# /home/otp/bin/docdepend
+# ----------------------------------------------------
+
+
+# ----------------------------------------------------
+# TeX files that the DVI file depend on
+# ----------------------------------------------------
+
+book.dvi: advanced.tex binaryhandling.tex book.tex commoncaveats.tex \
+ drivers.tex functions.tex introduction.tex listhandling.tex \
+ myths.tex part.tex processes.tex profiling.tex \
+ tablesDatabases.tex
+
diff --git a/system/doc/efficiency_guide/myths.xml b/system/doc/efficiency_guide/myths.xml
new file mode 100644
index 0000000000..76a72368bb
--- /dev/null
+++ b/system/doc/efficiency_guide/myths.xml
@@ -0,0 +1,203 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2007</year>
+ <year>2007</year>
+ <holder>Ericsson AB, All Rights Reserved</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ The Initial Developer of the Original Code is Ericsson AB.
+ </legalnotice>
+
+ <title>The Eight Myths of Erlang Performance</title>
+ <prepared>Bjorn Gustavsson</prepared>
+ <docno></docno>
+ <date>2007-11-10</date>
+ <rev></rev>
+ <file>myths.xml</file>
+ </header>
+
+ <p>Some truths seem to live on well beyond their best-before date,
+ perhaps because "information" spreads more rapidly from person-to-person
+ faster than a single release note that notes, for instance, that funs
+ have become faster.</p>
+
+ <p>Here we try to kill the old truths (or semi-truths) that have
+ become myths.</p>
+
+ <section>
+ <title>Myth: Funs are slow</title>
+ <p>Yes, funs used to be slow. Very slow. Slower than <c>apply/3</c>.
+ Originally, funs were implemented using nothing more than
+ compiler trickery, ordinary tuples, <c>apply/3</c>, and a great
+ deal of ingenuity.</p>
+
+ <p>But that is ancient history. Funs was given its own data type
+ in the R6B release and was further optimized in the R7B release.
+ Now the cost for a fun call falls roughly between the cost for a call to
+ local function and <c>apply/3</c>.</p>
+ </section>
+
+ <section>
+ <title>Myth: List comprehensions are slow</title>
+
+ <p>List comprehensions used to be implemented using funs, and in the
+ bad old days funs were really slow.</p>
+
+ <p>Nowadays the compiler rewrites list comprehensions into an ordinary
+ recursive function. Of course, using a tail-recursive function with
+ a reverse at the end would be still faster. Or would it?
+ That leads us to the next myth.</p>
+ </section>
+
+ <section>
+ <title>Myth: Tail-recursive functions are MUCH faster
+ than recursive functions</title>
+
+ <p><marker id="tail_recursive"></marker>According to the myth,
+ recursive functions leave references
+ to dead terms on the stack and the garbage collector will have to
+ copy all those dead terms, while tail-recursive functions immediately
+ discard those terms.</p>
+
+ <p>That used to be true before R7B. In R7B, the compiler started
+ to generate code that overwrites references to terms that will never
+ be used with an empty list, so that the garbage collector would not
+ keep dead values any longer than necessary.</p>
+
+ <p>Even after that optimization, a tail-recursive function would
+ still most of the time be faster than a body-recursive function. Why?</p>
+
+ <p>It has to do with how many words of stack that are used in each
+ recursive call. In most cases, a recursive function would use more words
+ on the stack for each recursion than the number of words a tail-recursive
+ would allocate on the heap. Since more memory is used, the garbage
+ collector will be invoked more frequently, and it will have more work traversing
+ the stack.</p>
+
+ <p>In R12B and later releases, there is an optimization that will
+ in many cases reduces the number of words used on the stack in
+ body-recursive calls, so that a body-recursive list function and
+ tail-recursive function that calls <seealso
+ marker="stdlib:lists#reverse/1">lists:reverse/1</seealso> at the
+ end will use exactly the same amount of memory.
+ <c>lists:map/2</c>, <c>lists:filter/2</c>, list comprehensions,
+ and many other recursive functions now use the same amount of space
+ as their tail-recursive equivalents.</p>
+
+ <p>So which is faster?</p>
+
+ <p>It depends. On Solaris/Sparc, the body-recursive function seems to
+ be slightly faster, even for lists with very many elements. On the x86
+ architecture, tail-recursion was up to about 30 percent faster.</p>
+
+ <p>So the choice is now mostly a matter of taste. If you really do need
+ the utmost speed, you must <em>measure</em>. You can no longer be
+ absolutely sure that the tail-recursive list function will be the fastest
+ in all circumstances.</p>
+
+ <p>Note: A tail-recursive function that does not need to reverse the
+ list at the end is, of course, faster than a body-recursive function,
+ as are tail-recursive functions that do not construct any terms at all
+ (for instance, a function that sums all integers in a list).</p>
+ </section>
+
+ <section>
+ <title>Myth: '++' is always bad</title>
+
+ <p>The <c>++</c> operator has, somewhat undeservedly, got a very bad reputation.
+ It probably has something to do with code like</p>
+
+ <p><em>DO NOT</em></p>
+ <code type="erl">
+naive_reverse([H|T]) ->
+ naive_reverse(T)++[H];
+naive_reverse([]) ->
+ [].</code>
+
+ <p>which is the most inefficient way there is to reverse a list.
+ Since the <c>++</c> operator copies its left operand, the result
+ will be copied again and again and again... leading to quadratic
+ complexity.</p>
+
+ <p>On the other hand, using <c>++</c> like this</p>
+
+ <p><em>OK</em></p>
+ <code type="erl">
+naive_but_ok_reverse([H|T], Acc) ->
+ naive_but_ok_reverse(T, [H]++Acc);
+naive_but_ok_reverse([], Acc) ->
+ Acc.</code>
+
+ <p>is not bad. Each list element will only be copied once.
+ The growing result <c>Acc</c> is the right operand
+ for the <c>++</c> operator, and it will <em>not</em> be copied.</p>
+
+ <p>Of course, experienced Erlang programmers would actually write</p>
+
+ <p><em>DO</em></p>
+ <code type="erl">
+vanilla_reverse([H|T], Acc) ->
+ vanilla_reverse(T, [H|Acc]);
+vanilla_reverse([], Acc) ->
+ Acc.</code>
+
+ <p>which is slightly more efficient because you don't build a
+ list element only to directly copy it. (Or it would be more efficient
+ if the the compiler did not automatically rewrite <c>[H]++Acc</c>
+ to <c>[H|Acc]</c>.)</p>
+ </section>
+
+ <section>
+ <title>Myth: Strings are slow</title>
+
+ <p>Actually, string handling could be slow if done improperly.
+ In Erlang, you'll have to think a little more about how the strings
+ are used and choose an appropriate representation and use
+ the <seealso marker="stdlib:re">re</seealso> instead of the obsolete
+ <c>regexp</c> module if you are going to use regualr expressions.</p>
+ </section>
+
+ <section>
+ <title>Myth: Repairing a Dets file is very slow</title>
+
+ <p>The repair time is still proportional to the number of records
+ in the file, but Dets repairs used to be much, much slower in the past.
+ Dets has been massively rewritten and improved.</p>
+ </section>
+
+ <section>
+ <title>Myth: BEAM is a stack-based byte-code virtual machine (and therefore slow)</title>
+
+ <p>BEAM is a register-based virtual machine. It has 1024 virtual registers
+ that are used for holding temporary values and for passing arguments when
+ calling functions. Variables that need to survive a function call are saved
+ to the stack.</p>
+
+ <p>BEAM is a threaded-code interpreter. Each instruction is word pointing
+ directly to executable C-code, making instruction dispatching very fast.</p>
+ </section>
+
+ <section>
+ <title>Myth: Use '_' to speed up your program when a variable is not used</title>
+
+ <p>That was once true, but since R6B the BEAM compiler is quite capable of seeing itself
+ that a variable is not used.</p>
+ </section>
+
+</chapter>
+
diff --git a/system/doc/efficiency_guide/part.xml b/system/doc/efficiency_guide/part.xml
new file mode 100644
index 0000000000..2b78f35e2a
--- /dev/null
+++ b/system/doc/efficiency_guide/part.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE part SYSTEM "part.dtd">
+
+<part xmlns:xi="http://www.w3.org/2001/XInclude">
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Efficiency Guide </title>
+ <prepared>Ingela Anderton</prepared>
+ <docno></docno>
+ <date>2002-09-23</date>
+ <rev></rev>
+ </header>
+ <xi:include href="introduction.xml"/>
+ <xi:include href="myths.xml"/>
+ <xi:include href="commoncaveats.xml"/>
+ <xi:include href="binaryhandling.xml"/>
+ <xi:include href="listhandling.xml"/>
+ <xi:include href="functions.xml"/>
+ <xi:include href="tablesDatabases.xml"/>
+ <xi:include href="processes.xml"/>
+ <xi:include href="drivers.xml"/>
+ <xi:include href="advanced.xml"/>
+ <xi:include href="profiling.xml"/>
+</part>
+
diff --git a/system/doc/efficiency_guide/processes.xml b/system/doc/efficiency_guide/processes.xml
new file mode 100644
index 0000000000..a25ec53370
--- /dev/null
+++ b/system/doc/efficiency_guide/processes.xml
@@ -0,0 +1,264 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Processes</title>
+ <prepared>Bjorn Gustavsson</prepared>
+ <docno></docno>
+ <date>2007-11-21</date>
+ <rev></rev>
+ <file>processes.xml</file>
+ </header>
+
+ <section>
+ <title>Creation of an Erlang process</title>
+
+ <p>An Erlang process is lightweight compared to operating
+ systems threads and processes.</p>
+
+ <p>A newly spawned Erlang process uses 309 words of memory
+ in the non-SMP emulator without HiPE support. (SMP support
+ and HiPE support will both add to this size.) The size can
+ be found out like this:</p>
+
+ <pre>
+Erlang (BEAM) emulator version 5.6 [async-threads:0] [kernel-poll:false]
+
+Eshell V5.6 (abort with ^G)
+1> <input>Fun = fun() -> receive after infinity -> ok end end.</input>
+#Fun&lt;...>
+2> <input>{_,Bytes} = process_info(spawn(Fun), memory).</input>
+{memory,1232}
+3> <input>Bytes div erlang:system_info(wordsize).</input>
+309</pre>
+
+ <p>The size includes 233 words for the heap area (which includes the stack).
+ The garbage collector will increase the heap as needed.</p>
+
+ <p>The main (outer) loop for a process <em>must</em> be tail-recursive.
+ If not, the stack will grow until the process terminates.</p>
+
+ <p><em>DO NOT</em></p>
+ <code type="erl">
+loop() ->
+ receive
+ {sys, Msg} ->
+ handle_sys_msg(Msg),
+ loop();
+ {From, Msg} ->
+ Reply = handle_msg(Msg),
+ From ! Reply,
+ loop()
+ end,
+ io:format("Message is processed~n", []).</code>
+
+ <p>The call to <c>io:format/2</c> will never be executed, but a
+ return address will still be pushed to the stack each time
+ <c>loop/0</c> is called recursively. The correct tail-recursive
+ version of the function looks like this:</p>
+
+ <p><em>DO</em></p>
+<code type="erl">
+ loop() ->
+ receive
+ {sys, Msg} ->
+ handle_sys_msg(Msg),
+ loop();
+ {From, Msg} ->
+ Reply = handle_msg(Msg),
+ From ! Reply,
+ loop()
+ end.</code>
+
+ <section>
+ <title>Initial heap size</title>
+
+ <p>The default initial heap size of 233 words is quite conservative
+ in order to support Erlang systems with hundreds of thousands or
+ even millions of processes. The garbage collector will grow and
+ shrink the heap as needed.</p>
+
+ <p>In a system that use comparatively few processes, performance
+ <em>might</em> be improved by increasing the minimum heap size using either
+ the <c>+h</c> option for
+ <seealso marker="erts:erl">erl</seealso> or on a process-per-process
+ basis using the <c>min_heap_size</c> option for
+ <seealso marker="erts:erlang#spawn_opt/4">spawn_opt/4</seealso>.</p>
+
+ <p>The gain is twofold: Firstly, although the garbage collector will
+ grow the heap, it will it grow it step by step, which will be more
+ costly than directly establishing a larger heap when the process
+ is spawned. Secondly, the garbage collector may also shrink the
+ heap if it is much larger than the amount of data stored on it;
+ setting the minimum heap size will prevent that.</p>
+
+ <warning><p>The emulator will probably use more memory, and because garbage
+ collections occur less frequently, huge binaries could be
+ kept much longer.</p></warning>
+
+ <p>In systems with many processes, computation tasks that run
+ for a short time could be spawned off into a new process with
+ a higher minimum heap size. When the process is done, it will
+ send the result of the computation to another process and terminate.
+ If the minimum heap size is calculated properly, the process may not
+ have to do any garbage collections at all.
+ <em>This optimization should not be attempted
+ without proper measurements.</em></p>
+ </section>
+
+ </section>
+
+ <section>
+ <title>Process messages</title>
+
+ <p>All data in messages between Erlang processes is copied, with
+ the exception of
+ <seealso marker="binaryhandling#refc_binary">refc binaries</seealso>
+ on the same Erlang node.</p>
+
+ <p>When a message is sent to a process on another Erlang node,
+ it will first be encoded to the Erlang External Format before
+ being sent via an TCP/IP socket. The receiving Erlang node decodes
+ the message and distributes it to the right process.</p>
+
+ <section>
+ <title>The constant pool</title>
+
+ <p>Constant Erlang terms (also called <em>literals</em>) are now
+ kept in constant pools; each loaded module has its own pool.
+ The following function</p>
+
+ <p><em>DO</em> (in R12B and later)</p>
+ <code type="erl">
+days_in_month(M) ->
+ element(M, {31,28,31,30,31,30,31,31,30,31,30,31}).</code>
+
+ <p>will no longer build the tuple every time it is called (only
+ to have it discarded the next time the garbage collector was run), but
+ the tuple will be located in the module's constant pool.</p>
+
+ <p>But if a constant is sent to another process (or stored in
+ an ETS table), it will be <em>copied</em>.
+ The reason is that the run-time system must be able
+ to keep track of all references to constants in order to properly
+ unload code containing constants. (When the code is unloaded,
+ the constants will be copied to the heap of the processes that refer
+ to them.) The copying of constants might be eliminated in a future
+ release.</p>
+ </section>
+
+ <section>
+ <title>Loss of sharing</title>
+
+ <p>Shared sub-terms are <em>not</em> preserved when a term is sent
+ to another process, passed as the initial process arguments in
+ the <c>spawn</c> call, or stored in an ETS table.
+ That is an optimization. Most applications do not send message
+ with shared sub-terms.</p>
+
+ <p>Here is an example of how a shared sub-term can be created:</p>
+
+ <code type="erl">
+kilo_byte() ->
+ kilo_byte(10, [42]).
+
+kilo_byte(0, Acc) ->
+ Acc;
+kilo_byte(N, Acc) ->
+ kilo_byte(N-1, [Acc|Acc]).</code>
+
+ <p><c>kilo_byte/1</c> creates a deep list. If we call
+ <c>list_to_binary/1</c>, we can convert the deep list to a binary
+ of 1024 bytes:</p>
+
+ <pre>
+1> <input>byte_size(list_to_binary(efficiency_guide:kilo_byte())).</input>
+1024</pre>
+
+ <p>Using the <c>erts_debug:size/1</c> BIF we can see that the
+ deep list only requires 22 words of heap space:</p>
+
+ <pre>
+2> <input>erts_debug:size(efficiency_guide:kilo_byte()).</input>
+22</pre>
+
+ <p>Using the <c>erts_debug:flat_size/1</c> BIF, we can calculate
+ the size of the deep list if sharing is ignored. It will be
+ the size of the list when it has been sent to another process
+ or stored in an ETS table:</p>
+
+ <pre>
+3> <input>erts_debug:flat_size(efficiency_guide:kilo_byte()).</input>
+4094</pre>
+
+ <p>We can verify that sharing will be lost if we insert the
+ data into an ETS table:</p>
+
+ <pre>
+4> <input>T = ets:new(tab, []).</input>
+17
+5> <input>ets:insert(T, {key,efficiency_guide:kilo_byte()}).</input>
+true
+6> <input>erts_debug:size(element(2, hd(ets:lookup(T, key)))).</input>
+4094
+7> <input>erts_debug:flat_size(element(2, hd(ets:lookup(T, key)))).</input>
+4094</pre>
+
+ <p>When the data has passed through an ETS table,
+ <c>erts_debug:size/1</c> and <c>erts_debug:flat_size/1</c>
+ return the same value. Sharing has been lost.</p>
+
+ <p>In a future release of Erlang/OTP, we might implement a
+ way to (optionally) preserve sharing. We have no plans to make
+ preserving of sharing the default behaviour, since that would
+ penalize the vast majority of Erlang applications.</p>
+ </section>
+ </section>
+
+ <section>
+ <title>The SMP emulator</title>
+
+ <p>The SMP emulator (introduced in R11B) will take advantage of
+ multi-core or multi-CPU computer by running several Erlang schedulers
+ threads (typically, the same as the number of cores). Each scheduler
+ thread schedules Erlang processes in the same way as the Erlang scheduler
+ in the non-SMP emulator.</p>
+
+ <p>To gain performance by using the SMP emulator, your application
+ <em>must have more than one runnable Erlang process</em> most of the time.
+ Otherwise, the Erlang emulator can still only run one Erlang process
+ at the time, but you must still pay the overhead for locking. Although
+ we try to reduce the locking overhead as much as possible, it will never
+ become exactly zero.</p>
+
+ <p>Benchmarks that may seem to be concurrent are often sequential.
+ The estone benchmark, for instance, is entirely sequential. So is also
+ the most common implementation of the "ring benchmark"; usually one process
+ is active, while the others wait in a <c>receive</c> statement.</p>
+
+ <p>The <seealso marker="percept:percept">percept</seealso> application
+ can be used to profile your application to see how much potential (or lack
+ thereof) it has for concurrency.</p>
+ </section>
+
+</chapter>
+
diff --git a/system/doc/efficiency_guide/profiling.xml b/system/doc/efficiency_guide/profiling.xml
new file mode 100644
index 0000000000..65d13408bc
--- /dev/null
+++ b/system/doc/efficiency_guide/profiling.xml
@@ -0,0 +1,258 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Profiling</title>
+ <prepared>Ingela Anderton</prepared>
+ <docno></docno>
+ <date>2001-11-02</date>
+ <rev></rev>
+ <file>profiling.xml</file>
+ </header>
+
+ <section>
+ <title>Do not guess about performance - profile</title>
+
+ <p>Even experienced software developers often guess wrong about where
+ the performance bottlenecks are in their programs.</p>
+
+ <p>Therefore, profile your program to see where the performance
+ bottlenecks are and concentrate on optimizing them.</p>
+
+ <p>Erlang/OTP contains several tools to help finding bottlenecks.</p>
+
+ <p><c>fprof</c> and <c>eprof</c> provide the most detailed information
+ about where the time is spent, but they significantly slow downs the
+ programs they profile.</p>
+
+ <p>If the program is too big to be profiled by <c>fprof</c> or <c>eprof</c>,
+ <c>cover</c> and <c>cprof</c> could be used to locate parts of the
+ code that should be more thoroughly profiled using <c>fprof</c> or
+ <c>eprof</c>.</p>
+
+ <p><c>cover</c> provides execution counts per line per process,
+ with less overhead than <c>fprof/eprof</c>. Execution counts can
+ with some caution be used to locate potential performance bottlenecks.
+ The most lightweight tool is <c>cprof</c>, but it only provides execution
+ counts on a function basis (for all processes, not per process).</p>
+ </section>
+
+ <section>
+ <title>Big systems</title>
+ <p>If you have a big system it might be interesting to run profiling
+ on a simulated and limited scenario to start with. But bottlenecks
+ have a tendency to only appear or cause problems when
+ there are many things going on at the same time, and when there
+ are many nodes involved. Therefore it is desirable to also run
+ profiling in a system test plant on a real target system.</p>
+ <p>When your system is big you do not want to run the profiling
+ tools on the whole system. You want to concentrate on processes
+ and modules that you know are central and stand for a big part of the
+ execution.</p>
+ </section>
+
+ <section>
+ <title>What to look for</title>
+ <p>When analyzing the result file from the profiling activity
+ you should look for functions that are called many
+ times and have a long "own" execution time (time excluded calls
+ to other functions). Functions that just are called very
+ many times can also be interesting, as even small things can add
+ up to quite a bit if they are repeated often. Then you need to
+ ask yourself what can I do to reduce this time. Appropriate
+ types of questions to ask yourself are: </p>
+ <list type="bulleted">
+ <item>Can I reduce the number of times the function is called?</item>
+ <item>Are there tests that can be run less often if I change
+ the order of tests?</item>
+ <item>Are there redundant tests that can be removed? </item>
+ <item>Is there some expression calculated giving the same result
+ each time? </item>
+ <item>Is there other ways of doing this that are equivalent and
+ more efficient?</item>
+ <item>Can I use another internal data representation to make
+ things more efficient? </item>
+ </list>
+ <p>These questions are not always trivial to answer. You might
+ need to do some benchmarks to back up your theory, to avoid
+ making things slower if your theory is wrong. See <seealso marker="#benchmark">benchmarking</seealso>.</p>
+ </section>
+
+ <section>
+ <title>Tools</title>
+
+ <section>
+ <title>fprof</title>
+ <p><c>fprof</c> measures the execution time for each function,
+ both own time i.e how much time a function has used for its
+ own execution, and accumulated time i.e. including called
+ functions. The values are displayed per process. You also get
+ to know how many times each function has been
+ called. <c>fprof</c> is based on trace to file in order to
+ minimize runtime performance impact. Using fprof is just a
+ matter of calling a few library functions, see fprof manual
+ page under the application tools.</p>
+ <p><c>fprof</c> was introduced in version R8 of Erlang/OTP. Its
+ predecessor <c>eprof</c> that is based on the Erlang trace BIFs,
+ is still available, see eprof manual page under the
+ application tools. Eprof shows how much time has been used by
+ each process, and in which function calls this time has been
+ spent. Time is shown as percentage of total time, not as
+ absolute time.</p>
+ </section>
+
+ <section>
+ <title>cover</title>
+ <p><c>cover</c>'s primary use is coverage analysis to verify
+ test cases, making sure all relevant code is covered.
+ <c>cover</c> counts how many times each executable line of
+ code is executed when a program is run. This is done on a per
+ module basis. Of course this information can be used to
+ determine what code is run very frequently and could therefore
+ be subject for optimization. Using cover is just a matter of
+ calling a few library functions, see cover manual
+ page under the application tools.</p>
+ </section>
+
+ <section>
+ <title>cprof</title>
+ <p><c>cprof</c> is something in between <c>fprof</c> and
+ <c>cover</c> regarding features. It counts how many times each
+ function is called when the program is run, on a per module
+ basis. <c>cprof</c> has a low performance degradation (versus
+ <c>fprof</c> and <c>eprof</c>) and does not need to recompile
+ any modules to profile (versus <c>cover</c>).</p>
+ </section>
+
+ <section>
+ <title>Tool summarization</title>
+ <table>
+ <row>
+ <cell align="center" valign="middle">Tool</cell>
+ <cell align="center" valign="middle">Results</cell>
+ <cell align="center" valign="middle">Size of result</cell>
+ <cell align="center" valign="middle">Effects on program execution time</cell>
+ <cell align="center" valign="middle">Records number of calls</cell>
+ <cell align="center" valign="middle">Records Execution time</cell>
+ <cell align="center" valign="middle">Records called by</cell>
+ <cell align="center" valign="middle">Records garbage collection</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>fprof </c></cell>
+ <cell align="left" valign="middle">per process to screen/file </cell>
+ <cell align="left" valign="middle">large </cell>
+ <cell align="left" valign="middle">significant slowdown </cell>
+ <cell align="left" valign="middle">yes </cell>
+ <cell align="left" valign="middle">total and own</cell>
+ <cell align="left" valign="middle">yes </cell>
+ <cell align="left" valign="middle">yes </cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>eprof </c></cell>
+ <cell align="left" valign="middle">per process/function to screen/file </cell>
+ <cell align="left" valign="middle">medium </cell>
+ <cell align="left" valign="middle">significant slowdown </cell>
+ <cell align="left" valign="middle">yes </cell>
+ <cell align="left" valign="middle">only total </cell>
+ <cell align="left" valign="middle">no </cell>
+ <cell align="left" valign="middle">no </cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>cover </c></cell>
+ <cell align="left" valign="middle">per module to screen/file</cell>
+ <cell align="left" valign="middle">small </cell>
+ <cell align="left" valign="middle">moderate slowdown</cell>
+ <cell align="left" valign="middle">yes, per line </cell>
+ <cell align="left" valign="middle">no </cell>
+ <cell align="left" valign="middle">no </cell>
+ <cell align="left" valign="middle">no </cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>cprof </c></cell>
+ <cell align="left" valign="middle">per module to caller</cell>
+ <cell align="left" valign="middle">small </cell>
+ <cell align="left" valign="middle">small slowdown </cell>
+ <cell align="left" valign="middle">yes </cell>
+ <cell align="left" valign="middle">no </cell>
+ <cell align="left" valign="middle">no </cell>
+ <cell align="left" valign="middle">no </cell>
+ </row>
+ <tcaption></tcaption>
+ </table>
+ </section>
+ </section>
+
+ <section>
+ <marker id="benchmark"></marker>
+ <title>Benchmarking</title>
+
+ <p>The main purpose of benchmarking is to find out which
+ implementation of a given algorithm or function is the fastest.
+ Benchmarking is far from an exact science. Today's operating systems
+ generally run background tasks that are difficult to turn off.
+ Caches and multiple CPU cores doesn't make it any easier.
+ It would be best to run Unix-computers in single-user mode when
+ benchmarking, but that is inconvenient to say the least for casual
+ testing.</p>
+
+ <p>Benchmarks can measure wall-clock time or CPU time.</p>
+
+ <p><seealso marker="stdlib:timer#tc/3">timer:tc/3</seealso> measures
+ wall-clock time. The advantage with wall-clock time is that I/O,
+ swapping, and other activities in the operating-system kernel are
+ included in the measurements. The disadvantage is that the
+ the measurements will vary wildly. Usually it is best to run the
+ benchmark several times and note the shortest time - that time should
+ be the minimum time that is possible to achieve under the best of
+ circumstances.</p>
+
+ <p><seealso marker="erts:erlang#statistics/1">statistics/1</seealso>
+ with the argument <c>runtime</c> measures CPU time spent in the Erlang
+ virtual machine. The advantage is that the results are more
+ consistent from run to run. The disadvantage is that the time
+ spent in the operating system kernel (such as swapping and I/O)
+ are not included. Therefore, measuring CPU time is misleading if
+ any I/O (file or sockets) are involved.</p>
+
+ <p>It is probably a good idea to do both wall-clock measurements and
+ CPU time measurements.</p>
+
+ <p>Some additional advice:</p>
+
+ <list type="bulleted">
+ <item>The granularity of both types measurement could be quite
+ high so you should make sure that each individual measurement
+ lasts for at least several seconds.</item>
+
+ <item>To make the test fair, each new test run should run in its own,
+ newly created Erlang process. Otherwise, if all tests runs in the
+ same process, the later tests would start out with larger heap sizes
+ and therefore probably does less garbage collections. You could
+ also consider restarting the Erlang emulator between each test.</item>
+
+ <item>Do not assume that the fastest implementation of a given algorithm
+ on computer architecture X also is the fast on computer architecture Y.</item>
+
+ </list>
+ </section>
+</chapter>
+
diff --git a/system/doc/efficiency_guide/tablesDatabases.xml b/system/doc/efficiency_guide/tablesDatabases.xml
new file mode 100644
index 0000000000..4b53348c4c
--- /dev/null
+++ b/system/doc/efficiency_guide/tablesDatabases.xml
@@ -0,0 +1,379 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+ <header>
+ <copyright>
+ <year>2001</year><year>2009</year>
+ <holder>Ericsson AB. All Rights Reserved.</holder>
+ </copyright>
+ <legalnotice>
+ The contents of this file are subject to the Erlang Public License,
+ Version 1.1, (the "License"); you may not use this file except in
+ compliance with the License. You should have received a copy of the
+ Erlang Public License along with this software. If not, it can be
+ retrieved online at http://www.erlang.org/.
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ the License for the specific language governing rights and limitations
+ under the License.
+
+ </legalnotice>
+
+ <title>Tables and databases</title>
+ <prepared>Ingela Anderton</prepared>
+ <docno></docno>
+ <date>2001-08-07</date>
+ <rev></rev>
+ <file>tablesDatabases.xml</file>
+ </header>
+
+ <section>
+ <title>Ets, Dets and Mnesia</title>
+ <p>Every example using Ets has a corresponding example in
+ Mnesia. In general all Ets examples also apply to Dets tables.</p>
+
+ <section>
+ <title>Select/Match operations</title>
+ <p>Select/Match operations on Ets and Mnesia tables can become
+ very expensive operations. They usually need to scan the complete
+ table. You should try to structure your
+ data so that you minimize the need for select/match
+ operations. However, if you really need a select/match operation,
+ it will still be more efficient than using <c>tab2list</c>.
+ Examples of this and also of ways to avoid select/match will be provided in
+ some of the following sections. The functions
+ <c>ets:select/2</c> and <c>mnesia:select/3</c> should be preferred over
+ <c>ets:match/2</c>,<c>ets:match_object/2</c>, and <c>mnesia:match_object/3</c>.</p>
+ <note>
+ <p>There are exceptions when the complete table is not
+ scanned, for instance if part of the key is bound when searching an
+ <c>ordered_set</c> table, or if it is a Mnesia
+ table and there is a secondary index on the field that is
+ selected/matched. If the key is fully bound there will, of course, be
+ no point in doing a select/match, unless you have a bag table and
+ you are only interested in a sub-set of the elements with
+ the specific key.</p>
+ </note>
+ <p>When creating a record to be used in a select/match operation you
+ want most of the fields to have the value '_'. The easiest and fastest way
+ to do that is as follows:</p>
+ <pre>
+#person{age = 42, _ = '_'}. </pre>
+ </section>
+
+ <section>
+ <title>Deleting an element</title>
+ <p>The delete operation is considered
+ successful if the element was not present in the table. Hence
+ all attempts to check that the element is present in the
+ Ets/Mnesia table before deletion are unnecessary. Here follows
+ an example for Ets tables.</p>
+ <p><em>DO</em></p>
+ <pre>
+...
+ets:delete(Tab, Key),
+...</pre>
+ <p><em>DO NOT</em></p>
+ <pre>
+...
+case ets:lookup(Tab, Key) of
+ [] ->
+ ok;
+ [_|_] ->
+ ets:delete(Tab, Key)
+end,
+...</pre>
+ </section>
+
+ <section>
+ <title>Data fetching</title>
+ <p>Do not fetch data that you already have! Consider that you
+ have a module that handles the abstract data type Person. You
+ export the interface function <c>print_person/1</c> that uses the internal functions
+ <c>print_name/1</c>, <c>print_age/1</c>, <c>print_occupation/1</c>.</p>
+ <note>
+ <p>If the functions <c>print_name/1</c> and so on, had been interface
+ functions the matter comes in to a whole new light, as you
+ do not want the user of the interface to know about the
+ internal data representation. </p>
+ </note>
+ <p><em>DO</em></p>
+ <code type="erl">
+%%% Interface function
+print_person(PersonId) ->
+ %% Look up the person in the named table person,
+ case ets:lookup(person, PersonId) of
+ [Person] ->
+ print_name(Person),
+ print_age(Person),
+ print_occupation(Person);
+ [] ->
+ io:format("No person with ID = ~p~n", [PersonID])
+ end.
+
+%%% Internal functions
+print_name(Person) ->
+ io:format("No person ~p~n", [Person#person.name]).
+
+print_age(Person) ->
+ io:format("No person ~p~n", [Person#person.age]).
+
+print_occupation(Person) ->
+ io:format("No person ~p~n", [Person#person.occupation]).</code>
+ <p><em>DO NOT</em></p>
+ <code type="erl">
+%%% Interface function
+print_person(PersonId) ->
+ %% Look up the person in the named table person,
+ case ets:lookup(person, PersonId) of
+ [Person] ->
+ print_name(PersonID),
+ print_age(PersonID),
+ print_occupation(PersonID);
+ [] ->
+ io:format("No person with ID = ~p~n", [PersonID])
+ end.
+
+%%% Internal functionss
+print_name(PersonID) ->
+ [Person] = ets:lookup(person, PersonId),
+ io:format("No person ~p~n", [Person#person.name]).
+
+print_age(PersonID) ->
+ [Person] = ets:lookup(person, PersonId),
+ io:format("No person ~p~n", [Person#person.age]).
+
+print_occupation(PersonID) ->
+ [Person] = ets:lookup(person, PersonId),
+ io:format("No person ~p~n", [Person#person.occupation]).</code>
+ </section>
+
+ <section>
+ <title>Non-persistent data storage </title>
+ <p>For non-persistent database storage, prefer Ets tables over
+ Mnesia local_content tables. Even the Mnesia <c>dirty_write</c>
+ operations carry a fixed overhead compared to Ets writes.
+ Mnesia must check if the table is replicated or has indices,
+ this involves at least one Ets lookup for each
+ <c>dirty_write</c>. Thus, Ets writes will always be faster than
+ Mnesia writes.</p>
+ </section>
+
+ <section>
+ <title>tab2list</title>
+ <p>Assume we have an Ets-table, which uses <c>idno</c> as key,
+ and contains:</p>
+ <pre>
+[#person{idno = 1, name = "Adam", age = 31, occupation = "mailman"},
+ #person{idno = 2, name = "Bryan", age = 31, occupation = "cashier"},
+ #person{idno = 3, name = "Bryan", age = 35, occupation = "banker"},
+ #person{idno = 4, name = "Carl", age = 25, occupation = "mailman"}]</pre>
+ <p>If we <em>must</em> return all data stored in the Ets-table we
+ can use <c>ets:tab2list/1</c>. However, usually we are only
+ interested in a subset of the information in which case
+ <c>ets:tab2list/1</c> is expensive. If we only want to extract
+ one field from each record, e.g., the age of every person, we
+ should use:</p>
+ <p><em>DO</em></p>
+ <pre>
+...
+ets:select(Tab,[{ #person{idno='_',
+ name='_',
+ age='$1',
+ occupation = '_'},
+ [],
+ ['$1']}]),
+...</pre>
+ <p><em>DO NOT</em></p>
+ <pre>
+...
+TabList = ets:tab2list(Tab),
+lists:map(fun(X) -> X#person.age end, TabList),
+...</pre>
+ <p>If we are only interested in the age of all persons named
+ Bryan, we should:</p>
+ <p><em>DO</em></p>
+ <pre>
+...
+ets:select(Tab,[{ #person{idno='_',
+ name="Bryan",
+ age='$1',
+ occupation = '_'},
+ [],
+ ['$1']}]),
+...</pre>
+ <p><em>DO NOT</em></p>
+ <pre>
+...
+TabList = ets:tab2list(Tab),
+lists:foldl(fun(X, Acc) -> case X#person.name of
+ "Bryan" ->
+ [X#person.age|Acc];
+ _ ->
+ Acc
+ end
+ end, [], TabList),
+...</pre>
+ <p><em>REALLY DO NOT</em></p>
+ <pre>
+...
+TabList = ets:tab2list(Tab),
+BryanList = lists:filter(fun(X) -> X#person.name == "Bryan" end,
+ TabList),
+lists:map(fun(X) -> X#person.age end, BryanList),
+...</pre>
+ <p>If we need all information stored in the Ets table about
+ persons named Bryan we should:</p>
+ <p><em>DO</em></p>
+ <pre>
+...
+ets:select(Tab, [{#person{idno='_',
+ name="Bryan",
+ age='_',
+ occupation = '_'}, [], ['$_']}]),
+...</pre>
+ <p><em>DO NOT</em></p>
+ <pre>
+...
+TabList = ets:tab2list(Tab),
+lists:filter(fun(X) -> X#person.name == "Bryan" end, TabList),
+...</pre>
+ </section>
+
+ <section>
+ <title>Ordered_set tables</title>
+ <p>If the data in the table should be accessed so that the order
+ of the keys in the table is significant, the table type
+ <c>ordered_set</c> could be used instead of the more usual
+ <c>set</c> table type. An <c>ordered_set</c> is always
+ traversed in Erlang term order with regard to the key field
+ so that return values from functions such as <c>select</c>,
+ <c>match_object</c>, and <c>foldl</c> are ordered by the key
+ values. Traversing an <c>ordered_set</c> with the <c>first</c> and
+ <c>next</c> operations also returns the keys ordered.</p>
+ <note>
+ <p>An <c>ordered_set</c> only guarantees that
+ objects are processed in <em>key</em> order. Results from functions as
+ <c>ets:select/2</c> appear in the <em>key</em> order even if
+ the key is not included in the result.</p>
+ </note>
+ </section>
+ </section>
+
+ <section>
+ <title>Ets specific</title>
+
+ <section>
+ <title>Utilizing the keys of the Ets table</title>
+ <p>An Ets table is a single key table (either a hash table or a
+ tree ordered by the key) and should be used as one. In other
+ words, use the key to look up things whenever possible. A
+ lookup by a known key in a set Ets table is constant and for a
+ ordered_set Ets table it is O(logN). A key lookup is always
+ preferable to a call where the whole table has to be
+ scanned. In the examples above, the field <c>idno</c> is the
+ key of the table and all lookups where only the name is known
+ will result in a complete scan of the (possibly large) table
+ for a matching result.</p>
+ <p>A simple solution would be to use the <c>name</c> field as
+ the key instead of the <c>idno</c> field, but that would cause
+ problems if the names were not unique. A more general solution
+ would be create a second table with name as key and idno as
+ data, i.e. to index (invert) the table with regards to the
+ <c>name</c> field. The second table would of course have to be
+ kept consistent with the master table. Mnesia could do this
+ for you, but a home brew index table could be very efficient
+ compared to the overhead involved in using Mnesia.</p>
+ <p>An index table for the table in the previous examples would
+ have to be a bag (as keys would appear more than once) and could
+ have the following contents:</p>
+ <pre>
+
+[#index_entry{name="Adam", idno=1},
+ #index_entry{name="Bryan", idno=2},
+ #index_entry{name="Bryan", idno=3},
+ #index_entry{name="Carl", idno=4}]</pre>
+ <p>Given this index table a lookup of the <c>age</c> fields for
+ all persons named "Bryan" could be done like this:</p>
+ <pre>
+...
+MatchingIDs = ets:lookup(IndexTable,"Bryan"),
+lists:map(fun(#index_entry{idno = ID}) ->
+ [#person{age = Age}] = ets:lookup(PersonTable, ID),
+ Age
+ end,
+ MatchingIDs),
+...</pre>
+ <p>Note that the code above never uses <c>ets:match/2</c> but
+ instead utilizes the <c>ets:lookup/2</c> call. The
+ <c>lists:map/2</c> call is only used to traverse the <c>idno</c>s
+ matching the name "Bryan" in the table; therefore the number of lookups
+ in the master table is minimized.</p>
+ <p>Keeping an index table introduces some overhead when
+ inserting records in the table, therefore the number of operations
+ gained from the table has to be weighted against the number of
+ operations inserting objects in the table. However, note that the gain when
+ the key can be used to lookup elements is significant.</p>
+ </section>
+ </section>
+
+ <section>
+ <title>Mnesia specific</title>
+
+ <section>
+ <title>Secondary index</title>
+ <p>If you frequently do a lookup on a field that is not the
+ key of the table, you will lose performance using
+ "mnesia:select/match_object" as this function will traverse the
+ whole table. You may create a secondary index instead and
+ use "mnesia:index_read" to get faster access, however this
+ will require more memory. Example:</p>
+ <pre>
+-record(person, {idno, name, age, occupation}).
+ ...
+{atomic, ok} =
+mnesia:create_table(person, [{index,[#person.age]},
+ {attributes,
+ record_info(fields, person)}]),
+{atomic, ok} = mnesia:add_table_index(person, age),
+...
+
+PersonsAge42 =
+ mnesia:dirty_index_read(person, 42, #person.age),
+...</pre>
+ </section>
+
+ <section>
+ <title>Transactions </title>
+ <p>Transactions is a way to guarantee that the distributed
+ Mnesia database remains consistent, even when many different
+ processes update it in parallel. However if you have
+ real time requirements it is recommended to use dirty
+ operations instead of transactions. When using the dirty
+ operations you lose the consistency guarantee, this is usually
+ solved by only letting one process update the table. Other
+ processes have to send update requests to that process.</p>
+ <pre>
+...
+% Using transaction
+
+Fun = fun() ->
+ [mnesia:read({Table, Key}),
+ mnesia:read({Table2, Key2})]
+ end,
+
+{atomic, [Result1, Result2]} = mnesia:transaction(Fun),
+...
+
+% Same thing using dirty operations
+...
+
+Result1 = mnesia:dirty_read({Table, Key}),
+Result2 = mnesia:dirty_read({Table2, Key2}),
+...</pre>
+ </section>
+ </section>
+</chapter>
+
diff --git a/system/doc/efficiency_guide/xmlfiles.mk b/system/doc/efficiency_guide/xmlfiles.mk
new file mode 100644
index 0000000000..fa2fe44eec
--- /dev/null
+++ b/system/doc/efficiency_guide/xmlfiles.mk
@@ -0,0 +1,32 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+EFF_GUIDE_CHAPTER_FILES = \
+ advanced.xml \
+ commoncaveats.xml \
+ binaryhandling.xml \
+ functions.xml \
+ introduction.xml \
+ listhandling.xml \
+ myths.xml \
+ part.xml \
+ processes.xml \
+ profiling.xml \
+ tablesDatabases.xml \
+ drivers.xml
+