diff options
author | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /system/doc/efficiency_guide | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'system/doc/efficiency_guide')
25 files changed, 4338 insertions, 0 deletions
diff --git a/system/doc/efficiency_guide/Makefile b/system/doc/efficiency_guide/Makefile new file mode 100644 index 0000000000..f51313de84 --- /dev/null +++ b/system/doc/efficiency_guide/Makefile @@ -0,0 +1,117 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include $(ERL_TOP)/erts/vsn.mk +#VSN=$(SYSTEM_VSN) + +APPLICATION=otp-system-documentation +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/doc/efficiency_guide + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +XML_PART_FILES = part.xml + +include xmlfiles.mk + +XML_CHAPTER_FILES = $(EFF_GUIDE_CHAPTER_FILES) + +TOPDOCDIR=.. + +BOOK_FILES = book.xml + +PS_FILES = digger.ps + +XML_FILES = \ + $(BOOK_FILES) $(XML_CHAPTER_FILES) \ + $(XML_PART_FILES) + +# ---------------------------------------------------- + +C_FILES = \ + +ERL_FILES = all.erl\ + bench.erl\ + call_bm.erl + +HRL_FILES = bench.hrl + + +MISC_FILES = call_result.html\ + README + +HTML_FILES = \ + $(XML_PART_FILES:%.xml=%.html) + +HTMLDIR = ../html/efficiency_guide + +EXTRA_FILES = $(ERL_FILES) \ + $(HRL_FILES) \ + $(MISC_FILES) + +HTML_UG_FILE = $(HTMLDIR)/users_guide.html + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +XML_FLAGS += +DVIPS_FLAGS += + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- +docs: html + +local_docs: PDFDIR=../../pdf + +html: $(GIF_FILES) $(HTML_UG_FILE) + +debug opt: + +clean clean_docs: + rm -rf $(HTMLDIR) + rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo) + rm -f errs core *~ + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_docs_spec: docs +# $(INSTALL_DIR) $(RELEASE_PATH)/pdf +# $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELEASE_PATH)/pdf + $(INSTALL_DIR) $(RELSYSDIR) + $(INSTALL_DATA) $(GIF_FILES) $(EXTRA_FILES) $(HTMLDIR)/*.html \ + $(RELSYSDIR) + + +release_spec: + + + diff --git a/system/doc/efficiency_guide/README b/system/doc/efficiency_guide/README new file mode 100644 index 0000000000..6830a46d9e --- /dev/null +++ b/system/doc/efficiency_guide/README @@ -0,0 +1,122 @@ +Benchmark framework +------------------- + +This benchmark framework consists of the files: +bench.erl - see bench module below +bench.hrl - Defines some useful macros +all.erl - see all module below + +bench module +----------- + +The module bench is a generic module that measures execution time +of functions in callback modules and writes an html-report on the outcome. + +When you execute the function bench:run/0 it will compile and run all +benchmark modules in the current directory. + +all module +----------- + +In the all module there is a function called releases/0 that you can +edit to contain all your erlang installations and then you can +run your benchmarks on several erlang versions using only one command i.e. +all:run(). + +Requirements on callback modules +--------------------------------- + +* A callback module must be named <callbackModuleName>_bm.erl + +* The module must export the function benchmarks/0 that must return: + {Iterations, [Name1,Name2...]} where Iterations is the number of + times each benchmark should be run. Name1, Name2 and so one are the + name of exported functions in the module. + +* The exported functions Name1 etc. must take one argument i.e. the number + of iterations and should return the atom ok. + +* The functions in a benchmark module should represent different + ways/different sequential algorithms for doing something. And the + result will be how fast they are compared to each other. + +Files created +-------------- + +Files that are created in the current directory are *.bmres and +index.html. The file(s) with the extension "bmres" are an intermediate +representation of the benchmark results and is only meant to be read +by the reporting mechanism defined in bench.erl. The index.html file +is the report telling you how good the benchmarks are in comparison to +each other. If you run your test on several erlang releases the +html-file will include the result for all versions. + + +Pitfalls +--------- +To get meaningful measurements, you should make sure that: + +* The total execution time is at least several seconds. + +* That any time spent in setup before entering the measurement loop is very + small compared to the total time. + +* That time spent by the loop itself is small compared to the total execution + time + +Consider the following example of a benchmark function that does +a local function call. + +local_call(0) -> ok; +local_call(Iter) -> + foo(), % Local function call + local_call(Iter-1). + +The problem is that both "foo()" and "local_call(Iter-1)" takes about +the same amount of time. To get meaningful figures you'll need to make +sure that the loop overhead will not be visible. In this case we can +take help of a macro in bench.hrl to repeat the local function call +many times, making sure that time spent calling the local function is +relatively much longer than the time spent iterating. Of course, all +benchmarks in the same module must be repeated the same number of +times; thus external_call will look like + +external_call(0) -> ok; +external_call(Iter) -> + ?rep20(?MODULE:foo()), + external_call(Iter-1). + +This technique is only necessary if the operation we are testing executes +really fast. + +If you for instance want to test a sort routine we can keep it simple: + +sorted(Iter) -> + do_sort(Iter, lists:seq(0, 63)). + +do_sort(0, List) -> ok; +do_sort(Iter, List) -> + lists:sort(List), + do_sort(Iter-1, List). + +The call to lists:seq/2 is only done once. The loop overhead in the +do_sort/2 function is small compared to the execution time of lists:sort/1. + +Error handling +--------------- + +Any error enforced by a callback module will result in exit of the benchmark +program and an errormessage that should give a good idea of what is wrong. + + + + + + + + + + + + + diff --git a/system/doc/efficiency_guide/advanced.xml b/system/doc/efficiency_guide/advanced.xml new file mode 100644 index 0000000000..0ec3afbd59 --- /dev/null +++ b/system/doc/efficiency_guide/advanced.xml @@ -0,0 +1,204 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Advanced</title> + <prepared>Kenneth Lundin</prepared> + <docno></docno> + <date>2001-08-21</date> + <rev></rev> + <file>advanced.xml</file> + </header> + + <section> + <title>Memory</title> + <p>A good start when programming efficiently is to have knowledge about + how much memory different data types and operations require. It is + implementation-dependent how much memory the Erlang data types and + other items consume, but here are some figures for + erts-5.2 system (OTP release R9B). (There have been no significant + changes in R13.)</p> + + <p>The unit of measurement is memory words. There exists both a 32-bit + and a 64-bit implementation, and a word is therefore, 4 bytes or + 8 bytes, respectively.</p> + <table> + <row> + <cell align="center" valign="middle">Data type</cell> + <cell align="center" valign="middle">Memory size</cell> + </row> + <row> + <cell align="left" valign="middle">Integer (-16#7FFFFFF < i <16#7FFFFFF)</cell> + <cell align="left" valign="middle">1 word</cell> + </row> + <row> + <cell align="left" valign="middle">Integer (big numbers)</cell> + <cell align="left" valign="middle">3..N words</cell> + </row> + <row> + <cell align="left" valign="middle">Atom</cell> + <cell align="left" valign="middle">1 word. Note: an atom refers into + an atom table which also consumes memory. + The atom text is stored once for each unique atom in this table. + The atom table is <em>not</em> garbage-collected.</cell> + </row> + <row> + <cell align="left" valign="middle">Float</cell> + <cell align="left" valign="middle">On 32-bit architectures: 4 words <br></br> +On 64-bit architectures: 3 words</cell> + </row> + <row> + <cell align="left" valign="middle">Binary</cell> + <cell align="left" valign="middle">3..6 + data (can be shared)</cell> + </row> + <row> + <cell align="left" valign="middle">List</cell> + <cell align="left" valign="middle">1 word per element + the size of each element</cell> + </row> + <row> + <cell align="left" valign="middle">String (is the same as a list of integers)</cell> + <cell align="left" valign="middle">2 words per character</cell> + </row> + <row> + <cell align="left" valign="middle">Tuple</cell> + <cell align="left" valign="middle">2 words + the size of each element</cell> + </row> + <row> + <cell align="left" valign="middle">Pid</cell> + <cell align="left" valign="middle">1 word for a process identifier from the current local node, and 5 words for a process identifier from another node. Note: a process identifier refers into a process table and a node table which also consumes memory.</cell> + </row> + <row> + <cell align="left" valign="middle">Port</cell> + <cell align="left" valign="middle">1 word for a port identifier from the current local node, and 5 words for a port identifier from another node. Note: a port identifier refers into a port table and a node table which also consumes memory.</cell> + </row> + <row> + <cell align="left" valign="middle">Reference</cell> + <cell align="left" valign="middle">On 32-bit architectures: 5 words for a reference from the current local node, and 7 words for a reference from another node. <br></br> +On 64-bit architectures: 4 words for a reference from the current local node, and 6 words for a reference from another node. Note: a reference refers into a node table which also consumes memory.</cell> + </row> + <row> + <cell align="left" valign="middle">Fun</cell> + <cell align="left" valign="middle">9..13 words + size of environment. Note: a fun refers into a fun table which also consumes memory.</cell> + </row> + <row> + <cell align="left" valign="middle">Ets table</cell> + <cell align="left" valign="middle">Initially 768 words + the size of each element (6 words + size of Erlang data). The table will grow when necessary.</cell> + </row> + <row> + <cell align="left" valign="middle">Erlang process</cell> + <cell align="left" valign="middle">327 words when spawned including a heap of 233 words.</cell> + </row> + <tcaption>Memory size of different data types</tcaption> + </table> + </section> + + <section> + <title>System limits</title> + <p>The Erlang language specification puts no limits on number of processes, + length of atoms etc., but for performance and memory saving reasons, + there will always be limits in a practical implementation of the Erlang + language and execution environment.</p> + <taglist> + <tag><em>Processes</em></tag> + <item> + <p>The maximum number of simultaneously alive Erlang processes is + by default 32768. This limit can be raised up to at most 268435456 + processes at startup (see documentation of the system flag + <seealso marker="erts:erl#max_processes">+P</seealso> in the + <seealso marker="erts:erl">erl(1)</seealso> documentation). + The maximum limit of 268435456 processes will at least on a 32-bit + architecture be impossible to reach due to memory shortage.</p> + </item> + <tag><em>Distributed nodes</em></tag> + <item> + <taglist> + <tag>Known nodes</tag> + <item> + <p>A remote node Y has to be known to node X if there exist + any pids, ports, references, or funs (Erlang data types) from Y + on X, or if X and Y are connected. The maximum number of remote + nodes simultaneously/ever known to a node is limited by the + <seealso marker="#atoms">maximum number of atoms</seealso> + available for node names. All data concerning remote nodes, + except for the node name atom, are garbage-collected.</p> + </item> + <tag>Connected nodes</tag> + <item>The maximum number of simultaneously connected nodes is limited by + either the maximum number of simultaneously known remote nodes, + <seealso marker="#ports">the maximum number of (Erlang) ports</seealso> + available, or + <seealso marker="#files_sockets">the maximum number of sockets</seealso> + available.</item> + </taglist> + </item> + <tag><em>Characters in an atom</em></tag> + <item>255</item> + <tag><em>Atoms </em></tag> + <item> <marker id="atoms"></marker> +The maximum number of atoms is 1048576. </item> + <tag><em>Ets-tables</em></tag> + <item>The default is 1400, can be changed with the environment variable <c>ERL_MAX_ETS_TABLES</c>.</item> + <tag><em>Elements in a tuple</em></tag> + <item>The maximum number of elements in a tuple is 67108863 (26 bit unsigned integer). Other factors + such as the available memory can of course make it hard to create a tuple of that size. </item> + <tag><em>Size of binary</em></tag> + <item>In the 32-bit implementation of Erlang, 536870911 bytes is the + largest binary that can be constructed or matched using the bit syntax. + (In the 64-bit implementation, the maximum size is 2305843009213693951 bytes.) + If the limit is exceeded, bit syntax construction will fail with a + <c>system_limit</c> exception, while any attempt to match a binary that is + too large will fail. + This limit is enforced starting with the R11B-4 release; in earlier releases, + operations on too large binaries would in general either fail or give incorrect + results. + In future releases of Erlang/OTP, other operations that create binaries (such as + <c>list_to_binary/1</c>) will probably also enforce the same limit.</item> + <tag><em>Total amount of data allocated by an Erlang node</em></tag> + <item>The Erlang runtime system can use the complete 32 (or 64) bit address space, + but the operating system often limits a single process to use less than that.</item> + <tag><em>length of a node name</em></tag> + <item>An Erlang node name has the form host@shortname or host@longname. The node name is + used as an atom within the system so the maximum size of 255 holds for the node name too.</item> + <tag><em>Open ports</em></tag> + <item> + <marker id="ports"></marker> + <p>The maximum number of simultaneously open Erlang ports is + by default 1024. This limit can be raised up to at most 268435456 + at startup (see environment variable + <seealso marker="erts:erlang#ERL_MAX_PORTS">ERL_MAX_PORTS</seealso> + in <seealso marker="erts:erlang">erlang(3)</seealso>) + The maximum limit of 268435456 open ports will at least on a 32-bit + architecture be impossible to reach due to memory shortage.</p> + </item> + <tag><em>Open files, and sockets</em></tag> + <item> <marker id="files_sockets"></marker> + + The maximum number of simultaneously open files and sockets + depend on + <seealso marker="#ports">the maximum number of Erlang ports</seealso> + available, and operating system specific settings and limits.</item> + <tag><em>Number of arguments to a function or fun</em></tag> + <item>256</item> + </taglist> + </section> +</chapter> + diff --git a/system/doc/efficiency_guide/all.erl b/system/doc/efficiency_guide/all.erl new file mode 100644 index 0000000000..a0f7809422 --- /dev/null +++ b/system/doc/efficiency_guide/all.erl @@ -0,0 +1,106 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% +-module(all). + +%% User interface +-export([run/0]). + +%% Interna constants +-define(NORMAL, 0). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Interface +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%--------------------------------------------------------------------------- +%% run() -> _ +%% +%% Runs all benchmark modules in the current directory on all erlang +%% installations specified by releases/0 +%%--------------------------------------------------------------------------- +run() -> + %% Delete previous intermediate test result files. + lists:foreach(fun(F) -> file:delete(F) end, filelib:wildcard("*.bmres")), + lists:foreach(fun run/1, releases()). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Internal functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%--------------------------------------------------------------------------- +%% run(Release) -> _ +%% Release = string() - Erlang release +%% Help functions to run/0 +%%--------------------------------------------------------------------------- +run(Release) -> + command(Release ++ " -noshell -compile bench -s erlang halt"), + command(Release ++ " -noshell -s bench run -s erlang halt"). +%%--------------------------------------------------------------------------- +%% command(Command) -> _ +%% Command = string() - is the name and arguments of the external +%% program which will be run +%%--------------------------------------------------------------------------- +command(Command) -> + io:format("~s\n", [Command]), % Progress info to user + Port = open_port({spawn,Command}, [exit_status, in]), + print_output(Port). +%%--------------------------------------------------------------------------- +%% print_output(Port) -> _ +%% Port = port() +%% Print data from the port i.e. output from external program, +%% on standard out. +%%--------------------------------------------------------------------------- +print_output(Port) -> + receive + {Port, {data,Bytes}} -> + io:put_chars(Bytes), + print_output(Port); + {Port, {exit_status, ?NORMAL}} -> + ok + end. +%%--------------------------------------------------------------------------- +%% run() -> Releases +%% Releases = [Release |_] +%% Release = string() - Erlang release +%% Defines which erlang releases to run on +%% --- Change this function to reflect your own erlang installations --- +%%--------------------------------------------------------------------------- +releases() -> + ["/usr/local/otp/releases/otp_beam_sunos5_r7b01_patched/bin/erl", + "/usr/local/otp/releases/otp_beam_sunos5_r8b_patched/bin/erl"]. + + + + + + + + + + + + + + + + + + + + + diff --git a/system/doc/efficiency_guide/appendix.xml b/system/doc/efficiency_guide/appendix.xml new file mode 100644 index 0000000000..631ef9bee7 --- /dev/null +++ b/system/doc/efficiency_guide/appendix.xml @@ -0,0 +1,72 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2002</year> + <year>2007</year> + <holder>Ericsson AB, All Rights Reserved</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + The Initial Developer of the Original Code is Ericsson AB. + </legalnotice> + + <title>Appendix - Programs</title> + <prepared>Ingela Anderton</prepared> + <docno></docno> + <date>2002-09-23</date> + <rev></rev> + <file>appendix.sgml</file> + </header> + <p>This appendix contains the programs referred to in the previous + chapters within the Efficiency Guide.</p> + + <section> + <marker id="bench"></marker> + <title>bench.erl</title> + <codeinclude file="bench.erl" tag="" type="none"></codeinclude> + </section> + + <section> + <marker id="bench_hrl"></marker> + <title>bench.hrl</title> + <codeinclude file="bench.hrl" tag="" type="none"></codeinclude> + </section> + + <section> + <marker id="all"></marker> + <title>all.erl</title> + <codeinclude file="all.erl" tag="" type="none"></codeinclude> + </section> + + <section> + <marker id="readme"></marker> + <title>README</title> + <codeinclude file="README" tag="" type="none"></codeinclude> + </section> + + <section> + <marker id="call_bm"></marker> + <title>call_bm.erl</title> + <codeinclude file="call_bm.erl" tag="" type="none"></codeinclude> + </section> + + <section> + <marker id="call_result"></marker> + <title>index.html</title> + <codeinclude file="call_result.html" tag="" type="none"></codeinclude> + </section> +</chapter> + diff --git a/system/doc/efficiency_guide/bench.erl b/system/doc/efficiency_guide/bench.erl new file mode 100644 index 0000000000..8f26b8d0af --- /dev/null +++ b/system/doc/efficiency_guide/bench.erl @@ -0,0 +1,488 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% + +-module(bench). + +%% User interface +-export([run/0]). + +%% Exported to be used in spawn +-export([measure/4]). + +%% Internal constants +-define(MAX, 999999999999999). +-define(RANGE_MAX, 16#7ffffff). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Interface +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%--------------------------------------------------------------------------- +%% run() -> _ +%% +%% Compiles and runs all benchmarks in the current directory, +%% and creates a report +%%--------------------------------------------------------------------------- +run() -> + run(compiler_options()). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Generic Benchmark functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%--------------------------------------------------------------------------- +%% compiler_options() -> OptionsList +%% OptionsList = list() - See Erlang/OTP module compile +%%--------------------------------------------------------------------------- +compiler_options() -> + [report_errors, report_warnings]. + +%%--------------------------------------------------------------------------- +%% run(OptionsList) -> +%% OptionsList = list() - See Erlang/OTP module compile +%% +%% Help function to run/0. +%%--------------------------------------------------------------------------- +run(OptionsList) -> + Bms = compile_benchmarks(OptionsList), + run_benchmarks(Bms), + report(). + +%%--------------------------------------------------------------------------- +%% compile_benchmarks(OptionsList) -> [BmInfo| _] +%% OptionsList = list() - See Erlang/OTP module compile +%% BmInfo = {Module, Iterations, [BmFunctionName| _]} +%% Module = atom() +%% Iterations = integer() +%% BmFunctionName = atom() +%% +%% Compiles all benchmark modules in the current directory and +%% returns info about the benchmarks. +%%--------------------------------------------------------------------------- +compile_benchmarks(OptionsList) -> + {ok, FilesInCurrentDir} = file:list_dir("."), + ErlFiles = [ErlFile || ErlFile <- lists:sort(FilesInCurrentDir), + lists:suffix(".erl", ErlFile)], + lists:foldr(fun(File, BmInfoAcc) -> + case lists:suffix("_bm.erl", File) of + true -> + BmInfo = bm_compile(File, OptionsList), + [BmInfo | BmInfoAcc]; + false -> + just_compile(File, OptionsList), + BmInfoAcc + end + end, [], ErlFiles). + +%%--------------------------------------------------------------------------- +%% just_compile(FileName, OptionsList) -> ok +%% FileName = string() +%% OptionsList = list() - See Erlang/OTP module compile +%% +%% Compiles a support module. +%%--------------------------------------------------------------------------- +just_compile(FileName, OptionsList) -> + io:format("Compiling ~s...\n", [FileName]), % Progress info to user + case c:c(FileName, OptionsList) of + {ok, _Mod} -> + ok; + %% If compilation fails there is no point in trying to continue + error -> + Reason = + lists:flatten( + io_lib:format("Could not compile file ~s", [FileName])), + exit(self(), Reason) + end. +%%--------------------------------------------------------------------------- +%% bm_compile(FileName, OptionsList) -> BmInfo +%% FileName = string() +%% OptionsList = list() - See Erlang/OTP module compile +%% BmInfo = {Module, Iterations, [BmFunctionName| _]} +%% Iterations = integer() +%% Module = atom() +%% BmFunctionName = atom() +%% +%% Compiles the benchmark module implemented in <FileName> and returns +%% information about the benchmark tests. +%%--------------------------------------------------------------------------- +bm_compile(FileName, OptionsList) -> + io:format("Compiling ~s...\n", [FileName]), % Progress info to user + case c:c(FileName, OptionsList) of + {ok, Mod} -> + bm_cases(Mod); + %% If compilation fails there is no point in trying to continue + error -> + Reason = + lists:flatten( + io_lib:format("Could not compile file ~s", [FileName])), + exit(self(), Reason) + end. +%%--------------------------------------------------------------------------- +%% bm_cases(Module) -> {Module, Iter, [BmFunctionName |_]} +%% Module = atom() +%% Iter = integer() +%% BmFunctionName = atom() +%% +%% Fetches the number of iterations and the names of the benchmark +%% functions for the module <Module>. +%%--------------------------------------------------------------------------- +bm_cases(Module) -> + case catch Module:benchmarks() of + {Iter, BmList} when integer(Iter), list(BmList) -> + {Module, Iter, BmList}; + %% The benchmark is incorrect implemented there is no point in + %% trying to continue + Other -> + Reason = + lists:flatten( + io_lib:format("Incorrect return value: ~p " + "from ~p:benchmarks()", + [Other, Module])), + exit(self(), Reason) + end. +%%--------------------------------------------------------------------------- +%% run_benchmarks(Bms) -> +%% Bms = [{Module, Iter, [BmFunctionName |_]} | _] +%% Module = atom() +%% Iter = integer() +%% BmFunctionName = atom() +%% +%% Runs all the benchmark tests described in <Bms>. +%%--------------------------------------------------------------------------- +run_benchmarks(Bms) -> + Ver = erlang:system_info(version), + Machine = erlang:system_info(machine), + SysInfo = {Ver,Machine}, + + Res = [bms_run(Mod, Tests, Iter, SysInfo) || {Mod,Iter,Tests} <- Bms], + + %% Create an intermediate file that is later used to generate a bench + %% mark report. + Name = Ver ++ [$.|Machine] ++ ".bmres", + {ok, IntermediatFile} = file:open(Name, [write]), + + %% Create mark that identifies version of the benchmark modules + io:format(IntermediatFile, "~p.\n", [erlang:phash(Bms, ?RANGE_MAX)]), + + io:format(IntermediatFile, "~p.\n", [Res]), + file:close(IntermediatFile). + +%%--------------------------------------------------------------------------- +%% bms_run(Module, BmTests, Iter, Info) -> +%% Module = atom(), +%% BmTests = [BmFunctionName|_], +%% BmFunctionName = atom() +%% Iter = integer(), +%% SysInfo = {Ver, Machine} +%% Ver = string() +%% Machine = string() +%% +%% Runs all benchmark tests in module <Module>. +%%--------------------------------------------------------------------------- +bms_run(Module, BmTests, Iter, SysInfo) -> + io:format("Running ~s:", [Module]), % Progress info to user + Res = + {Module,{SysInfo,[{Bm, bm_run(Module, Bm, Iter)} || Bm <- BmTests]}}, + io:nl(), + Res. +%%--------------------------------------------------------------------------- +%% bm_run(Module, BmTest, Iter) -> Elapsed +%% Module = atom(), +%% BmTest = atom(), +%% Iter = integer() +%% Elapsed = integer() - elapsed time in milliseconds. +%% +%% Runs the benchmark Module:BmTest(Iter) +%%--------------------------------------------------------------------------- +bm_run(Module, BmTest, Iter) -> + io:format(" ~s", [BmTest]), % Progress info to user + spawn_link(?MODULE, measure, [self(), Module, BmTest, Iter]), + receive + {Elapsed, ok} -> + Elapsed; + {_Elapsed, Fault} -> + io:nl(), + Reason = + lists:flatten( + io_lib:format("~w", [Fault])), + exit(self(), Reason) + end. +%%--------------------------------------------------------------------------- +%% measure(Parent, Module, BmTest, Iter) -> _ +%% Parent = pid(), +%% Module = atom(), +%% BmTest = atom(), +%% Iter = integer() +%% +%% Measures the time it take to execute Module:Bm(Iter) +%% and send the result to <Parent>. +%%--------------------------------------------------------------------------- +measure(Parent, Module, BmTest, Iter) -> + statistics(runtime), + Res = (catch apply(Module, BmTest, [Iter])), + {_TotalRunTime, TimeSinceLastCall} = statistics(runtime), + Parent ! {TimeSinceLastCall, Res}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Report functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%--------------------------------------------------------------------------- +%% report() -> _ +%% +%% Creates a report of the bench marking test that appeals to a human. +%% Currently this means creating a html-file. (Other formats could be added) +%%--------------------------------------------------------------------------- +report() -> + {ok, AllFiles} = file:list_dir("."), + BmResultFiles = [File || File <- AllFiles, lists:suffix(".bmres", File)], + + Results = fetch_bmres_data(BmResultFiles), + create_report(Results). + +%%--------------------------------------------------------------------------- +%% fetch_bmres_data(BmResultFiles) -> Results +%% BmResultFiles = [FileName | _] +%% FileName = string() +%% Results = [[{Bm, Res} | _]] +%% Bm = atom() - Name of benchmark module +%% Res = [{VersionInfo, [{Test, Time} | _]}] +%% VersionInfo = {Ver, Machine} +%% Ver = string() +%% Machine = string() +%% Test = atom() +%% Time = integer() +%% +%% Reads result data from intermediate files +%%--------------------------------------------------------------------------- +fetch_bmres_data(BmResultFiles) -> + fetch_bmres_data(BmResultFiles, [], undefined). + +%%--------------------------------------------------------------------------- +%% fetch_bmres_data(BmResultFiles, AccResData, Check) -> Results +%% BmResultFiles = [FileName | _] +%% FileName = string() +%% AccResData = see Results fetch_bmres_data/1 +%% Check = integer() | undefined (first time) +%% +%% Help function to fetch_bmres_data/1 +%%--------------------------------------------------------------------------- +fetch_bmres_data([], AccResData, _Check) -> + AccResData; + +fetch_bmres_data([Name | BmResultFiles], AccResData, Check) -> + {DataList, NewCheck} = read_bmres_file(Name, Check), + fetch_bmres_data(BmResultFiles, [DataList| AccResData], NewCheck). + +%%--------------------------------------------------------------------------- +%% read_bmres_file(Name, Check) -> +%% Name = string() +%% Check = integer() | undefined +%% +%% Reads the data from the result files. Checks that all result +%% files where created with the same set of tests. +%%--------------------------------------------------------------------------- +read_bmres_file(Name, Check) -> + case file:consult(Name) of + {ok, [Check1, List]} when Check =:= undefined, integer(Check1) -> + {List, Check1}; + {ok, [Check, List]} when integer(Check) -> + {List, Check}; + {ok, [Check1, _List]} when integer(Check1) -> + Reason = + lists:flatten( + io_lib:format("Different test setup, remove old setup " + "result by removing *.bmres files and " + "try again", [])), + exit(self(), Reason); + {error, Reason} when atom(Reason) -> + exit(self(), Reason); + {error, Reason} -> + exit(self(), file:format(Reason)) + end. + +%%--------------------------------------------------------------------------- +%% create_report(Results) -> +%% Results = see Results fetch_bmres_data/1 +%% +%% Organizes <Result> so it will be right for create_html_report/1 +%% i.e. group results for the same benchmark test, run on different versions +%% of erlang. +%%--------------------------------------------------------------------------- +create_report(Results) -> + Dictionary = + lists:foldl(fun(BmResultList, Dict0) -> + lists:foldl(fun({Bm, VerResult}, Dict1) -> + dict:append(Bm, VerResult, + Dict1) + end,Dict0, BmResultList) + end, + dict:new(), Results), + + create_html_report(dict:to_list(Dictionary)). +%%--------------------------------------------------------------------------- +%% create_html_report(ResultList) -> _ +%% ResultList = [{Bm, Res} | _] +%% Bm = atom() - Name of benchmark module +%% Res = [{VersionInfo, [{Test, Time} | _]} | _] +%% VersionInfo = {Ver, Machine} +%% Ver = string() +%% Machine = string() +%% Test = atom() +%% Time = integer() +%% +%% Writes the result to an html-file +%%--------------------------------------------------------------------------- +create_html_report(ResultList) -> + + {ok, OutputFile} = file:open("index.html", [write]), + + %% Create the begining of the result html-file. + Head = Title = "Benchmark Results", + io:put_chars(OutputFile, "<html>\n"), + io:put_chars(OutputFile, "<head>\n"), + io:format(OutputFile, "<title>~s</title>\n", [Title]), + io:put_chars(OutputFile, "</head>\n"), + io:put_chars(OutputFile, "<body bgcolor=\"#FFFFFF\" text=\"#000000\"" ++ + " link=\"#0000FF\" vlink=\"#800080\" alink=\"#FF0000\">\n"), + io:format(OutputFile, "<h1>~s</h1>\n", [Head]), + + %% Add the result tables + lists:foreach(fun(Element) -> + create_html_table(OutputFile, Element) end, + ResultList), + + %% Put in the end-html tags + io:put_chars(OutputFile, "</body>\n"), + io:put_chars(OutputFile, "</html>\n"), + + file:close(OutputFile). + +%%--------------------------------------------------------------------------- +%% create_html_table(File, {Bm, Res}) -> _ +%% File = file() - html file to write data to. +%% Bm = atom() - Name of benchmark module +%% Res = [{VersionInfo, [{Test, Time} | _]}] +%% VersionInfo = {Ver, Machine} +%% Ver = string() +%% Machine = string() +%% Test = atom() +%% Time = integer() +%% +%% Creates a html table that displays the result of the benchmark <Bm>. +%%--------------------------------------------------------------------------- +create_html_table(File, {Bm, Res}) -> + + {MinTime, Order} = min_time_and_sort(Res), + + io:format(File, "<h2>~s</h2>\n" , [Bm]), + + %% Fun that calculates relative measure values and puts them in + %% a dictionary + RelativeMesureFun = fun({TestName, Time}, Dict1) -> + dict:append(TestName, Time/MinTime, Dict1) + end, + + %% For all erlang versions that the benchmark tests has been run, + %% calculate the relative measure values and put them in a dictionary. + ResultDict = + lists:foldl(fun({_VerInfo, Bms}, Dict0) -> + lists:foldl(RelativeMesureFun, Dict0, Bms) end, + dict:new(), Res), + + %% Create the table and its headings + io:put_chars(File, "<table border=0 cellpadding=1><tr>" + "<td bgcolor=\"#000000\">\n"), + io:put_chars(File, "<table cellpadding=3 border=0 cellspacing=1>\n"), + io:put_chars(File, "<tr bgcolor=white>"), + io:put_chars(File, "<td>Test</td>"), + Heads = table_headers(Res), + lists:foreach(fun({Ver,Machine}) -> io:format(File, "<td>~s<br>~s</td>", + [Ver,Machine]) end, Heads), + io:put_chars(File, "</tr>\n"), + + %% Create table rows + lists:foreach(fun(Name) -> + create_html_row(File, Name, ResultDict) + end, Order), + + %% Tabel end-tags + io:put_chars(File, "</table></td></tr></table>\n"), + + %% Create link to benchmark source code + io:format(File, "<p><a href=\"~s.erl\">Source for ~s.erl</a>\n", + [Bm,Bm]). + +%%--------------------------------------------------------------------------- +%% create_html_row(File, Name, Dict) -> _ +%% File = file() - html file to write data to. +%% Name = atom() - Name of benchmark test +%% Dict = dict() - Dictonary where the relative time measures for +%% the test can be found. +%% +%% Creates an actual html table-row. +%%--------------------------------------------------------------------------- +create_html_row(File, Name, Dict) -> + ReletiveTimes = dict:fetch(Name, Dict), + io:put_chars(File, "<tr bgcolor=white>\n"), + io:format(File, "<td>~s</td>", [Name]), + lists:foreach(fun(Time) -> + io:format(File, "<td>~-8.2f</td>", [Time]) end, + ReletiveTimes), + io:put_chars(File, "</tr>\n"). + +%%--------------------------------------------------------------------------- +%% min_time_and_sort(ResultList) -> {MinTime, Order} +%% ResultList = [{VersionInfo, [{Test, Time} | _]}] +%% MinTime = integer() - The execution time of the fastes test +%% Order = [BmFunctionName|_] - the order of the testcases in +%% increasing execution time. +%% BmFunctionName = atom() +%%--------------------------------------------------------------------------- +min_time_and_sort(ResultList) -> + + %% Use the results from the run on the highest version + %% of Erlang as norm. + {_, TestRes} = + lists:foldl(fun ({{Ver, _}, ResList}, + {CurrentVer, _}) when Ver > CurrentVer -> + {Ver, ResList}; + (_, VerAndRes) -> + VerAndRes + end, {"0", []}, ResultList), + + {lists:foldl(fun ({_, Time0}, Min1) when Time0 < Min1 -> + Time0; + (_, Min1) -> + Min1 + end, ?MAX, TestRes), + [Name || {Name, _} <- lists:keysort(2, TestRes)]}. + +%%--------------------------------------------------------------------------- +%% table_headers(VerResultList) -> SysInfo +%% VerResultList = [{{Ver, Machine},[{BmFunctionName, Time}]} | _] +%% Ver = string() +%% Machine = string() +%% BmFunctionName = atom() +%% Time = integer() +%% SysInfo = {Ver, Machine} +%%--------------------------------------------------------------------------- +table_headers(VerResultList) -> + [SysInfo || {SysInfo, _} <- VerResultList]. diff --git a/system/doc/efficiency_guide/bench.hrl b/system/doc/efficiency_guide/bench.hrl new file mode 100644 index 0000000000..a98ca1c89e --- /dev/null +++ b/system/doc/efficiency_guide/bench.hrl @@ -0,0 +1,22 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% +-define(rep5(X), X, X, X, X, X). +-define(rep10(X), ?rep5(X), ?rep5(X)). +-define(rep20(X), ?rep10(X), ?rep10(X)). +-define(rep40(X), ?rep20(X), ?rep20(X)). +-define(rep80(X), ?rep40(X), ?rep40(X)). diff --git a/system/doc/efficiency_guide/binaryhandling.xml b/system/doc/efficiency_guide/binaryhandling.xml new file mode 100644 index 0000000000..8746de4b60 --- /dev/null +++ b/system/doc/efficiency_guide/binaryhandling.xml @@ -0,0 +1,528 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2007</year> + <year>2007</year> + <holder>Ericsson AB, All Rights Reserved</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + The Initial Developer of the Original Code is Ericsson AB. + </legalnotice> + + <title>Constructing and matching binaries</title> + <prepared>Bjorn Gustavsson</prepared> + <docno></docno> + <date>2007-10-12</date> + <rev></rev> + <file>binaryhandling.xml</file> + </header> + + <p>In R12B, the most natural way to write binary construction and matching is now + significantly faster than in earlier releases.</p> + + <p>To construct at binary, you can simply write</p> + + <p><em>DO</em> (in R12B) / <em>REALLY DO NOT</em> (in earlier releases)</p> + <code type="erl"><![CDATA[ +my_list_to_binary(List) -> + my_list_to_binary(List, <<>>). + +my_list_to_binary([H|T], Acc) -> + my_list_to_binary(T, <<Acc/binary,H>>); +my_list_to_binary([], Acc) -> + Acc.]]></code> + + <p>In releases before R12B, <c>Acc</c> would be copied in every iteration. + In R12B, <c>Acc</c> will be copied only in the first iteration and extra + space will be allocated at the end of the copied binary. In the next iteration, + <c>H</c> will be written in to the extra space. When the extra space runs out, + the binary will be reallocated with more extra space.</p> + + <p>The extra space allocated (or reallocated) will be twice the size of the + existing binary data, or 256, whichever is larger.</p> + + <p>The most natural way to match binaries is now the fastest:</p> + + <p><em>DO</em> (in R12B)</p> + <code type="erl"><![CDATA[ +my_binary_to_list(<<H,T/binary>>) -> + [H|my_binary_to_list(T)]; +my_binary_to_list(<<>>) -> [].]]></code> + + <section> + <title>How binaries are implemented</title> + + <p>Internally, binaries and bitstrings are implemented in the same way. + In this section, we will call them <em>binaries</em> since that is what + they are called in the emulator source code.</p> + + <p>There are four types of binary objects internally. Two of them are + containers for binary data and two of them are merely references to + a part of a binary.</p> + + <p>The binary containers are called <em>refc binaries</em> + (short for <em>reference-counted binaries</em>) and <em>heap binaries</em>.</p> + + <p><marker id="refc_binary"></marker><em>Refc binaries</em> + consist of two parts: an object stored on + the process heap, called a <em>ProcBin</em>, and the binary object itself + stored outside all process heaps.</p> + + <p>The binary object can be referenced by any number of ProcBins from any + number of processes; the object contains a reference counter to keep track + of the number of references, so that it can be removed when the last + reference disappears.</p> + + <p>All ProcBin objects in a process are part of a linked list, so that + the garbage collector can keep track of them and decrement the reference + counters in the binary when a ProcBin disappears.</p> + + <p><marker id="heap_binary"></marker><em>Heap binaries</em> are small binaries, + up to 64 bytes, that are stored directly on the process heap. + They will be copied when the process + is garbage collected and when they are sent as a message. They don't + require any special handling by the garbage collector.</p> + + <p>There are two types of reference objects that can reference part of + a refc binary or heap binary. They are called <em>sub binaries</em> and + <em>match contexts</em>.</p> + + <p><marker id="sub_binary"></marker>A <em>sub binary</em> + is created by <c>split_binary/2</c> and when + a binary is matched out in a binary pattern. A sub binary is a reference + into a part of another binary (refc or heap binary, never into a another + sub binary). Therefore, matching out a binary is relatively cheap because + the actual binary data is never copied.</p> + + <p><marker id="match_context"></marker>A <em>match context</em> is + similar to a sub binary, but is optimized + for binary matching; for instance, it contains a direct pointer to the binary + data. For each field that is matched out of a binary, the position in the + match context will be incremented.</p> + + <p>In R11B, a match context was only using during a binary matching + operation.</p> + + <p>In R12B, the compiler tries to avoid generating code that + creates a sub binary, only to shortly afterwards create a new match + context and discard the sub binary. Instead of creating a sub binary, + the match context is kept.</p> + + <p>The compiler can only do this optimization if it can know for sure + that the match context will not be shared. If it would be shared, the + functional properties (also called referential transparency) of Erlang + would break.</p> + </section> + + <section> + <title>Constructing binaries</title> + + <p>In R12B, appending to a binary or bitstring</p> + + <code type="erl"><![CDATA[ +<<Binary/binary, ...>> +<<Binary/bitstring, ...>>]]></code> + + <p>is specially optimized by the <em>run-time system</em>. + Because the run-time system handles the optimization (instead of + the compiler), there are very few circumstances in which the optimization + will not work.</p> + + <p>To explain how it works, we will go through this code</p> + + <code type="erl"><![CDATA[ +Bin0 = <<0>>, %% 1 +Bin1 = <<Bin0/binary,1,2,3>>, %% 2 +Bin2 = <<Bin1/binary,4,5,6>>, %% 3 +Bin3 = <<Bin2/binary,7,8,9>>, %% 4 +Bin4 = <<Bin1/binary,17>>, %% 5 !!! +{Bin4,Bin3} %% 6]]></code> + + <p>line by line.</p> + + <p>The first line (marked with the <c>%% 1</c> comment), assigns + a <seealso marker="#heap_binary">heap binary</seealso> to + the variable <c>Bin0</c>.</p> + + <p>The second line is an append operation. Since <c>Bin0</c> + has not been involved in an append operation, + a new <seealso marker="#refc_binary">refc binary</seealso> + will be created and the contents of <c>Bin0</c> will be copied + into it. The <em>ProcBin</em> part of the refc binary will have + its size set to the size of the data stored in the binary, while + the binary object will have extra space allocated. + The size of the binary object will be either twice the + size of <c>Bin0</c> or 256, whichever is larger. In this case + it will be 256.</p> + + <p>It gets more interesting in the third line. + <c>Bin1</c> <em>has</em> been used in an append operation, + and it has 255 bytes of unused storage at the end, so the three new bytes + will be stored there.</p> + + <p>Same thing in the fourth line. There are 252 bytes left, + so there is no problem storing another three bytes.</p> + + <p>But in the fifth line something <em>interesting</em> happens. + Note that we don't append to the previous result in <c>Bin3</c>, + but to <c>Bin1</c>. We expect that <c>Bin4</c> will be assigned + the value <c><<0,1,2,3,17>></c>. We also expect that + <c>Bin3</c> will retain its value + (<c><<0,1,2,3,4,5,6,7,8,9>></c>). + Clearly, the run-time system cannot write the byte <c>17</c> into the binary, + because that would change the value of <c>Bin3</c> to + <c><<0,1,2,3,4,17,6,7,8,9>></c>.</p> + + <p>What will happen?</p> + + <p>The run-time system will see that <c>Bin1</c> is the result + from a previous append operation (not from the latest append operation), + so it will <em>copy</em> the contents of <c>Bin1</c> to a new binary + and reserve extra storage and so on. (We will not explain here how the + run-time system can know that it is not allowed to write into <c>Bin1</c>; + it is left as an exercise to the curious reader to figure out how it is + done by reading the emulator sources, primarily <c>erl_bits.c</c>.)</p> + + <section> + <title>Circumstances that force copying</title> + + <p>The optimization of the binary append operation requires that + there is a <em>single</em> ProcBin and a <em>single reference</em> to the + ProcBin for the binary. The reason is that the binary object can be + moved (reallocated) during an append operation, and when that happens + the pointer in the ProcBin must be updated. If there would be more than + on ProcBin pointing to the binary object, it would not be possible to + find and update all of them.</p> + + <p>Therefore, certain operations on a binary will mark it so that + any future append operation will be forced to copy the binary. + In most cases, the binary object will be shrunk at the same time + to reclaim the extra space allocated for growing.</p> + + <p>When appending to a binary</p> + + <code type="erl"><![CDATA[ +Bin = <<Bin0,...>>]]></code> + + <p>only the binary returned from the latest append operation will + support further cheap append operations. In the code fragment above, + appending to <c>Bin</c> will be cheap, while appending to <c>Bin0</c> + will force the creation of a new binary and copying of the contents + of <c>Bin0</c>.</p> + + <p>If a binary is sent as a message to a process or port, the binary + will be shrunk and any further append operation will copy the binary + data into a new binary. For instance, in the following code fragment</p> + + <code type="erl"><![CDATA[ +Bin1 = <<Bin0,...>>, +PortOrPid ! Bin1, +Bin = <<Bin1,...>> %% Bin1 will be COPIED +]]></code> + + <p><c>Bin1</c> will be copied in the third line.</p> + + <p>The same thing happens if you insert a binary into an <em>ets</em> + table or send it to a port using <c>erlang:port_command/2</c>.</p> + + <p>Matching a binary will also cause it to shrink and the next append + operation will copy the binary data:</p> + + <code type="erl"><![CDATA[ +Bin1 = <<Bin0,...>>, +<<X,Y,Z,T/binary>> = Bin1, +Bin = <<Bin1,...>> %% Bin1 will be COPIED +]]></code> + + <p>The reason is that a <seealso marker="#match_context">match context</seealso> + contains a direct pointer to the binary data.</p> + + <p>If a process simply keeps binaries (either in "loop data" or in the process + dictionary), the garbage collector may eventually shrink the binaries. + If only one such binary is kept, it will not be shrunk. If the process later + appends to a binary that has been shrunk, the binary object will be reallocated + to make place for the data to be appended.</p> + </section> + + </section> + + <section> + <title>Matching binaries</title> + + <p>We will revisit the example shown earlier</p> + + <p><em>DO</em> (in R12B)</p> + <code type="erl"><![CDATA[ +my_binary_to_list(<<H,T/binary>>) -> + [H|my_binary_to_list(T)]; +my_binary_to_list(<<>>) -> [].]]></code> + + <p>too see what is happening under the hood.</p> + + <p>The very first time <c>my_binary_to_list/1</c> is called, + a <seealso marker="#match_context">match context</seealso> + will be created. The match context will point to the first + byte of the binary. One byte will be matched out and the match context + will be updated to point to the second byte in the binary.</p> + + <p>In R11B, at this point a <seealso marker="#sub_binary">sub binary</seealso> + would be created. In R12B, + the compiler sees that there is no point in creating a sub binary, + because there will soon be a call to a function (in this case, + to <c>my_binary_to_list/1</c> itself) that will immediately + create a new match context and discard the sub binary.</p> + + <p>Therefore, in R12B, <c>my_binary_to_list/1</c> will call itself + with the match context instead of with a sub binary. The instruction + that initializes the matching operation will basically do nothing + when it sees that it was passed a match context instead of a binary.</p> + + <p>When the end of the binary is reached and second clause matches, + the match context will simply be discarded (removed in the next + garbage collection, since there is no longer any reference to it).</p> + + <p>To summarize, <c>my_binary_to_list/1</c> in R12B only needs to create + <em>one</em> match context and no sub binaries. In R11B, if the binary + contains <em>N</em> bytes, <em>N+1</em> match contexts and <em>N</em> + sub binaries will be created.</p> + + <p>In R11B, the fastest way to match binaries is:</p> + + <p><em>DO NOT</em> (in R12B)</p> + <code type="erl"><![CDATA[ +my_complicated_binary_to_list(Bin) -> + my_complicated_binary_to_list(Bin, 0). + +my_complicated_binary_to_list(Bin, Skip) -> + case Bin of + <<_:Skip/binary,Byte,_/binary>> -> + [Byte|my_complicated_binary_to_list(Bin, Skip+1)]; + <<_:Skip/binary>> -> + [] + end.]]></code> + + <p>This function cleverly avoids building sub binaries, but it cannot + avoid building a match context in each recursion step. Therefore, in both R11B and R12B, + <c>my_complicated_binary_to_list/1</c> builds <em>N+1</em> match + contexts. (In a future release, the compiler might be able to generate code + that reuses the match context, but don't hold your breath.)</p> + + <p>Returning to <c>my_binary_to_list/1</c>, note that the match context was + discarded when the entire binary had been traversed. What happens if + the iteration stops before it has reached the end of the binary? Will + the optimization still work?</p> + + <code type="erl"><![CDATA[ +after_zero(<<0,T/binary>>) -> + T; +after_zero(<<_,T/binary>>) -> + after_zero(T); +after_zero(<<>>) -> + <<>>. + ]]></code> + + <p>Yes, it will. The compiler will remove the building of the sub binary in the + second clause</p> + + <code type="erl"><![CDATA[ +. +. +. +after_zero(<<_,T/binary>>) -> + after_zero(T); +. +. +.]]></code> + + <p>but will generate code that builds a sub binary in the first clause</p> + + <code type="erl"><![CDATA[ +after_zero(<<0,T/binary>>) -> + T; +. +. +.]]></code> + + <p>Therefore, <c>after_zero/1</c> will build one match context and one sub binary + (assuming it is passed a binary that contains a zero byte).</p> + + <p>Code like the following will also be optimized:</p> + + <code type="erl"><![CDATA[ +all_but_zeroes_to_list(Buffer, Acc, 0) -> + {lists:reverse(Acc),Buffer}; +all_but_zeroes_to_list(<<0,T/binary>>, Acc, Remaining) -> + all_but_zeroes_to_list(T, Acc, Remaining-1); +all_but_zeroes_to_list(<<Byte,T/binary>>, Acc, Remaining) -> + all_but_zeroes_to_list(T, [Byte|Acc], Remaining-1).]]></code> + + <p>The compiler will remove building of sub binaries in the second and third clauses, + and it will add an instruction to the first clause that will convert <c>Buffer</c> + from a match context to a sub binary (or do nothing if <c>Buffer</c> already is a binary).</p> + + <p>Before you begin to think that the compiler can optimize any binary patterns, + here is a function that the compiler (currently, at least) is not able to optimize:</p> + + <code type="erl"><![CDATA[ +non_opt_eq([H|T1], <<H,T2/binary>>) -> + non_opt_eq(T1, T2); +non_opt_eq([_|_], <<_,_/binary>>) -> + false; +non_opt_eq([], <<>>) -> + true.]]></code> + + <p>It was briefly mentioned earlier that the compiler can only delay creation of + sub binaries if it can be sure that the binary will not be shared. In this case, + the compiler cannot be sure.</p> + + <p>We will soon show how to rewrite <c>non_opt_eq/2</c> so that the delayed sub binary + optimization can be applied, and more importantly, we will show how you can find out + whether your code can be optimized.</p> + + <section> + <title>The bin_opt_info option</title> + + <p>Use the <c>bin_opt_info</c> option to have the compiler print a lot of + information about binary optimizations. It can be given either to the compiler or + <c>erlc</c></p> + + <code type="erl"><![CDATA[ +erlc +bin_opt_info Mod.erl]]></code> + + <p>or passed via an environment variable</p> + + <code type="erl"><![CDATA[ +export ERL_COMPILER_OPTIONS=bin_opt_info]]></code> + + <p>Note that the <c>bin_opt_info</c> is not meant to be a permanent option added + to your <c>Makefile</c>s, because it is not possible to eliminate all messages that + it generates. Therefore, passing the option through the environment is in most cases + the most practical approach.</p> + + <p>The warnings will look like this:</p> + + <code type="erl"><![CDATA[ +./efficiency_guide.erl:60: Warning: NOT OPTIMIZED: sub binary is used or returned +./efficiency_guide.erl:62: Warning: OPTIMIZED: creation of sub binary delayed]]></code> + + <p>To make it clearer exactly what code the warnings refer to, + in the examples that follow, the warnings are inserted as comments + after the clause they refer to:</p> + + <code type="erl"><![CDATA[ +after_zero(<<0,T/binary>>) -> + %% NOT OPTIMIZED: sub binary is used or returned + T; +after_zero(<<_,T/binary>>) -> + %% OPTIMIZED: creation of sub binary delayed + after_zero(T); +after_zero(<<>>) -> + <<>>.]]></code> + + <p>The warning for the first clause tells us that it is not possible to + delay the creation of a sub binary, because it will be returned. + The warning for the second clause tells us that a sub binary will not be + created (yet).</p> + + <p>It is time to revisit the earlier example of the code that could not + be optimized and find out why:</p> + + <code type="erl"><![CDATA[ +non_opt_eq([H|T1], <<H,T2/binary>>) -> + %% INFO: matching anything else but a plain variable to + %% the left of binary pattern will prevent delayed + %% sub binary optimization; + %% SUGGEST changing argument order + %% NOT OPTIMIZED: called function non_opt_eq/2 does not + %% begin with a suitable binary matching instruction + non_opt_eq(T1, T2); +non_opt_eq([_|_], <<_,_/binary>>) -> + false; +non_opt_eq([], <<>>) -> + true.]]></code> + + <p>The compiler emitted two warnings. The <c>INFO</c> warning refers to the function + <c>non_opt_eq/2</c> as a callee, indicating that any functions that call <c>non_opt_eq/2</c> + will not be able to make delayed sub binary optimization. + There is also a suggestion to change argument order. + The second warning (that happens to refer to the same line) refers to the construction of + the sub binary itself.</p> + + <p>We will soon show another example that should make the distinction between <c>INFO</c> + and <c>NOT OPTIMIZED</c> warnings somewhat clearer, but first we will heed the suggestion + to change argument order:</p> + + <code type="erl"><![CDATA[ +opt_eq(<<H,T1/binary>>, [H|T2]) -> + %% OPTIMIZED: creation of sub binary delayed + opt_eq(T1, T2); +opt_eq(<<_,_/binary>>, [_|_]) -> + false; +opt_eq(<<>>, []) -> + true.]]></code> + + <p>The compiler gives a warning for the following code fragment:</p> + + <code type="erl"><![CDATA[ +match_body([0|_], <<H,_/binary>>) -> + %% INFO: matching anything else but a plain variable to + %% the left of binary pattern will prevent delayed + %% sub binary optimization; + %% SUGGEST changing argument order + done; +. +. +.]]></code> + + <p>The warning means that <em>if</em> there is a call to <c>match_body/2</c> + (from another clause in <c>match_body/2</c> or another function), the + delayed sub binary optimization will not be possible. There will be additional + warnings for any place where a sub binary is matched out at the end of and + passed as the second argument to <c>match_body/2</c>. For instance:</p> + + <code type="erl"><![CDATA[ +match_head(List, <<_:10,Data/binary>>) -> + %% NOT OPTIMIZED: called function match_body/2 does not + %% begin with a suitable binary matching instruction + match_body(List, Data).]]></code> + + </section> + + <section> + <title>Unused variables</title> + + <p>The compiler itself figures out if a variable is unused. The same + code is generated for each of the following functions</p> + + <code type="erl"><![CDATA[ +count1(<<_,T/binary>>, Count) -> count1(T, Count+1); +count1(<<>>, Count) -> Count. + +count2(<<H,T/binary>>, Count) -> count2(T, Count+1); +count2(<<>>, Count) -> Count. + +count3(<<_H,T/binary>>, Count) -> count3(T, Count+1); +count3(<<>>, Count) -> Count.]]></code> + + <p>In each iteration, the first 8 bits in the binary will be skipped, not matched out.</p> + + </section> + + </section> + +</chapter> + diff --git a/system/doc/efficiency_guide/book.xml b/system/doc/efficiency_guide/book.xml new file mode 100644 index 0000000000..5df40695bb --- /dev/null +++ b/system/doc/efficiency_guide/book.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE book SYSTEM "book.dtd"> + +<book xmlns:xi="http://www.w3.org/2001/XInclude"> + <header titlestyle="normal"> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Efficiency Guide</title> + <prepared>OTP Team</prepared> + <docno></docno> + <date>2001-08-07</date> + <rev>R8A</rev> + </header> + <insidecover> + </insidecover> + <pagetext>Efficiency Guide</pagetext> + <preamble> + <contents level="2"></contents> + </preamble> + <parts lift="no"> + <xi:include href="part.xml"/> + </parts> + <listofterms></listofterms> + <index></index> +</book> + diff --git a/system/doc/efficiency_guide/call_bm.erl b/system/doc/efficiency_guide/call_bm.erl new file mode 100644 index 0000000000..389814f11f --- /dev/null +++ b/system/doc/efficiency_guide/call_bm.erl @@ -0,0 +1,75 @@ +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% + +-module(call_bm). + +-include("bench.hrl"). + +-export([benchmarks/0]). +-export([local_call/1,external_call/1,fun_call/1,apply_fun/1, + apply_mfa_implicit/1, apply_mfa_explicit/1]). +-export([foo/0]). + +benchmarks() -> + {400000,[local_call,external_call,fun_call,apply_fun, + apply_mfa_implicit, apply_mfa_explicit]}. + +local_call(0) -> + ok; +local_call(Iter) -> + ?rep40(foo()), + local_call(Iter-1). + +external_call(0) -> + ok; +external_call(Iter) -> + ?rep40(?MODULE:foo()), + external_call(Iter-1). + +fun_call(Iter) -> + fun_call(Iter, fun() -> ok end). +fun_call(0, _) -> + ok; +fun_call(Iter, Fun) -> + ?rep40(Fun()), + fun_call(Iter-1, Fun). + +apply_fun(Iter) -> + apply_fun(Iter, fun() -> ok end). +apply_fun(0, _) -> + ok; +apply_fun(Iter, Fun) -> + ?rep40(apply(Fun, [])), + apply_fun(Iter-1, Fun). + +apply_mfa_explicit(0) -> + ok; +apply_mfa_explicit(Iter) -> + ?rep40(apply(?MODULE, foo, [])), + apply_mfa_explicit(Iter-1). + +apply_mfa_implicit(Iter) -> + apply_mfa_implicit(?MODULE, foo, Iter). + +apply_mfa_implicit(_, _, 0) -> + ok; +apply_mfa_implicit(Module, Function, Iter) -> + ?rep40(Module:Function()), + apply_mfa_implicit(Module, Function, Iter-1). + +foo() -> ok. diff --git a/system/doc/efficiency_guide/call_result.html b/system/doc/efficiency_guide/call_result.html new file mode 100644 index 0000000000..37b8931cdf --- /dev/null +++ b/system/doc/efficiency_guide/call_result.html @@ -0,0 +1,26 @@ +<html> +<head> +<title>Benchmark Results</title> +</head> +<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000"> +<h1>Benchmark Results</h1> +<h2>call_bm</h2> +<table border=0 cellpadding=1><tr><td bgcolor="#000000"> +<table cellpadding=3 border=0 cellspacing=1> +<tr bgcolor=white><td>Test</td><td>5.4<br>BEAM</td></tr> +<tr bgcolor=white> +<td>local_call</td><td>1.00 </td></tr> +<tr bgcolor=white> +<td>external_call</td><td>1.08 </td></tr> +<tr bgcolor=white> +<td>fun_call</td><td>2.79 </td></tr> +<tr bgcolor=white> +<td>apply_fun</td><td>3.54 </td></tr> +<tr bgcolor=white> +<td>apply_mfa_implicit</td><td>7.76 </td></tr> +<tr bgcolor=white> +<td>apply_mfa_explicit</td><td>8.21 </td></tr> +</table></td></tr></table> +<p><a href="call_bm.erl">Source for call_bm.erl</a> +</body> +</html> diff --git a/system/doc/efficiency_guide/commoncaveats.xml b/system/doc/efficiency_guide/commoncaveats.xml new file mode 100644 index 0000000000..e18e5aa510 --- /dev/null +++ b/system/doc/efficiency_guide/commoncaveats.xml @@ -0,0 +1,239 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Common Caveats</title> + <prepared>Bjorn Gustavsson</prepared> + <docno></docno> + <date>2001-08-08</date> + <rev></rev> + <file>commoncaveats.xml</file> + </header> + + <p>Here we list a few modules and BIFs to watch out for, and not only + from a performance point of view.</p> + + <section> + <title>The regexp module</title> + + <p>The regular expression functions in the + <seealso marker="stdlib:regexp">regexp</seealso> + module are written in Erlang, not in C, and were + meant for occasional use on small amounts of data, + for instance for validation of configuration files + when starting an application.</p> + + <p>Use the <seealso marker="stdlib:re">re</seealso> module + (introduced in R13A) instead, especially in time-critical code.</p> + </section> + + <section> + <title>The timer module</title> + + <p>Creating timers using <seealso + marker="erts:erlang#erlang:send_after/3">erlang:send_after/3</seealso> + and <seealso marker="erts:erlang#erlang:start_timer/3">erlang:start_timer/3</seealso> + is much more efficient than using the timers provided by the + <seealso marker="stdlib:timer">timer</seealso> module. The + <c>timer</c> module uses a separate process to manage the timers, + and that process can easily become overloaded if many processes + create and cancel timers frequently (especially when using the + SMP emulator).</p> + + <p>The functions in the <c>timer</c> module that do not manage timers (such as + <c>timer:tc/3</c> or <c>timer:sleep/1</c>), do not call the timer-server process + and are therefore harmless.</p> + </section> + + <section> + <title>list_to_atom/1</title> + + <p>Atoms are not garbage-collected. Once an atom is created, it will never + be removed. The emulator will terminate if the limit for the number + of atoms (1048576) is reached.</p> + + <p>Therefore, converting arbitrary input strings to atoms could be + dangerous in a system that will run continuously. + If only certain well-defined atoms are allowed as input, you can use + <seealso marker="erts:erlang#list_to_existing_atom/1">list_to_existing_atom/1</seealso> + to guard against a denial-of-service attack. (All atoms that are allowed + must have been created earlier, for instance by simply using all of them + in a module and loading that module.)</p> + + <p>Using <c>list_to_atom/1</c> to construct an atom that is passed to + <c>apply/3</c> like this</p> + + <code type="erl"> +apply(list_to_atom("some_prefix"++Var), foo, Args)</code> + + <p>is quite expensive and is not recommended in time-critical code.</p> + </section> + + <section> + <title>length/1</title> + + <p>The time for calculating the length of a list is proportional to the + length of the list, as opposed to <c>tuple_size/1</c>, <c>byte_size/1</c>, + and <c>bit_size/1</c>, which all execute in constant time.</p> + + <p>Normally you don't have to worry about the speed of <c>length/1</c>, + because it is efficiently implemented in C. In time critical-code, though, + you might want to avoid it if the input list could potentially be very long.</p> + + <p>Some uses of <c>length/1</c> can be replaced by matching. + For instance, this code</p> + + <code type="erl"> +foo(L) when length(L) >= 3 -> + ...</code> + + <p>can be rewritten to</p> + <code type="erl"> +foo([_,_,_|_]=L) -> + ...</code> + + <p>(One slight difference is that <c>length(L)</c> will fail if the <c>L</c> + is an improper list, while the pattern in the second code fragment will + accept an improper list.)</p> + </section> + + <section> + <title>setelement/3</title> + + <p><seealso marker="erts:erlang#setelement/3">setelement/3</seealso> + copies the tuple it modifies. Therefore, updating a tuple in a loop + using <c>setelement/3</c> will create a new copy of the tuple every time.</p> + + <p>There is one exception to the rule that the tuple is copied. + If the compiler clearly can see that destructively updating the tuple would + give exactly the same result as if the tuple was copied, the call to + <c>setelement/3</c> will be replaced with a special destructive setelement + instruction. In the following code sequence</p> + + <code type="erl"> +multiple_setelement(T0) -> + T1 = setelement(9, T0, bar), + T2 = setelement(7, T1, foobar), + setelement(5, T2, new_value).</code> + + <p>the first <c>setelement/3</c> call will copy the tuple and modify the + ninth element. The two following <c>setelement/3</c> calls will modify + the tuple in place.</p> + + <p>For the optimization to be applied, <em>all</em> of the followings conditions + must be true:</p> + + <list type="bulleted"> + <item>The indices must be integer literals, not variables or expressions.</item> + <item>The indices must be given in descending order.</item> + <item>There must be no calls to other function in between the calls to + <c>setelement/3</c>.</item> + <item>The tuple returned from one <c>setelement/3</c> call must only be used + in the subsequent call to <c>setelement/3</c>.</item> + </list> + + <p>If it is not possible to structure the code as in the <c>multiple_setelement/1</c> + example, the best way to modify multiple elements in a large tuple is to + convert the tuple to a list, modify the list, and convert the list back to + a tuple.</p> + </section> + + <section> + <title>size/1</title> + + <p><c>size/1</c> returns the size for both tuples and binary.</p> + + <p>Using the new BIFs <c>tuple_size/1</c> and <c>byte_size/1</c> introduced + in R12B gives the compiler and run-time system more opportunities for + optimization. A further advantage is that the new BIFs could help Dialyzer + find more bugs in your program.</p> + </section> + + <section> + <title>split_binary/2</title> + <p>It is usually more efficient to split a binary using matching + instead of calling the <c>split_binary/2</c> function. + Furthermore, mixing bit syntax matching and <c>split_binary/2</c> + may prevent some optimizations of bit syntax matching.</p> + + <p><em>DO</em></p> + <code type="none"><![CDATA[ + <<Bin1:Num/binary,Bin2/binary>> = Bin,]]></code> + <p><em>DO NOT</em></p> + <code type="none"> + {Bin1,Bin2} = split_binary(Bin, Num) + </code> + </section> + + <section> + <title>The '--' operator</title> + <p>Note that the '<c>--</c>' operator has a complexity + proportional to the product of the length of its operands, + meaning that it will be very slow if both of its operands + are long lists:</p> + + <p><em>DO NOT</em></p> + <code type="none"><![CDATA[ + HugeList1 -- HugeList2]]></code> + + <p>Instead use the <seealso marker="stdlib:ordsets">ordsets</seealso> + module:</p> + + <p><em>DO</em></p> + <code type="none"> + HugeSet1 = ordsets:from_list(HugeList1), + HugeSet2 = ordsets:from_list(HugeList2), + ordsets:subtract(HugeSet1, HugeSet2) + </code> + + <p>Obviously, that code will not work if the original order + of the list is important. If the order of the list must be + preserved, do like this:</p> + + <p><em>DO</em></p> + <code type="none"><![CDATA[ + Set = gb_sets:from_list(HugeList2), + [E || E <- HugeList1, not gb_sets:is_element(E, Set)]]]></code> + + <p>Subtle note 1: This code behaves differently from '<c>--</c>' + if the lists contain duplicate elements. (One occurrence + of an element in HugeList2 will remove <em>all</em> + occurrences in HugeList1.)</p> + + <p>Subtle note 2: This code compares lists elements using the + '<c>==</c>' operator, while '<c>--</c>' uses the '<c>=:=</c>'. If + that difference is important, <c>sets</c> can be used instead of + <c>gb_sets</c>, but note that <c>sets:from_list/1</c> is much + slower than <c>gb_sets:from_list/1</c> for long lists.</p> + + <p>Using the '<c>--</c>' operator to delete an element + from a list is not a performance problem:</p> + + <p><em>OK</em></p> + <code type="none"> + HugeList1 -- [Element] + </code> + + </section> + +</chapter> + diff --git a/system/doc/efficiency_guide/digger.ps b/system/doc/efficiency_guide/digger.ps new file mode 100644 index 0000000000..07ac8e2fa9 --- /dev/null +++ b/system/doc/efficiency_guide/digger.ps @@ -0,0 +1,197 @@ +%!PS-Adobe-2.0 EPSF-2.0 +%%Title: /clearcase/otp/system/doc/efficiency_guide/digger.ps +%%Creator: XV Version 3.10a Rev: 12/29/94 - by John Bradley +%%BoundingBox: 290 380 322 412 +%%Pages: 1 +%%DocumentFonts: +%%EndComments +%%EndProlog + +%%Page: 1 1 + +% remember original state +/origstate save def + +% build a temporary dictionary +20 dict begin + +% define string to hold a scanline's worth of data +/pix 96 string def + +% define space for color conversions +/grays 32 string def % space for gray scale line +/npixls 0 def +/rgbindx 0 def + +% lower left corner +290 380 translate + +% size of image (on paper, in 1/72inch coords) +31.96800 31.96800 scale + +% define 'colorimage' if it isn't defined +% ('colortogray' and 'mergeprocs' come from xwd2ps +% via xgrab) +/colorimage where % do we know about 'colorimage'? + { pop } % yes: pop off the 'dict' returned + { % no: define one + /colortogray { % define an RGB->I function + /rgbdata exch store % call input 'rgbdata' + rgbdata length 3 idiv + /npixls exch store + /rgbindx 0 store + 0 1 npixls 1 sub { + grays exch + rgbdata rgbindx get 20 mul % Red + rgbdata rgbindx 1 add get 32 mul % Green + rgbdata rgbindx 2 add get 12 mul % Blue + add add 64 idiv % I = .5G + .31R + .18B + put + /rgbindx rgbindx 3 add store + } for + grays 0 npixls getinterval + } bind def + + % Utility procedure for colorimage operator. + % This procedure takes two procedures off the + % stack and merges them into a single procedure. + + /mergeprocs { % def + dup length + 3 -1 roll + dup + length + dup + 5 1 roll + 3 -1 roll + add + array cvx + dup + 3 -1 roll + 0 exch + putinterval + dup + 4 2 roll + putinterval + } bind def + + /colorimage { % def + pop pop % remove 'false 3' operands + {colortogray} mergeprocs + image + } bind def + } ifelse % end of 'false' case + + + +32 32 8 % dimensions of data +[32 0 0 -32 0 32] % mapping matrix +{currentfile pix readhexstring pop} +false 3 colorimage + +000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00000000000000000000000000ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00000000000000000000000000000000000000ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00000000000000000000000000000000000000ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00000000000000000000000000000000000000ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00000000000000000000000000000000000000ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000000000 +000000000000ffff00000000000000000000000000ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00000000ffff00ffff00ffff00ffff00000000 +000000000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00000000ffff00ffff00ffff00000000000000 +000000000000000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00000000ffff00000000000000000000000000 +000000000000ffff00000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00000000ffff00000000000000000000000000 +000000ffff00ffff00000000ffff00ffff00ffff00ffff00ffff00000000000000000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000000000 +ffff00ffff00ffff00000000ffff00ffff00ffff00ffff00ffff00000000000000000000 +000000000000ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00000000000000ffff00000000000000000000000000000000 +ffff00ffff00ffff00000000ffff00ffff00ffff00ffff00ffff00000000000000000000 +000000000000ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000000000000000 +000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00000000000000ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00000000 +000000000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00000000000000ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00 +000000000000000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00 +000000000000000000ffff00ffff00ffff00ffff00ffff00000000000000ffff00ffff00 +000000ffff00000000ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00 +000000000000000000ffff00ffff00ffff00ffff00000000000000000000000000ffff00 +ffff00ffff00000000ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000ffff00ffff00ffff00 +000000000000ffff00ffff00ffff00ffff00000000000000000000000000000000000000 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00 +000000000000000000ffff00ffff00000000000000000000000000000000000000000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000000000000000ffff00ffff00 +000000000000000000ffff00ffff00000000000000000000000000000000000000000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00ffff00 +ffff00ffff00ffff00ffff00ffff00ffff00ffff00000000 +000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000 + +showpage + +% stop using temporary dictionary +end + +% restore original state +origstate restore + +%%Trailer diff --git a/system/doc/efficiency_guide/drivers.xml b/system/doc/efficiency_guide/drivers.xml new file mode 100644 index 0000000000..9fe54fb19a --- /dev/null +++ b/system/doc/efficiency_guide/drivers.xml @@ -0,0 +1,148 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2009</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Drivers</title> + <prepared>Bjorn Gustavsson</prepared> + <docno></docno> + <date>2009-11-16</date> + <rev></rev> + <file>drivers.xml</file> + </header> + + <p>This chapter provides a (very) brief overview on how to write efficient + drivers. It is assumed that you already have a good understanding of + drivers.</p> + + <section> + <title>Drivers and concurrency</title> + + <p>The run-time system will always take a lock before running + any code in a driver.</p> + + <p>By default, that lock will be at the driver level, meaning that + if several ports has been opened to the same driver, only code for + one port at the same time can be running.</p> + + <p>A driver can be configured to instead have one lock for each port.</p> + + <p>If a driver is used in a functional way (i.e. it holds no state, + but only does some heavy calculation and returns a result), several + ports with registered names can be opened beforehand and the port to + be used can be chosen based on the scheduler ID like this:</p> + + <code type="none"> +-define(PORT_NAMES(), + {some_driver_01, some_driver_02, some_driver_03, some_driver_04, + some_driver_05, some_driver_06, some_driver_07, some_driver_08, + some_driver_09, some_driver_10, some_driver_11, some_driver_12, + some_driver_13, some_driver_14, some_driver_15, some_driver_16}). + +client_port() -> + element(erlang:system_info(scheduler_id) rem tuple_size(?PORT_NAMES()) + 1, + ?PORT_NAMES()).</code> + + <p>As long as there are no more than 16 schedulers, there will never + be any lock contention on the port lock for the driver.</p> + + </section> + + <section> + <title>Avoiding copying of binaries when calling a driver</title> + + <p>There are basically two ways to avoid copying a binary that is + sent to a driver.</p> + + <p>If the <c>Data</c> argument for + <seealso marker="erts:erlang#port_control/3">port_control/3</seealso> + is a binary, the driver will be passed a pointer to the contents of + the binary and the binary will not be copied. + If the <c>Data</c> argument is an iolist (list of binaries and lists), + all binaries in the iolist will be copied.</p> + + <p>Therefore, if you want to send both a pre-existing binary and some + additional data to a driver without copying the binary, you must call + <c>port_control/3</c> twice; once with the binary and once with the + additional data. However, that will only work if there is only one + process communicating with the port (because otherwise another process + could call the driver in-between the calls).</p> + + <p>Another way to avoid copying binaries is to implement an <c>outputv</c> + callback (instead of an <c>output</c> callback) in the driver. + If a driver has an <c>outputv</c> callback, refc binaries passed + in an iolist in the <c>Data</c> argument for + <seealso marker="erts:erlang#port_command/2">port_command/2</seealso> + will be passed as references to the driver.</p> + </section> + + <section> + <title>Returning small binaries from a driver</title> + + <p>The run-time system can represent binaries up to 64 bytes as + heap binaries. They will always be copied when sent in a messages, + but they will require less memory if they are not sent to another + process and garbage collection is cheaper.</p> + + <p>If you know that the binaries you return are always small, + you should use driver API calls that do not require a pre-allocated + binary, for instance + <seealso marker="erts:erl_driver#int driver_output-3">driver_output()</seealso> + or + <seealso marker="erts:erl_driver#int driver_output_term-3">driver_output_term()</seealso> + using the <c>ERL_DRV_BUF2BINARY</c> format, + to allow the run-time to construct a heap binary.</p> + + </section> + + <section> + <title>Returning big binaries without copying from a driver</title> + + <p>To avoid copying data when a big binary is sent or returned from + the driver to an Erlang process, the driver must first allocate the + binary and then send it to an Erlang process in some way.</p> + + <p>Use <seealso marker="erts:erl_driver#ErlDrvBinary* driver_alloc_binary-1">driver_alloc_binary()</seealso> to allocate a binary.</p> + + <p>There are several ways to send a binary created with + <c>driver_alloc_binary()</c>.</p> + + <list type="bulleted"> + <item><p>From the <c>control</c> callback, a binary can be returned provided + that + <seealso marker="erts:erl_driver#void set_port_control_flags-2">set_port_control()</seealso> + has been called with the flag value <c>PORT_CONTROL_FLAG_BINARY</c>.</p> + </item> + + <item><p>A single binary can be sent with + <seealso marker="erts:erl_driver#int driver_output_binary-6">driver_output_binary()</seealso>.</p></item> + + <item><p>Using + <seealso marker="erts:erl_driver#int driver_output_term-3">driver_output_term()</seealso> + or + <seealso marker="erts:erl_driver#int driver_send_term-4">driver_send_term()</seealso>, + a binary can be included in an Erlang term.</p> + </item> + </list> + + </section> + +</chapter> diff --git a/system/doc/efficiency_guide/efficiency_guide.erl b/system/doc/efficiency_guide/efficiency_guide.erl new file mode 100644 index 0000000000..e982bdae65 --- /dev/null +++ b/system/doc/efficiency_guide/efficiency_guide.erl @@ -0,0 +1,214 @@ +-module(efficiency_guide). +-compile(export_all). + +%% DO NOT +naive_reverse([H|T]) -> + naive_reverse(T)++[H]; +naive_reverse([]) -> + []. + +%% OK +naive_but_ok_reverse([H|T], Acc) -> + naive_but_ok_reverse(T, [H]++Acc); +naive_but_ok_reverse([], Acc) -> + Acc. + +%% DO +vanilla_reverse([H|T], Acc) -> + vanilla_reverse(T, [H|Acc]); +vanilla_reverse([], Acc) -> + Acc. + + +multiple_setelement(T0) -> + T1 = setelement(9, T0, bar), + T2 = setelement(7, T1, foobar), + setelement(5, T2, new_value). + + +my_list_to_binary(List) -> + my_list_to_binary(List, <<>>). + +my_list_to_binary([H|T], Acc) -> + my_list_to_binary(T, <<Acc/binary,H>>); +my_list_to_binary([], Acc) -> + Acc. + +my_old_list_to_binary(List) -> + my_old_list_to_binary(List, []). + +my_old_list_to_binary([H|T], Acc) -> + my_old_list_to_binary(T, [Acc,H]); +my_old_list_to_binary([], Acc) -> + list_to_binary(Acc). + +my_binary_to_list(<<H,T/binary>>) -> + [H|my_binary_to_list(T)]; +my_binary_to_list(<<>>) -> []. + +my_complicated_binary_to_list(Bin) -> + my_complicated_binary_to_list(Bin, 0). + +my_complicated_binary_to_list(Bin, Skip) -> + case Bin of + <<_:Skip/binary,Byte,_/binary>> -> + [Byte|my_complicated_binary_to_list(Bin, Skip+1)]; + <<_:Skip/binary>> -> + [] + end. + +after_zero(<<0,T/binary>>) -> + T; +after_zero(<<_,T/binary>>) -> + after_zero(T); +after_zero(<<>>) -> + <<>>. + +all_but_zeroes_to_list(Buffer, Acc, 0) -> + {lists:reverse(Acc),Buffer}; +all_but_zeroes_to_list(<<0,T/binary>>, Acc, Remaining) -> + all_but_zeroes_to_list(T, Acc, Remaining-1); +all_but_zeroes_to_list(<<Byte,T/binary>>, Acc, Remaining) -> + all_but_zeroes_to_list(T, [Byte|Acc], Remaining-1). + +non_opt_eq([H|T1], <<H,T2/binary>>) -> + non_opt_eq(T1, T2); +non_opt_eq([_|_], <<_,_/binary>>) -> + false; +non_opt_eq([], <<>>) -> + true. + +opt_eq(<<H,T1/binary>>, [H|T2]) -> + opt_eq(T1, T2); +opt_eq(<<_,_/binary>>, [_|_]) -> + false; +opt_eq(<<>>, []) -> + true. + +match_head(List, <<_:10,Data/binary>>) -> + match_body(List, Data). + +match_body([0|_], <<H,_/binary>>) -> + done; +match_body([H|T1], <<H,T2/binary>>) -> + {T1,T2}. + +count1(<<_,T/binary>>, Count) -> count1(T, Count+1); +count1(<<>>, Count) -> Count. + +count2(<<H,T/binary>>, Count) -> count2(T, Count+1); +count2(<<>>, Count) -> Count. + +count3(<<_H,T/binary>>, Count) -> count3(T, Count+1); +count3(<<>>, Count) -> Count. + +fib(N) -> + fib(N, 0, 1, []). + +fib(0, _Current, _Next, Fibs) -> + lists:reverse(Fibs); +fib(N, Current, Next, Fibs) -> + fib(N - 1, Next, Current + Next, [Current|Fibs]). + +recursive_fib(N) -> + recursive_fib(N, 0, 1). + +recursive_fib(0, _Current, _Next) -> + []; +recursive_fib(N, Current, Next) -> + [Current|recursive_fib(N - 1, Next, Current + Next)]. + +bad_fib(N) -> + bad_fib(N, 0, 1, []). + +bad_fib(0, _Current, _Next, Fibs) -> + Fibs; +bad_fib(N, Current, Next, Fibs) -> + bad_fib(N - 1, Next, Current + Next, Fibs ++ [Current]). + +tail_recursive_fib(N) -> + tail_recursive_fib(N, 0, 1, []). + +tail_recursive_fib(0, _Current, _Next, Fibs) -> + lists:reverse(Fibs); +tail_recursive_fib(N, Current, Next, Fibs) -> + tail_recursive_fib(N - 1, Next, Current + Next, [Current|Fibs]). + +append([H|T], Tail) -> + [H|append(T, Tail)]; +append([], Tail) -> + Tail. + +kilo_byte() -> + kilo_byte(10, [42]). + +kilo_byte(0, Acc) -> + Acc; +kilo_byte(N, Acc) -> + kilo_byte(N-1, [Acc|Acc]). + +recursive_sum([H|T]) -> + H+recursive_sum(T); +recursive_sum([]) -> 0. + +sum(L) -> sum(L, 0). + +sum([H|T], Sum) -> sum(T, Sum + H); +sum([], Sum) -> Sum. + +days_in_month(M) -> + element(M, {31,28,31,30,31,30,31,31,30,31,30,31}). + +atom_map1(one) -> 1; +atom_map1(two) -> 2; +atom_map1(three) -> 3; +atom_map1(Int) when is_integer(Int) -> Int; +atom_map1(four) -> 4; +atom_map1(five) -> 5; +atom_map1(six) -> 6. + +atom_map2(one) -> 1; +atom_map2(two) -> 2; +atom_map2(three) -> 3; +atom_map2(four) -> 4; +atom_map2(five) -> 5; +atom_map2(six) -> 6; +atom_map2(Int) when is_integer(Int) -> Int. + +atom_map3(Int) when is_integer(Int) -> Int; +atom_map3(one) -> 1; +atom_map3(two) -> 2; +atom_map3(three) -> 3; +atom_map3(four) -> 4; +atom_map3(five) -> 5; +atom_map3(six) -> 6. + + +map_pairs1(_Map, [], Ys) -> + Ys; +map_pairs1(_Map, Xs, [] ) -> + Xs; +map_pairs1(Map, [X|Xs], [Y|Ys]) -> + [Map(X, Y)|map_pairs1(Map, Xs, Ys)]. + +map_pairs2(_Map, [], Ys) -> + Ys; +map_pairs2(_Map, [_|_]=Xs, [] ) -> + Xs; +map_pairs2(Map, [X|Xs], [Y|Ys]) -> + [Map(X, Y)|map_pairs2(Map, Xs, Ys)]. + +explicit_map_pairs(Map, Xs0, Ys0) -> + case Xs0 of + [X|Xs] -> + case Ys0 of + [Y|Ys] -> + [Map(X, Y)|explicit_map_pairs(Map, Xs, Ys)]; + [] -> + Xs0 + end; + [] -> + Ys0 + end. + + diff --git a/system/doc/efficiency_guide/functions.xml b/system/doc/efficiency_guide/functions.xml new file mode 100644 index 0000000000..d55b60e39c --- /dev/null +++ b/system/doc/efficiency_guide/functions.xml @@ -0,0 +1,234 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Functions</title> + <prepared>Bjorn Gustavsson</prepared> + <docno></docno> + <date>2007-11-22</date> + <rev></rev> + <file>functions.xml</file> + </header> + + <section> + <title>Pattern matching</title> + <p>Pattern matching in function head and in <c>case</c> and <c>receive</c> + clauses are optimized by the compiler. With a few exceptions, there is nothing + to gain by rearranging clauses.</p> + + <p>One exception is pattern matching of binaries. The compiler + will not rearrange clauses that match binaries. Placing the + clause that matches against the empty binary <em>last</em> will usually + be slightly faster than placing it <em>first</em>.</p> + + <p>Here is a rather contrived example to show another exception:</p> + + <p><em>DO NOT</em></p> + <code type="erl"> +atom_map1(one) -> 1; +atom_map1(two) -> 2; +atom_map1(three) -> 3; +atom_map1(Int) when is_integer(Int) -> Int; +atom_map1(four) -> 4; +atom_map1(five) -> 5; +atom_map1(six) -> 6.</code> + + <p>The problem is the clause with the variable <c>Int</c>. + Since a variable can match anything, including the atoms + <c>four</c>, <c>five</c>, and <c>six</c> that the following clauses + also will match, the compiler must generate sub-optimal code that will + execute as follows:</p> + + <p>First the input value is compared to <c>one</c>, <c>two</c>, and + <c>three</c> (using a single instruction that does a binary search; + thus, quite efficient even if there are many values) to select which + one of the first three clauses to execute (if any).</p> + + <p>If none of the first three clauses matched, the fourth clause + will match since a variable always matches. If the guard test + <c>is_integer(Int)</c> succeeds, the fourth clause will be + executed.</p> + + <p>If the guard test failed, the input value is compared to + <c>four</c>, <c>five</c>, and <c>six</c>, and the appropriate clause + is selected. (There will be a <c>function_clause</c> exception if + none of the values matched.)</p> + + <p>Rewriting to either</p> + + <p><em>DO</em></p> + <code type="erl"><![CDATA[ +atom_map2(one) -> 1; +atom_map2(two) -> 2; +atom_map2(three) -> 3; +atom_map2(four) -> 4; +atom_map2(five) -> 5; +atom_map2(six) -> 6; +atom_map2(Int) when is_integer(Int) -> Int.]]></code> + + <p>or</p> + + <p><em>DO</em></p> + <code type="erl"><![CDATA[ +atom_map3(Int) when is_integer(Int) -> Int; +atom_map3(one) -> 1; +atom_map3(two) -> 2; +atom_map3(three) -> 3; +atom_map3(four) -> 4; +atom_map3(five) -> 5; +atom_map3(six) -> 6.]]></code> + + <p>will give slightly more efficient matching code.</p> + + <p>Here is a less contrived example:</p> + + <p><em>DO NOT</em></p> + <code type="erl"><![CDATA[ +map_pairs1(_Map, [], Ys) -> + Ys; +map_pairs1(_Map, Xs, [] ) -> + Xs; +map_pairs1(Map, [X|Xs], [Y|Ys]) -> + [Map(X, Y)|map_pairs1(Map, Xs, Ys)].]]></code> + + <p>The first argument is <em>not</em> a problem. It is variable, but it + is a variable in all clauses. The problem is the variable in the second + argument, <c>Xs</c>, in the middle clause. Because the variable can + match anything, the compiler is not allowed to rearrange the clauses, + but must generate code that matches them in the order written.</p> + + <p>If the function is rewritten like this</p> + + <p><em>DO</em></p> + <code type="erl"><![CDATA[ +map_pairs2(_Map, [], Ys) -> + Ys; +map_pairs2(_Map, [_|_]=Xs, [] ) -> + Xs; +map_pairs2(Map, [X|Xs], [Y|Ys]) -> + [Map(X, Y)|map_pairs2(Map, Xs, Ys)].]]></code> + + <p>the compiler is free rearrange the clauses. It will generate code + similar to this</p> + + <p><em>DO NOT (already done by the compiler)</em></p> + <code type="erl"><![CDATA[ +explicit_map_pairs(Map, Xs0, Ys0) -> + case Xs0 of + [X|Xs] -> + case Ys0 of + [Y|Ys] -> + [Map(X, Y)|explicit_map_pairs(Map, Xs, Ys)]; + [] -> + Xs0 + end; + [] -> + Ys0 + end.]]></code> + + <p>which should be slightly faster for presumably the most common case + that the input lists are not empty or very short. + (Another advantage is that Dialyzer is able to deduce a better type + for the variable <c>Xs</c>.)</p> + </section> + + <section> + <title>Function Calls </title> + + <p>Here is an intentionally rough guide to the relative costs of + different kinds of calls. It is based on benchmark figures run on + Solaris/Sparc:</p> + + <list type="bulleted"> + <item>Calls to local or external functions (<c>foo()</c>, <c>m:foo()</c>) + are the fastest kind of calls.</item> + <item>Calling or applying a fun (<c>Fun()</c>, <c>apply(Fun, [])</c>) + is about <em>three times</em> as expensive as calling a local function.</item> + <item>Applying an exported function (<c>Mod:Name()</c>, + <c>apply(Mod, Name, [])</c>) is about twice as expensive as calling a fun, + or about <em>six times</em> as expensive as calling a local function.</item> + </list> + + <section> + <title>Notes and implementation details</title> + + <p>Calling and applying a fun does not involve any hash-table lookup. + A fun contains an (indirect) pointer to the function that implements + the fun.</p> + + <warning><p><em>Tuples are not fun(s)</em>. + A "tuple fun", <c>{Module,Function}</c>, is not a fun. + The cost for calling a "tuple fun" is similar to that + of <c>apply/3</c> or worse. Using "tuple funs" is <em>strongly discouraged</em>, + as they may not be supported in a future release.</p></warning> + + <p><c>apply/3</c> must look up the code for the function to execute + in a hash table. Therefore, it will always be slower than a + direct call or a fun call.</p> + + <p>It no longer matters (from a performance point of view) + whether you write</p> + + <code type="erl"> +Module:Function(Arg1, Arg2)</code> + + <p>or</p> + + <code type="erl"> +apply(Module, Function, [Arg1,Arg2])</code> + + <p>(The compiler internally rewrites the latter code into the former.)</p> + + <p>The following code</p> + + <code type="erl"> +apply(Module, Function, Arguments)</code> + + <p>is slightly slower because the shape of the list of arguments + is not known at compile time.</p> + </section> + </section> + + <section> + <title>Memory usage in recursion</title> + <p>When writing recursive functions it is preferable to make them + tail-recursive so that they can execute in constant memory space.</p> + <p><em>DO</em></p> + <code type="none"> +list_length(List) -> + list_length(List, 0). + +list_length([], AccLen) -> + AccLen; % Base case + +list_length([_|Tail], AccLen) -> + list_length(Tail, AccLen + 1). % Tail-recursive</code> + <p><em>DO NOT</em></p> + <code type="none"> +list_length([]) -> + 0. % Base case +list_length([_ | Tail]) -> + list_length(Tail) + 1. % Not tail-recursive</code> + </section> + +</chapter> + diff --git a/system/doc/efficiency_guide/introduction.xml b/system/doc/efficiency_guide/introduction.xml new file mode 100644 index 0000000000..ba942c75c2 --- /dev/null +++ b/system/doc/efficiency_guide/introduction.xml @@ -0,0 +1,69 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Introduction</title> + <prepared>Bjorn Gustavsson</prepared> + <docno></docno> + <date>2007-11-21</date> + <rev></rev> + <file>introduction.xml</file> + </header> + + <section> + <title>Purpose</title> + + <quote><p>Premature optimization is the root of all evil. -- D.E. Knuth</p></quote> + + <p>Efficient code can be well-structured and clean code, based on + on a sound overall architecture and sound algorithms. + Efficient code can be highly implementation-code that bypasses + documented interfaces and takes advantage of obscure quirks in + the current implementation.</p> + + <p>Ideally, your code should only contain the first kind of efficient + code. If that turns out to be too slow, you should profile the application + to find out where the performance bottlenecks are and optimize only the + bottlenecks. Other code should stay as clean as possible.</p> + + <p>Fortunately, compiler and run-time optimizations introduced in + R12B makes it easier to write code that is both clean and + efficient. For instance, the ugly workarounds needed in R11B and earlier + releases to get the most speed out of binary pattern matching are + no longer necessary. In fact, the ugly code is slower + than the clean code (because the clean code has become faster, not + because the uglier code has become slower).</p> + + <p>This Efficiency Guide cannot really learn you how to write efficient + code. It can give you a few pointers about what to avoid and what to use, + and some understanding of how certain language features are implemented. + We have generally not included general tips about optimization that will + work in any language, such as moving common calculations out of loops.</p> + </section> + + <section> + <title>Prerequisites</title> + <p>It is assumed that the reader is familiar with the Erlang + programming language and concepts of OTP.</p> + </section> +</chapter> + diff --git a/system/doc/efficiency_guide/listhandling.xml b/system/doc/efficiency_guide/listhandling.xml new file mode 100644 index 0000000000..e9d2dfe556 --- /dev/null +++ b/system/doc/efficiency_guide/listhandling.xml @@ -0,0 +1,241 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>List handling</title> + <prepared>Bjorn Gustavsson</prepared> + <docno></docno> + <date>2007-11-16</date> + <rev></rev> + <file>listHandling.xml</file> + </header> + + <section> + <title>Creating a list</title> + + <p>Lists can only be built starting from the end and attaching + list elements at the beginning. If you use the <c>++</c> operator + like this</p> + + <code type="erl"> +List1 ++ List2</code> + + <p>you will create a new list which is copy of the elements in <c>List1</c>, + followed by <c>List2</c>. Looking at how <c>lists:append/1</c> or <c>++</c> would be + implemented in plain Erlang, it can be seen clearly that the first list + is copied:</p> + + <code type="erl"> +append([H|T], Tail) -> + [H|append(T, Tail)]; +append([], Tail) -> + Tail.</code> + + <p>So the important thing when recursing and building a list is to + make sure that you attach the new elements to the beginning of the list, + so that you build <em>a</em> list, and not hundreds or thousands of + copies of the growing result list.</p> + + <p>Let us first look at how it should not be done:</p> + + <p><em>DO NOT</em></p> + <code type="erl"><![CDATA[ +bad_fib(N) -> + bad_fib(N, 0, 1, []). + +bad_fib(0, _Current, _Next, Fibs) -> + Fibs; +bad_fib(N, Current, Next, Fibs) -> + bad_fib(N - 1, Next, Current + Next, Fibs ++ [Current]).]]></code> + + <p>Here we are not a building a list; in each iteration step we + create a new list that is one element longer than the new previous list.</p> + + <p>To avoid copying the result in each iteration, we must build the list in + reverse order and reverse the list when we are done:</p> + + <p><em>DO</em></p> + <code type="erl"><![CDATA[ +tail_recursive_fib(N) -> + tail_recursive_fib(N, 0, 1, []). + +tail_recursive_fib(0, _Current, _Next, Fibs) -> + lists:reverse(Fibs); +tail_recursive_fib(N, Current, Next, Fibs) -> + tail_recursive_fib(N - 1, Next, Current + Next, [Current|Fibs]).]]></code> + + </section> + + <section> + <title>List comprehensions</title> + + <p>Lists comprehensions still have a reputation for being slow. + They used to be implemented using funs, which used to be slow.</p> + + <p>In recent Erlang/OTP releases (including R12B), a list comprehension</p> + + <code type="erl"><![CDATA[ +[Expr(E) || E <- List]]]></code> + + <p>is basically translated to a local function</p> + + <code type="erl"> +'lc^0'([E|Tail], Expr) -> + [Expr(E)|'lc^0'(Tail, Expr)]; +'lc^0'([], _Expr) -> [].</code> + + <p>In R12B, if the result of the list comprehension will <em>obviously</em> not be used, + a list will not be constructed. For instance, in this code</p> + + <code type="erl"><![CDATA[ +[io:put_chars(E) || E <- List], +ok.]]></code> + + <p>or in this code</p> + + <code type="erl"><![CDATA[ +. +. +. +case Var of + ... -> + [io:put_chars(E) || E <- List]; + ... -> +end, +some_function(...), +. +. +.]]></code> + + <p>the value is neither assigned to a variable, nor passed to another function, + nor returned, so there is no need to construct a list and the compiler will simplify + the code for the list comprehension to</p> + + <code type="erl"> +'lc^0'([E|Tail], Expr) -> + Expr(E), + 'lc^0'(Tail, Expr); +'lc^0'([], _Expr) -> [].</code> + + </section> + + <section> + <title>Deep and flat lists</title> + + <p><seealso marker="stdlib:lists#flatten/1">lists:flatten/1</seealso> + builds an entirely new list. Therefore, it is expensive, and even + <em>more</em> expensive than the <c>++</c> (which copies its left argument, + but not its right argument).</p> + + <p>In the following situations, you can easily avoid calling <c>lists:flatten/1</c>:</p> + + <list type="bulleted"> + <item>When sending data to a port. Ports understand deep lists + so there is no reason to flatten the list before sending it to + the port.</item> + <item>When calling BIFs that accept deep lists, such as + <seealso marker="erts:erlang#list_to_binary/1">list_to_binary/1</seealso> or + <seealso marker="erts:erlang#iolist_to_binary/1">iolist_to_binary/1</seealso>.</item> + <item>When you know that your list is only one level deep, you can can use + <seealso marker="stdlib:lists#append/1">lists:append/1</seealso>.</item> + </list> + + <p><em>Port example</em></p> + <p><em>DO</em></p> + <pre> + ... + port_command(Port, DeepList) + ...</pre> + <p><em>DO NOT</em></p> + <pre> + ... + port_command(Port, lists:flatten(DeepList)) + ...</pre> + + <p>A common way to send a zero-terminated string to a port is the following:</p> + + <p><em>DO NOT</em></p> + <pre> + ... + TerminatedStr = String ++ [0], % String="foo" => [$f, $o, $o, 0] + port_command(Port, TerminatedStr) + ...</pre> + + <p>Instead do like this:</p> + + <p><em>DO</em></p> + <pre> + ... + TerminatedStr = [String, 0], % String="foo" => [[$f, $o, $o], 0] + port_command(Port, TerminatedStr) + ...</pre> + + <p><em>Append example</em></p> + <p><em>DO</em></p> + <pre> + > lists:append([[1], [2], [3]]). + [1,2,3] + ></pre> + <p><em>DO NOT</em></p> + <pre> + > lists:flatten([[1], [2], [3]]). + [1,2,3] + ></pre> + </section> + + <section> + <title>Why you should not worry about recursive lists functions</title> + + <p>In the performance myth chapter, the following myth was exposed: + <seealso marker="myths#tail_recursive">Tail-recursive functions + are MUCH faster than recursive functions</seealso>.</p> + + <p>To summarize, in R12B there is usually not much difference between + a body-recursive list function and tail-recursive function that reverses + the list at the end. Therefore, concentrate on writing beautiful code + and forget about the performance of your list functions. In the time-critical + parts of your code (and only there), <em>measure</em> before rewriting + your code.</p> + + <p><em>Important note</em>: This section talks about lists functions that + <em>construct</em> lists. A tail-recursive function that does not construct + a list runs in constant space, while the corresponding body-recursive + function uses stack space proportional to the length of the list. + For instance, a function that sums a list of integers, should <em>not</em> be + written like this</p> + + <p><em>DO NOT</em></p> + <code type="erl"> +recursive_sum([H|T]) -> H+recursive_sum(T); +recursive_sum([]) -> 0.</code> + + <p>but like this</p> + + <p><em>DO</em></p> + <code type="erl"> +sum(L) -> sum(L, 0). + +sum([H|T], Sum) -> sum(T, Sum + H); +sum([], Sum) -> Sum.</code> + </section> +</chapter> + diff --git a/system/doc/efficiency_guide/make.dep b/system/doc/efficiency_guide/make.dep new file mode 100644 index 0000000000..afa3bd0516 --- /dev/null +++ b/system/doc/efficiency_guide/make.dep @@ -0,0 +1,16 @@ +# ---------------------------------------------------- +# >>>> Do not edit this file <<<< +# This file was automaticly generated by +# /home/otp/bin/docdepend +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# TeX files that the DVI file depend on +# ---------------------------------------------------- + +book.dvi: advanced.tex binaryhandling.tex book.tex commoncaveats.tex \ + drivers.tex functions.tex introduction.tex listhandling.tex \ + myths.tex part.tex processes.tex profiling.tex \ + tablesDatabases.tex + diff --git a/system/doc/efficiency_guide/myths.xml b/system/doc/efficiency_guide/myths.xml new file mode 100644 index 0000000000..76a72368bb --- /dev/null +++ b/system/doc/efficiency_guide/myths.xml @@ -0,0 +1,203 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2007</year> + <year>2007</year> + <holder>Ericsson AB, All Rights Reserved</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + The Initial Developer of the Original Code is Ericsson AB. + </legalnotice> + + <title>The Eight Myths of Erlang Performance</title> + <prepared>Bjorn Gustavsson</prepared> + <docno></docno> + <date>2007-11-10</date> + <rev></rev> + <file>myths.xml</file> + </header> + + <p>Some truths seem to live on well beyond their best-before date, + perhaps because "information" spreads more rapidly from person-to-person + faster than a single release note that notes, for instance, that funs + have become faster.</p> + + <p>Here we try to kill the old truths (or semi-truths) that have + become myths.</p> + + <section> + <title>Myth: Funs are slow</title> + <p>Yes, funs used to be slow. Very slow. Slower than <c>apply/3</c>. + Originally, funs were implemented using nothing more than + compiler trickery, ordinary tuples, <c>apply/3</c>, and a great + deal of ingenuity.</p> + + <p>But that is ancient history. Funs was given its own data type + in the R6B release and was further optimized in the R7B release. + Now the cost for a fun call falls roughly between the cost for a call to + local function and <c>apply/3</c>.</p> + </section> + + <section> + <title>Myth: List comprehensions are slow</title> + + <p>List comprehensions used to be implemented using funs, and in the + bad old days funs were really slow.</p> + + <p>Nowadays the compiler rewrites list comprehensions into an ordinary + recursive function. Of course, using a tail-recursive function with + a reverse at the end would be still faster. Or would it? + That leads us to the next myth.</p> + </section> + + <section> + <title>Myth: Tail-recursive functions are MUCH faster + than recursive functions</title> + + <p><marker id="tail_recursive"></marker>According to the myth, + recursive functions leave references + to dead terms on the stack and the garbage collector will have to + copy all those dead terms, while tail-recursive functions immediately + discard those terms.</p> + + <p>That used to be true before R7B. In R7B, the compiler started + to generate code that overwrites references to terms that will never + be used with an empty list, so that the garbage collector would not + keep dead values any longer than necessary.</p> + + <p>Even after that optimization, a tail-recursive function would + still most of the time be faster than a body-recursive function. Why?</p> + + <p>It has to do with how many words of stack that are used in each + recursive call. In most cases, a recursive function would use more words + on the stack for each recursion than the number of words a tail-recursive + would allocate on the heap. Since more memory is used, the garbage + collector will be invoked more frequently, and it will have more work traversing + the stack.</p> + + <p>In R12B and later releases, there is an optimization that will + in many cases reduces the number of words used on the stack in + body-recursive calls, so that a body-recursive list function and + tail-recursive function that calls <seealso + marker="stdlib:lists#reverse/1">lists:reverse/1</seealso> at the + end will use exactly the same amount of memory. + <c>lists:map/2</c>, <c>lists:filter/2</c>, list comprehensions, + and many other recursive functions now use the same amount of space + as their tail-recursive equivalents.</p> + + <p>So which is faster?</p> + + <p>It depends. On Solaris/Sparc, the body-recursive function seems to + be slightly faster, even for lists with very many elements. On the x86 + architecture, tail-recursion was up to about 30 percent faster.</p> + + <p>So the choice is now mostly a matter of taste. If you really do need + the utmost speed, you must <em>measure</em>. You can no longer be + absolutely sure that the tail-recursive list function will be the fastest + in all circumstances.</p> + + <p>Note: A tail-recursive function that does not need to reverse the + list at the end is, of course, faster than a body-recursive function, + as are tail-recursive functions that do not construct any terms at all + (for instance, a function that sums all integers in a list).</p> + </section> + + <section> + <title>Myth: '++' is always bad</title> + + <p>The <c>++</c> operator has, somewhat undeservedly, got a very bad reputation. + It probably has something to do with code like</p> + + <p><em>DO NOT</em></p> + <code type="erl"> +naive_reverse([H|T]) -> + naive_reverse(T)++[H]; +naive_reverse([]) -> + [].</code> + + <p>which is the most inefficient way there is to reverse a list. + Since the <c>++</c> operator copies its left operand, the result + will be copied again and again and again... leading to quadratic + complexity.</p> + + <p>On the other hand, using <c>++</c> like this</p> + + <p><em>OK</em></p> + <code type="erl"> +naive_but_ok_reverse([H|T], Acc) -> + naive_but_ok_reverse(T, [H]++Acc); +naive_but_ok_reverse([], Acc) -> + Acc.</code> + + <p>is not bad. Each list element will only be copied once. + The growing result <c>Acc</c> is the right operand + for the <c>++</c> operator, and it will <em>not</em> be copied.</p> + + <p>Of course, experienced Erlang programmers would actually write</p> + + <p><em>DO</em></p> + <code type="erl"> +vanilla_reverse([H|T], Acc) -> + vanilla_reverse(T, [H|Acc]); +vanilla_reverse([], Acc) -> + Acc.</code> + + <p>which is slightly more efficient because you don't build a + list element only to directly copy it. (Or it would be more efficient + if the the compiler did not automatically rewrite <c>[H]++Acc</c> + to <c>[H|Acc]</c>.)</p> + </section> + + <section> + <title>Myth: Strings are slow</title> + + <p>Actually, string handling could be slow if done improperly. + In Erlang, you'll have to think a little more about how the strings + are used and choose an appropriate representation and use + the <seealso marker="stdlib:re">re</seealso> instead of the obsolete + <c>regexp</c> module if you are going to use regualr expressions.</p> + </section> + + <section> + <title>Myth: Repairing a Dets file is very slow</title> + + <p>The repair time is still proportional to the number of records + in the file, but Dets repairs used to be much, much slower in the past. + Dets has been massively rewritten and improved.</p> + </section> + + <section> + <title>Myth: BEAM is a stack-based byte-code virtual machine (and therefore slow)</title> + + <p>BEAM is a register-based virtual machine. It has 1024 virtual registers + that are used for holding temporary values and for passing arguments when + calling functions. Variables that need to survive a function call are saved + to the stack.</p> + + <p>BEAM is a threaded-code interpreter. Each instruction is word pointing + directly to executable C-code, making instruction dispatching very fast.</p> + </section> + + <section> + <title>Myth: Use '_' to speed up your program when a variable is not used</title> + + <p>That was once true, but since R6B the BEAM compiler is quite capable of seeing itself + that a variable is not used.</p> + </section> + +</chapter> + diff --git a/system/doc/efficiency_guide/part.xml b/system/doc/efficiency_guide/part.xml new file mode 100644 index 0000000000..2b78f35e2a --- /dev/null +++ b/system/doc/efficiency_guide/part.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE part SYSTEM "part.dtd"> + +<part xmlns:xi="http://www.w3.org/2001/XInclude"> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Efficiency Guide </title> + <prepared>Ingela Anderton</prepared> + <docno></docno> + <date>2002-09-23</date> + <rev></rev> + </header> + <xi:include href="introduction.xml"/> + <xi:include href="myths.xml"/> + <xi:include href="commoncaveats.xml"/> + <xi:include href="binaryhandling.xml"/> + <xi:include href="listhandling.xml"/> + <xi:include href="functions.xml"/> + <xi:include href="tablesDatabases.xml"/> + <xi:include href="processes.xml"/> + <xi:include href="drivers.xml"/> + <xi:include href="advanced.xml"/> + <xi:include href="profiling.xml"/> +</part> + diff --git a/system/doc/efficiency_guide/processes.xml b/system/doc/efficiency_guide/processes.xml new file mode 100644 index 0000000000..a25ec53370 --- /dev/null +++ b/system/doc/efficiency_guide/processes.xml @@ -0,0 +1,264 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Processes</title> + <prepared>Bjorn Gustavsson</prepared> + <docno></docno> + <date>2007-11-21</date> + <rev></rev> + <file>processes.xml</file> + </header> + + <section> + <title>Creation of an Erlang process</title> + + <p>An Erlang process is lightweight compared to operating + systems threads and processes.</p> + + <p>A newly spawned Erlang process uses 309 words of memory + in the non-SMP emulator without HiPE support. (SMP support + and HiPE support will both add to this size.) The size can + be found out like this:</p> + + <pre> +Erlang (BEAM) emulator version 5.6 [async-threads:0] [kernel-poll:false] + +Eshell V5.6 (abort with ^G) +1> <input>Fun = fun() -> receive after infinity -> ok end end.</input> +#Fun<...> +2> <input>{_,Bytes} = process_info(spawn(Fun), memory).</input> +{memory,1232} +3> <input>Bytes div erlang:system_info(wordsize).</input> +309</pre> + + <p>The size includes 233 words for the heap area (which includes the stack). + The garbage collector will increase the heap as needed.</p> + + <p>The main (outer) loop for a process <em>must</em> be tail-recursive. + If not, the stack will grow until the process terminates.</p> + + <p><em>DO NOT</em></p> + <code type="erl"> +loop() -> + receive + {sys, Msg} -> + handle_sys_msg(Msg), + loop(); + {From, Msg} -> + Reply = handle_msg(Msg), + From ! Reply, + loop() + end, + io:format("Message is processed~n", []).</code> + + <p>The call to <c>io:format/2</c> will never be executed, but a + return address will still be pushed to the stack each time + <c>loop/0</c> is called recursively. The correct tail-recursive + version of the function looks like this:</p> + + <p><em>DO</em></p> +<code type="erl"> + loop() -> + receive + {sys, Msg} -> + handle_sys_msg(Msg), + loop(); + {From, Msg} -> + Reply = handle_msg(Msg), + From ! Reply, + loop() + end.</code> + + <section> + <title>Initial heap size</title> + + <p>The default initial heap size of 233 words is quite conservative + in order to support Erlang systems with hundreds of thousands or + even millions of processes. The garbage collector will grow and + shrink the heap as needed.</p> + + <p>In a system that use comparatively few processes, performance + <em>might</em> be improved by increasing the minimum heap size using either + the <c>+h</c> option for + <seealso marker="erts:erl">erl</seealso> or on a process-per-process + basis using the <c>min_heap_size</c> option for + <seealso marker="erts:erlang#spawn_opt/4">spawn_opt/4</seealso>.</p> + + <p>The gain is twofold: Firstly, although the garbage collector will + grow the heap, it will it grow it step by step, which will be more + costly than directly establishing a larger heap when the process + is spawned. Secondly, the garbage collector may also shrink the + heap if it is much larger than the amount of data stored on it; + setting the minimum heap size will prevent that.</p> + + <warning><p>The emulator will probably use more memory, and because garbage + collections occur less frequently, huge binaries could be + kept much longer.</p></warning> + + <p>In systems with many processes, computation tasks that run + for a short time could be spawned off into a new process with + a higher minimum heap size. When the process is done, it will + send the result of the computation to another process and terminate. + If the minimum heap size is calculated properly, the process may not + have to do any garbage collections at all. + <em>This optimization should not be attempted + without proper measurements.</em></p> + </section> + + </section> + + <section> + <title>Process messages</title> + + <p>All data in messages between Erlang processes is copied, with + the exception of + <seealso marker="binaryhandling#refc_binary">refc binaries</seealso> + on the same Erlang node.</p> + + <p>When a message is sent to a process on another Erlang node, + it will first be encoded to the Erlang External Format before + being sent via an TCP/IP socket. The receiving Erlang node decodes + the message and distributes it to the right process.</p> + + <section> + <title>The constant pool</title> + + <p>Constant Erlang terms (also called <em>literals</em>) are now + kept in constant pools; each loaded module has its own pool. + The following function</p> + + <p><em>DO</em> (in R12B and later)</p> + <code type="erl"> +days_in_month(M) -> + element(M, {31,28,31,30,31,30,31,31,30,31,30,31}).</code> + + <p>will no longer build the tuple every time it is called (only + to have it discarded the next time the garbage collector was run), but + the tuple will be located in the module's constant pool.</p> + + <p>But if a constant is sent to another process (or stored in + an ETS table), it will be <em>copied</em>. + The reason is that the run-time system must be able + to keep track of all references to constants in order to properly + unload code containing constants. (When the code is unloaded, + the constants will be copied to the heap of the processes that refer + to them.) The copying of constants might be eliminated in a future + release.</p> + </section> + + <section> + <title>Loss of sharing</title> + + <p>Shared sub-terms are <em>not</em> preserved when a term is sent + to another process, passed as the initial process arguments in + the <c>spawn</c> call, or stored in an ETS table. + That is an optimization. Most applications do not send message + with shared sub-terms.</p> + + <p>Here is an example of how a shared sub-term can be created:</p> + + <code type="erl"> +kilo_byte() -> + kilo_byte(10, [42]). + +kilo_byte(0, Acc) -> + Acc; +kilo_byte(N, Acc) -> + kilo_byte(N-1, [Acc|Acc]).</code> + + <p><c>kilo_byte/1</c> creates a deep list. If we call + <c>list_to_binary/1</c>, we can convert the deep list to a binary + of 1024 bytes:</p> + + <pre> +1> <input>byte_size(list_to_binary(efficiency_guide:kilo_byte())).</input> +1024</pre> + + <p>Using the <c>erts_debug:size/1</c> BIF we can see that the + deep list only requires 22 words of heap space:</p> + + <pre> +2> <input>erts_debug:size(efficiency_guide:kilo_byte()).</input> +22</pre> + + <p>Using the <c>erts_debug:flat_size/1</c> BIF, we can calculate + the size of the deep list if sharing is ignored. It will be + the size of the list when it has been sent to another process + or stored in an ETS table:</p> + + <pre> +3> <input>erts_debug:flat_size(efficiency_guide:kilo_byte()).</input> +4094</pre> + + <p>We can verify that sharing will be lost if we insert the + data into an ETS table:</p> + + <pre> +4> <input>T = ets:new(tab, []).</input> +17 +5> <input>ets:insert(T, {key,efficiency_guide:kilo_byte()}).</input> +true +6> <input>erts_debug:size(element(2, hd(ets:lookup(T, key)))).</input> +4094 +7> <input>erts_debug:flat_size(element(2, hd(ets:lookup(T, key)))).</input> +4094</pre> + + <p>When the data has passed through an ETS table, + <c>erts_debug:size/1</c> and <c>erts_debug:flat_size/1</c> + return the same value. Sharing has been lost.</p> + + <p>In a future release of Erlang/OTP, we might implement a + way to (optionally) preserve sharing. We have no plans to make + preserving of sharing the default behaviour, since that would + penalize the vast majority of Erlang applications.</p> + </section> + </section> + + <section> + <title>The SMP emulator</title> + + <p>The SMP emulator (introduced in R11B) will take advantage of + multi-core or multi-CPU computer by running several Erlang schedulers + threads (typically, the same as the number of cores). Each scheduler + thread schedules Erlang processes in the same way as the Erlang scheduler + in the non-SMP emulator.</p> + + <p>To gain performance by using the SMP emulator, your application + <em>must have more than one runnable Erlang process</em> most of the time. + Otherwise, the Erlang emulator can still only run one Erlang process + at the time, but you must still pay the overhead for locking. Although + we try to reduce the locking overhead as much as possible, it will never + become exactly zero.</p> + + <p>Benchmarks that may seem to be concurrent are often sequential. + The estone benchmark, for instance, is entirely sequential. So is also + the most common implementation of the "ring benchmark"; usually one process + is active, while the others wait in a <c>receive</c> statement.</p> + + <p>The <seealso marker="percept:percept">percept</seealso> application + can be used to profile your application to see how much potential (or lack + thereof) it has for concurrency.</p> + </section> + +</chapter> + diff --git a/system/doc/efficiency_guide/profiling.xml b/system/doc/efficiency_guide/profiling.xml new file mode 100644 index 0000000000..65d13408bc --- /dev/null +++ b/system/doc/efficiency_guide/profiling.xml @@ -0,0 +1,258 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Profiling</title> + <prepared>Ingela Anderton</prepared> + <docno></docno> + <date>2001-11-02</date> + <rev></rev> + <file>profiling.xml</file> + </header> + + <section> + <title>Do not guess about performance - profile</title> + + <p>Even experienced software developers often guess wrong about where + the performance bottlenecks are in their programs.</p> + + <p>Therefore, profile your program to see where the performance + bottlenecks are and concentrate on optimizing them.</p> + + <p>Erlang/OTP contains several tools to help finding bottlenecks.</p> + + <p><c>fprof</c> and <c>eprof</c> provide the most detailed information + about where the time is spent, but they significantly slow downs the + programs they profile.</p> + + <p>If the program is too big to be profiled by <c>fprof</c> or <c>eprof</c>, + <c>cover</c> and <c>cprof</c> could be used to locate parts of the + code that should be more thoroughly profiled using <c>fprof</c> or + <c>eprof</c>.</p> + + <p><c>cover</c> provides execution counts per line per process, + with less overhead than <c>fprof/eprof</c>. Execution counts can + with some caution be used to locate potential performance bottlenecks. + The most lightweight tool is <c>cprof</c>, but it only provides execution + counts on a function basis (for all processes, not per process).</p> + </section> + + <section> + <title>Big systems</title> + <p>If you have a big system it might be interesting to run profiling + on a simulated and limited scenario to start with. But bottlenecks + have a tendency to only appear or cause problems when + there are many things going on at the same time, and when there + are many nodes involved. Therefore it is desirable to also run + profiling in a system test plant on a real target system.</p> + <p>When your system is big you do not want to run the profiling + tools on the whole system. You want to concentrate on processes + and modules that you know are central and stand for a big part of the + execution.</p> + </section> + + <section> + <title>What to look for</title> + <p>When analyzing the result file from the profiling activity + you should look for functions that are called many + times and have a long "own" execution time (time excluded calls + to other functions). Functions that just are called very + many times can also be interesting, as even small things can add + up to quite a bit if they are repeated often. Then you need to + ask yourself what can I do to reduce this time. Appropriate + types of questions to ask yourself are: </p> + <list type="bulleted"> + <item>Can I reduce the number of times the function is called?</item> + <item>Are there tests that can be run less often if I change + the order of tests?</item> + <item>Are there redundant tests that can be removed? </item> + <item>Is there some expression calculated giving the same result + each time? </item> + <item>Is there other ways of doing this that are equivalent and + more efficient?</item> + <item>Can I use another internal data representation to make + things more efficient? </item> + </list> + <p>These questions are not always trivial to answer. You might + need to do some benchmarks to back up your theory, to avoid + making things slower if your theory is wrong. See <seealso marker="#benchmark">benchmarking</seealso>.</p> + </section> + + <section> + <title>Tools</title> + + <section> + <title>fprof</title> + <p><c>fprof</c> measures the execution time for each function, + both own time i.e how much time a function has used for its + own execution, and accumulated time i.e. including called + functions. The values are displayed per process. You also get + to know how many times each function has been + called. <c>fprof</c> is based on trace to file in order to + minimize runtime performance impact. Using fprof is just a + matter of calling a few library functions, see fprof manual + page under the application tools.</p> + <p><c>fprof</c> was introduced in version R8 of Erlang/OTP. Its + predecessor <c>eprof</c> that is based on the Erlang trace BIFs, + is still available, see eprof manual page under the + application tools. Eprof shows how much time has been used by + each process, and in which function calls this time has been + spent. Time is shown as percentage of total time, not as + absolute time.</p> + </section> + + <section> + <title>cover</title> + <p><c>cover</c>'s primary use is coverage analysis to verify + test cases, making sure all relevant code is covered. + <c>cover</c> counts how many times each executable line of + code is executed when a program is run. This is done on a per + module basis. Of course this information can be used to + determine what code is run very frequently and could therefore + be subject for optimization. Using cover is just a matter of + calling a few library functions, see cover manual + page under the application tools.</p> + </section> + + <section> + <title>cprof</title> + <p><c>cprof</c> is something in between <c>fprof</c> and + <c>cover</c> regarding features. It counts how many times each + function is called when the program is run, on a per module + basis. <c>cprof</c> has a low performance degradation (versus + <c>fprof</c> and <c>eprof</c>) and does not need to recompile + any modules to profile (versus <c>cover</c>).</p> + </section> + + <section> + <title>Tool summarization</title> + <table> + <row> + <cell align="center" valign="middle">Tool</cell> + <cell align="center" valign="middle">Results</cell> + <cell align="center" valign="middle">Size of result</cell> + <cell align="center" valign="middle">Effects on program execution time</cell> + <cell align="center" valign="middle">Records number of calls</cell> + <cell align="center" valign="middle">Records Execution time</cell> + <cell align="center" valign="middle">Records called by</cell> + <cell align="center" valign="middle">Records garbage collection</cell> + </row> + <row> + <cell align="left" valign="middle"><c>fprof </c></cell> + <cell align="left" valign="middle">per process to screen/file </cell> + <cell align="left" valign="middle">large </cell> + <cell align="left" valign="middle">significant slowdown </cell> + <cell align="left" valign="middle">yes </cell> + <cell align="left" valign="middle">total and own</cell> + <cell align="left" valign="middle">yes </cell> + <cell align="left" valign="middle">yes </cell> + </row> + <row> + <cell align="left" valign="middle"><c>eprof </c></cell> + <cell align="left" valign="middle">per process/function to screen/file </cell> + <cell align="left" valign="middle">medium </cell> + <cell align="left" valign="middle">significant slowdown </cell> + <cell align="left" valign="middle">yes </cell> + <cell align="left" valign="middle">only total </cell> + <cell align="left" valign="middle">no </cell> + <cell align="left" valign="middle">no </cell> + </row> + <row> + <cell align="left" valign="middle"><c>cover </c></cell> + <cell align="left" valign="middle">per module to screen/file</cell> + <cell align="left" valign="middle">small </cell> + <cell align="left" valign="middle">moderate slowdown</cell> + <cell align="left" valign="middle">yes, per line </cell> + <cell align="left" valign="middle">no </cell> + <cell align="left" valign="middle">no </cell> + <cell align="left" valign="middle">no </cell> + </row> + <row> + <cell align="left" valign="middle"><c>cprof </c></cell> + <cell align="left" valign="middle">per module to caller</cell> + <cell align="left" valign="middle">small </cell> + <cell align="left" valign="middle">small slowdown </cell> + <cell align="left" valign="middle">yes </cell> + <cell align="left" valign="middle">no </cell> + <cell align="left" valign="middle">no </cell> + <cell align="left" valign="middle">no </cell> + </row> + <tcaption></tcaption> + </table> + </section> + </section> + + <section> + <marker id="benchmark"></marker> + <title>Benchmarking</title> + + <p>The main purpose of benchmarking is to find out which + implementation of a given algorithm or function is the fastest. + Benchmarking is far from an exact science. Today's operating systems + generally run background tasks that are difficult to turn off. + Caches and multiple CPU cores doesn't make it any easier. + It would be best to run Unix-computers in single-user mode when + benchmarking, but that is inconvenient to say the least for casual + testing.</p> + + <p>Benchmarks can measure wall-clock time or CPU time.</p> + + <p><seealso marker="stdlib:timer#tc/3">timer:tc/3</seealso> measures + wall-clock time. The advantage with wall-clock time is that I/O, + swapping, and other activities in the operating-system kernel are + included in the measurements. The disadvantage is that the + the measurements will vary wildly. Usually it is best to run the + benchmark several times and note the shortest time - that time should + be the minimum time that is possible to achieve under the best of + circumstances.</p> + + <p><seealso marker="erts:erlang#statistics/1">statistics/1</seealso> + with the argument <c>runtime</c> measures CPU time spent in the Erlang + virtual machine. The advantage is that the results are more + consistent from run to run. The disadvantage is that the time + spent in the operating system kernel (such as swapping and I/O) + are not included. Therefore, measuring CPU time is misleading if + any I/O (file or sockets) are involved.</p> + + <p>It is probably a good idea to do both wall-clock measurements and + CPU time measurements.</p> + + <p>Some additional advice:</p> + + <list type="bulleted"> + <item>The granularity of both types measurement could be quite + high so you should make sure that each individual measurement + lasts for at least several seconds.</item> + + <item>To make the test fair, each new test run should run in its own, + newly created Erlang process. Otherwise, if all tests runs in the + same process, the later tests would start out with larger heap sizes + and therefore probably does less garbage collections. You could + also consider restarting the Erlang emulator between each test.</item> + + <item>Do not assume that the fastest implementation of a given algorithm + on computer architecture X also is the fast on computer architecture Y.</item> + + </list> + </section> +</chapter> + diff --git a/system/doc/efficiency_guide/tablesDatabases.xml b/system/doc/efficiency_guide/tablesDatabases.xml new file mode 100644 index 0000000000..4b53348c4c --- /dev/null +++ b/system/doc/efficiency_guide/tablesDatabases.xml @@ -0,0 +1,379 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2001</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Tables and databases</title> + <prepared>Ingela Anderton</prepared> + <docno></docno> + <date>2001-08-07</date> + <rev></rev> + <file>tablesDatabases.xml</file> + </header> + + <section> + <title>Ets, Dets and Mnesia</title> + <p>Every example using Ets has a corresponding example in + Mnesia. In general all Ets examples also apply to Dets tables.</p> + + <section> + <title>Select/Match operations</title> + <p>Select/Match operations on Ets and Mnesia tables can become + very expensive operations. They usually need to scan the complete + table. You should try to structure your + data so that you minimize the need for select/match + operations. However, if you really need a select/match operation, + it will still be more efficient than using <c>tab2list</c>. + Examples of this and also of ways to avoid select/match will be provided in + some of the following sections. The functions + <c>ets:select/2</c> and <c>mnesia:select/3</c> should be preferred over + <c>ets:match/2</c>,<c>ets:match_object/2</c>, and <c>mnesia:match_object/3</c>.</p> + <note> + <p>There are exceptions when the complete table is not + scanned, for instance if part of the key is bound when searching an + <c>ordered_set</c> table, or if it is a Mnesia + table and there is a secondary index on the field that is + selected/matched. If the key is fully bound there will, of course, be + no point in doing a select/match, unless you have a bag table and + you are only interested in a sub-set of the elements with + the specific key.</p> + </note> + <p>When creating a record to be used in a select/match operation you + want most of the fields to have the value '_'. The easiest and fastest way + to do that is as follows:</p> + <pre> +#person{age = 42, _ = '_'}. </pre> + </section> + + <section> + <title>Deleting an element</title> + <p>The delete operation is considered + successful if the element was not present in the table. Hence + all attempts to check that the element is present in the + Ets/Mnesia table before deletion are unnecessary. Here follows + an example for Ets tables.</p> + <p><em>DO</em></p> + <pre> +... +ets:delete(Tab, Key), +...</pre> + <p><em>DO NOT</em></p> + <pre> +... +case ets:lookup(Tab, Key) of + [] -> + ok; + [_|_] -> + ets:delete(Tab, Key) +end, +...</pre> + </section> + + <section> + <title>Data fetching</title> + <p>Do not fetch data that you already have! Consider that you + have a module that handles the abstract data type Person. You + export the interface function <c>print_person/1</c> that uses the internal functions + <c>print_name/1</c>, <c>print_age/1</c>, <c>print_occupation/1</c>.</p> + <note> + <p>If the functions <c>print_name/1</c> and so on, had been interface + functions the matter comes in to a whole new light, as you + do not want the user of the interface to know about the + internal data representation. </p> + </note> + <p><em>DO</em></p> + <code type="erl"> +%%% Interface function +print_person(PersonId) -> + %% Look up the person in the named table person, + case ets:lookup(person, PersonId) of + [Person] -> + print_name(Person), + print_age(Person), + print_occupation(Person); + [] -> + io:format("No person with ID = ~p~n", [PersonID]) + end. + +%%% Internal functions +print_name(Person) -> + io:format("No person ~p~n", [Person#person.name]). + +print_age(Person) -> + io:format("No person ~p~n", [Person#person.age]). + +print_occupation(Person) -> + io:format("No person ~p~n", [Person#person.occupation]).</code> + <p><em>DO NOT</em></p> + <code type="erl"> +%%% Interface function +print_person(PersonId) -> + %% Look up the person in the named table person, + case ets:lookup(person, PersonId) of + [Person] -> + print_name(PersonID), + print_age(PersonID), + print_occupation(PersonID); + [] -> + io:format("No person with ID = ~p~n", [PersonID]) + end. + +%%% Internal functionss +print_name(PersonID) -> + [Person] = ets:lookup(person, PersonId), + io:format("No person ~p~n", [Person#person.name]). + +print_age(PersonID) -> + [Person] = ets:lookup(person, PersonId), + io:format("No person ~p~n", [Person#person.age]). + +print_occupation(PersonID) -> + [Person] = ets:lookup(person, PersonId), + io:format("No person ~p~n", [Person#person.occupation]).</code> + </section> + + <section> + <title>Non-persistent data storage </title> + <p>For non-persistent database storage, prefer Ets tables over + Mnesia local_content tables. Even the Mnesia <c>dirty_write</c> + operations carry a fixed overhead compared to Ets writes. + Mnesia must check if the table is replicated or has indices, + this involves at least one Ets lookup for each + <c>dirty_write</c>. Thus, Ets writes will always be faster than + Mnesia writes.</p> + </section> + + <section> + <title>tab2list</title> + <p>Assume we have an Ets-table, which uses <c>idno</c> as key, + and contains:</p> + <pre> +[#person{idno = 1, name = "Adam", age = 31, occupation = "mailman"}, + #person{idno = 2, name = "Bryan", age = 31, occupation = "cashier"}, + #person{idno = 3, name = "Bryan", age = 35, occupation = "banker"}, + #person{idno = 4, name = "Carl", age = 25, occupation = "mailman"}]</pre> + <p>If we <em>must</em> return all data stored in the Ets-table we + can use <c>ets:tab2list/1</c>. However, usually we are only + interested in a subset of the information in which case + <c>ets:tab2list/1</c> is expensive. If we only want to extract + one field from each record, e.g., the age of every person, we + should use:</p> + <p><em>DO</em></p> + <pre> +... +ets:select(Tab,[{ #person{idno='_', + name='_', + age='$1', + occupation = '_'}, + [], + ['$1']}]), +...</pre> + <p><em>DO NOT</em></p> + <pre> +... +TabList = ets:tab2list(Tab), +lists:map(fun(X) -> X#person.age end, TabList), +...</pre> + <p>If we are only interested in the age of all persons named + Bryan, we should:</p> + <p><em>DO</em></p> + <pre> +... +ets:select(Tab,[{ #person{idno='_', + name="Bryan", + age='$1', + occupation = '_'}, + [], + ['$1']}]), +...</pre> + <p><em>DO NOT</em></p> + <pre> +... +TabList = ets:tab2list(Tab), +lists:foldl(fun(X, Acc) -> case X#person.name of + "Bryan" -> + [X#person.age|Acc]; + _ -> + Acc + end + end, [], TabList), +...</pre> + <p><em>REALLY DO NOT</em></p> + <pre> +... +TabList = ets:tab2list(Tab), +BryanList = lists:filter(fun(X) -> X#person.name == "Bryan" end, + TabList), +lists:map(fun(X) -> X#person.age end, BryanList), +...</pre> + <p>If we need all information stored in the Ets table about + persons named Bryan we should:</p> + <p><em>DO</em></p> + <pre> +... +ets:select(Tab, [{#person{idno='_', + name="Bryan", + age='_', + occupation = '_'}, [], ['$_']}]), +...</pre> + <p><em>DO NOT</em></p> + <pre> +... +TabList = ets:tab2list(Tab), +lists:filter(fun(X) -> X#person.name == "Bryan" end, TabList), +...</pre> + </section> + + <section> + <title>Ordered_set tables</title> + <p>If the data in the table should be accessed so that the order + of the keys in the table is significant, the table type + <c>ordered_set</c> could be used instead of the more usual + <c>set</c> table type. An <c>ordered_set</c> is always + traversed in Erlang term order with regard to the key field + so that return values from functions such as <c>select</c>, + <c>match_object</c>, and <c>foldl</c> are ordered by the key + values. Traversing an <c>ordered_set</c> with the <c>first</c> and + <c>next</c> operations also returns the keys ordered.</p> + <note> + <p>An <c>ordered_set</c> only guarantees that + objects are processed in <em>key</em> order. Results from functions as + <c>ets:select/2</c> appear in the <em>key</em> order even if + the key is not included in the result.</p> + </note> + </section> + </section> + + <section> + <title>Ets specific</title> + + <section> + <title>Utilizing the keys of the Ets table</title> + <p>An Ets table is a single key table (either a hash table or a + tree ordered by the key) and should be used as one. In other + words, use the key to look up things whenever possible. A + lookup by a known key in a set Ets table is constant and for a + ordered_set Ets table it is O(logN). A key lookup is always + preferable to a call where the whole table has to be + scanned. In the examples above, the field <c>idno</c> is the + key of the table and all lookups where only the name is known + will result in a complete scan of the (possibly large) table + for a matching result.</p> + <p>A simple solution would be to use the <c>name</c> field as + the key instead of the <c>idno</c> field, but that would cause + problems if the names were not unique. A more general solution + would be create a second table with name as key and idno as + data, i.e. to index (invert) the table with regards to the + <c>name</c> field. The second table would of course have to be + kept consistent with the master table. Mnesia could do this + for you, but a home brew index table could be very efficient + compared to the overhead involved in using Mnesia.</p> + <p>An index table for the table in the previous examples would + have to be a bag (as keys would appear more than once) and could + have the following contents:</p> + <pre> + +[#index_entry{name="Adam", idno=1}, + #index_entry{name="Bryan", idno=2}, + #index_entry{name="Bryan", idno=3}, + #index_entry{name="Carl", idno=4}]</pre> + <p>Given this index table a lookup of the <c>age</c> fields for + all persons named "Bryan" could be done like this:</p> + <pre> +... +MatchingIDs = ets:lookup(IndexTable,"Bryan"), +lists:map(fun(#index_entry{idno = ID}) -> + [#person{age = Age}] = ets:lookup(PersonTable, ID), + Age + end, + MatchingIDs), +...</pre> + <p>Note that the code above never uses <c>ets:match/2</c> but + instead utilizes the <c>ets:lookup/2</c> call. The + <c>lists:map/2</c> call is only used to traverse the <c>idno</c>s + matching the name "Bryan" in the table; therefore the number of lookups + in the master table is minimized.</p> + <p>Keeping an index table introduces some overhead when + inserting records in the table, therefore the number of operations + gained from the table has to be weighted against the number of + operations inserting objects in the table. However, note that the gain when + the key can be used to lookup elements is significant.</p> + </section> + </section> + + <section> + <title>Mnesia specific</title> + + <section> + <title>Secondary index</title> + <p>If you frequently do a lookup on a field that is not the + key of the table, you will lose performance using + "mnesia:select/match_object" as this function will traverse the + whole table. You may create a secondary index instead and + use "mnesia:index_read" to get faster access, however this + will require more memory. Example:</p> + <pre> +-record(person, {idno, name, age, occupation}). + ... +{atomic, ok} = +mnesia:create_table(person, [{index,[#person.age]}, + {attributes, + record_info(fields, person)}]), +{atomic, ok} = mnesia:add_table_index(person, age), +... + +PersonsAge42 = + mnesia:dirty_index_read(person, 42, #person.age), +...</pre> + </section> + + <section> + <title>Transactions </title> + <p>Transactions is a way to guarantee that the distributed + Mnesia database remains consistent, even when many different + processes update it in parallel. However if you have + real time requirements it is recommended to use dirty + operations instead of transactions. When using the dirty + operations you lose the consistency guarantee, this is usually + solved by only letting one process update the table. Other + processes have to send update requests to that process.</p> + <pre> +... +% Using transaction + +Fun = fun() -> + [mnesia:read({Table, Key}), + mnesia:read({Table2, Key2})] + end, + +{atomic, [Result1, Result2]} = mnesia:transaction(Fun), +... + +% Same thing using dirty operations +... + +Result1 = mnesia:dirty_read({Table, Key}), +Result2 = mnesia:dirty_read({Table2, Key2}), +...</pre> + </section> + </section> +</chapter> + diff --git a/system/doc/efficiency_guide/xmlfiles.mk b/system/doc/efficiency_guide/xmlfiles.mk new file mode 100644 index 0000000000..fa2fe44eec --- /dev/null +++ b/system/doc/efficiency_guide/xmlfiles.mk @@ -0,0 +1,32 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +EFF_GUIDE_CHAPTER_FILES = \ + advanced.xml \ + commoncaveats.xml \ + binaryhandling.xml \ + functions.xml \ + introduction.xml \ + listhandling.xml \ + myths.xml \ + part.xml \ + processes.xml \ + profiling.xml \ + tablesDatabases.xml \ + drivers.xml + |