From 300b419486c1ca88e33938f182d5d5a8b90fb73f Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Thu, 17 Jun 2010 10:23:50 +0200
Subject: Rewrite ethread library

Large parts of the ethread library have been rewritten. The
ethread library is an Erlang runtime system internal, portable
thread library used by the runtime system itself.

Most notable improvement is a reader optimized rwlock
implementation which dramatically improve the performance of
read-lock/read-unlock operations on multi processor systems by
avoiding ping-ponging of the rwlock cache lines. The reader
optimized rwlock implementation is used by miscellaneous
rwlocks in the runtime system that are known to be read-locked
frequently, and can be enabled on ETS tables by passing the
`{read_concurrency, true}' option upon table creation. See the
documentation of `ets:new/2' for more information.

The ethread library can now also use the libatomic_ops library
for atomic memory accesses. This makes it possible for the
Erlang runtime system to utilize optimized atomic operations
on more platforms than before. Use the
`--with-libatomic_ops=PATH' configure command line argument
when specifying where the libatomic_ops installation is
located. The libatomic_ops library can be downloaded from:
http://www.hpl.hp.com/research/linux/atomic_ops/

The changed API of the ethread library has also caused
modifications in the Erlang runtime system. Preparations for
the to come "delayed deallocation" feature has also been done
since it depends on the ethread library.

Note: When building for x86, the ethread library will now use
instructions that first appeared on the pentium 4 processor. If
you want the runtime system to be compatible with older
processors (back to 486) you need to pass the
`--enable-ethread-pre-pentium4-compatibility' configure command
line argument when configuring the system.
---
 lib/stdlib/doc/src/ets.xml    | 34 +++++++++++++++++++++++++++--
 lib/stdlib/test/ets_SUITE.erl | 50 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 77 insertions(+), 7 deletions(-)

(limited to 'lib/stdlib')
diff --git a/lib/stdlib/doc/src/ets.xml b/lib/stdlib/doc/src/ets.xml
index ee1befc882..5df60a92e5 100644
--- a/lib/stdlib/doc/src/ets.xml
+++ b/lib/stdlib/doc/src/ets.xml
@@ -1039,15 +1039,22 @@ ets:select(Table,MatchSpec),</code>
               the owner terminates.</p>
           </item>
           <item>
+            <marker id="new_2_write_concurrency"></marker>
             <p><c>{write_concurrency,bool()}</c>
-              Performance tuning. Default is <c>false</c>, which means that the table
-              is optimized towards concurrent read access. An operation that
+              Performance tuning. Default is <c>false</c>. An operation that
               mutates (writes to) the table will obtain exclusive access,
               blocking any concurrent access of the same table until finished.
               If set to <c>true</c>, the table is optimized towards concurrent
               write access. Different objects of the same table can be mutated
               (and read) by concurrent processes. This is achieved to some degree
               at the expense of single access and concurrent reader performance.
+	      The <c>write_concurrency</c> option can be combined with the
+	      <seealso marker="#new_2_read_concurrency">read_concurrency</seealso>
+	      option. You typically want to combine these when large concurrent
+	      read bursts and large concurrent write bursts are common (see the
+	      documentation of the
+	      <seealso marker="#new_2_read_concurrency">read_concurrency</seealso>
+	      option for more information).
               Note that this option does not change any guarantees about 
               <seealso marker="#concurrency">atomicy and isolation</seealso>.
               Functions that makes such promises over several objects (like
@@ -1055,6 +1062,29 @@ ets:select(Table,MatchSpec),</code>
              <p>Table type <c>ordered_set</c> is not affected by this option in current
               implementation.</p>
           </item>
+          <item>
+            <marker id="new_2_read_concurrency"></marker>
+	    <p><c>{read_concurrency,bool()}</c>
+              Performance tuning. Default is <c>false</c>. When set to
+	      <c>true</c>, the table is optimized for concurrent read
+	      operations. When this option is enabled on a runtime system with
+	      SMP support, read operations become much cheaper; especially on
+	      systems with multiple physical processors. However, switching
+	      between read and write operations becomes more expensive. You
+	      typically want to enable this option when concurrent read
+	      operations are much more frequent than write operations, or when
+	      concurrent reads and writes comes in large read and write
+	      bursts (i.e., lots of reads not interrupted by writes, and lots
+	      of writes not interrupted by reads). You typically do
+	      <em>not</em> want to enable this option when the common access
+	      pattern is a few read operations interleaved with a few write
+	      operations repeatedly. In this case you will get a performance
+	      degradation by enabling this option. The <c>read_concurrency</c>
+	      option can be combined with the
+	      <seealso marker="#new_2_write_concurrency">write_concurrency</seealso>
+	      option. You typically want to combine these when large concurrent
+	      read bursts and large concurrent write bursts are common.</p>
+          </item>
         </list>
       </desc>
     </func>
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index 13c87ca005..1f68c6b8c4 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -33,7 +33,7 @@
 -export([misc/1, dups/1, misc1/1, safe_fixtable/1, info/1, tab2list/1]).
 -export([files/1, tab2file/1, tab2file2/1, tab2file3/1, tabfile_ext1/1,
 	tabfile_ext2/1, tabfile_ext3/1, tabfile_ext4/1]).
--export([heavy/1, heavy_lookup/1, heavy_lookup_element/1]).
+-export([heavy/1, heavy_lookup/1, heavy_lookup_element/1, heavy_concurrent/1]).
 -export([lookup_element/1, lookup_element_mult/1]).
 -export([fold/1]).
 -export([foldl_ordered/1, foldr_ordered/1, foldl/1, foldr/1, fold_empty/1]).
@@ -89,7 +89,8 @@
 	 match_delete_do/1, match_delete3_do/1, firstnext_do/1, 
 	 slot_do/1, match1_do/1, match2_do/1, match_object_do/1, match_object2_do/1,
 	 misc1_do/1, safe_fixtable_do/1, info_do/1, dups_do/1, heavy_lookup_do/1,
-	 heavy_lookup_element_do/1, member_do/1, otp_5340_do/1, otp_7665_do/1, meta_wb_do/1
+	 heavy_lookup_element_do/1, member_do/1, otp_5340_do/1, otp_7665_do/1, meta_wb_do/1,
+	 do_heavy_concurrent/1
 	]).
 
 -include("test_server.hrl").
@@ -3877,7 +3878,7 @@ make_sub_binary(List, Num) when is_list(List) ->
     {_,B} = split_binary(Bin, N+1),
     B.
 
-heavy(suite) -> [heavy_lookup, heavy_lookup_element].
+heavy(suite) -> [heavy_lookup, heavy_lookup_element, heavy_concurrent].
 
 %% Lookup stuff like crazy...
 heavy_lookup(doc) -> ["Performs multiple lookups for every key ",
@@ -3940,6 +3941,44 @@ do_lookup_element(Tab, N, M) ->
     end.
 
 
+heavy_concurrent(Config) ->
+    repeat_for_opts(do_heavy_concurrent).
+
+do_heavy_concurrent(Opts) ->
+    ?line Size = 20000,
+    ?line EtsMem = etsmem(),
+    ?line Tab = ets:new(blupp, [set, public, {keypos, 2} | Opts]),
+    ?line ok = fill_tab2(Tab, 0, Size),
+    ?line Procs = lists:map(
+		    fun (N) ->
+			    spawn_link(
+			      fun () ->
+				      do_heavy_concurrent_proc(Tab, Size, N)
+			      end)
+		    end,
+		    lists:seq(1, 500)),
+    ?line lists:foreach(fun (P) ->
+				M = erlang:monitor(process, P),
+				receive
+				    {'DOWN', Mon, process, P, _} ->
+					ok
+				end
+			end,
+			Procs),
+    ?line true = ets:delete(Tab),
+    ?line verify_etsmem(EtsMem).
+
+do_heavy_concurrent_proc(_Tab, 0, _Offs) ->
+    done;
+do_heavy_concurrent_proc(Tab, N, Offs) when (N+Offs) rem 100 == 0 ->
+    Data = {"here", are, "S O M E ", data, "toooooooooooooooooo", insert,
+	    make_ref(), make_ref(), make_ref()},
+    true=ets:insert(Tab, {{self(),Data}, N}),
+    do_heavy_concurrent_proc(Tab, N-1, Offs);
+do_heavy_concurrent_proc(Tab, N, Offs) ->
+    _ = ets:lookup(Tab, N),
+    do_heavy_concurrent_proc(Tab, N-1, Offs).
+
 fold(suite) -> [foldl_ordered, foldr_ordered,
 		foldl, foldr,
 		fold_empty].
@@ -5336,7 +5375,7 @@ only_if_smp(Schedulers, Func) ->
 %% Repeat test function with different combination of table options
 %%       
 repeat_for_opts(F) ->
-    repeat_for_opts(F, [write_concurrency]).
+    repeat_for_opts(F, [write_concurrency, read_concurrency]).
 
 repeat_for_opts(F, OptGenList) when is_atom(F) ->
     repeat_for_opts(fun(Opts) -> ?MODULE:F(Opts) end, OptGenList);
@@ -5356,6 +5395,7 @@ repeat_for_opts(F, [Atom | Tail], AccList) when is_atom(Atom) ->
     repeat_for_opts(F, [repeat_for_opts_atom2list(Atom) | Tail ], AccList).
 
 repeat_for_opts_atom2list(all_types) -> [set,ordered_set,bag,duplicate_bag];
-repeat_for_opts_atom2list(write_concurrency) -> [{write_concurrency,false},{write_concurrency,true}].
+repeat_for_opts_atom2list(write_concurrency) -> [{write_concurrency,false},{write_concurrency,true}];
+repeat_for_opts_atom2list(read_concurrency) -> [{read_concurrency,false},{read_concurrency,true}].
 
     
-- 
cgit v1.2.3