7 files changed, 322 insertions, 65 deletions
diff --git a/lib/stdlib/doc/src/binary.xml b/lib/stdlib/doc/src/binary.xml
index f3d4edd30f..fd991f258b 100644
--- a/lib/stdlib/doc/src/binary.xml
+++ b/lib/stdlib/doc/src/binary.xml
@@ -505,15 +505,16 @@ store(Binary, GBSet) ->
 &lt;&lt;1,1,1,1,1 ...
 2> byte_size(A).
 100
-3> binary:referenced_byte_size(A)
+3> binary:referenced_byte_size(A).
 100
-4> &lt;&lt;_:10/binary,B:10/binary,_/binary&gt;&gt; = A.
+4> &lt;&lt;B:10/binary, C:90/binary&gt;&gt; = A.
 &lt;&lt;1,1,1,1,1 ...
-5> byte_size(B).
-10
-6> binary:referenced_byte_size(B)
-100</code>
-
+5> {byte_size(B), binary:referenced_byte_size(B)}.
+{10,10}
+6> {byte_size(C), binary:referenced_byte_size(C)}.
+{90,100}</code>
+      <p>In the above example, the small binary <c>B</c> was copied while the
+      larger binary <c>C</c> references binary <c>A</c>.</p>
       <note>
       <p>Binary data is shared among processes. If another process
       still references the larger binary, copying the part this
diff --git a/lib/stdlib/doc/src/gen_statem.xml b/lib/stdlib/doc/src/gen_statem.xml
index 6f6849a19d..ef548ad643 100644
--- a/lib/stdlib/doc/src/gen_statem.xml
+++ b/lib/stdlib/doc/src/gen_statem.xml
@@ -40,7 +40,7 @@
     <p>
       This reference manual describes types generated from the types
       in the <c>gen_statem</c> source code, so they are correct.
-      However, the generated descriptions also reflect the type hiearchy,
+      However, the generated descriptions also reflect the type hierarchy,
       which makes them kind of hard to read.
     </p>
     <p>
diff --git a/lib/stdlib/doc/src/notes.xml b/lib/stdlib/doc/src/notes.xml
index 092056ffde..66624c43be 100644
--- a/lib/stdlib/doc/src/notes.xml
+++ b/lib/stdlib/doc/src/notes.xml
@@ -306,6 +306,40 @@
 
 </section>
 
+<section><title>STDLIB 3.8.2.2</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p> Fix a bug that could cause a loop when formatting
+	    terms using the control sequences <c>p</c> or <c>P</c>
+	    and limiting the output with the option
+	    <c>chars_limit</c>. </p>
+          <p>
+	    Own Id: OTP-15875 Aux Id: ERL-967 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>STDLIB 3.8.2.1</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p> Fix a bug that could cause a failure when formatting
+	    binaries using the control sequences <c>p</c> or <c>P</c>
+	    and limiting the output with the option
+	    <c>chars_limit</c>. </p>
+          <p>
+	    Own Id: OTP-15847 Aux Id: ERL-957 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
 <section><title>STDLIB 3.8.2</title>
 
     <section><title>Fixed Bugs and Malfunctions</title>
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 726b409d4d..197564b895 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -33,6 +33,8 @@
 
 %%% BIFs
 
+-export([internal_run/4]).
+
 -export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]).
 
 -spec version() -> binary().
@@ -100,6 +102,40 @@ run(_, _) ->
 run(_, _, _) ->
     erlang:nif_error(undef).
 
+-spec internal_run(Subject, RE, Options, FirstCall) -> {match, Captured} |
+                                                       match |
+                                                       nomatch |
+                                                       {error, ErrType} when
+      Subject :: iodata() | unicode:charlist(),
+      RE :: mp() | iodata() | unicode:charlist(),
+      Options :: [Option],
+      Option :: anchored | global | notbol | noteol | notempty 
+	      | notempty_atstart | report_errors
+              | {offset, non_neg_integer()} |
+		{match_limit, non_neg_integer()} |
+		{match_limit_recursion, non_neg_integer()} |
+                {newline, NLSpec :: nl_spec()} |
+                bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
+                {capture, ValueSpec, Type} | CompileOpt,
+      Type :: index | list | binary,
+      ValueSpec :: all | all_but_first | all_names | first | none | ValueList,
+      ValueList :: [ValueID],
+      ValueID :: integer() | string() | atom(),
+      CompileOpt :: compile_option(),
+      Captured :: [CaptureData] | [[CaptureData]],
+      CaptureData :: {integer(), integer()}
+                   | ListConversionData
+                   | binary(),
+      ListConversionData :: string()
+                          | {error, string(), binary()}
+                          | {incomplete, string(), binary()},
+      ErrType :: match_limit | match_limit_recursion | {compile,  CompileErr}, 
+      CompileErr :: {ErrString :: string(), Position :: non_neg_integer()},
+      FirstCall :: boolean().
+
+internal_run(_, _, _, _) ->
+    erlang:nif_error(undef).
+
 -spec inspect(MP,Item) -> {namelist, [ binary() ]} when
       MP :: mp(),
       Item :: namelist.
@@ -765,17 +801,17 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
     try
 	postprocess(loopexec(FlatSubject,RE,InitialOffset,
 			     byte_size(FlatSubject),
-			     Unicode,CRLF,StrippedOptions),
+			     Unicode,CRLF,StrippedOptions,true),
 		    SelectReturn,ConvertReturn,FlatSubject,Unicode)
     catch
 	throw:ErrTuple ->
 	    ErrTuple
     end.
 
-loopexec(_,_,X,Y,_,_,_) when X > Y ->
+loopexec(_,_,X,Y,_,_,_,_) when X > Y ->
     {match,[]};
-loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
-    case re:run(Subject,RE,[{offset,X}]++Options) of
+loopexec(Subject,RE,X,Y,Unicode,CRLF,Options, First) ->
+    case re:internal_run(Subject,RE,[{offset,X}]++Options,First) of
 	{error, Err} ->
 	    throw({error,Err});
 	nomatch ->
@@ -784,11 +820,11 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
 	    {match,Rest} = 
 		case B>0 of
 		    true ->
-			loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options);
+			loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options,false);
 		    false ->
 			{match,M} = 
-			    case re:run(Subject,RE,[{offset,X},notempty_atstart,
-						anchored]++Options) of
+			    case re:internal_run(Subject,RE,[{offset,X},notempty_atstart,
+                                                             anchored]++Options,false) of
 				nomatch ->
 				    {match,[]};
 				{match,Other} ->
@@ -801,7 +837,7 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
 				       forward(Subject,A,1,Unicode,CRLF)
 			       end,
 			{match,MM} = loopexec(Subject,RE,NewA,Y,
-					      Unicode,CRLF,Options),
+					      Unicode,CRLF,Options,false),
 			case M of 
 			    [] ->
 				{match,MM};
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index ecb514e9f3..d7d57941c2 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -108,7 +108,7 @@
                dets]},
   {applications, [kernel]},
   {env, []},
-  {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.4","crypto-3.3",
+  {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-@OTP-15831:OTP-15836@","crypto-3.3",
 			  "compiler-5.0"]}
 ]}.
 
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index dd49288417..09238ae2b4 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -75,7 +75,8 @@
 -export([throughput_benchmark/0,
          throughput_benchmark/1,
          test_throughput_benchmark/1,
-         long_throughput_benchmark/1]).
+         long_throughput_benchmark/1,
+         lookup_catree_par_vs_seq_init_benchmark/0]).
 -export([exit_large_table_owner/1,
 	 exit_many_large_table_owner/1,
 	 exit_many_tables_owner/1,
@@ -6728,6 +6729,14 @@ do_work(WorksDoneSoFar, Table, ProbHelpTab, Range, Operations) ->
     end.
 
 prefill_table(T, KeyRange, Num, ObjFun) ->
+    Parent = self(),
+    spawn_link(fun() ->
+                       prefill_table_helper(T, KeyRange, Num, ObjFun),
+                       Parent ! done
+               end),
+    receive done -> ok end.
+
+prefill_table_helper(T, KeyRange, Num, ObjFun) ->
     Seed = rand:uniform(KeyRange),
     %%io:format("prefill_table: Seed = ~p\n", [Seed]),
     RState = unique_rand_start(KeyRange, Seed),
@@ -6740,11 +6749,77 @@ prefill_table_loop(T, RS0, N, ObjFun) ->
     ets:insert(T, ObjFun(Key)),
     prefill_table_loop(T, RS1, N-1, ObjFun).
 
+inserter_proc_starter(T, ToInsert, Parent) ->
+    receive
+        start -> ok
+    end,
+    inserter_proc(T, ToInsert, [], Parent, false).
+
+inserter_proc(T, [], Inserted, Parent, _) ->
+    inserter_proc(T, Inserted, [], Parent, true);
+inserter_proc(T, [I | ToInsert], Inserted, Parent, CanStop) ->
+    Stop =
+        case CanStop of
+            true ->
+                receive
+                    stop -> Parent ! stopped
+                after 0 -> no_stop
+                end;
+            false -> no_stop
+        end,
+    case Stop of
+        no_stop ->
+            ets:insert(T, I),
+            inserter_proc(T, ToInsert, [I | Inserted], Parent, CanStop);
+        _ -> ok
+    end.
+
+prefill_table_parallel(T, KeyRange, Num, ObjFun) ->
+    Parent = self(),
+    spawn_link(fun() ->
+                       prefill_table_parallel_helper(T, KeyRange, Num, ObjFun),
+                       Parent ! done
+               end),
+    receive done -> ok end.
+
+prefill_table_parallel_helper(T, KeyRange, Num, ObjFun) ->
+    NrOfSchedulers = erlang:system_info(schedulers),
+    Seed = rand:uniform(KeyRange),
+    %%io:format("prefill_table: Seed = ~p\n", [Seed]),
+    RState = unique_rand_start(KeyRange, Seed),
+    InsertMap = prefill_insert_map_loop(T, RState, Num, ObjFun, #{}, NrOfSchedulers),
+    Self = self(),
+    Pids = [
+        begin
+            InserterFun =
+                fun() ->
+                    inserter_proc_starter(T, ToInsert, Self)
+                end,
+            spawn_link(InserterFun)
+        end
+        || ToInsert <- maps:values(InsertMap)],
+    [Pid ! start || Pid <- Pids],
+    timer:sleep(1000),
+    [Pid ! stop || Pid <- Pids],
+    [receive stopped -> ok end || _Pid <- Pids].
+
+prefill_insert_map_loop(_, _, 0, _, InsertMap, _NrOfSchedulers) ->
+    InsertMap;
+prefill_insert_map_loop(T, RS0, N, ObjFun, InsertMap, NrOfSchedulers) ->
+    {Key, RS1} = unique_rand_next(RS0),
+    Sched = N rem NrOfSchedulers,
+    PrevInserts = maps:get(Sched, InsertMap, []),
+    NewPrevInserts = [ObjFun(Key) | PrevInserts],
+    NewInsertMap = maps:put(Sched, NewPrevInserts, InsertMap),
+    prefill_insert_map_loop(T, RS1, N-1, ObjFun, NewInsertMap, NrOfSchedulers).
+
 -record(ets_throughput_bench_config,
         {benchmark_duration_ms = 3000,
          recover_time_ms = 1000,
          thread_counts = not_set,
          key_ranges = [1000000],
+         init_functions = [fun prefill_table/4],
+         nr_of_repeats = 1,
          scenarios =
              [
               [
@@ -6838,7 +6913,7 @@ prefill_table_loop(T, RS0, N, ObjFun) ->
          notify_res_fun = fun(_Name, _Throughput) -> ok end,
          print_result_paths_fun =
              fun(ResultPath, _LatestResultPath) ->
-                     Comment = 
+                     Comment =
                          io_lib:format("<a href=\"file:///~s\">Result visualization</a>",[ResultPath]),
                      {comment, Comment}
              end
@@ -6848,7 +6923,7 @@ stdout_notify_res(ResultPath, LatestResultPath) ->
     io:format("Result Location: /~s~n", [ResultPath]),
     io:format("Latest Result Location: ~s~n", [LatestResultPath]).
 
-throughput_benchmark() -> 
+throughput_benchmark() ->
     throughput_benchmark(
       #ets_throughput_bench_config{
          print_result_paths_fun = fun stdout_notify_res/2}).
@@ -6856,9 +6931,11 @@ throughput_benchmark() ->
 throughput_benchmark(
   #ets_throughput_bench_config{
      benchmark_duration_ms  = BenchmarkDurationMs,
-     recover_time_ms        = RecoverTimeMs, 
-     thread_counts          = ThreadCountsOpt, 
-     key_ranges             = KeyRanges, 
+     recover_time_ms        = RecoverTimeMs,
+     thread_counts          = ThreadCountsOpt,
+     key_ranges             = KeyRanges,
+     init_functions         = InitFuns,
+     nr_of_repeats          = NrOfRepeats,
      scenarios              = Scenarios,
      table_types            = TableTypes,
      etsmem_fun             = ETSMemFun,
@@ -6872,21 +6949,21 @@ throughput_benchmark(
                 Start = rand:uniform(KeyRange),
                 Last =
                     lists:foldl(
-                      fun(_, Prev) -> 
+                      fun(_, Prev) ->
                               case Prev of
                                   '$end_of_table'-> ok;
                                   _ ->
                                       try ets:next(T, Prev) of
                                            Normal -> Normal
                                        catch
-                                           error:badarg -> 
+                                           error:badarg ->
                                                % sets (not ordered_sets) cannot handle when the argument
                                                % to next is not in the set
                                                rand:uniform(KeyRange)
                                        end
                               end
                       end,
-                      Start, 
+                      Start,
                       lists:seq(1, SeqSize)),
                 case Last =:= -1 of
                     true -> io:format("Will never be printed");
@@ -6898,26 +6975,26 @@ throughput_benchmark(
                 Start = rand:uniform(KeyRange),
                 Last = Start + SeqSize,
                 case -1 =:= ets:select_count(T,
-                                             ets:fun2ms(fun({X}) when X > Start andalso X =< Last  -> true end)) of  
+                                             ets:fun2ms(fun({X}) when X > Start andalso X =< Last  -> true end)) of
                     true -> io:format("Will never be printed");
                     false -> ok
                 end
 
         end,
     %% Mapping benchmark operation names to their corresponding functions that do them
-    Operations = 
+    Operations =
         #{insert =>
-              fun(T,KeyRange) -> 
+              fun(T,KeyRange) ->
                       Num = rand:uniform(KeyRange),
                       ets:insert(T, {Num})
               end,
           delete =>
-              fun(T,KeyRange) -> 
+              fun(T,KeyRange) ->
                       Num = rand:uniform(KeyRange),
                       ets:delete(T, Num)
               end,
           lookup =>
-              fun(T,KeyRange) -> 
+              fun(T,KeyRange) ->
                       Num = rand:uniform(KeyRange),
                       ets:lookup(T, Num)
               end,
@@ -6928,8 +7005,8 @@ throughput_benchmark(
           nextseq1000 =>
               fun(T,KeyRange) -> NextSeqOp(T,KeyRange,1000) end,
           selectAll =>
-              fun(T,_KeyRange) -> 
-                      case -1 =:= ets:select_count(T, ets:fun2ms(fun(_X) -> true end)) of  
+              fun(T,_KeyRange) ->
+                      case -1 =:= ets:select_count(T, ets:fun2ms(fun(_X) -> true end)) of
                           true -> io:format("Will never be printed");
                           false -> ok
                       end
@@ -6951,7 +7028,7 @@ throughput_benchmark(
                 NewCurrent = Current + OpPropability,
                 [{NewCurrent, OpName}| Calculate(Res, NewCurrent)]
         end,
-    RenderScenario = 
+    RenderScenario =
         fun R([], StringSoFar) ->
                 StringSoFar;
             R([{Fraction, Operation}], StringSoFar) ->
@@ -6978,7 +7055,7 @@ throughput_benchmark(
                     false -> ok
                 end
         end,
-    DataHolder = 
+    DataHolder =
         fun DataHolderFun(Data)->
                 receive
                     {get_data, Pid} -> Pid ! {ets_bench_data, Data};
@@ -6992,18 +7069,21 @@ throughput_benchmark(
                 DataHolderPid ! io_lib:format(Str, List)
         end,
     GetData =
-        fun () -> 
+        fun () ->
                 DataHolderPid ! {get_data, self()},
                 receive {ets_bench_data, Data} -> Data end
         end,
     %% Function that runs a benchmark instance and returns the number
     %% of operations that were performed
     RunBenchmark =
-        fun({NrOfProcs, TableConfig, Scenario, Range, Duration}) ->
+        fun({NrOfProcs, TableConfig, Scenario, Range, Duration, InitFun}) ->
                 ProbHelpTab = CalculateOpsProbHelpTab(Scenario, 0),
                 Table = ets:new(t, TableConfig),
                 Nobj = Range div 2,
-                prefill_table(Table, Range, Nobj, fun(K) -> {K} end),
+                case InitFun of
+                    not_set -> prefill_table(Table, Range, Nobj, fun(K) -> {K} end);
+                    _ -> InitFun(Table, Range, Nobj, fun(K) -> {K} end)
+                end,
                 Nobj = ets:info(Table, size),
                 SafeFixTableIfRequired(Table, Scenario, true),
                 ParentPid = self(),
@@ -7016,12 +7096,14 @@ throughput_benchmark(
                     end,
                 ChildPids =
                     lists:map(fun(_N) ->spawn_link(Worker)end, lists:seq(1, NrOfProcs)),
+                erlang:garbage_collect(),
+                timer:sleep(RecoverTimeMs),
                 lists:foreach(fun(Pid) -> Pid ! start end, ChildPids),
                 timer:sleep(Duration),
                 lists:foreach(fun(Pid) -> Pid ! stop end, ChildPids),
                 TotalWorksDone = lists:foldl(
-                                   fun(_, Sum) -> 
-                                           receive 
+                                   fun(_, Sum) ->
+                                           receive
                                                Count -> Sum + Count
                                            end
                                    end, 0, ChildPids),
@@ -7032,27 +7114,32 @@ throughput_benchmark(
     RunBenchmarkInSepProcess =
         fun(ParameterTuple) ->
                 P = self(),
-                spawn_link(fun()-> P ! {bench_result, RunBenchmark(ParameterTuple)} end),
-                Result = receive {bench_result, Res} -> Res end,
-                timer:sleep(RecoverTimeMs),
-                Result
+                Results =
+                    [begin
+                         spawn_link(fun()-> P ! {bench_result, RunBenchmark(ParameterTuple)} end),
+                         receive {bench_result, Res} -> Res end
+                     end || _ <- lists:seq(1, NrOfRepeats)],
+                lists:sum(Results) / NrOfRepeats
         end,
     RunBenchmarkAndReport =
         fun(ThreadCount,
             TableType,
             Scenario,
             KeyRange,
-            Duration) ->
+            Duration,
+            InitFunName,
+            InitFun) ->
                 Result = RunBenchmarkInSepProcess({ThreadCount,
                                                    TableType,
                                                    Scenario,
                                                    KeyRange,
-                                                   Duration}),
+                                                   Duration,
+                                                   InitFun}),
                 Throughput = Result/(Duration/1000.0),
                 PrintData("; ~f",[Throughput]),
-                Name = io_lib:format("Scenario: ~w, Key Range Size: ~w, "
+                Name = io_lib:format("Scenario: ~s, ~w, Key Range Size: ~w, "
                                      "# of Processes: ~w, Table Type: ~w",
-                                     [Scenario, KeyRange, ThreadCount, TableType]),
+                                     [InitFunName, Scenario, KeyRange, ThreadCount, TableType]),
                 NotifyResFun(Name, Throughput)
         end,
     ThreadCounts =
@@ -7087,17 +7174,29 @@ throughput_benchmark(
                         PrintData("$~n",[]),
                         lists:foreach(
                           fun(TableType) ->
-                                  PrintData("~w ",[TableType]),
                                   lists:foreach(
-                                    fun(ThreadCount) ->
-                                            RunBenchmarkAndReport(ThreadCount,
-                                                                  TableType,
-                                                                  Scenario,
-                                                                  KeyRange,
-                                                                  BenchmarkDurationMs)
+                                    fun(InitFunArg) ->
+                                            {InitFunName, InitFun} =
+                                                case InitFunArg of
+                                                    {FunName, Fun} -> {FunName, Fun};
+                                                    Fun -> {"", Fun}
+                                                end,
+                                            PrintData("~s,~w ",[InitFunName,TableType]),
+                                            lists:foreach(
+                                              fun(ThreadCount) ->
+                                                      RunBenchmarkAndReport(ThreadCount,
+                                                                            TableType,
+                                                                            Scenario,
+                                                                            KeyRange,
+                                                                            BenchmarkDurationMs,
+                                                                            InitFunName,
+                                                                            InitFun)
+                                              end,
+                                              ThreadCounts),
+                                            PrintData("$~n",[])
                                     end,
-                                    ThreadCounts),
-                                  PrintData("$~n",[])
+                                    InitFuns)
+
                           end,
                           TableTypes)
                 end,
@@ -7121,7 +7220,7 @@ throughput_benchmark(
 test_throughput_benchmark(Config) when is_list(Config) ->
     throughput_benchmark(
       #ets_throughput_bench_config{
-         benchmark_duration_ms = 100, 
+         benchmark_duration_ms = 100,
          recover_time_ms = 0,
          thread_counts = [1, erlang:system_info(schedulers)],
          key_ranges = [50000],
@@ -7136,7 +7235,7 @@ long_throughput_benchmark(Config) when is_list(Config) ->
          recover_time_ms = 1000,
          thread_counts = [1, N div 2, N],
          key_ranges = [1000000],
-         scenarios = 
+         scenarios =
              [
               [
                {0.5, insert},
@@ -7171,15 +7270,15 @@ long_throughput_benchmark(Config) when is_list(Config) ->
                {0.01, partial_select1000}
               ]
              ],
-         table_types = 
+         table_types =
              [
               [ordered_set, public, {write_concurrency, true}, {read_concurrency, true}],
               [set, public, {write_concurrency, true}, {read_concurrency, true}]
              ],
          etsmem_fun = fun etsmem/0,
          verify_etsmem_fun = fun verify_etsmem/1,
-         notify_res_fun = 
-             fun(Name, Throughput) -> 
+         notify_res_fun =
+             fun(Name, Throughput) ->
                      SummaryTable =
                          proplists:get_value(ets_benchmark_result_summary_tab, Config),
                      AddToSummaryCounter =
@@ -7209,13 +7308,47 @@ long_throughput_benchmark(Config) when is_list(Config) ->
                                     total_throughput_ordered_set)
                      end,
                      ct_event:notify(
-                          #event{name = benchmark_data, 
+                          #event{name = benchmark_data,
                                  data = [{suite,"ets_bench"},
                                          {name, Name},
                                          {value,Throughput}]})
              end
         }).
 
+%% This function compares the lookup operation's performance for
+%% ordered_set ETS tables with and without write_concurrency enabled
+%% when the data structures have been populated in parallel and
+%% sequentially.
+%%
+%% The main purpose of this function is to check that the
+%% implementation of ordered_set with write_concurrency (CA tree)
+%% adapts its structure to contention even when only lookup operations
+%% are used.
+lookup_catree_par_vs_seq_init_benchmark() ->
+    N = erlang:system_info(schedulers),
+    throughput_benchmark(
+      #ets_throughput_bench_config{
+         benchmark_duration_ms = 600000,
+         recover_time_ms = 1000,
+         thread_counts = [1, N div 2, N],
+         key_ranges = [1000000],
+         init_functions = [{"seq_init", fun prefill_table/4},
+                           {"par_init", fun prefill_table_parallel/4}],
+         nr_of_repeats = 1,
+         scenarios =
+             [
+              [
+               {1.0, lookup}
+              ]
+             ],
+         table_types =
+             [
+              [ordered_set, public, {write_concurrency, true}],
+              [ordered_set, public]
+             ],
+          print_result_paths_fun = fun stdout_notify_res/2
+        }).
+
 add_lists(L1,L2) ->
     add_lists(L1,L2,[]).
 add_lists([],[],Acc) ->
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index c9ef9da990..06d8fe9255 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -28,7 +28,8 @@
 	 pcre_compile_workspace_overflow/1,re_infinite_loop/1, 
 	 re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1,
 	 opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1,
-	 match_limit/1,sub_binaries/1,copt/1]).
+	 match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1,
+         yield_on_subject_validation/1]).
 
 -include_lib("common_test/include/ct.hrl").
 -include_lib("kernel/include/file.hrl").
@@ -45,7 +46,8 @@ all() ->
      pcre_compile_workspace_overflow, re_infinite_loop, 
      re_backwards_accented, opt_dupnames, opt_all_names, 
      inspect, opt_no_start_optimize,opt_never_utf,opt_ucp,
-     match_limit, sub_binaries, re_version].
+     match_limit, sub_binaries, re_version, global_unicode_validation,
+     yield_on_subject_validation].
 
 groups() -> 
     [].
@@ -200,7 +202,58 @@ re_version(_Config) ->
     {match,[Version]} = re:run(Version,"^[0-9]\\.[0-9]{2} 20[0-9]{2}-[0-9]{2}-[0-9]{2}",[{capture,all,binary}]),
     ok.
 
+global_unicode_validation(Config) when is_list(Config) ->
+    %% Test that unicode validation of the subject is not done
+    %% for every match found...
+    Bin = binary:copy(<<"abc\n">>,100000),
+    {TimeAscii, _} = take_time(fun () ->
+                                       re:run(Bin, <<"b">>, [global])
+                               end),
+    {TimeUnicode, _} = take_time(fun () ->
+                                         re:run(Bin, <<"b">>, [unicode,global])
+                                 end),
+    if TimeAscii == 0; TimeUnicode == 0 ->
+            {comment, "Not good enough resolution to compare results"};
+       true ->
+            %% The time the operations takes should be in the
+            %% same order of magnitude. If validation of the
+            %% whole subject occurs for every match, the unicode
+            %% variant will take way longer time...
+            true = TimeUnicode div TimeAscii < 10
+    end.
+
+take_time(Fun) ->
+    Start = erlang:monotonic_time(nanosecond),
+    Res = Fun(),
+    End = erlang:monotonic_time(nanosecond),
+    {End-Start, Res}.
+
+yield_on_subject_validation(Config) when is_list(Config) ->
+    Go = make_ref(),
+    Bin = binary:copy(<<"abc\n">>,100000),
+    {P, M} = spawn_opt(fun () ->
+                               receive Go -> ok end,
+                               {match,[{1,1}]} = re:run(Bin, <<"b">>, [unicode])
+                       end,
+                       [link, monitor]),
+    1 = erlang:trace(P, true, [running]),
+    P ! Go,
+    N = count_re_run_trap_out(P, M),
+    true = N >= 5,
+    ok.
 
+count_re_run_trap_out(P, M) when is_reference(M) ->
+    receive {'DOWN',M,process,P,normal} -> ok end,
+    TD = erlang:trace_delivered(P),
+    receive {trace_delivered, P, TD} -> ok end,
+    count_re_run_trap_out(P, 0);
+count_re_run_trap_out(P, N) when is_integer(N) ->
+    receive
+        {trace,P,out,{erlang,re_run_trap,3}} ->
+            count_re_run_trap_out(P, N+1)
+    after 0 ->
+            N
+    end.
 
 %% Test compile options given directly to run.
 combined_options(Config) when is_list(Config) ->