From bb20626522a8d5cb92b2379751450905151cab44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 20 Apr 2015 08:00:39 +0200
Subject: compilation_SUITE: Unload tested modules using the code server

Don't unload modules using BIFs; use the code server to ensure
that code:all_loaded/0 only lists code that is actually loaded.
---
 lib/compiler/test/compilation_SUITE.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/compiler/test/compilation_SUITE.erl b/lib/compiler/test/compilation_SUITE.erl
index 296774e083..51e1da2cb6 100644
--- a/lib/compiler/test/compilation_SUITE.erl
+++ b/lib/compiler/test/compilation_SUITE.erl
@@ -309,8 +309,8 @@ load_and_call(Out, Module) ->
     %% Smoke-test of beam disassembler.
     ?line test_lib:smoke_disasm(Module),
 
-    ?line true = erlang:delete_module(Module),
-    ?line true = erlang:purge_module(Module),
+    _ = code:delete(Module),
+    _ = code:purge(Module),
 
     %% Restore state of trap_exit just in case. (Since the compiler
     %% uses a temporary process, we will get {'EXIT',Pid,normal} messages
-- 
cgit v1.2.3


From 81354ca6651ff23ecff8dc93e1db13c115bb8369 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 20 Apr 2015 10:06:24 +0200
Subject: test suites: Unload modules compiled from .core or .S

The .core or .S files that are compiled in the test cases
may lack module_info/0,1 functions, which will cause problems if
we (for example) try to run eprof later. To avoid that problem,
unload each module directly after testing it.
---
 lib/compiler/test/core_SUITE.erl      |  5 ++++-
 lib/compiler/test/core_fold_SUITE.erl | 20 +++++++++++---------
 lib/compiler/test/guard_SUITE.erl     |  2 ++
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/lib/compiler/test/core_SUITE.erl b/lib/compiler/test/core_SUITE.erl
index 428ad65364..471f8dc558 100644
--- a/lib/compiler/test/core_SUITE.erl
+++ b/lib/compiler/test/core_SUITE.erl
@@ -87,4 +87,7 @@ try_it(Mod, Conf) ->
 compile_and_load(Src, Opts) ->
     {ok,Mod,Bin} = compile:file(Src, [from_core,report,time,binary|Opts]),
     {module,Mod} = code:load_binary(Mod, Mod, Bin),
-    ok = Mod:Mod().
+    ok = Mod:Mod(),
+    _ = code:delete(Mod),
+    _ = code:purge(Mod),
+    ok.
diff --git a/lib/compiler/test/core_fold_SUITE.erl b/lib/compiler/test/core_fold_SUITE.erl
index bc82eaf5aa..a722f97e81 100644
--- a/lib/compiler/test/core_fold_SUITE.erl
+++ b/lib/compiler/test/core_fold_SUITE.erl
@@ -231,15 +231,17 @@ eq(Config) when is_list(Config) ->
 
 %% OTP-7117.
 nested_call_in_case(Config) when is_list(Config) ->
-    ?line PrivDir = ?config(priv_dir, Config),
-    ?line Dir = filename:dirname(code:which(?MODULE)),
-    ?line Core = filename:join(Dir, "nested_call_in_case"),
-    ?line Opts = [from_core,{outdir,PrivDir}|test_lib:opt_opts(?MODULE)],
-    ?line io:format("~p", [Opts]),
-    ?line {ok,Mod} = c:c(Core, Opts),
-    ?line yes = Mod:a([1,2,3], 2),
-    ?line no = Mod:a([1,2,3], 4),
-    ?line {'EXIT',_} = (catch Mod:a(not_a_list, 42)),
+    PrivDir = ?config(priv_dir, Config),
+    Dir = filename:dirname(code:which(?MODULE)),
+    Core = filename:join(Dir, "nested_call_in_case"),
+    Opts = [from_core,{outdir,PrivDir}|test_lib:opt_opts(?MODULE)],
+    io:format("~p", [Opts]),
+    {ok,Mod} = c:c(Core, Opts),
+    yes = Mod:a([1,2,3], 2),
+    no = Mod:a([1,2,3], 4),
+    {'EXIT',_} = (catch Mod:a(not_a_list, 42)),
+    _ = code:delete(Mod),
+    _ = code:purge(Mod),
     ok.
 
 guard_try_catch(_Config) ->
diff --git a/lib/compiler/test/guard_SUITE.erl b/lib/compiler/test/guard_SUITE.erl
index 08279d9408..d91ee7ea08 100644
--- a/lib/compiler/test/guard_SUITE.erl
+++ b/lib/compiler/test/guard_SUITE.erl
@@ -1614,6 +1614,8 @@ t_tuple_size(Config) when is_list(Config) ->
     ?line {ok,Mod,Code} = compile:file(File, [from_asm,binary]),
     ?line code:load_binary(Mod, File, Code),
     ?line 14 = Mod:t({1,2,3,4}),
+    _ = code:delete(Mod),
+    _ = code:purge(Mod),
     
     ok.
 
-- 
cgit v1.2.3


From 37996d71a60f8aa4dd1078a7903098aa656b9e35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 20 Apr 2015 10:19:55 +0200
Subject: test suite: Always place .core files in data directories

For tidiness, always place .core files in data directories.
---
 lib/compiler/test/Makefile                         | 10 +---
 lib/compiler/test/bs_match_SUITE.erl               | 14 +----
 lib/compiler/test/bs_shadowed_size_var.core        | 25 --------
 lib/compiler/test/core_SUITE.erl                   |  9 ++-
 .../test/core_SUITE_data/bs_shadowed_size_var.core | 66 ++++++++++++++++++++++
 lib/compiler/test/core_fold_SUITE.erl              |  4 +-
 .../core_fold_SUITE_data/nested_call_in_case.core  | 18 ++++++
 .../unused_multiple_values_error.core              | 11 ++++
 lib/compiler/test/nested_call_in_case.core         | 21 -------
 .../test/unused_multiple_values_error.core         | 11 ----
 10 files changed, 107 insertions(+), 82 deletions(-)
 delete mode 100644 lib/compiler/test/bs_shadowed_size_var.core
 create mode 100644 lib/compiler/test/core_SUITE_data/bs_shadowed_size_var.core
 create mode 100644 lib/compiler/test/core_fold_SUITE_data/nested_call_in_case.core
 create mode 100644 lib/compiler/test/core_fold_SUITE_data/unused_multiple_values_error.core
 delete mode 100644 lib/compiler/test/nested_call_in_case.core
 delete mode 100644 lib/compiler/test/unused_multiple_values_error.core

diff --git a/lib/compiler/test/Makefile b/lib/compiler/test/Makefile
index 73d52a48bc..e1498e564e 100644
--- a/lib/compiler/test/Makefile
+++ b/lib/compiler/test/Makefile
@@ -79,12 +79,6 @@ INLINE= \
 	receive \
 	record
 
-CORE_MODULES = \
-	bs_shadowed_size_var \
-	unused_multiple_values_error \
-	nested_call_in_case
-
-
 NO_OPT_MODULES= $(NO_OPT:%=%_no_opt_SUITE)
 NO_OPT_ERL_FILES= $(NO_OPT_MODULES:%=%.erl)
 POST_OPT_MODULES= $(NO_OPT:%=%_post_opt_SUITE)
@@ -94,8 +88,6 @@ INLINE_ERL_FILES= $(INLINE_MODULES:%=%.erl)
 
 ERL_FILES= $(MODULES:%=%.erl)
 
-CORE_FILES= $(CORE_MODULES:%=%.core)
-
 ##TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR))
 ##INSTALL_PROGS= $(TARGET_FILES)
 
@@ -162,7 +154,7 @@ release_spec: opt
 release_tests_spec: make_emakefile
 	$(INSTALL_DIR) "$(RELSYSDIR)"
 	$(INSTALL_DATA) compiler.spec compiler.cover \
-		$(EMAKEFILE) $(ERL_FILES) $(CORE_FILES) "$(RELSYSDIR)"
+		$(EMAKEFILE) $(ERL_FILES) "$(RELSYSDIR)"
 	$(INSTALL_DATA) $(NO_OPT_ERL_FILES) $(POST_OPT_ERL_FILES) \
 		$(INLINE_ERL_FILES) "$(RELSYSDIR)"
 	chmod -R u+w "$(RELSYSDIR)"
diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl
index f7af56afcc..80d80505a6 100644
--- a/lib/compiler/test/bs_match_SUITE.erl
+++ b/lib/compiler/test/bs_match_SUITE.erl
@@ -24,7 +24,7 @@
 	 init_per_group/2,end_per_group/2,
 	 init_per_testcase/2,end_per_testcase/2,
 	 fun_shadow/1,int_float/1,otp_5269/1,null_fields/1,wiger/1,
-	 bin_tail/1,save_restore/1,shadowed_size_var/1,
+	 bin_tail/1,save_restore/1,
 	 partitioned_bs_match/1,function_clause/1,
 	 unit/1,shared_sub_bins/1,bin_and_float/1,
 	 dec_subidentifiers/1,skip_optional_tag/1,
@@ -50,7 +50,7 @@ all() ->
 groups() -> 
     [{p,[parallel],
       [fun_shadow,int_float,otp_5269,null_fields,wiger,
-       bin_tail,save_restore,shadowed_size_var,
+       bin_tail,save_restore,
        partitioned_bs_match,function_clause,unit,
        shared_sub_bins,bin_and_float,dec_subidentifiers,
        skip_optional_tag,wfbm,degenerated_match,bs_sum,
@@ -322,16 +322,6 @@ bad_float_unpack_match(<<F:64/float>>) -> F;
 bad_float_unpack_match(<<I:64/integer-signed>>) -> I.
 
 
-shadowed_size_var(Config) when is_list(Config) ->
-    ?line PrivDir = ?config(priv_dir, Config),
-    ?line Dir = filename:dirname(code:which(?MODULE)),
-    ?line Core = filename:join(Dir, "bs_shadowed_size_var"),
-    ?line Opts = [from_core,{outdir,PrivDir}|test_lib:opt_opts(?MODULE)],
-    ?line io:format("~p", [Opts]),
-    ?line {ok,Mod} = c:c(Core, Opts),
-    ?line [42|<<"abcde">>] = Mod:filter_essentials([<<42:32>>|<<5:32,"abcde">>]),
-    ok.
-
 partitioned_bs_match(Config) when is_list(Config) ->
     ?line <<1,2,3>> = partitioned_bs_match(blurf, <<42,1,2,3>>),
     ?line error = partitioned_bs_match(10, <<7,8,15,13>>),
diff --git a/lib/compiler/test/bs_shadowed_size_var.core b/lib/compiler/test/bs_shadowed_size_var.core
deleted file mode 100644
index d1d5ebba6d..0000000000
--- a/lib/compiler/test/bs_shadowed_size_var.core
+++ /dev/null
@@ -1,25 +0,0 @@
-module 'bs_shadowed_size_var' ['filter_essentials'/1]
-    attributes []
-
-%% Reduced code from beam_asm inlined using the old inliner.
-
-'filter_essentials'/1 =
-    fun (_cor0) ->
-	case _cor0 of
-	  <[#{#<Sz>(32,1,'integer',['unsigned','big']) }#|T]> when 'true' ->
-	      let <_cor4> =
-                      case T of
-			%% Variable 'Sz' repeated here. Should work.
-			<#{#<Sz>(32,1,'integer',['unsigned','big']),
-			    #<Data>(Sz,8,'binary',['unsigned','big'])}#> when 'true' ->
-			    Data
-			<_cor5> when 'true' ->
-			    primop 'match_fail'
-				({'case_clause',{_cor5}})
-		      end
-	      in  [Sz|_cor4]
-	  <_cor5> when 'true' ->
-	      primop 'match_fail'
-		  ({'function_clause',_cor5})
-	end
-end
diff --git a/lib/compiler/test/core_SUITE.erl b/lib/compiler/test/core_SUITE.erl
index 471f8dc558..c4a7efbfc4 100644
--- a/lib/compiler/test/core_SUITE.erl
+++ b/lib/compiler/test/core_SUITE.erl
@@ -24,7 +24,9 @@
 	 dehydrated_itracer/1,nested_tries/1,
 	 seq_in_guard/1,make_effect_seq/1,eval_is_boolean/1,
 	 unsafe_case/1,nomatch_shadow/1,reversed_annos/1,
-	 map_core_test/1,eval_case/1,bad_boolean_guard/1]).
+	 map_core_test/1,eval_case/1,bad_boolean_guard/1,
+	 bs_shadowed_size_var/1
+	]).
 
 -include_lib("test_server/include/test_server.hrl").
 
@@ -50,7 +52,8 @@ groups() ->
     [{p,test_lib:parallel(),
       [dehydrated_itracer,nested_tries,seq_in_guard,make_effect_seq,
        eval_is_boolean,unsafe_case,nomatch_shadow,reversed_annos,
-       map_core_test,eval_case,bad_boolean_guard
+       map_core_test,eval_case,bad_boolean_guard,
+       bs_shadowed_size_var
    ]}].
 
 
@@ -78,6 +81,8 @@ end_per_group(_GroupName, Config) ->
 ?comp(map_core_test).
 ?comp(eval_case).
 ?comp(bad_boolean_guard).
+?comp(bs_shadowed_size_var).
+
 
 try_it(Mod, Conf) ->
     Src = filename:join(?config(data_dir, Conf), atom_to_list(Mod)),
diff --git a/lib/compiler/test/core_SUITE_data/bs_shadowed_size_var.core b/lib/compiler/test/core_SUITE_data/bs_shadowed_size_var.core
new file mode 100644
index 0000000000..0ade037e05
--- /dev/null
+++ b/lib/compiler/test/core_SUITE_data/bs_shadowed_size_var.core
@@ -0,0 +1,66 @@
+module 'bs_shadowed_size_var'
+   ['filter_essentials'/1,
+    'bs_shadowed_size_var'/0]
+    attributes []
+
+%% bs_shadowed_size_var() ->
+%%    [42|<<"abcde">>] = Mod:filter_essentials([<<42:32>>|<<5:32,"abcde">>]),
+%%    ok.
+
+'bs_shadowed_size_var'/0 =
+    fun () ->
+	case <> of
+	  <> when 'true' ->
+	      case apply 'filter_essentials'/1
+		       ([#{#<0>(8,1,'integer',['unsigned'|['big']]),
+			   #<0>(8,1,'integer',['unsigned'|['big']]),
+			   #<0>(8,1,'integer',['unsigned'|['big']]),
+			   #<42>(8,1,'integer',['unsigned'|['big']])}#|#{#<0>(8,1,'integer',['unsigned'|['big']]),
+									 #<0>(8,1,'integer',['unsigned'|['big']]),
+									 #<0>(8,1,'integer',['unsigned'|['big']]),
+									 #<5>(8,1,'integer',['unsigned'|['big']]),
+									 #<97>(8,1,'integer',['unsigned'|['big']]),
+									 #<98>(8,1,'integer',['unsigned'|['big']]),
+									 #<99>(8,1,'integer',['unsigned'|['big']]),
+									 #<100>(8,1,'integer',['unsigned'|['big']]),
+									 #<101>(8,1,'integer',['unsigned'|['big']])}#]) of
+		<[42|#{#<97>(8,1,'integer',['unsigned'|['big']]),
+		       #<98>(8,1,'integer',['unsigned'|['big']]),
+		       #<99>(8,1,'integer',['unsigned'|['big']]),
+		       #<100>(8,1,'integer',['unsigned'|['big']]),
+		       #<101>(8,1,'integer',['unsigned'|['big']])}#]> when 'true' ->
+		    'ok'
+		( <_cor0> when 'true' ->
+		      primop 'match_fail'
+			  ({'badmatch',_cor0})
+		  -| ['compiler_generated'] )
+	      end
+	  ( <> when 'true' ->
+		( primop 'match_fail'
+		      ({'function_clause'})
+		  -| [{'function_name',{'bs_shadowed_size_var',0}}] )
+	    -| ['compiler_generated'] )
+	end
+
+%% Reduced code from beam_asm inlined using the old inliner.
+
+'filter_essentials'/1 =
+    fun (_cor0) ->
+	case _cor0 of
+	  <[#{#<Sz>(32,1,'integer',['unsigned','big']) }#|T]> when 'true' ->
+	      let <_cor4> =
+                      case T of
+			%% Variable 'Sz' repeated here. Should work.
+			<#{#<Sz>(32,1,'integer',['unsigned','big']),
+			    #<Data>(Sz,8,'binary',['unsigned','big'])}#> when 'true' ->
+			    Data
+			<_cor5> when 'true' ->
+			    primop 'match_fail'
+				({'case_clause',{_cor5}})
+		      end
+	      in  [Sz|_cor4]
+	  <_cor5> when 'true' ->
+	      primop 'match_fail'
+		  ({'function_clause',_cor5})
+	end
+end
diff --git a/lib/compiler/test/core_fold_SUITE.erl b/lib/compiler/test/core_fold_SUITE.erl
index a722f97e81..bff9806bdd 100644
--- a/lib/compiler/test/core_fold_SUITE.erl
+++ b/lib/compiler/test/core_fold_SUITE.erl
@@ -232,7 +232,7 @@ eq(Config) when is_list(Config) ->
 %% OTP-7117.
 nested_call_in_case(Config) when is_list(Config) ->
     PrivDir = ?config(priv_dir, Config),
-    Dir = filename:dirname(code:which(?MODULE)),
+    Dir = test_lib:get_data_dir(Config),
     Core = filename:join(Dir, "nested_call_in_case"),
     Opts = [from_core,{outdir,PrivDir}|test_lib:opt_opts(?MODULE)],
     io:format("~p", [Opts]),
@@ -347,7 +347,7 @@ bsm_an_inlined(_, _) -> error.
 
 unused_multiple_values_error(Config) when is_list(Config) ->
     PrivDir = ?config(priv_dir, Config),
-    Dir = filename:dirname(code:which(?MODULE)),
+    Dir = test_lib:get_data_dir(Config),
     Core = filename:join(Dir, "unused_multiple_values_error"),
     Opts = [no_copt,clint,return,from_core,{outdir,PrivDir}
 	   |test_lib:opt_opts(?MODULE)],
diff --git a/lib/compiler/test/core_fold_SUITE_data/nested_call_in_case.core b/lib/compiler/test/core_fold_SUITE_data/nested_call_in_case.core
new file mode 100644
index 0000000000..c46906b2ed
--- /dev/null
+++ b/lib/compiler/test/core_fold_SUITE_data/nested_call_in_case.core
@@ -0,0 +1,18 @@
+module 'nested_call_in_case' ['a'/2]
+    attributes []
+
+'a'/2 =
+    fun (_x,_y) ->
+        case call 'erlang':'>'
+                 (call 'erlang':'length'
+                      (_x), _y) of
+          <'true'> when 'true' ->
+              'yes'
+          <'false'> when 'true' ->
+              'no'
+          ( <_omega> when 'true' ->
+                primop 'match_fail'
+                    ('if_clause')
+            -| ['compiler_generated'] )
+        end
+end
diff --git a/lib/compiler/test/core_fold_SUITE_data/unused_multiple_values_error.core b/lib/compiler/test/core_fold_SUITE_data/unused_multiple_values_error.core
new file mode 100644
index 0000000000..e06587c936
--- /dev/null
+++ b/lib/compiler/test/core_fold_SUITE_data/unused_multiple_values_error.core
@@ -0,0 +1,11 @@
+module 'unused_multiple_values_error' ['hello'/1]
+    attributes []
+'hello'/1 =
+    fun (_cor0) ->
+	do
+	    case _cor0 of
+	      <_cor0> when 'true' ->
+		  <'ok','ok'>
+	    end
+	    'ok'
+end
diff --git a/lib/compiler/test/nested_call_in_case.core b/lib/compiler/test/nested_call_in_case.core
deleted file mode 100644
index 5c6b6909bd..0000000000
--- a/lib/compiler/test/nested_call_in_case.core
+++ /dev/null
@@ -1,21 +0,0 @@
-module 'nested_call_in_case' ['a'/2]
-    attributes []
-
-'a'/2 =
-    fun (_x,_y) ->
-        case call 'erlang':'>'
-                 (call 'erlang':'length'
-                      (_x), _y) of
-          <'true'> when 'true' ->
-              'yes'
-          <'false'> when 'true' ->
-              'no'
-          ( <_omega> when 'true' ->
-                primop 'match_fail'
-                    ('if_clause')
-            -| ['compiler_generated'] )
-        end
-end
-
-
-
diff --git a/lib/compiler/test/unused_multiple_values_error.core b/lib/compiler/test/unused_multiple_values_error.core
deleted file mode 100644
index e06587c936..0000000000
--- a/lib/compiler/test/unused_multiple_values_error.core
+++ /dev/null
@@ -1,11 +0,0 @@
-module 'unused_multiple_values_error' ['hello'/1]
-    attributes []
-'hello'/1 =
-    fun (_cor0) ->
-	do
-	    case _cor0 of
-	      <_cor0> when 'true' ->
-		  <'ok','ok'>
-	    end
-	    'ok'
-end
-- 
cgit v1.2.3


From 7dd42ae8d9faa195f9a7b39014504fc225895b41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 20 Apr 2015 09:56:50 +0200
Subject: Add z_SUITE to validate loaded code

If we want to have test cases that run eprof, we must make sure that
there are no modules loaded that don't have a working module_info/1
function, since eprof calls module_info(functions) to retrieve the
list of functions in the module. Some test cases load modules compiled
from Core Erlang that don't have any module_info/1 functions, so
we will need make sure that all such modules have been unloaded.

Add z_SUITE:loaded/1 to run after all other test cases to verify that
all modules that the code server consider loaded are indeed loaded and
all have working module_info/0,1 functions.
---
 lib/compiler/test/Makefile    |  1 +
 lib/compiler/test/z_SUITE.erl | 62 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 lib/compiler/test/z_SUITE.erl

diff --git a/lib/compiler/test/Makefile b/lib/compiler/test/Makefile
index e1498e564e..98125fc84e 100644
--- a/lib/compiler/test/Makefile
+++ b/lib/compiler/test/Makefile
@@ -35,6 +35,7 @@ MODULES= \
 	record_SUITE \
 	trycatch_SUITE \
 	warnings_SUITE \
+	z_SUITE \
 	test_lib
 
 NO_OPT= \
diff --git a/lib/compiler/test/z_SUITE.erl b/lib/compiler/test/z_SUITE.erl
new file mode 100644
index 0000000000..eff8a1877f
--- /dev/null
+++ b/lib/compiler/test/z_SUITE.erl
@@ -0,0 +1,62 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2015. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(z_SUITE).
+
+-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
+	 init_per_group/2,end_per_group/2,
+	 loaded/1]).
+
+suite() -> [{ct_hooks,[ts_install_cth]}].
+
+all() ->
+    test_lib:recompile(?MODULE),
+    [loaded].
+
+groups() ->
+    [].
+
+init_per_suite(Config) ->
+    Config.
+
+end_per_suite(_Config) ->
+    ok.
+
+init_per_group(_GroupName, Config) ->
+    Config.
+
+end_per_group(_GroupName, Config) ->
+    Config.
+
+loaded(_Config) ->
+    0 = do_loaded(code:all_loaded(), 0),
+    ok.
+
+do_loaded([{M,_}|Ms], E0) ->
+    E = try
+	    _ = M:module_info(),
+	    _ = M:module_info(functions),
+	    E0
+	catch
+	    C:Error ->
+		Stk = erlang:get_stacktrace(),
+		io:format("~p:~p\n~p\n", [C,Error,Stk]),
+		E0 + 1
+	end,
+    do_loaded(Ms, E);
+do_loaded([], E) -> E.
-- 
cgit v1.2.3


From f4c7080ba83ff5df0c57d8b517563e6106b8a994 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 16 Apr 2015 10:55:29 +0200
Subject: compile: Eliminate unnecessary wrappers for compiler passes

Several compiler passes have unnecessary wrapper functions that
can be easily eliminated.
---
 lib/compiler/src/compile.erl | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index c45c9a1a29..f4ef6fcebf 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -606,7 +606,7 @@ standard_passes() ->
      {iff,'to_exp',{done,"E"}},
 
      %% Conversion to Core Erlang.
-     ?pass(core_module),
+     {pass,v3_core},
      {iff,'dcore',{listing,"core"}},
      {iff,'to_core0',{done,"core"}}
      | core_passes()].
@@ -618,7 +618,7 @@ core_passes() ->
       [{unless,no_copt,
        [{core_old_inliner,fun test_old_inliner/1,fun core_old_inliner/1},
 	{iff,doldinline,{listing,"oldinline"}},
-	?pass(core_fold_module),
+	{pass,sys_core_fold},
 	{iff,dcorefold,{listing,"corefold"}},
 	{core_inline_module,fun test_core_inliner/1,fun core_inline_module/1},
 	{iff,dinline,{listing,"inline"}},
@@ -631,14 +631,14 @@ core_passes() ->
 
 kernel_passes() ->
     %% Destructive setelement/3 optimization and core lint.
-    [?pass(core_dsetel_module),
+    [{pass,sys_core_dsetel},
      {iff,dsetel,{listing,"dsetel"}},
 
      {iff,clint,?pass(core_lint_module)},
      {iff,core,?pass(save_core_code)},
 
      %% Kernel Erlang and code generation.
-     ?pass(kernel_module),
+     {pass,v3_kernel},
      {iff,dkern,{listing,"kernel"}},
      {iff,'to_kernel',{done,"kernel"}},
      {pass,v3_life},
@@ -1176,14 +1176,6 @@ expand_module(#compile{code=Code,options=Opts0}=St0) ->
     Opts = expand_opts(Opts1),
     {ok,St0#compile{module=Mod,options=Opts,code={Mod,Exp,Forms}}}.
 
-core_module(#compile{code=Code0,options=Opts}=St) ->
-    {ok,Code,Ws} = v3_core:module(Code0, Opts),
-    {ok,St#compile{code=Code,warnings=St#compile.warnings ++ Ws}}.
-
-core_fold_module(#compile{code=Code0,options=Opts,warnings=Warns}=St) ->
-    {ok,Code,Ws} = sys_core_fold:module(Code0, Opts),
-    {ok,St#compile{code=Code,warnings=Warns ++ Ws}}.
-
 core_fold_module_after_inlining(#compile{code=Code0,options=Opts}=St) ->
     %% Inlining may produce code that generates spurious warnings.
     %% Ignore all warnings.
@@ -1219,14 +1211,6 @@ core_inline_module(#compile{code=Code0,options=Opts}=St) ->
     Code = cerl_inline:core_transform(Code0, Opts),
     {ok,St#compile{code=Code}}.
 
-core_dsetel_module(#compile{code=Code0,options=Opts}=St) ->
-    {ok,Code} = sys_core_dsetel:module(Code0, Opts),
-    {ok,St#compile{code=Code}}.
-
-kernel_module(#compile{code=Code0,options=Opts}=St) ->
-    {ok,Code,Ws} = v3_kernel:module(Code0, Opts),
-    {ok,St#compile{code=Code,warnings=St#compile.warnings ++ Ws}}.
-
 save_abstract_code(#compile{ifile=File}=St) ->
     case abstract_code(St) of
 	{ok,Code} ->
-- 
cgit v1.2.3


From a5d724cf240ac8770c7b465c65c7b50d6583c057 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 16 Apr 2015 07:36:21 +0200
Subject: compile: Add the {eprof,Pass} option for easy eprof running

To run eprof for a compiler pass:

  erlc +'{eprof,beam_asm}' file.erl

The name of the compiler pass is the name as printed when
'time' option is used. It is usually, but not always, the module
name for the compiler pass.
---
 lib/compiler/src/compile.erl        | 29 ++++++++++++++++++++++++-----
 lib/compiler/test/compile_SUITE.erl |  2 ++
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index f4ef6fcebf..0b021073db 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -285,11 +285,20 @@ internal_comp(Passes, File, Suffix, St0) ->
     St1 = St0#compile{filename=File, dir=Dir, base=Base,
 		      ifile=erlfile(Dir, Base, Suffix),
 		      ofile=objfile(Base, St0)},
-    Run = case member(time, St1#compile.options) of
-	      true  ->
-		  io:format("Compiling ~tp\n", [File]),
-		  fun run_tc/2;
-	      false -> fun({_Name,Fun}, St) -> catch Fun(St) end
+    Opts = St1#compile.options,
+    Run0 = case member(time, Opts) of
+	       true  ->
+		   io:format("Compiling ~tp\n", [File]),
+		   fun run_tc/2;
+	       false -> fun({_Name,Fun}, St) -> catch Fun(St) end
+	   end,
+    Run = case keyfind(eprof, 1, Opts) of
+	      {eprof,EprofPass} ->
+		  fun(P, St) ->
+			  run_eprof(P, EprofPass, St)
+		  end;
+	      false ->
+		  Run0
 	  end,
     case fold_comp(Passes, Run, St1) of
 	{ok,St2} -> comp_ret_ok(St2);
@@ -331,6 +340,16 @@ run_tc({Name,Fun}, St) ->
 	      [Name,(After_c-Before_c) / 1000,Mem]),
     Val.
 
+run_eprof({Name,Fun}, Name, St) ->
+    io:format("~p: Running eprof\n", [Name]),
+    eprof:start_profiling([self()]),
+    Val = (catch Fun(St)),
+    eprof:stop_profiling(),
+    eprof:analyze(),
+    Val;
+run_eprof({_,Fun}, _, St) ->
+    catch Fun(St).
+
 comp_ret_ok(#compile{code=Code,warnings=Warn0,module=Mod,options=Opts}=St) ->
     case werror(St) of
         true ->
diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl
index 1c96abe017..6d4fde662b 100644
--- a/lib/compiler/test/compile_SUITE.erl
+++ b/lib/compiler/test/compile_SUITE.erl
@@ -102,6 +102,8 @@ file_1(Config) when is_list(Config) ->
     ?line compile_and_verify(Simple, Target, [debug_info]),
     ?line {ok,simple} = compile:file(Simple, [no_line_info]), %Coverage
 
+    {ok,simple} = compile:file(Simple, [{eprof,beam_z}]), %Coverage
+
     ?line ok = file:set_cwd(Cwd),
     ?line true = exists(Target),
     ?line passed = run(Target, test, []),
-- 
cgit v1.2.3


From 9bca15422cc76e2145e30b822ccc3599abec278a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 14 Apr 2015 07:42:21 +0200
Subject: orddict: Eliminate unnecessary consing in store/3 and others

As a minor optimization, eliminate unnecessary cons operations
in store/3, append/3, append_list/3, update/4, and update_counter/3.
---
 lib/stdlib/src/orddict.erl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/stdlib/src/orddict.erl b/lib/stdlib/src/orddict.erl
index c98d78b34d..af5d917840 100644
--- a/lib/stdlib/src/orddict.erl
+++ b/lib/stdlib/src/orddict.erl
@@ -115,8 +115,8 @@ erase(_, []) -> [].
       Orddict1 :: orddict(),
       Orddict2 :: orddict().
 
-store(Key, New, [{K,_}=E|Dict]) when Key < K ->
-    [{Key,New},E|Dict];
+store(Key, New, [{K,_}|_]=Dict) when Key < K ->
+    [{Key,New}|Dict];
 store(Key, New, [{K,_}=E|Dict]) when Key > K ->
     [E|store(Key, New, Dict)];
 store(Key, New, [{_K,_Old}|Dict]) ->		%Key == K
@@ -129,8 +129,8 @@ store(Key, New, []) -> [{Key,New}].
       Orddict1 :: orddict(),
       Orddict2 :: orddict().
 
-append(Key, New, [{K,_}=E|Dict]) when Key < K ->
-    [{Key,[New]},E|Dict];
+append(Key, New, [{K,_}|_]=Dict) when Key < K ->
+    [{Key,[New]}|Dict];
 append(Key, New, [{K,_}=E|Dict]) when Key > K ->
     [E|append(Key, New, Dict)];
 append(Key, New, [{_K,Old}|Dict]) ->		%Key == K
@@ -143,8 +143,8 @@ append(Key, New, []) -> [{Key,[New]}].
       Orddict1 :: orddict(),
       Orddict2 :: orddict().
 
-append_list(Key, NewList, [{K,_}=E|Dict]) when Key < K ->
-    [{Key,NewList},E|Dict];
+append_list(Key, NewList, [{K,_}|_]=Dict) when Key < K ->
+    [{Key,NewList}|Dict];
 append_list(Key, NewList, [{K,_}=E|Dict]) when Key > K ->
     [E|append_list(Key, NewList, Dict)];
 append_list(Key, NewList, [{_K,Old}|Dict]) ->		%Key == K
@@ -170,8 +170,8 @@ update(Key, Fun, [{K,Val}|Dict]) when Key == K ->
       Orddict1 :: orddict(),
       Orddict2 :: orddict().
 
-update(Key, _, Init, [{K,_}=E|Dict]) when Key < K ->
-    [{Key,Init},E|Dict];
+update(Key, _, Init, [{K,_}|_]=Dict) when Key < K ->
+    [{Key,Init}|Dict];
 update(Key, Fun, Init, [{K,_}=E|Dict]) when Key > K ->
     [E|update(Key, Fun, Init, Dict)];
 update(Key, Fun, _Init, [{_K,Val}|Dict]) ->		%Key == K
@@ -184,8 +184,8 @@ update(Key, _, Init, []) -> [{Key,Init}].
       Orddict1 :: orddict(),
       Orddict2 :: orddict().
 
-update_counter(Key, Incr, [{K,_}=E|Dict]) when Key < K ->
-    [{Key,Incr},E|Dict];
+update_counter(Key, Incr, [{K,_}|_]=Dict) when Key < K ->
+    [{Key,Incr}|Dict];
 update_counter(Key, Incr, [{K,_}=E|Dict]) when Key > K ->
     [E|update_counter(Key, Incr, Dict)];
 update_counter(Key, Incr, [{_K,Val}|Dict]) ->		%Key == K
-- 
cgit v1.2.3


From 3a9828cfe25971b5a6fb2f58f786127e63544bf1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 13 Apr 2015 17:00:44 +0200
Subject: v3_kernel: Optimize subst_vsub/3

Profiling shows that subst_vsub/3 dominates the running time. It
is therefore worthwhile optimizing it.
---
 lib/compiler/src/v3_kernel.erl | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl
index 0ac1aaf158..7dff58582e 100644
--- a/lib/compiler/src/v3_kernel.erl
+++ b/lib/compiler/src/v3_kernel.erl
@@ -836,12 +836,23 @@ get_vsub(V, Vsub) ->
 set_vsub(V, S, Vsub) ->
     orddict:store(V, S, Vsub).
 
-subst_vsub(V, S, Vsub0) ->
-    %% Fold chained substitutions.
-    Vsub1 = orddict:map(fun (_, V1) when V1 =:= V -> S;
-			    (_, V1) -> V1
-			end, Vsub0),
-    orddict:store(V, S, Vsub1).
+subst_vsub(Key, New, [{K,Key}|Dict]) ->
+    %% Fold chained substitution.
+    [{K,New}|subst_vsub(Key, New, Dict)];
+subst_vsub(Key, New, [{K,_}|_]=Dict) when Key < K ->
+    %% Insert the new substitution here, and continue
+    %% look for chained substitutions.
+    [{Key,New}|subst_vsub_1(Key, New, Dict)];
+subst_vsub(Key, New, [{K,_}=E|Dict]) when Key > K ->
+    [E|subst_vsub(Key, New, Dict)];
+subst_vsub(Key, New, []) -> [{Key,New}].
+
+subst_vsub_1(V, S, [{K,V}|Dict]) ->
+    %% Fold chained substitution.
+    [{K,S}|subst_vsub_1(V, S, Dict)];
+subst_vsub_1(V, S, [E|Dict]) ->
+    [E|subst_vsub_1(V, S, Dict)];
+subst_vsub_1(_, _, []) -> [].
 
 get_fsub(F, A, Fsub) ->
     case orddict:find({F,A}, Fsub) of
-- 
cgit v1.2.3


From 32b85d920529990df2aa7acb091e7d03c520ef8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 14 Apr 2015 10:50:25 +0200
Subject: v3_codegen: Optimize "turning" of y registers

Profiling shows that the execution time for "turning" y registers
is noticeable for some modules (e.g. S1AP-PDU-Contents from the
asn1 test suite). We can reduce the impact on running time by
special-casing important instructions. In particular, there is
no need to look for y registers in the list argument for a
select_val instruction.
---
 lib/compiler/src/v3_codegen.erl | 53 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 40235d6767..eb7926d3ab 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -584,7 +584,7 @@ top_level_block(Keis, Bef, MaxRegs, _St) ->
 			(return) ->
 			    [{deallocate,FrameSz},return];
 			(Tuple) when is_tuple(Tuple) ->
-			    [turn_yregs(tuple_size(Tuple), Tuple, MaxY)];
+			    [turn_yregs(Tuple, MaxY)];
 			(Other) ->
 			    [Other]
 		    end, Keis),
@@ -596,14 +596,49 @@ top_level_block(Keis, Bef, MaxRegs, _St) ->
 %%   catches work.  The code generation algorithm gives a lower register
 %%   number to the outer catch, which is wrong.
 
-turn_yregs(0, Tp, _) -> Tp;
-turn_yregs(El, Tp, MaxY) ->
-    turn_yregs(El-1,setelement(El,Tp,turn_yreg(element(El,Tp),MaxY)),MaxY).
-
-turn_yreg({yy,YY},MaxY) -> {y,MaxY-YY};
-turn_yreg({list,Ls},MaxY) -> {list, turn_yreg(Ls,MaxY)};
-turn_yreg(Ts,MaxY) when is_list(Ts) -> [turn_yreg(T,MaxY)||T<-Ts];
-turn_yreg(Other,_MaxY) -> Other.
+turn_yregs({call,_,_}=I, _MaxY) -> I;
+turn_yregs({call_ext,_,_}=I, _MaxY) -> I;
+turn_yregs({jump,_}=I, _MaxY) -> I;
+turn_yregs({label,_}=I, _MaxY) -> I;
+turn_yregs({line,_}=I, _MaxY) -> I;
+turn_yregs({test_heap,_,_}=I, _MaxY) -> I;
+turn_yregs({bif,Op,F,A,B}, MaxY) ->
+    {bif,Op,F,turn_yreg(A, MaxY),turn_yreg(B, MaxY)};
+turn_yregs({gc_bif,Op,F,Live,A,B}, MaxY) when is_integer(Live) ->
+    {gc_bif,Op,F,Live,turn_yreg(A, MaxY),turn_yreg(B, MaxY)};
+turn_yregs({get_tuple_element,S,N,D}, MaxY) ->
+    {get_tuple_element,turn_yreg(S, MaxY),N,turn_yreg(D, MaxY)};
+turn_yregs({put_tuple,Arity,D}, MaxY) ->
+    {put_tuple,Arity,turn_yreg(D, MaxY)};
+turn_yregs({select_val,R,F,L}, MaxY) ->
+    {select_val,turn_yreg(R, MaxY),F,L};
+turn_yregs({test,Op,F,L}, MaxY) ->
+    {test,Op,F,turn_yreg(L, MaxY)};
+turn_yregs({test,Op,F,Live,A,B}, MaxY) when is_integer(Live) ->
+    {test,Op,F,Live,turn_yreg(A, MaxY),turn_yreg(B, MaxY)};
+turn_yregs({Op,A}, MaxY) ->
+    {Op,turn_yreg(A, MaxY)};
+turn_yregs({Op,A,B}, MaxY) ->
+    {Op,turn_yreg(A, MaxY),turn_yreg(B, MaxY)};
+turn_yregs({Op,A,B,C}, MaxY) ->
+    {Op,turn_yreg(A, MaxY),turn_yreg(B, MaxY),turn_yreg(C, MaxY)};
+turn_yregs(Tuple, MaxY) ->
+    turn_yregs(tuple_size(Tuple), Tuple, MaxY).
+
+turn_yregs(1, Tp, _) ->
+    Tp;
+turn_yregs(N, Tp, MaxY) ->
+    E = turn_yreg(element(N, Tp), MaxY),
+    turn_yregs(N-1, setelement(N, Tp, E), MaxY).
+
+turn_yreg({yy,YY}, MaxY) ->
+    {y,MaxY-YY};
+turn_yreg({list,Ls},MaxY) ->
+    {list,turn_yreg(Ls, MaxY)};
+turn_yreg([_|_]=Ts, MaxY) ->
+    [turn_yreg(T, MaxY) || T <- Ts];
+turn_yreg(Other, _MaxY) ->
+    Other.
 
 %% select_cg(Sclause, V, TypeFail, ValueFail, StackReg, State) ->
 %%      {Is,StackReg,State}.
-- 
cgit v1.2.3


From 0971feb23d553bb51b04eb78fbf76c4bc19b4b93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 14 Apr 2015 13:01:00 +0200
Subject: v3_codegen: Reduce cost for fixing up bs_match_string instructions

Commit b76588fb5a introduced an optimization of the compile time of
huge functions with many bs_match_string instructions. The
optimization is done in two passes. The first pass coalesces adjacent
bs_match_string instructions. To avoid copying bitstrings multiple
times, the bitstrings in the instructions are combined in to a (deep)
list. The second pass goes through all instructions in the function
and combines the list of bitstrings to a single bitstring in all
bs_match_string instructions.

The second pass (fix_bs_match_string) is run on all instructions in
each function, even if there are no bs_match_instructions in the
function. While fix_bs_match_string is not a bottleneck (it is a
linear pass), its execution time is noticeable when profiling some
modules.

Move the execution of the second pass to the select_binary()
function so that it will only be executed for instructions that
do binary matching. Also take the opportunity to optimize away
uses of bs_restore2 that occour directly after a bs_save2. That
optimimization is currently done in beam_block, but it can be
done essentially for free in the same pass that fixes up
bs_match_string instructions.
---
 lib/compiler/src/beam_block.erl |  9 ---------
 lib/compiler/src/v3_codegen.erl | 42 +++++++++++++++++++++++------------------
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 5216f39296..e2639e9cac 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -61,15 +61,6 @@ blockify(Is) ->
 blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) ->
     %% Useless instruction sequence.
     blockify(Is, Acc);
-
-%% New bit syntax matching.
-blockify([{bs_save2,R,Point}=I,{bs_restore2,R,Point}|Is], Acc) ->
-    blockify([I|Is], Acc);
-blockify([{bs_save2,R,Point}=I,{test,is_eq_exact,_,_}=Test,
-	  {bs_restore2,R,Point}|Is], Acc) ->
-    blockify([I,Test|Is], Acc);
-
-%% Do other peep-hole optimizations.
 blockify([{test,is_atom,{f,Fail},[Reg]}=I|
 	  [{select,select_val,Reg,{f,Fail},
 	    [{atom,false},{f,_}=BrFalse,
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index eb7926d3ab..15a54a5886 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -121,24 +121,15 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) ->
 					   put_reg(V, Reg)
 				   end, [], Hvs),
 			 stk=[]}, 0, Vdb),
-    {B0,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
+    {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
 			  St3#cg{bfail=0,
 				 ultimate_failure=UltimateMatchFail,
 				 is_top_block=true}),
-    B = fix_bs_match_strings(B0),
     {Name,Arity} = NameArity,
     Asm = [{label,Fi},line(Anno),{func_info,AtomMod,{atom,Name},Arity},
 	   {label,Fl}|B++[{label,UltimateMatchFail},if_end]],
     {Asm,Fl,St}.
 
-fix_bs_match_strings([{test,bs_match_string,F,[Ctx,BinList]}|Is])
-  when is_list(BinList) ->
-    I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]},
-    [I|fix_bs_match_strings(Is)];
-fix_bs_match_strings([I|Is]) ->
-    [I|fix_bs_match_strings(Is)];
-fix_bs_match_strings([]) -> [].
-
 %% cg(Lkexpr, Vdb, StackReg, State) -> {[Ainstr],StackReg,State}.
 %%  Generate code for a kexpr.
 %%  Split function into two steps for clarity, not efficiency.
@@ -717,22 +708,37 @@ select_nil(#l{ke={val_clause,nil,B}}, V, Tf, Vf, Bef, St0) ->
 select_binary(#l{ke={val_clause,{binary,{var,V}},B},i=I,vdb=Vdb},
 	      V, Tf, Vf, Bef, St0) ->
     Int0 = clear_dead(Bef#sr{reg=Bef#sr.reg}, I, Vdb),
-    {Bis,Aft,St1} = match_cg(B, Vf, Int0, St0),
+    {Bis0,Aft,St1} = match_cg(B, Vf, Int0, St0),
     CtxReg = fetch_var(V, Int0),
     Live = max_reg(Bef#sr.reg),
-    {[{test,bs_start_match2,{f,Tf},Live,[CtxReg,V],CtxReg},
-      {bs_save2,CtxReg,{V,V}}|Bis],
-     Aft,St1};
+    Bis1 = [{test,bs_start_match2,{f,Tf},Live,[CtxReg,V],CtxReg},
+	    {bs_save2,CtxReg,{V,V}}|Bis0],
+    Bis = finish_select_binary(Bis1),
+    {Bis,Aft,St1};
 select_binary(#l{ke={val_clause,{binary,{var,Ivar}},B},i=I,vdb=Vdb},
 	      V, Tf, Vf, Bef, St0) ->
     Regs = put_reg(Ivar, Bef#sr.reg),
     Int0 = clear_dead(Bef#sr{reg=Regs}, I, Vdb),
-    {Bis,Aft,St1} = match_cg(B, Vf, Int0, St0),
+    {Bis0,Aft,St1} = match_cg(B, Vf, Int0, St0),
     CtxReg = fetch_var(Ivar, Int0),
     Live = max_reg(Bef#sr.reg),
-    {[{test,bs_start_match2,{f,Tf},Live,[fetch_var(V, Bef),Ivar],CtxReg},
-      {bs_save2,CtxReg,{Ivar,Ivar}}|Bis],
-     Aft,St1}.
+    Bis1 = [{test,bs_start_match2,{f,Tf},Live,[fetch_var(V, Bef),Ivar],CtxReg},
+	    {bs_save2,CtxReg,{Ivar,Ivar}}|Bis0],
+    Bis = finish_select_binary(Bis1),
+    {Bis,Aft,St1}.
+
+finish_select_binary([{bs_save2,R,Point}=I,{bs_restore2,R,Point}|Is]) ->
+    [I|finish_select_binary(Is)];
+finish_select_binary([{bs_save2,R,Point}=I,{test,is_eq_exact,_,_}=Test,
+		      {bs_restore2,R,Point}|Is]) ->
+    [I,Test|finish_select_binary(Is)];
+finish_select_binary([{test,bs_match_string,F,[Ctx,BinList]}|Is])
+  when is_list(BinList) ->
+    I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]},
+    [I|finish_select_binary(Is)];
+finish_select_binary([I|Is]) ->
+    [I|finish_select_binary(Is)];
+finish_select_binary([]) -> [].
 
 %% New instructions for selection of binary segments.
 
-- 
cgit v1.2.3


From 2e2d583a49939026ec9b959f9b7941d3c2d084f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Sat, 18 Apr 2015 05:53:16 +0200
Subject: Move rewriting of bs_match from beam_clean to beam_z

The actual bs_match_string instruction has four operands:

   bs_match_string {f,Lbl} Ctxt NumBits {string,ListOfBytes}

However, v3_codegen emits a more compact representation where
the bits to match are packaged in a bitstring:

   bs_match_string {f,Lbl} Ctxt Bitstring

Currently, beam_clean:clean_labels/1 will rewrite the compact
representation to the final representation. That is unfortunate
since clean_labels/1 is called by beam_dead, which means that
the less compact representation will be introduced long before
it is actually needed by beam_asm. It will also complicate any
optimizations that we might want to do.

Move the rewriting of bs_match_string from beam_clean:clean_labels/1
to the beam_z pass, which is the last pass executed before
beam_validator and beam_asm.
---
 lib/compiler/src/beam_a.erl     | 4 ++++
 lib/compiler/src/beam_clean.erl | 8 --------
 lib/compiler/src/beam_dead.erl  | 4 ++--
 lib/compiler/src/beam_z.erl     | 7 +++++++
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl
index dd7e03dd28..410f598665 100644
--- a/lib/compiler/src/beam_a.erl
+++ b/lib/compiler/src/beam_a.erl
@@ -91,6 +91,10 @@ rename_instr({bs_private_append=I,F,Sz,U,Src,Flags,Dst}) ->
     {bs_init,F,{I,U,Flags},none,[Sz,Src],Dst};
 rename_instr(bs_init_writable=I) ->
     {bs_init,{f,0},I,1,[{x,0}],{x,0}};
+rename_instr({test,Op,F,[Ctx,Bits,{string,Str}]}) ->
+    %% When compiling from a .S file.
+    <<Bs:Bits/bits,_/bits>> = list_to_binary(Str),
+    {test,Op,F,[Ctx,Bs]};
 rename_instr({put_map_assoc,Fail,S,D,R,L}) ->
     {put_map,Fail,assoc,S,D,R,L};
 rename_instr({put_map_exact,Fail,S,D,R,L}) ->
diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl
index b68b8702e0..1d26993103 100644
--- a/lib/compiler/src/beam_clean.erl
+++ b/lib/compiler/src/beam_clean.erl
@@ -184,14 +184,6 @@ function_replace([{function,Name,Arity,Entry,Asm0}|Fs], Dict, Acc) ->
     function_replace(Fs, Dict, [{function,Name,Arity,Entry,Asm}|Acc]);
 function_replace([], _, Acc) -> Acc.
 
-replace([{test,bs_match_string=Op,{f,Lbl},[Ctx,Bin0]}|Is], Acc, D) ->
-    Bits = bit_size(Bin0),
-    Bin = case Bits rem 8 of
-	      0 -> Bin0;
-	      Rem -> <<Bin0/bitstring,0:(8-Rem)>>
-	  end,
-    I = {test,Op,{f,label(Lbl, D)},[Ctx,Bits,{string,binary_to_list(Bin)}]},
-    replace(Is, [I|Acc], D);
 replace([{test,Test,{f,Lbl},Ops}|Is], Acc, D) ->
     replace(Is, [{test,Test,{f,label(Lbl, D)},Ops}|Acc], D);
 replace([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D) ->
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index f4515ba2a7..adc3cebc62 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -459,8 +459,8 @@ count_bits_matched([{test,_,_,_,[_,Sz,U,{field_flags,_}],_}|Is], SavePoint, Bits
 	{integer,N} -> count_bits_matched(Is, SavePoint, Bits+N*U);
 	_ -> count_bits_matched(Is, SavePoint, Bits)
     end;
-count_bits_matched([{test,bs_match_string,_,[_,Bits,_]}|Is], SavePoint, Bits0) ->
-    count_bits_matched(Is, SavePoint, Bits0+Bits);
+count_bits_matched([{test,bs_match_string,_,[_,Bs]}|Is], SavePoint, Bits) ->
+    count_bits_matched(Is, SavePoint, Bits+bit_size(Bs));
 count_bits_matched([{test,_,_,_}|Is], SavePoint, Bits) ->
     count_bits_matched(Is, SavePoint, Bits);
 count_bits_matched([{bs_save2,Reg,SavePoint}|_], {Reg,SavePoint}, Bits) ->
diff --git a/lib/compiler/src/beam_z.erl b/lib/compiler/src/beam_z.erl
index 0c7bef9183..47e786034d 100644
--- a/lib/compiler/src/beam_z.erl
+++ b/lib/compiler/src/beam_z.erl
@@ -74,6 +74,13 @@ undo_rename({bs_init,F,{I,Extra,U,Flags},Live,[Sz,Src],Dst}) ->
     {I,F,Sz,Extra,Live,U,Src,Flags,Dst};
 undo_rename({bs_init,_,bs_init_writable=I,_,_,_}) ->
     I;
+undo_rename({test,bs_match_string=Op,F,[Ctx,Bin0]}) ->
+    Bits = bit_size(Bin0),
+    Bin = case Bits rem 8 of
+	      0 -> Bin0;
+	      Rem -> <<Bin0/bitstring,0:(8-Rem)>>
+	  end,
+    {test,Op,F,[Ctx,Bits,{string,binary_to_list(Bin)}]};
 undo_rename({put_map,Fail,assoc,S,D,R,L}) ->
     {put_map_assoc,Fail,S,D,R,L};
 undo_rename({put_map,Fail,exact,S,D,R,L}) ->
-- 
cgit v1.2.3


From 37225949b6cf177934848fff21a1a551b7f6faee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Wed, 15 Apr 2015 09:59:05 +0200
Subject: beam_block: Optimize matching of binary literals

When matching a binary literal as in:

  <<"abc">> = Bin

the compiler will produce a sequence of three instructions
(some details in the instructions removed for simplicity):

  bs_start_match2 Fail BinReg CtxtReg
  bs_match_string Fail CtxtReg "abc"
  bs_test_tail2 Fail CtxtReg 0

The sequence can be replaced with:

  is_eq_exact Fail BinReg "abc"
---
 lib/compiler/src/beam_dead.erl       | 10 ++++++++++
 lib/compiler/test/bs_match_SUITE.erl | 14 ++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index adc3cebc62..5932d8ce1d 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -255,6 +255,16 @@ backward([{jump,{f,To}}=J|[{bif,Op,_,Ops,Reg}|Is]=Is0], D, Acc) ->
     catch
 	throw:not_possible -> backward(Is0, D, [J|Acc])
     end;
+backward([{test,bs_start_match2,F,_,[R,_],Ctxt}=I|Is], D,
+	 [{test,bs_match_string,F,[Ctxt,Bs]},
+	  {test,bs_test_tail2,F,[Ctxt,0]}|Acc0]=Acc) ->
+    case beam_utils:is_killed(Ctxt, Acc0, D) of
+	true ->
+	    Eq = {test,is_eq_exact,F,[R,{literal,Bs}]},
+	    backward(Is, D, [Eq|Acc0]);
+	false ->
+	    backward(Is, D, [I|Acc])
+    end;
 backward([{test,bs_start_match2,{f,To0},Live,[Src|_]=Info,Dst}|Is], D, Acc) ->
     To = shortcut_bs_start_match(To0, Src, D),
     I = {test,bs_start_match2,{f,To},Live,Info,Dst},
diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl
index 80d80505a6..b54db06339 100644
--- a/lib/compiler/test/bs_match_SUITE.erl
+++ b/lib/compiler/test/bs_match_SUITE.erl
@@ -34,7 +34,8 @@
 	 otp_7188/1,otp_7233/1,otp_7240/1,otp_7498/1,
 	 match_string/1,zero_width/1,bad_size/1,haystack/1,
 	 cover_beam_bool/1,matched_out_size/1,follow_fail_branch/1,
-	 no_partition/1,calling_a_binary/1,binary_in_map/1]).
+	 no_partition/1,calling_a_binary/1,binary_in_map/1,
+	 match_string_opt/1]).
 
 -export([coverage_id/1,coverage_external_ignore/2]).
 
@@ -59,7 +60,8 @@ groups() ->
        matching_and_andalso,otp_7188,otp_7233,otp_7240,
        otp_7498,match_string,zero_width,bad_size,haystack,
        cover_beam_bool,matched_out_size,follow_fail_branch,
-       no_partition,calling_a_binary,binary_in_map]}].
+       no_partition,calling_a_binary,binary_in_map,
+       match_string_opt]}].
 
 
 init_per_suite(Config) ->
@@ -1214,6 +1216,14 @@ match_binary_in_map(Map) ->
 	    ok
     end.
 
+match_string_opt(Config) when is_list(Config) ->
+    {x,<<1,2,3>>,{<<1>>,{v,<<1,2,3>>}}} =
+	do_match_string_opt({<<1>>,{v,<<1,2,3>>}}),
+    ok.
+
+do_match_string_opt({<<1>>,{v,V}}=T) ->
+    {x,V,T}.
+
 
 check(F, R) ->
     R = F().
-- 
cgit v1.2.3


From 84705583c517437f59e5b2b3833e1dc1a693830a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Fri, 17 Apr 2015 11:43:20 +0200
Subject: beam_utils: Optimize index_labels_1/2

The execution time for beam_utils:index_labels_1/2 is among
the longest in the beam_bool, beam_bsm, beam_receive, and
beam_trim compiler passes. Therefore it is worthwhile to do
the minor optimization of replacing a call to lists:dropwhile/2
with a special-purpose drop_labels function.
---
 lib/compiler/src/beam_utils.erl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 7704690f86..fd666be41e 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -612,13 +612,15 @@ is_reg_used_at_1(R, Lbl, St0) ->
     end.
 
 index_labels_1([{label,Lbl}|Is0], Acc) ->
-    Is = lists:dropwhile(fun({label,_}) -> true;
-			    (_) -> false end, Is0),
+    Is = drop_labels(Is0),
     index_labels_1(Is0, [{Lbl,Is}|Acc]);
 index_labels_1([_|Is], Acc) ->
     index_labels_1(Is, Acc);
 index_labels_1([], Acc) -> gb_trees:from_orddict(sort(Acc)).
 
+drop_labels([{label,_}|Is]) -> drop_labels(Is);
+drop_labels(Is) -> Is.
+
 %% Help functions for combine_heap_needs.
 
 combine_alloc_lists(Al1, Al2) ->
-- 
cgit v1.2.3


From 68fdf06841859afc4de3ccdc26fac7b78e8b47db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 16 Apr 2015 13:43:29 +0200
Subject: erl_expand_records: Simplify handling of call_ext instructions

The erl_expand_records module have inherited code from sys_pre_expand.
We can simplify the code for handling the call_ext instruction to
make the code clearer and a smidge faster.
---
 lib/stdlib/src/erl_expand_records.erl | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/lib/stdlib/src/erl_expand_records.erl b/lib/stdlib/src/erl_expand_records.erl
index 64a00acd88..dc74d611a3 100644
--- a/lib/stdlib/src/erl_expand_records.erl
+++ b/lib/stdlib/src/erl_expand_records.erl
@@ -384,21 +384,11 @@ expr({call,Line,{tuple,_,[{atom,_,erlang},{atom,_,is_record}]},
 expr({call,Line,{atom,_La,N}=Atom,As0}, St0) ->
     {As,St1} = expr_list(As0, St0),
     Ar = length(As),
-    case erl_internal:bif(N, Ar) of
-        true ->
-            {{call,Line,Atom,As},St1};
-        false ->
-            case imported(N, Ar, St1) of
-                {yes,_Mod} ->
-                    {{call,Line,Atom,As},St1};
-                no ->
-                    case {N,Ar} of
-                        {record_info,2} ->
-                            record_info_call(Line, As, St1);
-                        _ ->
-                            {{call,Line,Atom,As},St1}
-                    end
-            end
+    case {N,Ar} =:= {record_info,2} andalso not imported(N, Ar, St1) of
+	true ->
+	    record_info_call(Line, As, St1);
+	false ->
+	    {{call,Line,Atom,As},St1}
     end;
 expr({call,Line,{remote,Lr,M,F},As0}, St0) ->
     {[M1,F1 | As1],St1} = expr_list([M,F | As0], St0),
@@ -832,10 +822,7 @@ add_imports(Mod, [F | Fs], Is) ->
 add_imports(_, [], Is) -> Is.
 
 imported(F, A, St) ->
-    case orddict:find({F,A}, St#exprec.imports) of
-        {ok,Mod} -> {yes,Mod};
-        error -> no
-    end.
+    orddict:is_key({F,A}, St#exprec.imports).
 
 %%%
 %%% Replace is_record/3 in guards with matching if possible.
-- 
cgit v1.2.3


From de9c0ab5295fb0a16b05c4df101074ce9cd6695b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 16 Apr 2015 08:14:28 +0200
Subject: beam_type: Eliminate redundant calls to checkerror_1/2

Profiling shows that the excution time for checkerror_1/2 could
be be near the top even for modules without any floating point
operations.

It turns out that the complexity of simplify_float_1/4 is quadratic.
checkerror/1 is called with the growing accumulator for each
iteration. checkerror/1 will traverse the entire accumulated list
*unless* some floating point operations are used.

We can avoid this situation if we only call checkerror/1 when there
are live floating point registers. We can also avoid calling flush/3
if there are no live floating point registers.
---
 lib/compiler/src/beam_type.erl | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl
index 26c933481a..4731b5e78e 100644
--- a/lib/compiler/src/beam_type.erl
+++ b/lib/compiler/src/beam_type.erl
@@ -149,9 +149,10 @@ simplify_basic_1([], Ts, Acc) ->
 %%
 simplify_float(Is0, Ts0) ->
     {Is1,Ts} = simplify_float_1(Is0, Ts0, [], []),
-    Is2 = flt_need_heap(Is1),
+    Is2 = opt_fmoves(Is1, []),
+    Is3 = flt_need_heap(Is2),
     try
-	{flt_liveness(Is2),Ts}
+	{flt_liveness(Is3),Ts}
     catch
 	throw:not_possible -> not_possible
     end.
@@ -202,14 +203,15 @@ simplify_float_1([{set,_,_,{'catch',_}}=I|Is]=Is0, _Ts, Rs0, Acc0) ->
     simplify_float_1(Is, tdb_new(), Rs0, [I|Acc]);
 simplify_float_1([{set,_,_,{line,_}}=I|Is], Ts, Rs, Acc) ->
     simplify_float_1(Is, Ts, Rs, [I|Acc]);
+simplify_float_1([I|Is], Ts0, [], Acc) ->
+    Ts = update(I, Ts0),
+    simplify_float_1(Is, Ts, [], [I|Acc]);
 simplify_float_1([I|Is]=Is0, Ts0, Rs0, Acc0) ->
     Ts = update(I, Ts0),
     {Rs,Acc} = flush(Rs0, Is0, Acc0),
     simplify_float_1(Is, Ts, Rs, [I|checkerror(Acc)]);
-simplify_float_1([], Ts, Rs, Acc0) ->
-    Acc = checkerror(Acc0),
-    Is0 = reverse(flush_all(Rs, [], Acc)),
-    Is = opt_fmoves(Is0, []),
+simplify_float_1([], Ts, [], Acc) ->
+    Is = reverse(Acc),
     {Is,Ts}.
 
 coerce_to_float({integer,I}=Int) ->
-- 
cgit v1.2.3


From 93ad33ddcdceb201239f9c5133f2c51769006a2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Sat, 18 Apr 2015 07:24:53 +0200
Subject: beam_bsm: Optimize btb_index()

lists:dropwhile/2 and the fun in btb_index_1/2 shows up in the
top 10 list of eprof. Replace dropwhile with a special-purpose
function for a tiny increase in speed.
---
 lib/compiler/src/beam_bsm.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/compiler/src/beam_bsm.erl b/lib/compiler/src/beam_bsm.erl
index d54c2a9fde..427b7071ac 100644
--- a/lib/compiler/src/beam_bsm.erl
+++ b/lib/compiler/src/beam_bsm.erl
@@ -20,7 +20,7 @@
 -module(beam_bsm).
 -export([module/2,format_error/1]).
 
--import(lists, [member/2,foldl/3,reverse/1,sort/1,all/2,dropwhile/2]).
+-import(lists, [member/2,foldl/3,reverse/1,sort/1,all/2]).
 
 %%%
 %%% We optimize bit syntax matching where the tail end of a binary is
@@ -542,16 +542,13 @@ btb_context_regs_1(Regs, N, Tag, Acc) ->
 %%  a binary. MustSave is true if the function may pass the match
 %%  context to the bs_context_to_binary instruction (in which case
 %%  the current position in the binary must have saved into the
-%%  start position using "bs_save_2 Ctx start".
+%%  start position using "bs_save_2 Ctx start").
 
 btb_index(Fs) ->
     btb_index_1(Fs, []).
 
 btb_index_1([{function,_,_,Entry,Is0}|Fs], Acc0) ->
-    [{label,Entry}|Is] =
-	dropwhile(fun({label,L}) when L =:= Entry -> false;
-		     (_) -> true
-		  end, Is0),
+    Is = drop_to_label(Is0, Entry),
     Acc = btb_index_2(Is, Entry, false, Acc0),
     btb_index_1(Fs, Acc);
 btb_index_1([], Acc) -> gb_trees:from_orddict(sort(Acc)).
@@ -566,6 +563,9 @@ btb_index_2(Is0, Entry, _, Acc) ->
 	throw:none -> Acc
     end.
 
+drop_to_label([{label,L}|Is], L) -> Is;
+drop_to_label([_|Is], L) -> drop_to_label(Is, L).
+
 btb_index_find_start_match([{test,_,{f,F},_},{bs_context_to_binary,_}|Is]) ->
     btb_index_find_label(Is, F);
 btb_index_find_start_match(_) ->
-- 
cgit v1.2.3


From f5298679670260be1ea2caba212d20c528701455 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Fri, 17 Apr 2015 12:54:48 +0200
Subject: beam_asm: Eliminate unnecessary use of iolist_to_binary/1

---
 lib/compiler/src/beam_asm.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index f8cf178d2e..084686def7 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -132,10 +132,10 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
     LiteralChunk = case beam_dict:literal_table(Dict) of
 		       {0,[]} -> [];
 		       {NumLiterals,LitTab0} ->
-			   LitTab1 = iolist_to_binary(LitTab0),
-			   LitTab2 = <<NumLiterals:32,LitTab1/binary>>,
-			   LitTab = iolist_to_binary(zlib:compress(LitTab2)),
-			   chunk(<<"LitT">>, <<(byte_size(LitTab2)):32>>, LitTab)
+			   LitTab1 = [<<NumLiterals:32>>,LitTab0],
+			   LitTab = zlib:compress(LitTab1),
+			   chunk(<<"LitT">>, <<(iolist_size(LitTab1)):32>>,
+				 LitTab)
 		   end,
 
     %% Create the line chunk.
-- 
cgit v1.2.3


From 08708c8327a0e75190a738ceae5080480e2c1e4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Sat, 18 Apr 2015 06:58:08 +0200
Subject: beam_jump: Replace use of lists:dropwhile/2 with a custom function

The use of lists:dropwhile/2 is noticeable in the eprof results.
---
 lib/compiler/src/beam_jump.erl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index ba71d4efae..52b6464c7f 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -127,7 +127,7 @@
 %%% on the program state.
 %%% 
 
--import(lists, [reverse/1,reverse/2,foldl/3,dropwhile/2]).
+-import(lists, [reverse/1,reverse/2,foldl/3]).
 
 module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
     Fs = [function(F) || F <- Fs0],
@@ -509,10 +509,7 @@ rem_unused([{label,Lbl}=I|Is0], Used, [Prev|_]=Acc) ->
     case gb_sets:is_member(Lbl, Used) of
 	false ->
 	    Is = case is_unreachable_after(Prev) of
-		     true ->
-			 dropwhile(fun({label,_}) -> false;
-				      (_) -> true
-				   end, Is0);
+		     true -> drop_upto_label(Is0);
 		     false -> Is0
 		 end,
 	    rem_unused(Is, Used, Acc);
@@ -533,6 +530,10 @@ initial_labels([{label,Lbl}|Is], Acc) ->
 initial_labels([{func_info,_,_,_},{label,Lbl}|_], Acc) ->
     gb_sets:from_list([Lbl|Acc]).
 
+drop_upto_label([{label,_}|_]=Is) -> Is;
+drop_upto_label([_|Is]) -> drop_upto_label(Is);
+drop_upto_label([]) -> [].
+
 %% ulbl(Instruction, UsedGbSet) -> UsedGbSet'
 %%  Update the gb_set UsedGbSet with any function-local labels
 %%  (i.e. not with labels in call instructions) referenced by
-- 
cgit v1.2.3


From 81eb0eb3dbd047c926482d011244403f68c5dad4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Sun, 19 Apr 2015 09:45:53 +0200
Subject: v3_life: Optimize updating of the variable data base

Updating of the variable data base takes most of the time.
---
 lib/compiler/src/v3_life.erl | 117 ++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 64 deletions(-)

diff --git a/lib/compiler/src/v3_life.erl b/lib/compiler/src/v3_life.erl
index 75bd188479..4b1f1c3f71 100644
--- a/lib/compiler/src/v3_life.erl
+++ b/lib/compiler/src/v3_life.erl
@@ -45,7 +45,7 @@
 
 -export([vdb_find/2]).
 
--import(lists, [member/2,map/2,foldl/3,reverse/1,sort/1]).
+-import(lists, [member/2,map/2,reverse/1,sort/1]).
 -import(ordsets, [add_element/2,intersection/2,union/2]).
 
 -include("v3_kernel.hrl").
@@ -68,7 +68,7 @@ functions([], Acc) -> reverse(Acc).
 function(#k_fdef{anno=#k{a=Anno},func=F,arity=Ar,vars=Vs,body=Kb}) ->
     try
 	As = var_list(Vs),
-	Vdb0 = foldl(fun ({var,N}, Vdb) -> new_var(N, 0, Vdb) end, [], As),
+	Vdb0 = init_vars(As),
 	%% Force a top-level match!
 	B0 = case Kb of
 		 #k_match{} -> Kb;
@@ -94,14 +94,14 @@ function(#k_fdef{anno=#k{a=Anno},func=F,arity=Ar,vars=Vs,body=Kb}) ->
 body(#k_seq{arg=Ke,body=Kb}, I, Vdb0) ->
     %%ok = io:fwrite("life ~w:~p~n", [?LINE,{Ke,I,Vdb0}]),
     A = get_kanno(Ke),
-    Vdb1 = use_vars(A#k.us, I, new_vars(A#k.ns, I, Vdb0)),
+    Vdb1 = use_vars(union(A#k.us, A#k.ns), I, Vdb0),
     {Es,MaxI,Vdb2} = body(Kb, I+1, Vdb1),
     E = expr(Ke, I, Vdb2),
     {[E|Es],MaxI,Vdb2};
 body(Ke, I, Vdb0) ->
     %%ok = io:fwrite("life ~w:~p~n", [?LINE,{Ke,I,Vdb0}]),
     A = get_kanno(Ke),
-    Vdb1 = use_vars(A#k.us, I, new_vars(A#k.ns, I, Vdb0)),
+    Vdb1 = use_vars(union(A#k.us, A#k.ns), I, Vdb0),
     E = expr(Ke, I, Vdb1),
     {[E],I,Vdb1}.
 
@@ -150,12 +150,12 @@ expr(#k_try_enter{anno=A,arg=Ka,vars=Vs,body=Kb,evars=Evs,handler=Kh}, I, Vdb) -
     %% the body and handler. Add try tag 'variable'.
     Ab = get_kanno(Kb),
     Ah = get_kanno(Kh),
-    Tdb1 = use_vars(Ab#k.us, I+3, use_vars(Ah#k.us, I+3, Tdb0)),
+    Tdb1 = use_vars(union(Ab#k.us, Ah#k.us), I+3, Tdb0),
     Tdb2 = vdb_sub(I, I+2, Tdb1),
     Vnames = fun (Kvar) -> Kvar#k_var.name end,	%Get the variable names
     {Aes,_,Adb} = body(Ka, I+2, add_var({catch_tag,I+1}, I+1, 1000000, Tdb2)),
-    {Bes,_,Bdb} = body(Kb, I+4, new_vars(map(Vnames, Vs), I+3, Tdb2)),
-    {Hes,_,Hdb} = body(Kh, I+4, new_vars(map(Vnames, Evs), I+3, Tdb2)),
+    {Bes,_,Bdb} = body(Kb, I+4, new_vars(sort(map(Vnames, Vs)), I+3, Tdb2)),
+    {Hes,_,Hdb} = body(Kh, I+4, new_vars(sort(map(Vnames, Evs)), I+3, Tdb2)),
     #l{ke={try_enter,#l{ke={block,Aes},i=I+1,vdb=Adb,a=[]},
 	   var_list(Vs),#l{ke={block,Bes},i=I+3,vdb=Bdb,a=[]},
 	   var_list(Evs),#l{ke={block,Hes},i=I+3,vdb=Hdb,a=[]}},
@@ -171,7 +171,7 @@ expr(#k_receive{anno=A,var=V,body=Kb,timeout=T,action=Ka,ret=Rs}, I, Vdb) ->
     %% Work out imported variables which need to be locked.
     Rdb = vdb_sub(I, I+1, Vdb),
     M = match(Kb, add_element(V#k_var.name, A#k.us), I+1, [],
- 	      new_var(V#k_var.name, I, Rdb)),
+	      new_vars([V#k_var.name], I, Rdb)),
     {Tes,_,Adb} = body(Ka, I+1, Rdb),
     #l{ke={receive_loop,atomic(T),variable(V),M,
 	   #l{ke=Tes,i=I+1,vdb=Adb,a=[]},var_list(Rs)},
@@ -199,12 +199,12 @@ body_try(#k_try{anno=A,arg=Ka,vars=Vs,body=Kb,evars=Evs,handler=Kh,ret=Rs},
     %% the body and handler. Add try tag 'variable'.
     Ab = get_kanno(Kb),
     Ah = get_kanno(Kh),
-    Tdb1 = use_vars(Ab#k.us, I+3, use_vars(Ah#k.us, I+3, Tdb0)),
+    Tdb1 = use_vars(union(Ab#k.us, Ah#k.us), I+3, Tdb0),
     Tdb2 = vdb_sub(I, I+2, Tdb1),
     Vnames = fun (Kvar) -> Kvar#k_var.name end,	%Get the variable names
     {Aes,_,Adb} = body(Ka, I+2, add_var({catch_tag,I+1}, I+1, locked, Tdb2)),
-    {Bes,_,Bdb} = body(Kb, I+4, new_vars(map(Vnames, Vs), I+3, Tdb2)),
-    {Hes,_,Hdb} = body(Kh, I+4, new_vars(map(Vnames, Evs), I+3, Tdb2)),
+    {Bes,_,Bdb} = body(Kb, I+4, new_vars(sort(map(Vnames, Vs)), I+3, Tdb2)),
+    {Hes,_,Hdb} = body(Kh, I+4, new_vars(sort(map(Vnames, Evs)), I+3, Tdb2)),
     #l{ke={'try',#l{ke={block,Aes},i=I+1,vdb=Adb,a=[]},
 	   var_list(Vs),#l{ke={block,Bes},i=I+3,vdb=Bdb,a=[]},
 	   var_list(Evs),#l{ke={block,Hes},i=I+3,vdb=Hdb,a=[]},
@@ -400,79 +400,68 @@ is_gc_bif(Bif, Arity) ->
 	 erl_internal:new_type_test(Bif, Arity) orelse
 	 erl_internal:comp_op(Bif, Arity)).
 
-%% new_var(VarName, I, Vdb) -> Vdb.
+%% Keep track of life time for variables.
+%%
+%% init_vars([{var,VarName}]) -> Vdb.
 %% new_vars([VarName], I, Vdb) -> Vdb.
-%% use_var(VarName, I, Vdb) -> Vdb.
 %% use_vars([VarName], I, Vdb) -> Vdb.
 %% add_var(VarName, F, L, Vdb) -> Vdb.
+%%
+%% The list of variable names for new_vars/3 and use_vars/3
+%% must be sorted.
 
-new_var(V, I, Vdb) ->
-    vdb_store_new(V, I, I, Vdb).
+init_vars(Vs) ->
+    sort([{V,0,0} || {var,V} <- Vs]).
 
-new_vars(Vs, I, Vdb0) ->
-    foldl(fun (V, Vdb) -> new_var(V, I, Vdb) end, Vdb0, Vs).
+new_vars([], _, Vdb) -> Vdb;
+new_vars([V], I, Vdb) -> vdb_store_new(V, {V,I,I}, Vdb);
+new_vars(Vs, I, Vdb) -> vdb_update_vars(Vs, Vdb, I).
 
-use_var(V, I, Vdb) ->
+use_vars([], _, Vdb) ->
+    Vdb;
+use_vars([V], I, Vdb) ->
     case vdb_find(V, Vdb) of
-	{V,F,L} when I > L -> vdb_update(V, F, I, Vdb);
+	{V,F,L} when I > L -> vdb_update(V, {V,F,I}, Vdb);
 	{V,_,_} -> Vdb;
-	error -> vdb_store_new(V, I, I, Vdb)
-    end.
-
-use_vars([], _, Vdb) -> Vdb;
-use_vars([V], I, Vdb) -> use_var(V, I, Vdb);
-use_vars(Vs, I, Vdb) ->
-    Res = use_vars_1(sort(Vs), Vdb, I),
-    %% The following line can be used as an assertion.
-    %%   Res = foldl(fun (V, Vdb) -> use_var(V, I, Vdb) end, Vdb, Vs),
-    Res.
-
-%% Measurements show that it is worthwhile having this special
-%% function that updates/inserts several variables at once.
-
-use_vars_1([V|_]=Vs, [{V1,_,_}=Vd|Vdb], I) when V > V1 ->
-    [Vd|use_vars_1(Vs, Vdb, I)];
-use_vars_1([V|Vs], [{V1,_,_}|_]=Vdb, I) when V < V1 ->
-    %% New variable.
-    [{V,I,I}|use_vars_1(Vs, Vdb, I)];
-use_vars_1([V|Vs], [{_,F,L}=Vd|Vdb], I) ->
-    %% Existing variable.
-    if
-	I > L ->[{V,F,I}|use_vars_1(Vs, Vdb, I)];
-	true -> [Vd|use_vars_1(Vs, Vdb, I)]
+	error -> vdb_store_new(V, {V,I,I}, Vdb)
     end;
-use_vars_1([V|Vs], [], I) ->
-    %% New variable.
-    [{V,I,I}|use_vars_1(Vs, [], I)];
-use_vars_1([], Vdb, _) -> Vdb.
+use_vars(Vs, I, Vdb) -> vdb_update_vars(Vs, Vdb, I).
 
 add_var(V, F, L, Vdb) ->
-    vdb_store_new(V, F, L, Vdb).
+    vdb_store_new(V, {V,F,L}, Vdb).
 
 vdb_find(V, Vdb) ->
-    %% Performance note: Profiling shows that this function accounts for
-    %% a lot of the execution time when huge constant terms are built.
-    %% Using the BIF lists:keyfind/3 is a lot faster than the
-    %% original Erlang version.
     case lists:keyfind(V, 1, Vdb) of
 	false -> error;
 	Vd -> Vd
     end.
 
-%vdb_find(V, [{V1,F,L}=Vd|Vdb]) when V < V1 -> error;
-%vdb_find(V, [{V1,F,L}=Vd|Vdb]) when V == V1 -> Vd;
-%vdb_find(V, [{V1,F,L}=Vd|Vdb]) when V > V1 -> vdb_find(V, Vdb);
-%vdb_find(V, []) -> error.
+vdb_update(V, Update, [{V,_,_}|Vdb]) ->
+    [Update|Vdb];
+vdb_update(V, Update, [Vd|Vdb]) ->
+    [Vd|vdb_update(V, Update, Vdb)].
 
-vdb_update(V, F, L, [{V1,_,_}=Vd|Vdb]) when V > V1 ->
-    [Vd|vdb_update(V, F, L, Vdb)];
-vdb_update(V, F, L, [{V1,_,_}|Vdb]) when V == V1 ->
-    [{V,F,L}|Vdb].
+vdb_store_new(V, New, [{V1,_,_}=Vd|Vdb]) when V > V1 ->
+    [Vd|vdb_store_new(V, New, Vdb)];
+vdb_store_new(V, New, [{V1,_,_}|_]=Vdb) when V < V1 ->
+    [New|Vdb];
+vdb_store_new(_, New, []) -> [New].
 
-vdb_store_new(V, F, L, [{V1,_,_}=Vd|Vdb]) when V > V1 ->
-    [Vd|vdb_store_new(V, F, L, Vdb)];
-vdb_store_new(V, F, L, [{V1,_,_}|_]=Vdb) when V < V1 -> [{V,F,L}|Vdb];
-vdb_store_new(V, F, L, []) -> [{V,F,L}].
+vdb_update_vars([V|_]=Vs, [{V1,_,_}=Vd|Vdb], I) when V > V1 ->
+    [Vd|vdb_update_vars(Vs, Vdb, I)];
+vdb_update_vars([V|Vs], [{V1,_,_}|_]=Vdb, I) when V < V1 ->
+    %% New variable.
+    [{V,I,I}|vdb_update_vars(Vs, Vdb, I)];
+vdb_update_vars([V|Vs], [{_,F,L}=Vd|Vdb], I) ->
+    %% Existing variable.
+    if
+	I > L -> [{V,F,I}|vdb_update_vars(Vs, Vdb, I)];
+	true ->  [Vd|vdb_update_vars(Vs, Vdb, I)]
+    end;
+vdb_update_vars([V|Vs], [], I) ->
+    %% New variable.
+    [{V,I,I}|vdb_update_vars(Vs, [], I)];
+vdb_update_vars([], Vdb, _) -> Vdb.
 
 %% vdb_sub(Min, Max, Vdb) -> Vdb.
 %%  Extract variables which are used before and after Min.  Lock
-- 
cgit v1.2.3