1 files changed, 179 insertions, 51 deletions
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 6510571441..0325c714d0 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -31,6 +31,9 @@
 %% Erlc interface.
 -export([compile/3,compile_beam/3,compile_asm/3,compile_core/3]).
 
+%% Utility functions for compiler passes.
+-export([run_sub_passes/2]).
+
 -export_type([option/0]).
 
 -include("erl_compile.hrl").
@@ -39,6 +42,8 @@
 -import(lists, [member/2,reverse/1,reverse/2,keyfind/3,last/1,
 		map/2,flatmap/2,foreach/2,foldr/3,any/2]).
 
+-define(SUB_PASS_TIMES, compile__sub_pass_times).
+
 %%----------------------------------------------------------------------
 
 -type abstract_code() :: [erl_parse:abstract_form()].
@@ -64,6 +69,7 @@
 -type err_ret()  :: 'error' | {'error', errors(), warnings()}.
 -type comp_ret() :: mod_ret() | bin_ret() | err_ret().
 
+
 %%----------------------------------------------------------------------
 
 %%
@@ -143,6 +149,30 @@ noenv_output_generated(Opts) ->
 
 env_compiler_options() -> env_default_opts().
 
+
+%%%
+%%% Run sub passes from a compiler pass.
+%%%
+
+-spec run_sub_passes([term()], term()) -> term().
+
+run_sub_passes(Ps, St) ->
+    case get(?SUB_PASS_TIMES) of
+        undefined ->
+            Runner = fun(_Name, Run, S) -> Run(S) end,
+            run_sub_passes_1(Ps, Runner, St);
+        Times when is_list(Times) ->
+            Runner = fun(Name, Run, S0) ->
+                             T1 = erlang:monotonic_time(),
+                             S = Run(S0),
+                             T2 = erlang:monotonic_time(),
+                             put(?SUB_PASS_TIMES,
+                                 [{Name,T2-T1}|get(?SUB_PASS_TIMES)]),
+                             S
+                     end,
+            run_sub_passes_1(Ps, Runner, St)
+    end.
+
 %%
 %%  Local functions
 %%
@@ -180,8 +210,11 @@ do_compile(Input, Opts0) ->
                               {error,Reason}
                       end
              end,
-    %% Dialyzer has already spawned workers.
-    case lists:member(dialyzer, Opts) of
+    %% Some tools, like Dialyzer, has already spawned workers
+    %% and spawning extra workers actually slow the compilation
+    %% down instead of speeding it up, so we provide a mechanism
+    %% to bypass the compiler process.
+    case lists:member(no_spawn_compiler_process, Opts) of
         true ->
             IntFun();
         false ->
@@ -218,23 +251,36 @@ expand_opt(report, Os) ->
     [report_errors,report_warnings|Os];
 expand_opt(return, Os) ->
     [return_errors,return_warnings|Os];
+expand_opt(no_bsm3, Os) ->
+    %% The new bsm pass requires bsm3 instructions.
+    [no_bsm3,no_bsm_opt|Os];
 expand_opt(r16, Os) ->
-    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+    expand_opt_before_21(Os);
 expand_opt(r17, Os) ->
-    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+    expand_opt_before_21(Os);
 expand_opt(r18, Os) ->
-    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+    expand_opt_before_21(Os);
 expand_opt(r19, Os) ->
-    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+    expand_opt_before_21(Os);
 expand_opt(r20, Os) ->
-    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+    expand_opt_before_21(Os);
+expand_opt(r21, Os) ->
+    [no_put_tuple2 | expand_opt(no_bsm3, Os)];
 expand_opt({debug_info_key,_}=O, Os) ->
     [encrypt_debug_info,O|Os];
-expand_opt(no_float_opt, Os) ->
-    %%Turn off the entire type optimization pass.
-    [no_topt|Os];
+expand_opt(no_type_opt=O, Os) ->
+    %% Be sure to keep the no_type_opt option so that it will
+    %% be recorded in the BEAM file, allowing the test suites
+    %% to recompile the file with this option.
+    [O,no_ssa_opt_type_start,
+     no_ssa_opt_type_continue,
+     no_ssa_opt_type_finish | Os];
 expand_opt(O, Os) -> [O|Os].
 
+expand_opt_before_21(Os) ->
+    [no_put_tuple2, no_get_hd_tl, no_ssa_opt_record,
+     no_utf8_atoms | expand_opt(no_bsm3, Os)].
+
 %% format_error(ErrorDescriptor) -> string()
 
 -spec format_error(term()) -> iolist().
@@ -247,6 +293,10 @@ format_error(bad_crypto_key) ->
     "invalid crypto key.";
 format_error(no_crypto_key) ->
     "no crypto key supplied.";
+format_error({unimplemented_instruction,Instruction}) ->
+    io_lib:fwrite("native-code compilation failed because of an "
+                  "unimplemented instruction (~s).",
+		  [Instruction]);
 format_error({native, E}) ->
     io_lib:fwrite("native-code compilation failed with reason: ~tP.",
 		  [E, 25]);
@@ -387,17 +437,57 @@ fold_comp([{Name,Pass}|Ps], Run, Code0, St0) ->
     end;
 fold_comp([], _Run, Code, St) -> {ok,Code,St}.
 
+run_sub_passes_1([{Name,Run}|Ps], Runner, St0)
+  when is_atom(Name), is_function(Run, 1) ->
+    try Runner(Name, Run, St0) of
+        St ->
+            run_sub_passes_1(Ps, Runner, St)
+    catch
+        C:E:Stk ->
+            io:format("Sub pass ~s\n", [Name]),
+            erlang:raise(C, E, Stk)
+    end;
+run_sub_passes_1([], _, St) -> St.
+
 run_tc({Name,Fun}, Code, St) ->
+    put(?SUB_PASS_TIMES, []),
     T1 = erlang:monotonic_time(),
     Val = (catch Fun(Code, St)),
     T2 = erlang:monotonic_time(),
-    Elapsed = erlang:convert_time_unit(T2 - T1, native, millisecond),
+    Times = erase(?SUB_PASS_TIMES),
+    Elapsed = erlang:convert_time_unit(T2 - T1, native, microsecond),
     Mem0 = erts_debug:flat_size(Val)*erlang:system_info(wordsize),
     Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])),
     io:format(" ~-30s: ~10.3f s ~12s\n",
-	      [Name,Elapsed/1000,Mem]),
+	      [Name,Elapsed/1000000,Mem]),
+    print_times(Times, Name),
     Val.
 
+print_times(Times0, Name) ->
+    Fam0 = sofs:relation(Times0),
+    Fam1 = sofs:rel2fam(Fam0),
+    Fam2 = sofs:to_external(Fam1),
+    Fam3 = [{W,lists:sum(Times)} || {W,Times} <- Fam2],
+    Fam = reverse(lists:keysort(2, Fam3)),
+    Total = case lists:sum([T || {_,T} <- Fam]) of
+                0 -> 1;
+                Total0 -> Total0
+            end,
+    case Fam of
+        [] ->
+            ok;
+        [_|_] ->
+            io:format("    %% Sub passes of ~s from slowest to fastest:\n", [Name]),
+            print_times_1(Fam, Total)
+    end.
+
+print_times_1([{Name,T}|Ts], Total) ->
+    Elapsed = erlang:convert_time_unit(T, native, microsecond),
+    io:format("    ~-27s: ~10.3f s ~3w %\n",
+              [Name,Elapsed/1000000,round(100*T/Total)]),
+    print_times_1(Ts, Total);
+print_times_1([], _Total) -> ok.
+
 run_eprof({Name,Fun}, Code, Name, St) ->
     io:format("~p: Running eprof\n", [Name]),
     c:appcall(tools, eprof, start_profiling, [[self()]]),
@@ -731,8 +821,6 @@ kernel_passes() ->
     %% Optimizations that must be done after all other optimizations.
     [{pass,sys_core_bsm},
      {iff,dcbsm,{listing,"core_bsm"}},
-     {pass,sys_core_dsetel},
-     {iff,dsetel,{listing,"dsetel"}},
 
      {iff,clint,?pass(core_lint_module)},
      {iff,core,?pass(save_core_code)},
@@ -741,8 +829,30 @@ kernel_passes() ->
      ?pass(v3_kernel),
      {iff,dkern,{listing,"kernel"}},
      {iff,'to_kernel',{done,"kernel"}},
-     {pass,v3_codegen},
-     {iff,dcg,{listing,"codegen"}}
+     {pass,beam_kernel_to_ssa},
+     {iff,dssa,{listing,"ssa"}},
+     {iff,ssalint,{pass,beam_ssa_lint}},
+     {delay,
+      [{unless,no_share_opt,{pass,beam_ssa_share}},
+       {iff,dssashare,{listing,"ssashare"}},
+       {iff,ssalint,{pass,beam_ssa_lint}},
+       {unless,no_bsm_opt,{pass,beam_ssa_bsm}},
+       {iff,dssabsm,{listing,"ssabsm"}},
+       {iff,ssalint,{pass,beam_ssa_lint}},
+       {unless,no_fun_opt,{pass,beam_ssa_funs}},
+       {iff,dssafuns,{listing,"ssafuns"}},
+       {iff,ssalint,{pass,beam_ssa_lint}},
+       {unless,no_ssa_opt,{pass,beam_ssa_opt}},
+       {iff,dssaopt,{listing,"ssaopt"}},
+       {iff,ssalint,{pass,beam_ssa_lint}},
+       {unless,no_recv_opt,{pass,beam_ssa_recv}},
+       {iff,drecv,{listing,"recv"}}]},
+     {pass,beam_ssa_pre_codegen},
+     {iff,dprecg,{listing,"precodegen"}},
+     {iff,ssalint,{pass,beam_ssa_lint}},
+     {pass,beam_ssa_codegen},
+     {iff,dcg,{listing,"codegen"}},
+     {iff,doldcg,{listing,"codegen"}}
      | asm_passes()].
 
 asm_passes() ->
@@ -751,34 +861,16 @@ asm_passes() ->
       [{pass,beam_a},
        {iff,da,{listing,"a"}},
        {unless,no_postopt,
-	[{unless,no_reorder,{pass,beam_reorder}},
-	 {iff,dre,{listing,"reorder"}},
-	 {pass,beam_block},
+	[{pass,beam_block},
 	 {iff,dblk,{listing,"block"}},
 	 {unless,no_except,{pass,beam_except}},
 	 {iff,dexcept,{listing,"except"}},
-	 {unless,no_bs_opt,{pass,beam_bs}},
-	 {iff,dbs,{listing,"bs"}},
-	 {unless,no_topt,{pass,beam_type}},
-	 {iff,dtype,{listing,"type"}},
-	 {pass,beam_split},
-	 {iff,dsplit,{listing,"split"}},
-	 {unless,no_dead,{pass,beam_dead}},
-	 {iff,ddead,{listing,"dead"}},
 	 {unless,no_jopt,{pass,beam_jump}},
 	 {iff,djmp,{listing,"jump"}},
 	 {unless,no_peep_opt,{pass,beam_peep}},
 	 {iff,dpeep,{listing,"peep"}},
 	 {pass,beam_clean},
 	 {iff,dclean,{listing,"clean"}},
-	 {unless,no_bsm_opt,{pass,beam_bsm}},
-	 {iff,dbsm,{listing,"bsm"}},
-	 {unless,no_recv_opt,{pass,beam_receive}},
-	 {iff,drecv,{listing,"recv"}},
-	 {unless,no_record_opt,{pass,beam_record}},
-	 {iff,drecord,{listing,"record"}},
-         {unless,no_blk2,?pass(block2)},
-	 {iff,dblk2,{listing,"block2"}},
 	 {unless,no_stack_trimming,{pass,beam_trim}},
 	 {iff,dtrim,{listing,"trim"}},
 	 {pass,beam_flatten}]},
@@ -787,7 +879,9 @@ asm_passes() ->
        %% need to do a few clean-ups to code.
        {iff,no_postopt,[{pass,beam_clean}]},
 
+       {iff,diffable,?pass(diffable)},
        {pass,beam_z},
+       {iff,diffable,{listing,"S"}},
        {iff,dz,{listing,"z"}},
        {iff,dopt,{listing,"optimize"}},
        {iff,'S',{listing,"S"}},
@@ -1360,10 +1454,6 @@ v3_kernel(Code0, #compile{options=Opts,warnings=Ws0}=St) ->
 	    {ok,Code,St}
     end.
 
-block2(Code0, #compile{options=Opts}=St) ->
-    {ok,Code} = beam_block:module(Code0, [no_blockify|Opts]),
-    {ok,Code,St}.
-
 test_old_inliner(#compile{options=Opts}) ->
     %% The point of this test is to avoid loading the old inliner
     %% if we know that it will not be used.
@@ -1568,18 +1658,22 @@ native_compile_1(Code, St) ->
 	    case IgnoreErrors of
 		true ->
 		    Ws = [{St#compile.ifile,[{none,?MODULE,{native,R}}]}],
-		    {ok,St#compile{warnings=St#compile.warnings ++ Ws}};
+		    {ok,Code,St#compile{warnings=St#compile.warnings ++ Ws}};
 		false ->
 		    Es = [{St#compile.ifile,[{none,?MODULE,{native,R}}]}],
 		    {error,St#compile{errors=St#compile.errors ++ Es}}
 	    end
     catch
+        exit:{unimplemented_instruction,_}=Unimplemented ->
+            Ws = [{St#compile.ifile,
+                   [{none,?MODULE,Unimplemented}]}],
+            {ok,Code,St#compile{warnings=St#compile.warnings ++ Ws}};
 	Class:R:Stack ->
 	    case IgnoreErrors of
 		true ->
 		    Ws = [{St#compile.ifile,
 			   [{none,?MODULE,{native_crash,R,Stack}}]}],
-		    {ok,St#compile{warnings=St#compile.warnings ++ Ws}};
+		    {ok,Code,St#compile{warnings=St#compile.warnings ++ Ws}};
 		false ->
 		    erlang:raise(Class, R, Stack)
 	    end
@@ -1849,6 +1943,39 @@ restore_expand_module([F|Fs]) ->
     [F|restore_expand_module(Fs)];
 restore_expand_module([]) -> [].
 
+%%%
+%%% Transform the BEAM code to make it more friendly for
+%%% diffing: using function names instead of labels for
+%%% local calls and number labels relative to each function.
+%%%
+
+diffable(Code0, St) ->
+    {Mod,Exp,Attr,Fs0,NumLabels} = Code0,
+    EntryLabels0 = [{Entry,{Name,Arity}} ||
+                       {function,Name,Arity,Entry,_} <- Fs0],
+    EntryLabels = maps:from_list(EntryLabels0),
+    Fs = [diffable_fix_function(F, EntryLabels) || F <- Fs0],
+    Code = {Mod,Exp,Attr,Fs,NumLabels},
+    {ok,Code,St}.
+
+diffable_fix_function({function,Name,Arity,Entry0,Is0}, LabelMap0) ->
+    Entry = maps:get(Entry0, LabelMap0),
+    {Is1,LabelMap} = diffable_label_map(Is0, 1, LabelMap0, []),
+    Fb = fun(Old) -> error({no_fb,Old}) end,
+    Is = beam_utils:replace_labels(Is1, [], LabelMap, Fb),
+    {function,Name,Arity,Entry,Is}.
+
+diffable_label_map([{label,Old}|Is], New, Map, Acc) ->
+    case Map of
+        #{Old:=NewLabel} ->
+            diffable_label_map(Is, New, Map, [{label,NewLabel}|Acc]);
+        #{} ->
+            diffable_label_map(Is, New+1, Map#{Old=>New}, [{label,New}|Acc])
+    end;
+diffable_label_map([I|Is], New, Map, Acc) ->
+    diffable_label_map(Is, New, Map, [I|Acc]);
+diffable_label_map([], _New, Map, Acc) ->
+    {Acc,Map}.
 
 -spec options() -> 'ok'.
 
@@ -1969,22 +2096,25 @@ pre_load() ->
     L = [beam_a,
 	 beam_asm,
 	 beam_block,
-	 beam_bs,
-	 beam_bsm,
 	 beam_clean,
-	 beam_dead,
 	 beam_dict,
 	 beam_except,
 	 beam_flatten,
 	 beam_jump,
+	 beam_kernel_to_ssa,
 	 beam_opcodes,
 	 beam_peep,
-	 beam_receive,
-         beam_record,
-	 beam_reorder,
-	 beam_split,
+	 beam_ssa,
+	 beam_ssa_bsm,
+	 beam_ssa_codegen,
+	 beam_ssa_dead,
+         beam_ssa_funs,
+	 beam_ssa_opt,
+	 beam_ssa_pre_codegen,
+	 beam_ssa_recv,
+	 beam_ssa_share,
+	 beam_ssa_type,
 	 beam_trim,
-	 beam_type,
 	 beam_utils,
 	 beam_validator,
 	 beam_z,
@@ -2001,9 +2131,7 @@ pre_load() ->
 	 erl_scan,
 	 sys_core_alias,
 	 sys_core_bsm,
-	 sys_core_dsetel,
 	 sys_core_fold,
-	 v3_codegen,
 	 v3_core,
 	 v3_kernel],
     _ = code:ensure_modules_loaded(L),