From 63528ed4fa6463097dffdc951b46321bac37350f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <michal@muskala.eu>
Date: Wed, 9 Aug 2017 13:50:21 +0200
Subject: Introduce beam_utils:replace_labels/4

---
 lib/compiler/src/beam_clean.erl | 58 +++--------------------------------
 lib/compiler/src/beam_utils.erl | 68 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 70 insertions(+), 56 deletions(-)

(limited to 'lib/compiler')

diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl
index b736d39f9c..e094c2c320 100644
--- a/lib/compiler/src/beam_clean.erl
+++ b/lib/compiler/src/beam_clean.erl
@@ -24,7 +24,7 @@
 -export([module/2]).
 -export([bs_clean_saves/1]).
 -export([clean_labels/1]).
--import(lists, [map/2,foldl/3,reverse/1,filter/2]).
+-import(lists, [foldl/3,reverse/1,filter/2]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
@@ -118,7 +118,7 @@ add_to_work_list(F, {Fs,Used}=Sets) ->
 clean_labels(Fs0) ->
     St0 = #st{lmap=[],entry=1,lc=1},
     {Fs1,#st{lmap=Lmap0,lc=Lc}} = function_renumber(Fs0, St0, []),
-    Lmap = gb_trees:from_orddict(ordsets:from_list(Lmap0)),
+    Lmap = maps:from_list(Lmap0),
     Fs = function_replace(Fs1, Lmap, []),
     {Fs,Lc}.
 
@@ -187,7 +187,8 @@ is_record_tuple(_, _, _) -> no.
 
 function_replace([{function,Name,Arity,Entry,Asm0}|Fs], Dict, Acc) ->
     Asm = try
-	      replace(Asm0, [], Dict)
+              Fb = fun(Old) -> throw({error,{undefined_label,Old}}) end,
+              beam_utils:replace_labels(Asm0, [], Dict, Fb)
 	  catch
 	      throw:{error,{undefined_label,Lbl}=Reason} ->
 		  io:format("Function ~s/~w refers to undefined label ~w\n",
@@ -197,57 +198,6 @@ function_replace([{function,Name,Arity,Entry,Asm0}|Fs], Dict, Acc) ->
     function_replace(Fs, Dict, [{function,Name,Arity,Entry,Asm}|Acc]);
 function_replace([], _, Acc) -> Acc.
 
-replace([{test,Test,{f,Lbl},Ops}|Is], Acc, D) ->
-    replace(Is, [{test,Test,{f,label(Lbl, D)},Ops}|Acc], D);
-replace([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D) ->
-    replace(Is, [{test,Test,{f,label(Lbl, D)},Live,Ops,Dst}|Acc], D);
-replace([{select,I,R,{f,Fail0},Vls0}|Is], Acc, D) ->
-    Vls = map(fun ({f,L}) -> {f,label(L, D)};
-		   (Other) -> Other
-	      end, Vls0),
-    Fail = label(Fail0, D),
-    replace(Is, [{select,I,R,{f,Fail},Vls}|Acc], D);
-replace([{'try',R,{f,Lbl}}|Is], Acc, D) ->
-    replace(Is, [{'try',R,{f,label(Lbl, D)}}|Acc], D);
-replace([{'catch',R,{f,Lbl}}|Is], Acc, D) ->
-    replace(Is, [{'catch',R,{f,label(Lbl, D)}}|Acc], D);
-replace([{jump,{f,Lbl}}|Is], Acc, D) ->
-    replace(Is, [{jump,{f,label(Lbl, D)}}|Acc], D);
-replace([{loop_rec,{f,Lbl},R}|Is], Acc, D) ->
-    replace(Is, [{loop_rec,{f,label(Lbl, D)},R}|Acc], D);
-replace([{loop_rec_end,{f,Lbl}}|Is], Acc, D) ->
-    replace(Is, [{loop_rec_end,{f,label(Lbl, D)}}|Acc], D);
-replace([{wait,{f,Lbl}}|Is], Acc, D) ->
-    replace(Is, [{wait,{f,label(Lbl, D)}}|Acc], D);
-replace([{wait_timeout,{f,Lbl},To}|Is], Acc, D) ->
-    replace(Is, [{wait_timeout,{f,label(Lbl, D)},To}|Acc], D);
-replace([{bif,Name,{f,Lbl},As,R}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bif,Name,{f,label(Lbl, D)},As,R}|Acc], D);
-replace([{gc_bif,Name,{f,Lbl},Live,As,R}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{gc_bif,Name,{f,label(Lbl, D)},Live,As,R}|Acc], D);
-replace([{call,Ar,{f,Lbl}}|Is], Acc, D) ->
-    replace(Is, [{call,Ar,{f,label(Lbl,D)}}|Acc], D);
-replace([{make_fun2,{f,Lbl},U1,U2,U3}|Is], Acc, D) ->
-    replace(Is, [{make_fun2,{f,label(Lbl, D)},U1,U2,U3}|Acc], D);
-replace([{bs_init,{f,Lbl},Info,Live,Ss,Dst}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bs_init,{f,label(Lbl, D)},Info,Live,Ss,Dst}|Acc], D);
-replace([{bs_put,{f,Lbl},Info,Ss}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bs_put,{f,label(Lbl, D)},Info,Ss}|Acc], D);
-replace([{put_map=I,{f,Lbl},Op,Src,Dst,Live,List}|Is], Acc, D)
-  when Lbl =/= 0 ->
-    replace(Is, [{I,{f,label(Lbl, D)},Op,Src,Dst,Live,List}|Acc], D);
-replace([{get_map_elements=I,{f,Lbl},Src,List}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{I,{f,label(Lbl, D)},Src,List}|Acc], D);
-replace([I|Is], Acc, D) ->
-    replace(Is, [I|Acc], D);
-replace([], Acc, _) -> Acc.
-
-label(Old, D) ->
-    case gb_trees:lookup(Old, D) of
-	{value,Val} -> Val;
-	none -> throw({error,{undefined_label,Old}})
-    end.
-
 %%%
 %%% Final fixup of bs_start_match2/5,bs_save2/bs_restore2 instructions for
 %%% new bit syntax matching (introduced in R11B).
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index cc6e54ca16..df41e35c82 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -23,14 +23,14 @@
 -module(beam_utils).
 -export([is_killed_block/2,is_killed/3,is_killed_at/3,
 	 is_not_used/3,
-	 empty_label_index/0,index_label/3,index_labels/1,
+	 empty_label_index/0,index_label/3,index_labels/1,replace_labels/4,
 	 code_at/2,bif_to_test/3,is_pure_test/1,
 	 live_opt/1,delete_live_annos/1,combine_heap_needs/2,
 	 split_even/1]).
 
 -export_type([code_index/0,module_code/0,instruction/0]).
 
--import(lists, [member/2,sort/1,reverse/1,splitwith/2]).
+-import(lists, [map/2,member/2,sort/1,reverse/1,splitwith/2]).
 
 %% instruction() describes all instructions that are used during optimzation
 %% (from beam_a to beam_z).
@@ -160,6 +160,18 @@ index_label(Lbl, Is0, Acc) ->
 code_at(L, Ll) ->
     gb_trees:get(L, Ll).
 
+%% replace_labels(FunctionIs, Tail, ReplaceDb, Fallback) -> FunctionIs.
+%%  Replace all labels in instructions according to the ReplaceDb.
+%%  If label is not found the Fallback is called with the label to
+%%  produce a new one.
+
+-spec replace_labels([instruction()],
+                     [instruction()],
+                     #{beam_asm:label() => beam_asm:label()},
+                     fun((beam_asm:label()) -> term())) -> [instruction()].
+replace_labels(Is, Acc, D, Fb) ->
+    replace_labels_1(Is, Acc, D, Fb).
+
 %% bif_to_test(Bif, [Op], Fail) -> {test,Test,Fail,[Op]}
 %%  Convert a BIF to a test. Fail if not possible.
 
@@ -643,6 +655,58 @@ index_labels_1([], Acc) -> gb_trees:from_orddict(sort(Acc)).
 drop_labels([{label,_}|Is]) -> drop_labels(Is);
 drop_labels(Is) -> Is.
 
+
+replace_labels_1([{test,Test,{f,Lbl},Ops}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{test,Test,{f,label(Lbl, D, Fb)},Ops}|Acc], D, Fb);
+replace_labels_1([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{test,Test,{f,label(Lbl, D, Fb)},Live,Ops,Dst}|Acc], D, Fb);
+replace_labels_1([{select,I,R,{f,Fail0},Vls0}|Is], Acc, D, Fb) ->
+    Vls = map(fun ({f,L}) -> {f,label(L, D, Fb)};
+		   (Other) -> Other
+	      end, Vls0),
+    Fail = label(Fail0, D, Fb),
+    replace_labels_1(Is, [{select,I,R,{f,Fail},Vls}|Acc], D, Fb);
+replace_labels_1([{'try',R,{f,Lbl}}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{'try',R,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
+replace_labels_1([{'catch',R,{f,Lbl}}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{'catch',R,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
+replace_labels_1([{jump,{f,Lbl}}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{jump,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
+replace_labels_1([{loop_rec,{f,Lbl},R}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{loop_rec,{f,label(Lbl, D, Fb)},R}|Acc], D, Fb);
+replace_labels_1([{loop_rec_end,{f,Lbl}}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{loop_rec_end,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
+replace_labels_1([{wait,{f,Lbl}}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{wait,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
+replace_labels_1([{wait_timeout,{f,Lbl},To}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{wait_timeout,{f,label(Lbl, D, Fb)},To}|Acc], D, Fb);
+replace_labels_1([{bif,Name,{f,Lbl},As,R}|Is], Acc, D, Fb) when Lbl =/= 0 ->
+    replace_labels_1(Is, [{bif,Name,{f,label(Lbl, D, Fb)},As,R}|Acc], D, Fb);
+replace_labels_1([{gc_bif,Name,{f,Lbl},Live,As,R}|Is], Acc, D, Fb) when Lbl =/= 0 ->
+    replace_labels_1(Is, [{gc_bif,Name,{f,label(Lbl, D, Fb)},Live,As,R}|Acc], D, Fb);
+replace_labels_1([{call,Ar,{f,Lbl}}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{call,Ar,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
+replace_labels_1([{make_fun2,{f,Lbl},U1,U2,U3}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{make_fun2,{f,label(Lbl, D, Fb)},U1,U2,U3}|Acc], D, Fb);
+replace_labels_1([{bs_init,{f,Lbl},Info,Live,Ss,Dst}|Is], Acc, D, Fb) when Lbl =/= 0 ->
+    replace_labels_1(Is, [{bs_init,{f,label(Lbl, D, Fb)},Info,Live,Ss,Dst}|Acc], D, Fb);
+replace_labels_1([{bs_put,{f,Lbl},Info,Ss}|Is], Acc, D, Fb) when Lbl =/= 0 ->
+    replace_labels_1(Is, [{bs_put,{f,label(Lbl, D, Fb)},Info,Ss}|Acc], D, Fb);
+replace_labels_1([{put_map=I,{f,Lbl},Op,Src,Dst,Live,List}|Is], Acc, D, Fb)
+  when Lbl =/= 0 ->
+    replace_labels_1(Is, [{I,{f,label(Lbl, D, Fb)},Op,Src,Dst,Live,List}|Acc], D, Fb);
+replace_labels_1([{get_map_elements=I,{f,Lbl},Src,List}|Is], Acc, D, Fb) when Lbl =/= 0 ->
+    replace_labels_1(Is, [{I,{f,label(Lbl, D, Fb)},Src,List}|Acc], D, Fb);
+replace_labels_1([I|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [I|Acc], D, Fb);
+replace_labels_1([], Acc, _, _) -> Acc.
+
+label(Old, D, Fb) ->
+    case D of
+        #{Old := New} -> New;
+        _ -> Fb(Old)
+    end.
+
 %% Help functions for combine_heap_needs.
 
 combine_alloc_lists(Al1, Al2) ->
-- 
cgit v1.2.3


From f7c9383f4c3d4b6819b5ba4d54c7093df806fe4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <michal@muskala.eu>
Date: Wed, 9 Aug 2017 13:51:16 +0200
Subject: Run the sharing optimisation in beam_jump until fixpoint

This is especially useful after inlining a function with a case.

Today the compiler would most probably be able to unify all the leafs of the
case during the sharing optimisation, but it would fail to unify the pattern
matching itself.

Naively running the optimisation multiple times wouldn't be able to find the
common code either, because it would differ in jump/fail targets of various
instructions.

To remedy this, after doing each sharing pass we traverse the code backwards
when reversing and update all the jump targets with the new targets that were
discovered during the unification pass. This allows running the optimisation
until fixpoint and makes sure all sharing opportunities will be discovered.

This optimisation also helps with the Elixir's `with/else` construct.
---
 lib/compiler/src/beam_jump.erl | 65 ++++++++++++++++++++++++++----------------
 1 file changed, 40 insertions(+), 25 deletions(-)

(limited to 'lib/compiler')

diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index 4365451356..427f605ce2 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -156,41 +156,46 @@ function({function,Name,Arity,CLabel,Asm0}) ->
 %%%
 
 share(Is0) ->
-    %% We will get more sharing if we never fall through to a label.
-    Is = eliminate_fallthroughs(Is0, []),
-    share_1(Is, #{}, [], []).
+    Is1 = eliminate_fallthroughs(Is0, []),
+    Is2 = find_fixpoint(fun(Is) ->
+                                share_1(Is, #{}, #{}, [], [])
+                        end, Is1),
+    reverse(Is2).
 
-share_1([{label,L}=Lbl|Is], Dict0, [_|_]=Seq, Acc) ->
+share_1([{label,L}=Lbl|Is], Dict0, Lbls0, [_|_]=Seq, Acc) ->
     case maps:find(Seq, Dict0) of
 	error ->
 	    Dict = maps:put(Seq, L, Dict0),
-	    share_1(Is, Dict, [], [Lbl|Seq ++ Acc]);
+	    share_1(Is, Dict, Lbls0, [], [Lbl|Seq ++ Acc]);
 	{ok,Label} ->
-	    share_1(Is, Dict0, [], [Lbl,{jump,{f,Label}}|Acc])
+            Lbls = maps:put(L, Label, Lbls0),
+	    share_1(Is, Dict0, Lbls, [], [Lbl,{jump,{f,Label}}|Acc])
     end;
-share_1([{func_info,_,_,_}=I|Is], _, [], Acc) ->
-    reverse(Is, [I|Acc]);
-share_1([{'catch',_,_}=I|Is], Dict0, Seq, Acc) ->
-    Dict = clean_non_sharable(Dict0),
-    share_1(Is, Dict, [I|Seq], Acc);
-share_1([{'try',_,_}=I|Is], Dict0, Seq, Acc) ->
-    Dict = clean_non_sharable(Dict0),
-    share_1(Is, Dict, [I|Seq], Acc);
-share_1([{try_case,_}=I|Is], Dict0, Seq, Acc) ->
-    Dict = clean_non_sharable(Dict0),
-    share_1(Is, Dict, [I|Seq], Acc);
-share_1([{catch_end,_}=I|Is], Dict0, Seq, Acc) ->
-    Dict = clean_non_sharable(Dict0),
-    share_1(Is, Dict, [I|Seq], Acc);
-share_1([I|Is], Dict, Seq, Acc) ->
+share_1([{func_info,_,_,_}|_]=Is, _, Lbls, [], Acc) when Lbls =/= #{} ->
+    beam_utils:replace_labels(Acc, Is, Lbls, fun(Old) -> Old end);
+share_1([{func_info,_,_,_}|_]=Is, _, Lbls, [], Acc) when Lbls =:= #{} ->
+    reverse(Acc, Is);
+share_1([{'catch',_,_}=I|Is], Dict0, Lbls0, Seq, Acc) ->
+    {Dict,Lbls} = clean_non_sharable(Dict0, Lbls0),
+    share_1(Is, Dict, Lbls, [I|Seq], Acc);
+share_1([{'try',_,_}=I|Is], Dict0, Lbls0, Seq, Acc) ->
+    {Dict,Lbls} = clean_non_sharable(Dict0, Lbls0),
+    share_1(Is, Dict, Lbls, [I|Seq], Acc);
+share_1([{try_case,_}=I|Is], Dict0, Lbls0, Seq, Acc) ->
+    {Dict,Lbls} = clean_non_sharable(Dict0, Lbls0),
+    share_1(Is, Dict, Lbls, [I|Seq], Acc);
+share_1([{catch_end,_}=I|Is], Dict0, Lbls0, Seq, Acc) ->
+    {Dict,Lbls} = clean_non_sharable(Dict0, Lbls0),
+    share_1(Is, Dict, Lbls, [I|Seq], Acc);
+share_1([I|Is], Dict, Lbls, Seq, Acc) ->
     case is_unreachable_after(I) of
 	false ->
-	    share_1(Is, Dict, [I|Seq], Acc);
+	    share_1(Is, Dict, Lbls, [I|Seq], Acc);
 	true ->
-	    share_1(Is, Dict, [I], Acc)
+	    share_1(Is, Dict, Lbls, [I], Acc)
     end.
 
-clean_non_sharable(Dict) ->
+clean_non_sharable(Dict0, Lbls0) ->
     %% We are passing in or out of a 'catch' or 'try' block. Remove
     %% sequences that should not be shared over the boundaries of the
     %% block. Since the end of the sequence must match, the only
@@ -198,7 +203,17 @@ clean_non_sharable(Dict) ->
     %% the 'catch'/'try' block is a sequence that ends with an
     %% instruction that causes an exception. Any sequence that causes
     %% an exception must contain a line/1 instruction.
-    maps:filter(fun(K, _V) -> sharable_with_try(K) end, Dict).
+    Dict1 = maps:to_list(Dict0),
+    Lbls1 = maps:to_list(Lbls0),
+    {Dict2,Lbls2} = foldl(fun({K, V}, {Dict,Lbls}) ->
+                                  case sharable_with_try(K) of
+                                      true ->
+                                          {[{K,V}|Dict],lists:keydelete(V, 2, Lbls)};
+                                      false ->
+                                          {Dict,Lbls}
+                                  end
+                          end, {[],Lbls1}, Dict1),
+    {maps:from_list(Dict2),maps:from_list(Lbls2)}.
 
 sharable_with_try([{line,_}|_]) ->
     %% This sequence may cause an exception and may potentially
-- 
cgit v1.2.3


From cf95a4a8b6a0d57423a89465e950beb16c9606a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <michal@muskala.eu>
Date: Sun, 13 Aug 2017 14:15:06 +0200
Subject: Enhance elimination of useless tests in beam_jump

It can happen we have the following situation:

    {test,is_tuple,Fail,[R1]}
    {test,test_arity,Fail,[R1,N1]}
    {get_tuple_element,R1,N2,R2}
    {test,is_eq_exaqct,Fail,[R2,Atom]}
    {jump,Fail}

Previously, the optimisation would eliminate the last is_eq_exact test, but
we can do more. If the register R2 is not used in Fail, we can eliminate the
get_tuple_element instruction as well as all the preceding tests. Ultimately,
the whole sequence can be replaced by:

    {jump,Fail}
---
 lib/compiler/src/beam_jump.erl | 62 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 4 deletions(-)

(limited to 'lib/compiler')

diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index 427f605ce2..72bf1e21dd 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -291,13 +291,14 @@ extract_seq_1(_, _) -> no.
 	{
 	  entry :: beam_asm:label(), %Entry label (must not be moved).
 	  mlbl :: #{beam_asm:label() := [beam_asm:label()]}, %Moved labels.
-	  labels :: cerl_sets:set()         %Set of referenced labels.
+	  labels :: cerl_sets:set(),         %Set of referenced labels.
+          index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index build lazily only if needed
 	}).
 
 opt(Is0, CLabel) ->
     find_fixpoint(fun(Is) ->
 			  Lbls = initial_labels(Is),
-			  St = #st{entry=CLabel,mlbl=#{},labels=Lbls},
+			  St = #st{entry=CLabel,mlbl=#{},labels=Lbls,index={lazy,Is}},
 			  opt(Is, [], St)
 		  end, Is0).
 
@@ -307,7 +308,7 @@ find_fixpoint(OptFun, Is0) ->
 	Is -> find_fixpoint(OptFun, Is)
     end.
 
-opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
+opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc0, St0) ->
     %% We have
     %%    Test Label Ops
     %%    jump Label
@@ -316,10 +317,23 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
     case beam_utils:is_pure_test(I) of
 	false ->
 	    %% Test is not pure; we must keep it.
-	    opt(Is, [I|Acc], label_used(Lbl, St));
+	    opt(Is, [I|Acc0], label_used(Lbl, St0));
 	true ->
 	    %% The test is pure and its failure label is the same
 	    %% as in the jump that follows -- thus it is not needed.
+            %% Check if any of the previous instructions could also be eliminated.
+            {Acc,St} = opt_useless_loads(Acc0, L, St0),
+	    opt(Is, Acc, St)
+    end;
+opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc0, St0) ->
+    %% Similar to the above, except we have a fall-through rather than jump
+    %%    Test Label Ops
+    %%    label Label
+    case beam_utils:is_pure_test(I) of
+	false ->
+	    opt(Is, [I|Acc0], label_used(Lbl, St0));
+	true ->
+            {Acc,St} = opt_useless_loads(Acc0, L, St0),
 	    opt(Is, Acc, St)
     end;
 opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
@@ -398,6 +412,46 @@ insert_fc_labels([{label,L}=I|Is0], Mlbl) ->
     end;
 insert_fc_labels([_|_]=Is, _) -> Is.
 
+%% After eliminating a test, it might happen, that a register was only used
+%% in this test. Let's check if that was the case and if it was so, we can
+%% eliminate the load into the register completely.
+opt_useless_loads([{block,_}|_]=Is, L, #st{index={lazy,FIs}}=St) ->
+    opt_useless_loads(Is, L, St#st{index=beam_utils:index_labels(FIs)});
+opt_useless_loads([{block,Block0}|Is], L, #st{index=Index}=St) ->
+    case opt_useless_block_loads(Block0, L, Index) of
+        [] ->
+            opt_useless_loads(Is, L, St);
+        [_|_]=Block ->
+            {[{block,Block}|Is],St}
+    end;
+%% After eliminating the test and useless blocks, it might happen,
+%% that the previous test could also be eliminated.
+%% It might be that the label was already marked as used, even if ultimately,
+%% it never will be - we can't do much about it at that point, though
+opt_useless_loads([{test,_,{f,L},_}=I|Is], L, St) ->
+    case beam_utils:is_pure_test(I) of
+        false ->
+            {[I|Is],St};
+        true ->
+            opt_useless_loads(Is, L, St)
+    end;
+opt_useless_loads(Is, _L, St) ->
+    {Is,St}.
+
+opt_useless_block_loads([{set,[Dst],_,_}=I|Is], L, Index) ->
+    BlockJump = [{block,Is},{jump,{f,L}}],
+    case beam_utils:is_killed(Dst, BlockJump, Index) of
+        true ->
+            %% The register is killed and not used, we can remove the load
+            opt_useless_block_loads(Is, L, Index);
+        false ->
+            [I|opt_useless_block_loads(Is, L, Index)]
+    end;
+opt_useless_block_loads([I|Is], L, Index) ->
+    [I|opt_useless_block_loads(Is, L, Index)];
+opt_useless_block_loads([], _L, _Index) ->
+    [].
+
 maps_append_list(K,Vs,M) ->
     case M of
         #{K:=Vs0} -> M#{K:=Vs0++Vs}; % same order as dict
-- 
cgit v1.2.3


From 104c69e2153c41d5fb4eea86d18852d065258083 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <michal@muskala.eu>
Date: Sun, 13 Aug 2017 17:42:09 +0200
Subject: Replace labels instead of inserting duplicates in beam_jump

This makes other optimisations more efficient since we have less labels overall.
---
 lib/compiler/src/beam_jump.erl | 89 +++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 57 deletions(-)

(limited to 'lib/compiler')

diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index 72bf1e21dd..c33de217bd 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -71,9 +71,9 @@
 %%%
 %%%    jump L2
 %%%          . . .
-%%%    L1:
 %%%    L2: ...
 %%%
+%%%    and all preceding uses of L1 renamed to L2.
 %%%    If the jump is unreachable, it will be removed according to (1).
 %%%
 %%% (5) In
@@ -290,15 +290,15 @@ extract_seq_1(_, _) -> no.
 -record(st,
 	{
 	  entry :: beam_asm:label(), %Entry label (must not be moved).
-	  mlbl :: #{beam_asm:label() := [beam_asm:label()]}, %Moved labels.
+	  replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace.
 	  labels :: cerl_sets:set(),         %Set of referenced labels.
-          index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index build lazily only if needed
+          index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index built lazily only if needed
 	}).
 
 opt(Is0, CLabel) ->
     find_fixpoint(fun(Is) ->
 			  Lbls = initial_labels(Is),
-			  St = #st{entry=CLabel,mlbl=#{},labels=Lbls,index={lazy,Is}},
+			  St = #st{entry=CLabel,replace=#{},labels=Lbls,index={lazy,Is}},
 			  opt(Is, [], St)
 		  end, Is0).
 
@@ -355,30 +355,16 @@ opt([{test,_,{f,_}=Lbl,_,_,_}=I|Is], Acc, St) ->
     opt(Is, [I|Acc], label_used(Lbl, St));
 opt([{select,_,_R,Fail,Vls}=I|Is], Acc, St) ->
     skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St));
-opt([{label,Lbl}=I|Is], Acc, #st{mlbl=Mlbl}=St0) ->
-    case maps:find(Lbl, Mlbl) of
-	{ok,Lbls} ->
-	    %% Essential to remove the list of labels from the dictionary,
-	    %% since we will rescan the inserted labels.  We MUST rescan.
-	    St = St0#st{mlbl=maps:remove(Lbl, Mlbl)},
-	    insert_labels([Lbl|Lbls], Is, Acc, St);
-	error ->
-	    opt(Is, [I|Acc], St0)
-    end;
+opt([{label,From}=I,{label,To}|Is], Acc, #st{replace=Replace}=St) ->
+    opt([I|Is], Acc, St#st{replace=Replace#{To => From}});
 opt([{jump,{f,_}=X}|[{label,_},{jump,X}|_]=Is], Acc, St) ->
     opt(Is, Acc, St);
 opt([{jump,{f,Lbl}}|[{label,Lbl}|_]=Is], Acc, St) ->
     opt(Is, Acc, St);
-opt([{jump,{f,L}=Lbl}=I|Is], Acc0, #st{mlbl=Mlbl0}=St0) ->
-    %% All labels before this jump instruction should now be
-    %% moved to the location of the jump's target.
-    {Lbls,Acc} = collect_labels(Acc0, St0),
-    St = case Lbls of
-	     [] -> St0;
-	     [_|_] ->
-		 Mlbl = maps_append_list(L, Lbls, Mlbl0),
-		 St0#st{mlbl=Mlbl}
-	 end,
+opt([{jump,{f,L}=Lbl}=I|Is], Acc0, St0) ->
+    %% Replace all labels before this jump instruction into the
+    %% location of the jump's target.
+    {Acc,St} = collect_labels(Acc0, L, St0),
     skip_unreachable(Is, [I|Acc], label_used(Lbl, St));
 %% Optimization: quickly handle some common instructions that don't
 %% have any failure labels and where is_unreachable_after(I) =:= false.
@@ -398,19 +384,21 @@ opt([I|Is], Acc, #st{labels=Used0}=St0) ->
 	true  -> skip_unreachable(Is, [I|Acc], St);
 	false -> opt(Is, [I|Acc], St)
     end;
-opt([], Acc, #st{mlbl=Mlbl}) ->
-    Code = reverse(Acc),
-    insert_fc_labels(Code, Mlbl).
-
-insert_fc_labels([{label,L}=I|Is0], Mlbl) ->
-    case maps:find(L, Mlbl) of
-	error ->
-	    [I|insert_fc_labels(Is0, Mlbl)];
-	{ok,Lbls} ->
-	    Is = [{label,Lb} || Lb <- Lbls] ++ Is0,
-	    [I|insert_fc_labels(Is, maps:remove(L, Mlbl))]
+opt([], Acc, #st{replace=Replace0}) when Replace0 =/= #{} ->
+    Replace = normalize_replace(maps:to_list(Replace0), Replace0, []),
+    beam_utils:replace_labels(Acc, [], Replace, fun(Old) -> Old end);
+opt([], Acc, #st{replace=Replace}) when Replace =:= #{} ->
+    reverse(Acc).
+
+normalize_replace([{From,To0}|Rest], Replace, Acc) ->
+    case Replace of
+        #{To0 := To} ->
+            normalize_replace([{From,To}|Rest], Replace, Acc);
+        _ ->
+            normalize_replace(Rest, Replace, [{From,To0}|Acc])
     end;
-insert_fc_labels([_|_]=Is, _) -> Is.
+normalize_replace([], _Replace, Acc) ->
+    maps:from_list(Acc).
 
 %% After eliminating a test, it might happen, that a register was only used
 %% in this test. Let's check if that was the case and if it was so, we can
@@ -452,22 +440,16 @@ opt_useless_block_loads([I|Is], L, Index) ->
 opt_useless_block_loads([], _L, _Index) ->
     [].
 
-maps_append_list(K,Vs,M) ->
-    case M of
-        #{K:=Vs0} -> M#{K:=Vs0++Vs}; % same order as dict
-        _ -> M#{K => Vs}
-    end.
-
-collect_labels(Is, #st{entry=Entry}) ->
-    collect_labels_1(Is, Entry, []).
+collect_labels(Is, Label, #st{entry=Entry,replace=Replace} = St) ->
+    collect_labels_1(Is, Label, Entry, Replace, St).
 
-collect_labels_1([{label,Entry}|_]=Is, Entry, Acc) ->
+collect_labels_1([{label,Entry}|_]=Is, _Label, Entry, Acc, St) ->
     %% Never move the entry label.
-    {Acc,Is};
-collect_labels_1([{label,L}|Is], Entry, Acc) ->
-    collect_labels_1(Is, Entry, [L|Acc]);
-collect_labels_1(Is, _Entry, Acc) ->
-    {Acc,Is}.
+    {Is,St#st{replace=Acc}};
+collect_labels_1([{label,L}|Is], Label, Entry, Acc, St) ->
+    collect_labels_1(Is, Label, Entry, Acc#{L => Label}, St);
+collect_labels_1(Is, _Label, _Entry, Acc, St) ->
+    {Is,St#st{replace=Acc}}.
 
 %% label_defined(Is, Label) -> true | false.
 %%  Test whether the label Label is defined at the start of the instruction
@@ -487,13 +469,6 @@ invert_test(is_eq_exact) -> is_ne_exact;
 invert_test(is_ne_exact) -> is_eq_exact;
 invert_test(_) ->           not_possible.
 
-insert_labels([L|Ls], Is, [{jump,{f,L}}|Acc], St) ->
-    insert_labels(Ls, [{label,L}|Is], Acc, St);
-insert_labels([L|Ls], Is, Acc, St) ->
-    insert_labels(Ls, [{label,L}|Is], Acc, St);
-insert_labels([], Is, Acc, St) ->
-    opt(Is, Acc, St).
-
 %% skip_unreachable([Instruction], St).
 %%  Remove all instructions (including definitions of labels
 %%  that have not been referenced yet) up to the next
-- 
cgit v1.2.3


From cac51274eb9a550d5a3cc0e1b60591a9c2c1ffde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <michal@muskala.eu>
Date: Sun, 13 Aug 2017 17:54:41 +0200
Subject: Apply the redundant test optimisation also in case of fall-through

Even though, it's not possible to have fall-throughs when entering the otp
pass, it can produce them itself and we're running the pass until fixpoint.
---
 lib/compiler/src/beam_jump.erl | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'lib/compiler')

diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index c33de217bd..0bcec9ce19 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -336,6 +336,17 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc0, St0) ->
             {Acc,St} = opt_useless_loads(Acc0, L, St0),
 	    opt(Is, Acc, St)
     end;
+opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc0, St0) ->
+    %% Similar to the above, except we have a fall-through rather than jump
+    %%    Test Label Ops
+    %%    label Label
+    case beam_utils:is_pure_test(I) of
+	false ->
+	    opt(Is, [I|Acc0], label_used(Lbl, St0));
+	true ->
+            {Acc,St} = opt_useless_loads(Acc0, L, St0),
+	    opt(Is, Acc, St)
+    end;
 opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
     case is_label_defined(Is, L) of
 	false ->
-- 
cgit v1.2.3