15 files changed, 662 insertions, 89 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index d475e5a19a..961dacc6c9 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -69,6 +69,7 @@ MODULES =  \
 	beam_ssa_pp \
 	beam_ssa_pre_codegen \
 	beam_ssa_recv \
+	beam_ssa_share \
 	beam_ssa_type \
 	beam_kernel_to_ssa \
 	beam_trim \
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 9d8d5b2b0c..707974b2c1 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -22,7 +22,7 @@
 -module(beam_block).
 
 -export([module/2]).
--import(lists, [reverse/1,splitwith/2]).
+-import(lists, [keysort/2,reverse/1,splitwith/2]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
@@ -53,7 +53,8 @@ blockify([I|Is0]=IsAll, Acc) ->
     case collect(I) of
 	error -> blockify(Is0, [I|Acc]);
 	Instr when is_tuple(Instr) ->
-	    {Block,Is} = collect_block(IsAll),
+            {Block0,Is} = collect_block(IsAll),
+            Block = sort_moves(Block0),
 	    blockify(Is, [{block,Block}|Acc])
     end;
 blockify([], Acc) -> reverse(Acc).
@@ -117,3 +118,36 @@ embed_lines([{block,B1},{line,_}=Line|T], Acc) ->
 embed_lines([I|Is], Acc) ->
     embed_lines(Is, [I|Acc]);
 embed_lines([], Acc) -> Acc.
+
+%% sort_moves([Instruction]) -> [Instruction].
+%%  Sort move instructions on the Y register to give the loader
+%%  more opportunities for combining instructions.
+
+sort_moves([{set,[{x,_}],[{y,_}],move}=I|Is0]) ->
+    {Moves,Is} = sort_moves_1(Is0, x, y, [I]),
+    Moves ++ sort_moves(Is);
+sort_moves([{set,[{y,_}],[{x,_}],move}=I|Is0]) ->
+    {Moves,Is} = sort_moves_1(Is0, y, x, [I]),
+    Moves ++ sort_moves(Is);
+sort_moves([I|Is]) ->
+    [I|sort_moves(Is)];
+sort_moves([]) -> [].
+
+sort_moves_1([{set,[{x,0}],[_],move}=I|Is], _DTag, _STag, Acc) ->
+    %% The loader sometimes combines a move to x0 with the
+    %% instruction that follows, producing, for example, a move_call
+    %% instruction. Therefore, we don't want include this move
+    %% instruction in the sorting.
+    {sort_on_yreg(Acc)++[I],Is};
+sort_moves_1([{set,[{DTag,_}],[{STag,_}],move}=I|Is], DTag, STag, Acc) ->
+    sort_moves_1(Is, DTag, STag, [I|Acc]);
+sort_moves_1(Is, _DTag, _STag, Acc) ->
+    {sort_on_yreg(Acc),Is}.
+
+sort_on_yreg([{set,[Dst],[Src],move}|_]=Moves) ->
+    case {Dst,Src} of
+        {{y,_},{x,_}} ->
+            keysort(2, Moves);
+        {{x,_},{y,_}} ->
+            keysort(3, Moves)
+    end.
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index d3a618d211..8b0e3e32f8 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -101,6 +101,10 @@
 %%%      always keep the label. (beam_clean will remove any unused
 %%%      labels.)
 %%%
+%%% (7)  Replace a jump to a return instruction with a return instruction.
+%%%      Similarly, replace a jump to deallocate + return with those
+%%%      instructions.
+%%%
 %%% Note: This modules depends on (almost) all branches and jumps only
 %%% going forward, so that we can remove instructions (including definition
 %%% of labels) after any label that has not been referenced by the code
@@ -150,7 +154,8 @@ function({function,Name,Arity,CLabel,Asm0}, Lc0) ->
         Asm3 = share(Asm2),
         Asm4 = move(Asm3),
         Asm5 = opt(Asm4, CLabel),
-        Asm = remove_unused_labels(Asm5),
+        Asm6 = unshare(Asm5),
+        Asm = remove_unused_labels(Asm6),
         {{function,Name,Arity,CLabel,Asm},Lc}
     catch
         Class:Error:Stack ->
@@ -393,14 +398,13 @@ extract_seq_1(_, _) -> no.
 	{
 	  entry :: beam_asm:label(), %Entry label (must not be moved).
 	  replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace.
-	  labels :: cerl_sets:set(),         %Set of referenced labels.
-          index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index built lazily only if needed
+	  labels :: cerl_sets:set()         %Set of referenced labels.
 	}).
 
 opt(Is0, CLabel) ->
     find_fixpoint(fun(Is) ->
 			  Lbls = initial_labels(Is),
-			  St = #st{entry=CLabel,replace=#{},labels=Lbls,index={lazy,Is}},
+			  St = #st{entry=CLabel,replace=#{},labels=Lbls},
 			  opt(Is, [], St)
 		  end, Is0).
 
@@ -425,16 +429,6 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
 	    %% as in the jump that follows -- thus it is not needed.
 	    opt(Is, Acc, St)
     end;
-opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc, St) ->
-    %% Similar to the above, except we have a fall-through rather than jump
-    %%    Test Label Ops
-    %%    label Label
-    case beam_utils:is_pure_test(I) of
-	false ->
-	    opt(Is, [I|Acc], label_used(Lbl, St));
-	true ->
-	    opt(Is, Acc, St)
-    end;
 opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
     case is_label_defined(Is, L) of
 	false ->
@@ -625,6 +619,39 @@ drop_upto_label([{label,_}|_]=Is) -> Is;
 drop_upto_label([_|Is]) -> drop_upto_label(Is);
 drop_upto_label([]) -> [].
 
+%% unshare([Instruction]) -> [Instruction].
+%%  Replace a jump to a return sequence (a `return` instruction
+%%  optionally preced by a `deallocate` instruction) with the return
+%%  sequence. This always saves execution time and may also save code
+%%  space (depending on the architecture). Eliminating `jump`
+%%  instructions also gives beam_trim more opportunities to trim the
+%%  stack.
+
+unshare(Is) ->
+    Short = unshare_collect_short(Is, #{}),
+    unshare_short(Is, Short).
+
+unshare_collect_short([{label,L},return|Is], Map) ->
+    unshare_collect_short(Is, Map#{L=>[return]});
+unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) ->
+    %% `deallocate` and `return` are combined into one instruction by
+    %% the loader.
+    unshare_collect_short(Is, Map#{L=>[D,return]});
+unshare_collect_short([_|Is], Map) ->
+    unshare_collect_short(Is, Map);
+unshare_collect_short([], Map) -> Map.
+
+unshare_short([{jump,{f,F}}=I|Is], Map) ->
+    case Map of
+        #{F:=Seq} ->
+            Seq ++ unshare_short(Is, Map);
+        #{} ->
+            [I|unshare_short(Is, Map)]
+    end;
+unshare_short([I|Is], Map) ->
+    [I|unshare_short(Is, Map)];
+unshare_short([], _Map) -> [].
+
 %% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet'
 %%  Update the cerl_set UsedCerlSet with any function-local labels
 %%  (i.e. not with labels in call instructions) referenced by
diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl
index 2323a439e9..5730e9704e 100644
--- a/lib/compiler/src/beam_peep.erl
+++ b/lib/compiler/src/beam_peep.erl
@@ -94,30 +94,26 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) ->
 peep([{jump,{f,L}},{label,L}=I|Is], _, Acc) ->
     %% Sometimes beam_jump has missed this optimization.
     peep(Is, gb_sets:empty(), [I|Acc]);
-peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) ->
+peep([{select,select_val,R,F,Vls0}|Is], SeenTests0, Acc0) ->
     case prune_redundant_values(Vls0, F) of
 	[] ->
 	    %% No values left. Must convert to plain jump.
 	    I = {jump,F},
 	    peep([I|Is], gb_sets:empty(), Acc0);
-        [{atom,_}=Value,Lbl] when Op =:= select_val ->
+        [{atom,_}=Value,Lbl] ->
             %% Single value left. Convert to regular test.
             Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
             peep(Is1, SeenTests0, Acc0);
-        [{integer,_}=Value,Lbl] when Op =:= select_val ->
+        [{integer,_}=Value,Lbl] ->
             %% Single value left. Convert to regular test.
             Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
             peep(Is1, SeenTests0, Acc0);
-        [Arity,Lbl] when Op =:= select_tuple_arity ->
-            %% Single value left. Convert to regular test
-            Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is],
-            peep(Is1, SeenTests0, Acc0);
 	[{atom,B1},Lbl,{atom,B2},Lbl] when B1 =:= not B2 ->
             %% Replace with is_boolean test.
             Is1 = [{test,is_boolean,F,[R]},{jump,Lbl}|Is],
             peep(Is1, SeenTests0, Acc0);
 	[_|_]=Vls ->
-	    I = {select,Op,R,F,Vls},
+	    I = {select,select_val,R,F,Vls},
 	    peep(Is, gb_sets:empty(), [I|Acc0])
     end;
 peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) ->
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index c5e23d2ae0..b491e340b7 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -194,6 +194,7 @@ no_side_effect(#b_set{op=Op}) ->
         extract -> true;
         get_hd -> true;
         get_tl -> true;
+        get_map_element -> true;
         get_tuple_element -> true;
         has_map_field -> true;
         is_nonempty_list -> true;
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
index 3c14062d0b..d3facc5911 100644
--- a/lib/compiler/src/beam_ssa_codegen.erl
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -747,7 +747,16 @@ need_live_anno(Op) ->
     end.
 
 %%%
-%%% Add annotations for defined Y registers.
+%%% Add the following annotations for Y registers:
+%%%
+%%%   def_yregs   An ordset with variables that refer to live Y registers.
+%%%               That is, Y registers that that have been killed
+%%%               are not included. This annotation is added to all
+%%%               instructions that require Y registers to be initialized.
+%%%
+%%%   kill_yregs  This annotation is added to call instructions. It is
+%%%               an ordset containing variables referring to Y registers
+%%%               that will no longer be used after the call instruction.
 %%%
 
 defined(Linear, #cg{regs=Regs}) ->
@@ -863,13 +872,35 @@ opt_allocate(Linear, #cg{regs=Regs}) ->
 
 opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs)
   when is_integer(Stk) ->
-    Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
-    I = I0#cg_alloc{def_yregs=Yregs},
-    [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+    %% Collect the variables that are initialized by copy
+    %% instruction in this block.
+    case ordsets:from_list(opt_allocate_defs(Is, Regs)) of
+        Yregs when length(Yregs) =:= Stk ->
+            %% Those copy instructions are sufficient to fully
+            %% initialize the stack frame.
+            I = I0#cg_alloc{def_yregs=Yregs},
+            [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+        Yregs0 ->
+            %% Determine a conservative approximation of the Y
+            %% registers that are guaranteed to be initialized by all
+            %% successors of this block, and to it add the variables
+            %% initialized by copy instructions in this block.
+            Yregs1 = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
+            Yregs = ordsets:union(Yregs0, Yregs1),
+            I = I0#cg_alloc{def_yregs=Yregs},
+            [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]
+    end;
 opt_allocate_1([B|Bs], Regs) ->
     [B|opt_allocate_1(Bs, Regs)];
 opt_allocate_1([], _) -> [].
 
+opt_allocate_defs([#cg_set{op=copy,dst=Dst}|Is], Regs) ->
+    case is_yreg(Dst, Regs) of
+        true -> [Dst|opt_allocate_defs(Is, Regs)];
+        false -> []
+    end;
+opt_allocate_defs(_, _Regs) -> [].
+
 opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) ->
     case gb_sets:is_member(L, Ws0) of
         false ->
diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl
index c20652580d..067d9a6741 100644
--- a/lib/compiler/src/beam_ssa_dead.erl
+++ b/lib/compiler/src/beam_ssa_dead.erl
@@ -135,7 +135,8 @@ shortcut_terminator(Last, _Is, _Bs, _St) ->
     Last.
 
 shortcut_switch([{Lit,L0}|T], Bool, Bs, St0) ->
-    St = St0#st{rel_op=normalize_op({bif,'=:='}, [Bool,Lit])},
+    RelOp = {'=:=',Bool,Lit},
+    St = St0#st{rel_op=RelOp},
     #b_br{bool=#b_literal{val=true},succ=L} =
         shortcut(L0, bind_var(Bool, Lit, Bs), St#st{target=one_way}),
     [{Lit,L}|shortcut_switch(T, Bool, Bs, St0)];
@@ -388,41 +389,43 @@ eval_terminator(#b_switch{arg=Arg,fail=Fail,list=List}=Sw, Bs, St) ->
             %% Literal argument. Simplify to a `br`.
             beam_ssa:normalize(Sw#b_switch{arg=Val});
         #b_var{} ->
-            case St of
-                #st{rel_op=none} ->
-                    %% No previous relational operator is stored.
-                    %% Give up.
+            %% Try optimizing the switch.
+            case eval_switch(List, Arg, St, Fail) of
+                none ->
                     none;
-                #st{} ->
-                    %% There is a previous relational operator stored.
-                    %% Try optimizing the switch.
-                    case eval_switch(List, Arg, St, Fail) of
-                        none ->
-                            none;
-                        To when is_integer(To) ->
-                            %% Either one of the values in the switch
-                            %% matched a previous value in a '=:=' test, or
-                            %% none of the values matched a previous test.
-                            #b_br{bool=#b_literal{val=true},succ=To,fail=To}
-                    end
+                To when is_integer(To) ->
+                    %% Either one of the values in the switch
+                    %% matched a previous value in a '=:=' test, or
+                    %% none of the values matched a previous test.
+                    #b_br{bool=#b_literal{val=true},succ=To,fail=To}
             end
     end;
 eval_terminator(#b_ret{}, _Bs, _St) ->
     none.
 
-eval_switch([{Lit,Lbl}|T], Arg, St, Fail) ->
-    case eval_rel_op({bif,'=:='}, [Arg,Lit], St) of
-        none ->
-            %% This label could be reached.
-            eval_switch(T, Arg, St, none);
-        #b_literal{val=false} ->
-            %% This branch will never be taken.
-            eval_switch(T, Arg, St, Fail);
-        #b_literal{val=true} ->
+eval_switch(List, Arg, #st{rel_op={_,Arg,_}=PrevOp}, Fail) ->
+    %% There is a previous relational operator testing the same variable.
+    %% Optimization may be possible.
+    eval_switch_1(List, Arg, PrevOp, Fail);
+eval_switch(_, _, _, _) ->
+    %% There is either no previous relational operator, or it tests
+    %% a different variable. Nothing to optimize.
+    none.
+
+eval_switch_1([{Lit,Lbl}|T], Arg, PrevOp, Fail) ->
+    RelOp = {'=:=',Arg,Lit},
+    case will_succeed(PrevOp, RelOp) of
+        yes ->
             %% Success. This branch will always be taken.
-            Lbl
+            Lbl;
+        no ->
+            %% This branch will never be taken.
+            eval_switch_1(T, Arg, PrevOp, Fail);
+        maybe ->
+            %% This label could be reached.
+            eval_switch_1(T, Arg, PrevOp, none)
     end;
-eval_switch([], _Arg, _St, Fail) ->
+eval_switch_1([], _Arg, _PrevOp, Fail) ->
     %% Fail is now either the failure label or 'none'.
     Fail.
 
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index ac2d943fef..2dda67eac6 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -22,7 +22,8 @@
 -export([module/2]).
 
 -include("beam_ssa.hrl").
--import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2,
+-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,
+                reverse/1,reverse/2,
                 splitwith/2,takewhile/2,unzip/1]).
 
 -spec module(beam_ssa:b_module(), [compile:option()]) ->
@@ -787,15 +788,20 @@ float_flush_regs(#fs{regs=Rs}) ->
 %%% with a cheaper instructions
 %%%
 
-ssa_opt_live(#st{ssa=Linear}=St) ->
-    St#st{ssa=live_opt(reverse(Linear), #{}, [])}.
+ssa_opt_live(#st{ssa=Linear0}=St) ->
+    RevLinear = reverse(Linear0),
+    Blocks0 = maps:from_list(RevLinear),
+    Blocks = live_opt(RevLinear, #{}, Blocks0),
+    Linear = beam_ssa:linearize(Blocks),
+    St#st{ssa=Linear}.
 
-live_opt([{L,Blk0}|Bs], LiveMap0, Acc) ->
-    Successors = beam_ssa:successors(Blk0),
+live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) ->
+    Blk1 = beam_ssa_share:block(Blk0, Blocks),
+    Successors = beam_ssa:successors(Blk1),
     Live0 = live_opt_succ(Successors, L, LiveMap0),
-    {Blk,Live} = live_opt_blk(Blk0, Live0),
+    {Blk,Live} = live_opt_blk(Blk1, Live0),
     LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
-    live_opt(Bs, LiveMap, [{L,Blk}|Acc]);
+    live_opt(Bs, LiveMap, Blocks#{L:=Blk});
 live_opt([], _, Acc) -> Acc.
 
 live_opt_succ([S|Ss], L, LiveMap) ->
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index 9175931375..56fe9b4793 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -1478,6 +1478,10 @@ copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
            Copy, Count, Acc) ->
     I = I0#b_set{args=copy_sub_args(Args0, Copy)},
     {reverse(Acc, [I|acc_copy([], Copy)]),Count};
+copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
+  when Op =:= call; Op =:= make_fun ->
+    {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
+    {reverse(Acc, [I]),Count};
 copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
     {reverse(Acc, acc_copy(Is, Copy)),Count};
 copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
@@ -1990,13 +1994,15 @@ reserve_zregs(Blocks, Intervals, Res) ->
     beam_ssa:fold_rpo(F, [0], Res, Blocks).
 
 reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
-              #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) ->
-    case Val of
-        #b_literal{val=Arity} when Arity bsr 32 =:= 0 ->
+              #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
+    case {Val,Last} of
+        {#b_literal{val=Arity},#b_br{}} when Arity bsr 32 =:= 0 ->
             %% These two instructions can be combined to a test_arity
             %% instruction provided that the arity variable is short-lived.
             reserve_zreg_1(Dst, ShortLived, A0);
-        _ ->
+        {_,_} ->
+            %% Either the arity is too big, or the boolean value from
+            %% '=:=' will be returned.
             A0
     end;
 reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
@@ -2211,7 +2217,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
     Free = init_free(maps:to_list(Res)),
     Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
     Intervals = sort(Intervals1),
-    IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end,
+    IsReserved = fun(#i{reg=Reg}) ->
+                         case Reg of
+                             none -> false;
+                             {prefer,{_,_}} -> false;
+                             {_,_} -> true
+                         end
+                 end,
     {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
     L = #l{unhandled_res=UnhandledRes,
            unhandled_any=Unhandled,free=Free},
diff --git a/lib/compiler/src/beam_ssa_share.erl b/lib/compiler/src/beam_ssa_share.erl
new file mode 100644
index 0000000000..426efa2cc9
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_share.erl
@@ -0,0 +1,370 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Share code for semantically equivalent blocks referred to
+%% to by `br` and `switch` instructions.
+%%
+%% A similar optimization is done in beam_jump, but doing it here as
+%% well is beneficial as it may enable other optimizations. If there
+%% are many semantically equivalent clauses, this optimization can
+%% substanstially decrease compilation times.
+%%
+%% block/2 is called from the liveness optimization pass in
+%% beam_ssa_opt, as code sharing helps the liveness pass and vice
+%% versa.
+%%
+
+-module(beam_ssa_share).
+-export([module/2,block/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [keyfind/3,reverse/1,sort/1]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+                    {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, _Opts) ->
+    Fs = [function(F) || F <- Fs0],
+    {ok,Module#b_module{body=Fs}}.
+
+-spec block(Blk0, Blocks0) -> Blk when
+      Blk0 :: beam_ssa:b_blk(),
+      Blocks0 :: beam_ssa:block_map(),
+      Blk :: beam_ssa:b_blk().
+
+block(#b_blk{last=Last0}=Blk, Blocks) ->
+    case share_terminator(Last0, Blocks) of
+        none -> Blk;
+        Last -> Blk#b_blk{last=beam_ssa:normalize(Last)}
+    end.
+
+%%%
+%%% Local functions.
+%%%
+
+function(#b_function{anno=Anno,bs=Blocks0}=F) ->
+    try
+        PO = reverse(beam_ssa:rpo(Blocks0)),
+        {Blocks1,Changed} = blocks(PO, Blocks0, false),
+        Blocks = case Changed of
+                     true ->
+                         beam_ssa:trim_unreachable(Blocks1);
+                     false ->
+                         Blocks0
+                 end,
+        F#b_function{bs=Blocks}
+    catch
+        Class:Error:Stack ->
+            #{func_info:={_,Name,Arity}} = Anno,
+            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+            erlang:raise(Class, Error, Stack)
+    end.
+
+blocks([L|Ls], Blocks, Changed) ->
+    #b_blk{last=Last0} = Blk0 = map_get(L, Blocks),
+    case block(Blk0, Blocks) of
+        #b_blk{last=Last0} ->
+            blocks(Ls, Blocks, Changed);
+        #b_blk{}=Blk ->
+            blocks(Ls, Blocks#{L:=Blk}, true)
+    end;
+blocks([], Blocks, Changed) ->
+    {Blocks,Changed}.
+
+share_terminator(#b_br{bool=#b_var{},succ=Succ0,fail=Fail0}=Br, Blocks) ->
+    {Succ,SuccBlk} = shortcut_nonempty_block(Succ0, Blocks),
+    {Fail,FailBlk} = shortcut_nonempty_block(Fail0, Blocks),
+    case are_equivalent(Succ, SuccBlk, Fail, FailBlk, Blocks) of
+        true ->
+            %% The blocks are semantically equivalent.
+            Br#b_br{succ=Succ,fail=Succ};
+        false ->
+            if
+                Succ =:= Succ0, Fail =:= Fail0 ->
+                    %% None of blocks were cut short.
+                    none;
+                true ->
+                    %% One or both labels were cut short
+                    %% to avoid jumping to an empty block.
+                    Br#b_br{succ=Succ,fail=Fail}
+            end
+    end;
+share_terminator(#b_switch{}=Sw, Blocks) ->
+    share_switch(Sw, Blocks);
+share_terminator(_Last, _Blocks) -> none.
+
+%% Test whether the two blocks are semantically equivalent.  This
+%% function is specially optimized to return `false` as fast as
+%% possible if the blocks are not equivalent, as that is the common
+%% case.
+
+are_equivalent(_Succ, _, ?BADARG_BLOCK, _, _Blocks) ->
+    %% ?BADARG_BLOCK is special. Sharing could be incorrect.
+    false;
+are_equivalent(_Succ, #b_blk{is=Is1,last=#b_ret{arg=RetVal1}=Ret1},
+         _Fail, #b_blk{is=Is2,last=#b_ret{arg=RetVal2}=Ret2}, _Blocks) ->
+    case {RetVal1,RetVal2} of
+        {#b_literal{},#b_literal{}} ->
+            case RetVal1 =:= RetVal2 of
+                true ->
+                    %% The return values are identical literals. We
+                    %% only need to compare the canonicalized bodies.
+                    Can1 = canonical_is(Is1),
+                    Can2 = canonical_is(Is2),
+                    Can1 =:= Can2;
+                false ->
+                    %% Non-equal literals.
+                    false
+            end;
+        {#b_var{},#b_var{}} ->
+            %% The return values are varibles. We must canonicalize
+            %% the blocks (including returns) and compare them.
+            Can1 = canonical_is(Is1 ++ [Ret1]),
+            Can2 = canonical_is(Is2 ++ [Ret2]),
+            Can1 =:= Can2;
+        {_,_} ->
+            %% One literal and one variable.
+            false
+    end;
+are_equivalent(Succ,
+         #b_blk{is=Is1,
+                last=#b_br{bool=#b_literal{val=true},
+                           succ=Target}},
+         Fail,
+         #b_blk{is=Is2,
+                last=#b_br{bool=#b_literal{val=true},
+                           succ=Target}},
+         Blocks) ->
+    %% Both blocks end with an unconditional branch to the
+    %% same target block. If the target block has phi nodes,
+    %% we must pick up the values from the phi nodes and
+    %% compare them.
+    #b_blk{is=Is} = map_get(Target, Blocks),
+    Phis1 = canonical_terminator_phis(Is, Succ),
+    Phis2 = canonical_terminator_phis(Is, Fail),
+    case {Phis1,Phis2} of
+        {[#b_set{args=[#b_literal{}]}|_],_} when Phis1 =/= Phis2 ->
+            %% Different values are used in the phi nodes.
+            false;
+        {_,[#b_set{args=[#b_literal{}]}|_]} when Phis1 =/= Phis2 ->
+            %% Different values are used in the phi nodes.
+            false;
+        {_,_} ->
+            %% The values in the phi nodes are variables or identical
+            %% literals. We must canonicalize the blocks and compare
+            %% them.
+            Can1 = canonical_is(Is1 ++ Phis1),
+            Can2 = canonical_is(Is2 ++ Phis2),
+            Can1 =:= Can2
+    end;
+are_equivalent(Succ0, #b_blk{is=Is1,last=#b_br{bool=#b_var{},fail=Same}},
+         Fail0, #b_blk{is=Is2,last=#b_br{bool=#b_var{},fail=Same}},
+         Blocks) ->
+    %% Two-way branches with identical failure labels. First compare the
+    %% canonicalized bodies of the blocks.
+    case canonical_is(Is1) =:= canonical_is(Is2) of
+        false ->
+            %% Different bodies.
+            false;
+        true ->
+            %% Bodies were equal. That is fairly uncommon, so to keep
+            %% the code simple we will rewrite the `br` to a `switch`
+            %% and let share_switch/2 do the work of following the
+            %% branches.
+            Sw = #b_switch{arg=#b_var{name=not_used},fail=Fail0,
+                           list=[{#b_literal{},Succ0}]},
+            #b_switch{fail=Fail,list=[{_,Succ}]} = share_switch(Sw, Blocks),
+            Fail =:= Succ
+    end;
+are_equivalent(_, _, _, _, _) -> false.
+
+share_switch(#b_switch{fail=Fail0,list=List0}=Sw, Blocks) ->
+    Prep = share_prepare_sw([{value,Fail0}|List0], Blocks, 0, []),
+    Res = do_share_switch(Prep, Blocks, []),
+    [{_,Fail}|List] = [VL || {_,VL} <- sort(Res)],
+    Sw#b_switch{fail=Fail,list=List}.
+
+share_prepare_sw([{V,L0}|T], Blocks, N, Acc) ->
+    {L,_Blk} = shortcut_nonempty_block(L0, Blocks),
+    share_prepare_sw(T, Blocks, N+1, [{{L,#{}},{N,{V,L}}}|Acc]);
+share_prepare_sw([], _, _, Acc) -> Acc.
+
+do_share_switch(Prep, Blocks, Acc) ->
+    Map = share_switch_1(Prep, Blocks, #{}),
+    share_switch_2(maps:values(Map), Blocks, Acc).
+
+share_switch_1([{Next0,Res}|T], Blocks, Map) ->
+    {Can,Next} = canonical_block(Next0, Blocks),
+    case Map of
+        #{Can:=Ls} ->
+            share_switch_1(T, Blocks, Map#{Can:=[{Next,Res}|Ls]});
+        #{} ->
+            share_switch_1(T, Blocks, Map#{Can=>[{Next,Res}]})
+    end;
+share_switch_1([], _Blocks, Map) -> Map.
+
+share_switch_2([[{_,{N,Res}}]|T], Blocks, Acc) ->
+    %% This block is not equivalent to any other block.
+    share_switch_2(T, Blocks, [{N,Res}|Acc]);
+share_switch_2([[{done,{_,{_,Common}}}|_]=Eqs|T], Blocks, Acc0) ->
+    %% Two or more blocks are semantically equivalent, and all blocks
+    %% are either terminated with a `ret` or a `br` to the same target
+    %% block. Replace the labels in the `switch` for all of those
+    %% blocks with the label for the first of the blocks.
+    Acc = [{N,{V,Common}} || {done,{N,{V,_}}} <- Eqs] ++ Acc0,
+    share_switch_2(T, Blocks, Acc);
+share_switch_2([[{_,_}|_]=Prep|T], Blocks, Acc0) ->
+    %% Two or more blocks are semantically equivalent, but they have
+    %% different successful successor blocks. Now we must check
+    %% recursively whether the successor blocks are equivalent too.
+    Acc = do_share_switch(Prep, Blocks, Acc0),
+    share_switch_2(T, Blocks, Acc);
+share_switch_2([], _, Acc) -> Acc.
+
+canonical_block({L,VarMap0}, Blocks) ->
+    #b_blk{is=Is,last=Last0} = map_get(L, Blocks),
+    case canonical_terminator(L, Last0, Blocks) of
+        none ->
+            %% The block has a terminator that we don't handle.
+            {{none,L},done};
+        {Last,done} ->
+            %% The block ends with a `ret` or an unconditional `br` to
+            %% another block.
+            {Can,_VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+            {Can,done};
+        {Last,Next} ->
+            %% The block ends with a conditional branch.
+            {Can,VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+            {Can,{Next,VarMap}}
+    end.
+
+%% Translate a sequence of instructions to a canonical representation. If the
+%% canonical representation of two blocks compare equal, the blocks are
+%% semantically equivalent. The following translations are done:
+%%
+%%    * Variables defined in the instruction sequence are replaced with
+%%    {var,0}, {var,1}, and so on. Free variables are not changed.
+%%
+%%    * `location` annotations that would produce a `line` instruction are
+%%    kept. All other annotations are cleared.
+%%
+%%    * Instructions are repackaged into tuples instead of into the
+%%    usual records. The main reason is to avoid violating the types for
+%%    the SSA records. We can simplify things a little by linking the
+%%    instructions directly instead of putting them into a list.
+
+canonical_is(Is) ->
+    {Can,_} = canonical_is(Is, #{}, []),
+    Can.
+
+canonical_is([#b_set{op=Op,dst=Dst,args=Args0}=I|Is], VarMap0, Acc) ->
+    Args = [canonical_arg(Arg, VarMap0) || Arg <-Args0],
+    Var = {var,map_size(VarMap0)},
+    VarMap = VarMap0#{Dst=>Var},
+    LineAnno = case Op of
+                   bs_match ->
+                       %% The location annotation for a bs_match instruction
+                       %% is only used in warnings, never to emit a `line`
+                       %% instruction. Therefore, it should not be included.
+                       [];
+                   _ ->
+                       %% The location annotation will be used in a `line`
+                       %% instruction. It must be included.
+                       beam_ssa:get_anno(location, I, none)
+               end,
+    canonical_is(Is, VarMap, {Op,LineAnno,Var,Args,Acc});
+canonical_is([#b_ret{arg=Arg}], VarMap, Acc0) ->
+    Acc1 = case Acc0 of
+               {call,_Anno,Var,[#b_local{}|_]=Args,PrevAcc} ->
+                   %% This is a tail-recursive call to a local function.
+                   %% There will be no line instruction generated;
+                   %% thus, the annotation is not significant.
+                   {call,[],Var,Args,PrevAcc};
+               _ ->
+                   Acc0
+           end,
+    {{ret,canonical_arg(Arg, VarMap),Acc1},VarMap};
+canonical_is([#b_br{bool=#b_var{},fail=Fail}], VarMap, Acc) ->
+    {{br,succ,Fail,Acc},VarMap};
+canonical_is([#b_br{succ=Succ}], VarMap, Acc) ->
+    {{br,Succ,Acc},VarMap};
+canonical_is([], VarMap, Acc) ->
+    {Acc,VarMap}.
+
+canonical_terminator(_L, #b_ret{}=Ret, _Blocks) ->
+    {[Ret],done};
+canonical_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}=Br, Blocks) ->
+    #b_blk{is=Is} = map_get(Succ, Blocks),
+    case canonical_terminator_phis(Is, L) of
+        [] ->
+            {[],Succ};
+        [_|_]=Phis ->
+            {Phis ++ [Br],done}
+    end;
+canonical_terminator(_L, #b_br{bool=#b_var{},succ=Succ}=Br, _Blocks) ->
+    {[Br],Succ};
+canonical_terminator(_, _, _) -> none.
+
+canonical_terminator_phis([#b_set{op=phi,args=PhiArgs}=Phi|Is], L) ->
+    {Value,L} = keyfind(L, 2, PhiArgs),
+    [Phi#b_set{op=copy,args=[Value]}|canonical_terminator_phis(Is, L)];
+canonical_terminator_phis([#b_set{op=peek_message}=I|_], L) ->
+    %% We could get stuck into an infinite loop if we allowed the
+    %% comparisons to continue into this block. Force an unequal
+    %% compare with all other predecessors of this block.
+    [I#b_set{op=copy,args=[#b_literal{val=L}]}];
+canonical_terminator_phis(_, _) -> [].
+
+canonical_arg(#b_var{}=Var, VarMap) ->
+    case VarMap of
+        #{Var:=CanonicalVar} ->
+            CanonicalVar;
+        #{} ->
+            Var
+    end;
+canonical_arg(#b_remote{mod=Mod,name=Name}, VarMap) ->
+    {remote,canonical_arg(Mod, VarMap),
+     canonical_arg(Name, VarMap)};
+canonical_arg(Other, _VarMap) -> Other.
+
+%% Shortcut branches to empty blocks if safe.
+
+shortcut_nonempty_block(L, Blocks) ->
+    case map_get(L, Blocks) of
+        #b_blk{is=[],last=#b_br{bool=#b_literal{val=true},succ=Succ}}=Blk ->
+            %% This block is empty.
+            case is_forbidden(Succ, Blocks) of
+                false ->
+                    shortcut_nonempty_block(Succ, Blocks);
+                true ->
+                    {L,Blk}
+            end;
+        #b_blk{}=Blk ->
+            {L,Blk}
+    end.
+
+is_forbidden(L, Blocks) ->
+    case map_get(L, Blocks) of
+        #b_blk{is=[#b_set{op=phi}|_]} -> true;
+        #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
+        #b_blk{} -> false
+    end.
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
index 18e6e73a46..95fc3bb0e9 100644
--- a/lib/compiler/src/beam_ssa_type.erl
+++ b/lib/compiler/src/beam_ssa_type.erl
@@ -27,9 +27,10 @@
 
 -define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}).
 
--record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()},
+-record(d, {ds :: #{beam_ssa:b_var():=beam_ssa:b_set()},
             ls :: #{beam_ssa:label():=type_db()},
-            sub :: #{beam_ssa:var_name():=beam_ssa:value()}
+            once :: cerl_sets:set(beam_ssa:b_var()),
+            sub :: #{beam_ssa:b_var():=beam_ssa:value()}
            }).
 
 -define(ATOM_SET_SIZE, 5).
@@ -56,13 +57,15 @@
       Block :: beam_ssa:b_blk().
 
 opt(Linear, Args) ->
+    UsedOnce = used_once(Linear, Args),
     Ts = maps:from_list([{V,any} || #b_var{}=V <- Args]),
     FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown},
                                               name=#b_literal{val=unknown},
                                               arity=0}]},
     Defs = maps:from_list([{Var,FakeCall#b_set{dst=Var}} ||
                               #b_var{}=Var <- Args]),
-    D = #d{ds=Defs,ls=#{0=>Ts},sub=#{}},
+    D = #d{ds=Defs,ls=#{0=>Ts,?BADARG_BLOCK=>#{}},
+           once=UsedOnce,sub=#{}},
     opt_1(Linear, D).
 
 opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) ->
@@ -425,16 +428,43 @@ opt_terminator(#b_ret{}=Ret, _Ts, _Ds) -> Ret.
 
 update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) ->
     update_successor(S, Ts, D);
-update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts, D0) ->
-    D = update_successor_bool(Bool, false, Fail, Ts, D0),
-    SuccTs = infer_types(Bool, Ts, D0),
-    update_successor_bool(Bool, true, Succ, SuccTs, D);
-update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts, D0) ->
-    D = update_successor(Fail, Ts, D0),
-    foldl(fun({Val,S}, A) ->
-                  T = get_type(Val, Ts),
-                  update_successor(S, Ts#{V=>T}, A)
-          end, D, List);
+update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) ->
+    case cerl_sets:is_element(Bool, D0#d.once) of
+        true ->
+            %% This variable is defined in this block and is only
+            %% referenced by this br terminator. Therefore, there is
+            %% no need to include the type database passed on to the
+            %% successors of this block.
+            Ts = maps:remove(Bool, Ts0),
+            D = update_successor(Fail, Ts, D0),
+            SuccTs = infer_types(Bool, Ts, D0),
+            update_successor(Succ, SuccTs, D);
+        false ->
+            D = update_successor_bool(Bool, false, Fail, Ts0, D0),
+            SuccTs = infer_types(Bool, Ts0, D0),
+            update_successor_bool(Bool, true, Succ, SuccTs, D)
+    end;
+update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) ->
+    case cerl_sets:is_element(V, D0#d.once) of
+        true ->
+            %% This variable is defined in this block and is only
+            %% referenced by this switch terminator. Therefore, there is
+            %% no need to include the type database passed on to the
+            %% successors of this block.
+            Ts = maps:remove(V, Ts0),
+            D = update_successor(Fail, Ts, D0),
+            F = fun({_Val,S}, A) ->
+                        update_successor(S, Ts, A)
+                end,
+            foldl(F, D, List);
+        false ->
+            D = update_successor(Fail, Ts0, D0),
+            F = fun({Val,S}, A) ->
+                        T = get_type(Val, Ts0),
+                        update_successor(S, Ts0#{V=>T}, A)
+                end,
+            foldl(F, D, List)
+        end;
 update_successors(#b_ret{}, _Ts, D) -> D.
 
 update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
@@ -447,6 +477,11 @@ update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
             update_successor(S, Ts, D)
     end.
 
+update_successor(?BADARG_BLOCK, _Ts, #d{}=D) ->
+    %% We KNOW that no variables are used in the ?BADARG_BLOCK,
+    %% so there is no need to update the type information. That
+    %% can be a huge timesaver for huge functions.
+    D;
 update_successor(S, Ts0, #d{ls=Ls}=D) ->
     case Ls of
         #{S:=Ts1} ->
@@ -766,6 +801,48 @@ simplify_not(#b_br{bool=#b_var{}=V,succ=Succ,fail=Fail}=Br0, Ts, Ds) ->
             Br0
     end.
 
+%%%
+%%% Calculate the set of variables that are only used once in the
+%%% block that they are defined in. That will allow us to discard type
+%%% information for variables that will never be referenced by the
+%%% successor blocks, potentially improving compilation times.
+%%%
+
+used_once(Linear, Args) ->
+    Map0 = used_once_1(reverse(Linear), #{}),
+    Map = maps:without(Args, Map0),
+    cerl_sets:from_list(maps:keys(Map)).
+
+used_once_1([{L,#b_blk{is=Is,last=Last}}|Bs], Uses0) ->
+    Uses = used_once_2([Last|reverse(Is)], L, Uses0),
+    used_once_1(Bs, Uses);
+used_once_1([], Uses) -> Uses.
+
+used_once_2([I|Is], L, Uses0) ->
+    Uses = used_once_uses(beam_ssa:used(I), L, Uses0),
+    case I of
+        #b_set{dst=Dst} ->
+            case Uses of
+                #{Dst:=[L]} ->
+                    used_once_2(Is, L, Uses);
+                #{} ->
+                    used_once_2(Is, L, maps:remove(Dst, Uses))
+            end;
+        _ ->
+            used_once_2(Is, L, Uses)
+    end;
+used_once_2([], _, Uses) -> Uses.
+
+used_once_uses([V|Vs], L, Uses) ->
+    case Uses of
+        #{V:=Us} ->
+            used_once_uses(Vs, L, Uses#{V:=[L|Us]});
+        #{} ->
+            used_once_uses(Vs, L, Uses#{V=>[L]})
+    end;
+used_once_uses([], _, Uses) -> Uses.
+
+
 get_types(Values, Ts) ->
     [get_type(Val, Ts) || Val <- Values].
 -spec get_type(beam_ssa:value(), type_db()) -> type().
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 7d908df3bf..1945faba7f 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -1366,8 +1366,10 @@ kill_aliases(Reg, #st{aliases=Aliases0}=St) ->
             St
     end.
 
-set_type(Type, {x,_}=Reg, Vst) -> set_type_reg(Type, Reg, Vst);
-set_type(Type, {y,_}=Reg, Vst) -> set_type_y(Type, Reg, Vst);
+set_type(Type, {x,_}=Reg, Vst) ->
+    set_type_reg(Type, Reg, Reg, Vst);
+set_type(Type, {y,_}=Reg, Vst) ->
+    set_type_reg(Type, Reg, Reg, Vst);
 set_type(_, _, #vst{}=Vst) -> Vst.
 
 set_type_reg(Type, Src, Dst, Vst) ->
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 65c4f140c9..14c8c5b4ab 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -823,6 +823,9 @@ kernel_passes() ->
      {pass,beam_kernel_to_ssa},
      {iff,dssa,{listing,"ssa"}},
      {iff,ssalint,{pass,beam_ssa_lint}},
+     {unless,no_share_opt,{pass,beam_ssa_share}},
+     {iff,dssashare,{listing,"ssashare"}},
+     {iff,ssalint,{pass,beam_ssa_lint}},
      {unless,no_bsm_opt,{pass,beam_ssa_bsm}},
      {iff,dssabsm,{listing,"ssabsm"}},
      {iff,ssalint,{pass,beam_ssa_lint}},
@@ -2098,6 +2101,7 @@ pre_load() ->
 	 beam_ssa_opt,
 	 beam_ssa_pre_codegen,
 	 beam_ssa_recv,
+	 beam_ssa_share,
 	 beam_ssa_type,
 	 beam_trim,
 	 beam_utils,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 86259270bd..1472e3fde1 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -45,6 +45,7 @@
              beam_ssa_pp,
              beam_ssa_pre_codegen,
              beam_ssa_recv,
+             beam_ssa_share,
              beam_ssa_type,
 	     beam_trim,
 	     beam_utils,
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index d848cd8f19..43c99be982 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -2667,12 +2667,20 @@ opt_build_stacktrace(#c_let{vars=[#c_var{name=Cooked}],
         #c_call{module=#c_literal{val=erlang},
                 name=#c_literal{val=raise},
                 args=[Class,Exp,#c_var{name=Cooked}]} ->
-            %% The stacktrace is only used in a call to erlang:raise/3.
-            %% There is no need to build the stacktrace. Replace the
-            %% call to erlang:raise/3 with the the raw_raise/3 instruction,
-            %% which will use a raw stacktrace.
-            #c_primop{name=#c_literal{val=raw_raise},
-                      args=[Class,Exp,RawStk]};
+            case core_lib:is_var_used(Cooked, #c_cons{hd=Class,tl=Exp}) of
+                true ->
+                    %% Not safe. The stacktrace is used in the class or
+                    %% reason.
+                    Let;
+                false ->
+                    %% The stacktrace is only used in the last
+                    %% argument for erlang:raise/3. There is no need
+                    %% to build the stacktrace. Replace the call to
+                    %% erlang:raise/3 with the the raw_raise/3
+                    %% instruction, which will use a raw stacktrace.
+                    #c_primop{name=#c_literal{val=raw_raise},
+                              args=[Class,Exp,RawStk]}
+            end;
         #c_let{vars=[#c_var{name=V}],arg=Arg,body=B0} when V =/= Cooked ->
             case core_lib:is_var_used(Cooked, Arg) of
                 false ->