aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compiler/src')
-rw-r--r--lib/compiler/src/Makefile1
-rw-r--r--lib/compiler/src/beam_block.erl38
-rw-r--r--lib/compiler/src/beam_jump.erl55
-rw-r--r--lib/compiler/src/beam_peep.erl12
-rw-r--r--lib/compiler/src/beam_ssa.erl1
-rw-r--r--lib/compiler/src/beam_ssa_codegen.erl39
-rw-r--r--lib/compiler/src/beam_ssa_dead.erl59
-rw-r--r--lib/compiler/src/beam_ssa_opt.erl20
-rw-r--r--lib/compiler/src/beam_ssa_pre_codegen.erl22
-rw-r--r--lib/compiler/src/beam_ssa_share.erl370
-rw-r--r--lib/compiler/src/beam_ssa_type.erl103
-rw-r--r--lib/compiler/src/beam_validator.erl6
-rw-r--r--lib/compiler/src/compile.erl4
-rw-r--r--lib/compiler/src/compiler.app.src1
-rw-r--r--lib/compiler/src/sys_core_fold.erl20
15 files changed, 662 insertions, 89 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index d475e5a19a..961dacc6c9 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -69,6 +69,7 @@ MODULES = \
beam_ssa_pp \
beam_ssa_pre_codegen \
beam_ssa_recv \
+ beam_ssa_share \
beam_ssa_type \
beam_kernel_to_ssa \
beam_trim \
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 9d8d5b2b0c..707974b2c1 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -22,7 +22,7 @@
-module(beam_block).
-export([module/2]).
--import(lists, [reverse/1,splitwith/2]).
+-import(lists, [keysort/2,reverse/1,splitwith/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -53,7 +53,8 @@ blockify([I|Is0]=IsAll, Acc) ->
case collect(I) of
error -> blockify(Is0, [I|Acc]);
Instr when is_tuple(Instr) ->
- {Block,Is} = collect_block(IsAll),
+ {Block0,Is} = collect_block(IsAll),
+ Block = sort_moves(Block0),
blockify(Is, [{block,Block}|Acc])
end;
blockify([], Acc) -> reverse(Acc).
@@ -117,3 +118,36 @@ embed_lines([{block,B1},{line,_}=Line|T], Acc) ->
embed_lines([I|Is], Acc) ->
embed_lines(Is, [I|Acc]);
embed_lines([], Acc) -> Acc.
+
+%% sort_moves([Instruction]) -> [Instruction].
+%% Sort move instructions on the Y register to give the loader
+%% more opportunities for combining instructions.
+
+sort_moves([{set,[{x,_}],[{y,_}],move}=I|Is0]) ->
+ {Moves,Is} = sort_moves_1(Is0, x, y, [I]),
+ Moves ++ sort_moves(Is);
+sort_moves([{set,[{y,_}],[{x,_}],move}=I|Is0]) ->
+ {Moves,Is} = sort_moves_1(Is0, y, x, [I]),
+ Moves ++ sort_moves(Is);
+sort_moves([I|Is]) ->
+ [I|sort_moves(Is)];
+sort_moves([]) -> [].
+
+sort_moves_1([{set,[{x,0}],[_],move}=I|Is], _DTag, _STag, Acc) ->
+ %% The loader sometimes combines a move to x0 with the
+ %% instruction that follows, producing, for example, a move_call
+ %% instruction. Therefore, we don't want include this move
+ %% instruction in the sorting.
+ {sort_on_yreg(Acc)++[I],Is};
+sort_moves_1([{set,[{DTag,_}],[{STag,_}],move}=I|Is], DTag, STag, Acc) ->
+ sort_moves_1(Is, DTag, STag, [I|Acc]);
+sort_moves_1(Is, _DTag, _STag, Acc) ->
+ {sort_on_yreg(Acc),Is}.
+
+sort_on_yreg([{set,[Dst],[Src],move}|_]=Moves) ->
+ case {Dst,Src} of
+ {{y,_},{x,_}} ->
+ keysort(2, Moves);
+ {{x,_},{y,_}} ->
+ keysort(3, Moves)
+ end.
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index d3a618d211..8b0e3e32f8 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -101,6 +101,10 @@
%%% always keep the label. (beam_clean will remove any unused
%%% labels.)
%%%
+%%% (7) Replace a jump to a return instruction with a return instruction.
+%%% Similarly, replace a jump to deallocate + return with those
+%%% instructions.
+%%%
%%% Note: This modules depends on (almost) all branches and jumps only
%%% going forward, so that we can remove instructions (including definition
%%% of labels) after any label that has not been referenced by the code
@@ -150,7 +154,8 @@ function({function,Name,Arity,CLabel,Asm0}, Lc0) ->
Asm3 = share(Asm2),
Asm4 = move(Asm3),
Asm5 = opt(Asm4, CLabel),
- Asm = remove_unused_labels(Asm5),
+ Asm6 = unshare(Asm5),
+ Asm = remove_unused_labels(Asm6),
{{function,Name,Arity,CLabel,Asm},Lc}
catch
Class:Error:Stack ->
@@ -393,14 +398,13 @@ extract_seq_1(_, _) -> no.
{
entry :: beam_asm:label(), %Entry label (must not be moved).
replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace.
- labels :: cerl_sets:set(), %Set of referenced labels.
- index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index built lazily only if needed
+ labels :: cerl_sets:set() %Set of referenced labels.
}).
opt(Is0, CLabel) ->
find_fixpoint(fun(Is) ->
Lbls = initial_labels(Is),
- St = #st{entry=CLabel,replace=#{},labels=Lbls,index={lazy,Is}},
+ St = #st{entry=CLabel,replace=#{},labels=Lbls},
opt(Is, [], St)
end, Is0).
@@ -425,16 +429,6 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
%% as in the jump that follows -- thus it is not needed.
opt(Is, Acc, St)
end;
-opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc, St) ->
- %% Similar to the above, except we have a fall-through rather than jump
- %% Test Label Ops
- %% label Label
- case beam_utils:is_pure_test(I) of
- false ->
- opt(Is, [I|Acc], label_used(Lbl, St));
- true ->
- opt(Is, Acc, St)
- end;
opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
case is_label_defined(Is, L) of
false ->
@@ -625,6 +619,39 @@ drop_upto_label([{label,_}|_]=Is) -> Is;
drop_upto_label([_|Is]) -> drop_upto_label(Is);
drop_upto_label([]) -> [].
+%% unshare([Instruction]) -> [Instruction].
+%% Replace a jump to a return sequence (a `return` instruction
+%% optionally preced by a `deallocate` instruction) with the return
+%% sequence. This always saves execution time and may also save code
+%% space (depending on the architecture). Eliminating `jump`
+%% instructions also gives beam_trim more opportunities to trim the
+%% stack.
+
+unshare(Is) ->
+ Short = unshare_collect_short(Is, #{}),
+ unshare_short(Is, Short).
+
+unshare_collect_short([{label,L},return|Is], Map) ->
+ unshare_collect_short(Is, Map#{L=>[return]});
+unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) ->
+ %% `deallocate` and `return` are combined into one instruction by
+ %% the loader.
+ unshare_collect_short(Is, Map#{L=>[D,return]});
+unshare_collect_short([_|Is], Map) ->
+ unshare_collect_short(Is, Map);
+unshare_collect_short([], Map) -> Map.
+
+unshare_short([{jump,{f,F}}=I|Is], Map) ->
+ case Map of
+ #{F:=Seq} ->
+ Seq ++ unshare_short(Is, Map);
+ #{} ->
+ [I|unshare_short(Is, Map)]
+ end;
+unshare_short([I|Is], Map) ->
+ [I|unshare_short(Is, Map)];
+unshare_short([], _Map) -> [].
+
%% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet'
%% Update the cerl_set UsedCerlSet with any function-local labels
%% (i.e. not with labels in call instructions) referenced by
diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl
index 2323a439e9..5730e9704e 100644
--- a/lib/compiler/src/beam_peep.erl
+++ b/lib/compiler/src/beam_peep.erl
@@ -94,30 +94,26 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) ->
peep([{jump,{f,L}},{label,L}=I|Is], _, Acc) ->
%% Sometimes beam_jump has missed this optimization.
peep(Is, gb_sets:empty(), [I|Acc]);
-peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) ->
+peep([{select,select_val,R,F,Vls0}|Is], SeenTests0, Acc0) ->
case prune_redundant_values(Vls0, F) of
[] ->
%% No values left. Must convert to plain jump.
I = {jump,F},
peep([I|Is], gb_sets:empty(), Acc0);
- [{atom,_}=Value,Lbl] when Op =:= select_val ->
+ [{atom,_}=Value,Lbl] ->
%% Single value left. Convert to regular test.
Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
- [{integer,_}=Value,Lbl] when Op =:= select_val ->
+ [{integer,_}=Value,Lbl] ->
%% Single value left. Convert to regular test.
Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
- [Arity,Lbl] when Op =:= select_tuple_arity ->
- %% Single value left. Convert to regular test
- Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is],
- peep(Is1, SeenTests0, Acc0);
[{atom,B1},Lbl,{atom,B2},Lbl] when B1 =:= not B2 ->
%% Replace with is_boolean test.
Is1 = [{test,is_boolean,F,[R]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
[_|_]=Vls ->
- I = {select,Op,R,F,Vls},
+ I = {select,select_val,R,F,Vls},
peep(Is, gb_sets:empty(), [I|Acc0])
end;
peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) ->
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index c5e23d2ae0..b491e340b7 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -194,6 +194,7 @@ no_side_effect(#b_set{op=Op}) ->
extract -> true;
get_hd -> true;
get_tl -> true;
+ get_map_element -> true;
get_tuple_element -> true;
has_map_field -> true;
is_nonempty_list -> true;
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
index 3c14062d0b..d3facc5911 100644
--- a/lib/compiler/src/beam_ssa_codegen.erl
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -747,7 +747,16 @@ need_live_anno(Op) ->
end.
%%%
-%%% Add annotations for defined Y registers.
+%%% Add the following annotations for Y registers:
+%%%
+%%% def_yregs An ordset with variables that refer to live Y registers.
+%%% That is, Y registers that that have been killed
+%%% are not included. This annotation is added to all
+%%% instructions that require Y registers to be initialized.
+%%%
+%%% kill_yregs This annotation is added to call instructions. It is
+%%% an ordset containing variables referring to Y registers
+%%% that will no longer be used after the call instruction.
%%%
defined(Linear, #cg{regs=Regs}) ->
@@ -863,13 +872,35 @@ opt_allocate(Linear, #cg{regs=Regs}) ->
opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs)
when is_integer(Stk) ->
- Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
- I = I0#cg_alloc{def_yregs=Yregs},
- [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+ %% Collect the variables that are initialized by copy
+ %% instruction in this block.
+ case ordsets:from_list(opt_allocate_defs(Is, Regs)) of
+ Yregs when length(Yregs) =:= Stk ->
+ %% Those copy instructions are sufficient to fully
+ %% initialize the stack frame.
+ I = I0#cg_alloc{def_yregs=Yregs},
+ [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+ Yregs0 ->
+ %% Determine a conservative approximation of the Y
+ %% registers that are guaranteed to be initialized by all
+ %% successors of this block, and to it add the variables
+ %% initialized by copy instructions in this block.
+ Yregs1 = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
+ Yregs = ordsets:union(Yregs0, Yregs1),
+ I = I0#cg_alloc{def_yregs=Yregs},
+ [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]
+ end;
opt_allocate_1([B|Bs], Regs) ->
[B|opt_allocate_1(Bs, Regs)];
opt_allocate_1([], _) -> [].
+opt_allocate_defs([#cg_set{op=copy,dst=Dst}|Is], Regs) ->
+ case is_yreg(Dst, Regs) of
+ true -> [Dst|opt_allocate_defs(Is, Regs)];
+ false -> []
+ end;
+opt_allocate_defs(_, _Regs) -> [].
+
opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) ->
case gb_sets:is_member(L, Ws0) of
false ->
diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl
index c20652580d..067d9a6741 100644
--- a/lib/compiler/src/beam_ssa_dead.erl
+++ b/lib/compiler/src/beam_ssa_dead.erl
@@ -135,7 +135,8 @@ shortcut_terminator(Last, _Is, _Bs, _St) ->
Last.
shortcut_switch([{Lit,L0}|T], Bool, Bs, St0) ->
- St = St0#st{rel_op=normalize_op({bif,'=:='}, [Bool,Lit])},
+ RelOp = {'=:=',Bool,Lit},
+ St = St0#st{rel_op=RelOp},
#b_br{bool=#b_literal{val=true},succ=L} =
shortcut(L0, bind_var(Bool, Lit, Bs), St#st{target=one_way}),
[{Lit,L}|shortcut_switch(T, Bool, Bs, St0)];
@@ -388,41 +389,43 @@ eval_terminator(#b_switch{arg=Arg,fail=Fail,list=List}=Sw, Bs, St) ->
%% Literal argument. Simplify to a `br`.
beam_ssa:normalize(Sw#b_switch{arg=Val});
#b_var{} ->
- case St of
- #st{rel_op=none} ->
- %% No previous relational operator is stored.
- %% Give up.
+ %% Try optimizing the switch.
+ case eval_switch(List, Arg, St, Fail) of
+ none ->
none;
- #st{} ->
- %% There is a previous relational operator stored.
- %% Try optimizing the switch.
- case eval_switch(List, Arg, St, Fail) of
- none ->
- none;
- To when is_integer(To) ->
- %% Either one of the values in the switch
- %% matched a previous value in a '=:=' test, or
- %% none of the values matched a previous test.
- #b_br{bool=#b_literal{val=true},succ=To,fail=To}
- end
+ To when is_integer(To) ->
+ %% Either one of the values in the switch
+ %% matched a previous value in a '=:=' test, or
+ %% none of the values matched a previous test.
+ #b_br{bool=#b_literal{val=true},succ=To,fail=To}
end
end;
eval_terminator(#b_ret{}, _Bs, _St) ->
none.
-eval_switch([{Lit,Lbl}|T], Arg, St, Fail) ->
- case eval_rel_op({bif,'=:='}, [Arg,Lit], St) of
- none ->
- %% This label could be reached.
- eval_switch(T, Arg, St, none);
- #b_literal{val=false} ->
- %% This branch will never be taken.
- eval_switch(T, Arg, St, Fail);
- #b_literal{val=true} ->
+eval_switch(List, Arg, #st{rel_op={_,Arg,_}=PrevOp}, Fail) ->
+ %% There is a previous relational operator testing the same variable.
+ %% Optimization may be possible.
+ eval_switch_1(List, Arg, PrevOp, Fail);
+eval_switch(_, _, _, _) ->
+ %% There is either no previous relational operator, or it tests
+ %% a different variable. Nothing to optimize.
+ none.
+
+eval_switch_1([{Lit,Lbl}|T], Arg, PrevOp, Fail) ->
+ RelOp = {'=:=',Arg,Lit},
+ case will_succeed(PrevOp, RelOp) of
+ yes ->
%% Success. This branch will always be taken.
- Lbl
+ Lbl;
+ no ->
+ %% This branch will never be taken.
+ eval_switch_1(T, Arg, PrevOp, Fail);
+ maybe ->
+ %% This label could be reached.
+ eval_switch_1(T, Arg, PrevOp, none)
end;
-eval_switch([], _Arg, _St, Fail) ->
+eval_switch_1([], _Arg, _PrevOp, Fail) ->
%% Fail is now either the failure label or 'none'.
Fail.
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index ac2d943fef..2dda67eac6 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -22,7 +22,8 @@
-export([module/2]).
-include("beam_ssa.hrl").
--import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2,
+-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,
+ reverse/1,reverse/2,
splitwith/2,takewhile/2,unzip/1]).
-spec module(beam_ssa:b_module(), [compile:option()]) ->
@@ -787,15 +788,20 @@ float_flush_regs(#fs{regs=Rs}) ->
%%% with a cheaper instructions
%%%
-ssa_opt_live(#st{ssa=Linear}=St) ->
- St#st{ssa=live_opt(reverse(Linear), #{}, [])}.
+ssa_opt_live(#st{ssa=Linear0}=St) ->
+ RevLinear = reverse(Linear0),
+ Blocks0 = maps:from_list(RevLinear),
+ Blocks = live_opt(RevLinear, #{}, Blocks0),
+ Linear = beam_ssa:linearize(Blocks),
+ St#st{ssa=Linear}.
-live_opt([{L,Blk0}|Bs], LiveMap0, Acc) ->
- Successors = beam_ssa:successors(Blk0),
+live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) ->
+ Blk1 = beam_ssa_share:block(Blk0, Blocks),
+ Successors = beam_ssa:successors(Blk1),
Live0 = live_opt_succ(Successors, L, LiveMap0),
- {Blk,Live} = live_opt_blk(Blk0, Live0),
+ {Blk,Live} = live_opt_blk(Blk1, Live0),
LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
- live_opt(Bs, LiveMap, [{L,Blk}|Acc]);
+ live_opt(Bs, LiveMap, Blocks#{L:=Blk});
live_opt([], _, Acc) -> Acc.
live_opt_succ([S|Ss], L, LiveMap) ->
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index 9175931375..56fe9b4793 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -1478,6 +1478,10 @@ copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
Copy, Count, Acc) ->
I = I0#b_set{args=copy_sub_args(Args0, Copy)},
{reverse(Acc, [I|acc_copy([], Copy)]),Count};
+copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
+ when Op =:= call; Op =:= make_fun ->
+ {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
+ {reverse(Acc, [I]),Count};
copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
{reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
@@ -1990,13 +1994,15 @@ reserve_zregs(Blocks, Intervals, Res) ->
beam_ssa:fold_rpo(F, [0], Res, Blocks).
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
- #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) ->
- case Val of
- #b_literal{val=Arity} when Arity bsr 32 =:= 0 ->
+ #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
+ case {Val,Last} of
+ {#b_literal{val=Arity},#b_br{}} when Arity bsr 32 =:= 0 ->
%% These two instructions can be combined to a test_arity
%% instruction provided that the arity variable is short-lived.
reserve_zreg_1(Dst, ShortLived, A0);
- _ ->
+ {_,_} ->
+ %% Either the arity is too big, or the boolean value from
+ %% '=:=' will be returned.
A0
end;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
@@ -2211,7 +2217,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
Free = init_free(maps:to_list(Res)),
Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
Intervals = sort(Intervals1),
- IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end,
+ IsReserved = fun(#i{reg=Reg}) ->
+ case Reg of
+ none -> false;
+ {prefer,{_,_}} -> false;
+ {_,_} -> true
+ end
+ end,
{UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
L = #l{unhandled_res=UnhandledRes,
unhandled_any=Unhandled,free=Free},
diff --git a/lib/compiler/src/beam_ssa_share.erl b/lib/compiler/src/beam_ssa_share.erl
new file mode 100644
index 0000000000..426efa2cc9
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_share.erl
@@ -0,0 +1,370 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Share code for semantically equivalent blocks referred to
+%% to by `br` and `switch` instructions.
+%%
+%% A similar optimization is done in beam_jump, but doing it here as
+%% well is beneficial as it may enable other optimizations. If there
+%% are many semantically equivalent clauses, this optimization can
+%% substanstially decrease compilation times.
+%%
+%% block/2 is called from the liveness optimization pass in
+%% beam_ssa_opt, as code sharing helps the liveness pass and vice
+%% versa.
+%%
+
+-module(beam_ssa_share).
+-export([module/2,block/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [keyfind/3,reverse/1,sort/1]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+ {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, _Opts) ->
+ Fs = [function(F) || F <- Fs0],
+ {ok,Module#b_module{body=Fs}}.
+
+-spec block(Blk0, Blocks0) -> Blk when
+ Blk0 :: beam_ssa:b_blk(),
+ Blocks0 :: beam_ssa:block_map(),
+ Blk :: beam_ssa:b_blk().
+
+block(#b_blk{last=Last0}=Blk, Blocks) ->
+ case share_terminator(Last0, Blocks) of
+ none -> Blk;
+ Last -> Blk#b_blk{last=beam_ssa:normalize(Last)}
+ end.
+
+%%%
+%%% Local functions.
+%%%
+
+function(#b_function{anno=Anno,bs=Blocks0}=F) ->
+ try
+ PO = reverse(beam_ssa:rpo(Blocks0)),
+ {Blocks1,Changed} = blocks(PO, Blocks0, false),
+ Blocks = case Changed of
+ true ->
+ beam_ssa:trim_unreachable(Blocks1);
+ false ->
+ Blocks0
+ end,
+ F#b_function{bs=Blocks}
+ catch
+ Class:Error:Stack ->
+ #{func_info:={_,Name,Arity}} = Anno,
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+blocks([L|Ls], Blocks, Changed) ->
+ #b_blk{last=Last0} = Blk0 = map_get(L, Blocks),
+ case block(Blk0, Blocks) of
+ #b_blk{last=Last0} ->
+ blocks(Ls, Blocks, Changed);
+ #b_blk{}=Blk ->
+ blocks(Ls, Blocks#{L:=Blk}, true)
+ end;
+blocks([], Blocks, Changed) ->
+ {Blocks,Changed}.
+
+share_terminator(#b_br{bool=#b_var{},succ=Succ0,fail=Fail0}=Br, Blocks) ->
+ {Succ,SuccBlk} = shortcut_nonempty_block(Succ0, Blocks),
+ {Fail,FailBlk} = shortcut_nonempty_block(Fail0, Blocks),
+ case are_equivalent(Succ, SuccBlk, Fail, FailBlk, Blocks) of
+ true ->
+ %% The blocks are semantically equivalent.
+ Br#b_br{succ=Succ,fail=Succ};
+ false ->
+ if
+ Succ =:= Succ0, Fail =:= Fail0 ->
+ %% None of blocks were cut short.
+ none;
+ true ->
+ %% One or both labels were cut short
+ %% to avoid jumping to an empty block.
+ Br#b_br{succ=Succ,fail=Fail}
+ end
+ end;
+share_terminator(#b_switch{}=Sw, Blocks) ->
+ share_switch(Sw, Blocks);
+share_terminator(_Last, _Blocks) -> none.
+
+%% Test whether the two blocks are semantically equivalent. This
+%% function is specially optimized to return `false` as fast as
+%% possible if the blocks are not equivalent, as that is the common
+%% case.
+
+are_equivalent(_Succ, _, ?BADARG_BLOCK, _, _Blocks) ->
+ %% ?BADARG_BLOCK is special. Sharing could be incorrect.
+ false;
+are_equivalent(_Succ, #b_blk{is=Is1,last=#b_ret{arg=RetVal1}=Ret1},
+ _Fail, #b_blk{is=Is2,last=#b_ret{arg=RetVal2}=Ret2}, _Blocks) ->
+ case {RetVal1,RetVal2} of
+ {#b_literal{},#b_literal{}} ->
+ case RetVal1 =:= RetVal2 of
+ true ->
+ %% The return values are identical literals. We
+ %% only need to compare the canonicalized bodies.
+ Can1 = canonical_is(Is1),
+ Can2 = canonical_is(Is2),
+ Can1 =:= Can2;
+ false ->
+ %% Non-equal literals.
+ false
+ end;
+ {#b_var{},#b_var{}} ->
+ %% The return values are varibles. We must canonicalize
+ %% the blocks (including returns) and compare them.
+ Can1 = canonical_is(Is1 ++ [Ret1]),
+ Can2 = canonical_is(Is2 ++ [Ret2]),
+ Can1 =:= Can2;
+ {_,_} ->
+ %% One literal and one variable.
+ false
+ end;
+are_equivalent(Succ,
+ #b_blk{is=Is1,
+ last=#b_br{bool=#b_literal{val=true},
+ succ=Target}},
+ Fail,
+ #b_blk{is=Is2,
+ last=#b_br{bool=#b_literal{val=true},
+ succ=Target}},
+ Blocks) ->
+ %% Both blocks end with an unconditional branch to the
+ %% same target block. If the target block has phi nodes,
+ %% we must pick up the values from the phi nodes and
+ %% compare them.
+ #b_blk{is=Is} = map_get(Target, Blocks),
+ Phis1 = canonical_terminator_phis(Is, Succ),
+ Phis2 = canonical_terminator_phis(Is, Fail),
+ case {Phis1,Phis2} of
+ {[#b_set{args=[#b_literal{}]}|_],_} when Phis1 =/= Phis2 ->
+ %% Different values are used in the phi nodes.
+ false;
+ {_,[#b_set{args=[#b_literal{}]}|_]} when Phis1 =/= Phis2 ->
+ %% Different values are used in the phi nodes.
+ false;
+ {_,_} ->
+ %% The values in the phi nodes are variables or identical
+ %% literals. We must canonicalize the blocks and compare
+ %% them.
+ Can1 = canonical_is(Is1 ++ Phis1),
+ Can2 = canonical_is(Is2 ++ Phis2),
+ Can1 =:= Can2
+ end;
+are_equivalent(Succ0, #b_blk{is=Is1,last=#b_br{bool=#b_var{},fail=Same}},
+ Fail0, #b_blk{is=Is2,last=#b_br{bool=#b_var{},fail=Same}},
+ Blocks) ->
+ %% Two-way branches with identical failure labels. First compare the
+ %% canonicalized bodies of the blocks.
+ case canonical_is(Is1) =:= canonical_is(Is2) of
+ false ->
+ %% Different bodies.
+ false;
+ true ->
+ %% Bodies were equal. That is fairly uncommon, so to keep
+ %% the code simple we will rewrite the `br` to a `switch`
+ %% and let share_switch/2 do the work of following the
+ %% branches.
+ Sw = #b_switch{arg=#b_var{name=not_used},fail=Fail0,
+ list=[{#b_literal{},Succ0}]},
+ #b_switch{fail=Fail,list=[{_,Succ}]} = share_switch(Sw, Blocks),
+ Fail =:= Succ
+ end;
+are_equivalent(_, _, _, _, _) -> false.
+
+share_switch(#b_switch{fail=Fail0,list=List0}=Sw, Blocks) ->
+ Prep = share_prepare_sw([{value,Fail0}|List0], Blocks, 0, []),
+ Res = do_share_switch(Prep, Blocks, []),
+ [{_,Fail}|List] = [VL || {_,VL} <- sort(Res)],
+ Sw#b_switch{fail=Fail,list=List}.
+
+share_prepare_sw([{V,L0}|T], Blocks, N, Acc) ->
+ {L,_Blk} = shortcut_nonempty_block(L0, Blocks),
+ share_prepare_sw(T, Blocks, N+1, [{{L,#{}},{N,{V,L}}}|Acc]);
+share_prepare_sw([], _, _, Acc) -> Acc.
+
+do_share_switch(Prep, Blocks, Acc) ->
+ Map = share_switch_1(Prep, Blocks, #{}),
+ share_switch_2(maps:values(Map), Blocks, Acc).
+
+share_switch_1([{Next0,Res}|T], Blocks, Map) ->
+ {Can,Next} = canonical_block(Next0, Blocks),
+ case Map of
+ #{Can:=Ls} ->
+ share_switch_1(T, Blocks, Map#{Can:=[{Next,Res}|Ls]});
+ #{} ->
+ share_switch_1(T, Blocks, Map#{Can=>[{Next,Res}]})
+ end;
+share_switch_1([], _Blocks, Map) -> Map.
+
+share_switch_2([[{_,{N,Res}}]|T], Blocks, Acc) ->
+ %% This block is not equivalent to any other block.
+ share_switch_2(T, Blocks, [{N,Res}|Acc]);
+share_switch_2([[{done,{_,{_,Common}}}|_]=Eqs|T], Blocks, Acc0) ->
+ %% Two or more blocks are semantically equivalent, and all blocks
+ %% are either terminated with a `ret` or a `br` to the same target
+ %% block. Replace the labels in the `switch` for all of those
+ %% blocks with the label for the first of the blocks.
+ Acc = [{N,{V,Common}} || {done,{N,{V,_}}} <- Eqs] ++ Acc0,
+ share_switch_2(T, Blocks, Acc);
+share_switch_2([[{_,_}|_]=Prep|T], Blocks, Acc0) ->
+ %% Two or more blocks are semantically equivalent, but they have
+ %% different successful successor blocks. Now we must check
+ %% recursively whether the successor blocks are equivalent too.
+ Acc = do_share_switch(Prep, Blocks, Acc0),
+ share_switch_2(T, Blocks, Acc);
+share_switch_2([], _, Acc) -> Acc.
+
+canonical_block({L,VarMap0}, Blocks) ->
+ #b_blk{is=Is,last=Last0} = map_get(L, Blocks),
+ case canonical_terminator(L, Last0, Blocks) of
+ none ->
+ %% The block has a terminator that we don't handle.
+ {{none,L},done};
+ {Last,done} ->
+ %% The block ends with a `ret` or an unconditional `br` to
+ %% another block.
+ {Can,_VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+ {Can,done};
+ {Last,Next} ->
+ %% The block ends with a conditional branch.
+ {Can,VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+ {Can,{Next,VarMap}}
+ end.
+
+%% Translate a sequence of instructions to a canonical representation. If the
+%% canonical representation of two blocks compare equal, the blocks are
+%% semantically equivalent. The following translations are done:
+%%
+%% * Variables defined in the instruction sequence are replaced with
+%% {var,0}, {var,1}, and so on. Free variables are not changed.
+%%
+%% * `location` annotations that would produce a `line` instruction are
+%% kept. All other annotations are cleared.
+%%
+%% * Instructions are repackaged into tuples instead of into the
+%% usual records. The main reason is to avoid violating the types for
+%% the SSA records. We can simplify things a little by linking the
+%% instructions directly instead of putting them into a list.
+
+canonical_is(Is) ->
+ {Can,_} = canonical_is(Is, #{}, []),
+ Can.
+
+canonical_is([#b_set{op=Op,dst=Dst,args=Args0}=I|Is], VarMap0, Acc) ->
+ Args = [canonical_arg(Arg, VarMap0) || Arg <-Args0],
+ Var = {var,map_size(VarMap0)},
+ VarMap = VarMap0#{Dst=>Var},
+ LineAnno = case Op of
+ bs_match ->
+ %% The location annotation for a bs_match instruction
+ %% is only used in warnings, never to emit a `line`
+ %% instruction. Therefore, it should not be included.
+ [];
+ _ ->
+ %% The location annotation will be used in a `line`
+ %% instruction. It must be included.
+ beam_ssa:get_anno(location, I, none)
+ end,
+ canonical_is(Is, VarMap, {Op,LineAnno,Var,Args,Acc});
+canonical_is([#b_ret{arg=Arg}], VarMap, Acc0) ->
+ Acc1 = case Acc0 of
+ {call,_Anno,Var,[#b_local{}|_]=Args,PrevAcc} ->
+ %% This is a tail-recursive call to a local function.
+ %% There will be no line instruction generated;
+ %% thus, the annotation is not significant.
+ {call,[],Var,Args,PrevAcc};
+ _ ->
+ Acc0
+ end,
+ {{ret,canonical_arg(Arg, VarMap),Acc1},VarMap};
+canonical_is([#b_br{bool=#b_var{},fail=Fail}], VarMap, Acc) ->
+ {{br,succ,Fail,Acc},VarMap};
+canonical_is([#b_br{succ=Succ}], VarMap, Acc) ->
+ {{br,Succ,Acc},VarMap};
+canonical_is([], VarMap, Acc) ->
+ {Acc,VarMap}.
+
+canonical_terminator(_L, #b_ret{}=Ret, _Blocks) ->
+ {[Ret],done};
+canonical_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}=Br, Blocks) ->
+ #b_blk{is=Is} = map_get(Succ, Blocks),
+ case canonical_terminator_phis(Is, L) of
+ [] ->
+ {[],Succ};
+ [_|_]=Phis ->
+ {Phis ++ [Br],done}
+ end;
+canonical_terminator(_L, #b_br{bool=#b_var{},succ=Succ}=Br, _Blocks) ->
+ {[Br],Succ};
+canonical_terminator(_, _, _) -> none.
+
+canonical_terminator_phis([#b_set{op=phi,args=PhiArgs}=Phi|Is], L) ->
+ {Value,L} = keyfind(L, 2, PhiArgs),
+ [Phi#b_set{op=copy,args=[Value]}|canonical_terminator_phis(Is, L)];
+canonical_terminator_phis([#b_set{op=peek_message}=I|_], L) ->
+ %% We could get stuck into an infinite loop if we allowed the
+ %% comparisons to continue into this block. Force an unequal
+ %% compare with all other predecessors of this block.
+ [I#b_set{op=copy,args=[#b_literal{val=L}]}];
+canonical_terminator_phis(_, _) -> [].
+
+canonical_arg(#b_var{}=Var, VarMap) ->
+ case VarMap of
+ #{Var:=CanonicalVar} ->
+ CanonicalVar;
+ #{} ->
+ Var
+ end;
+canonical_arg(#b_remote{mod=Mod,name=Name}, VarMap) ->
+ {remote,canonical_arg(Mod, VarMap),
+ canonical_arg(Name, VarMap)};
+canonical_arg(Other, _VarMap) -> Other.
+
+%% Shortcut branches to empty blocks if safe.
+
+shortcut_nonempty_block(L, Blocks) ->
+ case map_get(L, Blocks) of
+ #b_blk{is=[],last=#b_br{bool=#b_literal{val=true},succ=Succ}}=Blk ->
+ %% This block is empty.
+ case is_forbidden(Succ, Blocks) of
+ false ->
+ shortcut_nonempty_block(Succ, Blocks);
+ true ->
+ {L,Blk}
+ end;
+ #b_blk{}=Blk ->
+ {L,Blk}
+ end.
+
+is_forbidden(L, Blocks) ->
+ case map_get(L, Blocks) of
+ #b_blk{is=[#b_set{op=phi}|_]} -> true;
+ #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
+ #b_blk{} -> false
+ end.
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
index 18e6e73a46..95fc3bb0e9 100644
--- a/lib/compiler/src/beam_ssa_type.erl
+++ b/lib/compiler/src/beam_ssa_type.erl
@@ -27,9 +27,10 @@
-define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}).
--record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()},
+-record(d, {ds :: #{beam_ssa:b_var():=beam_ssa:b_set()},
ls :: #{beam_ssa:label():=type_db()},
- sub :: #{beam_ssa:var_name():=beam_ssa:value()}
+ once :: cerl_sets:set(beam_ssa:b_var()),
+ sub :: #{beam_ssa:b_var():=beam_ssa:value()}
}).
-define(ATOM_SET_SIZE, 5).
@@ -56,13 +57,15 @@
Block :: beam_ssa:b_blk().
opt(Linear, Args) ->
+ UsedOnce = used_once(Linear, Args),
Ts = maps:from_list([{V,any} || #b_var{}=V <- Args]),
FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown},
name=#b_literal{val=unknown},
arity=0}]},
Defs = maps:from_list([{Var,FakeCall#b_set{dst=Var}} ||
#b_var{}=Var <- Args]),
- D = #d{ds=Defs,ls=#{0=>Ts},sub=#{}},
+ D = #d{ds=Defs,ls=#{0=>Ts,?BADARG_BLOCK=>#{}},
+ once=UsedOnce,sub=#{}},
opt_1(Linear, D).
opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) ->
@@ -425,16 +428,43 @@ opt_terminator(#b_ret{}=Ret, _Ts, _Ds) -> Ret.
update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) ->
update_successor(S, Ts, D);
-update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts, D0) ->
- D = update_successor_bool(Bool, false, Fail, Ts, D0),
- SuccTs = infer_types(Bool, Ts, D0),
- update_successor_bool(Bool, true, Succ, SuccTs, D);
-update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts, D0) ->
- D = update_successor(Fail, Ts, D0),
- foldl(fun({Val,S}, A) ->
- T = get_type(Val, Ts),
- update_successor(S, Ts#{V=>T}, A)
- end, D, List);
+update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) ->
+ case cerl_sets:is_element(Bool, D0#d.once) of
+ true ->
+ %% This variable is defined in this block and is only
+ %% referenced by this br terminator. Therefore, there is
+ %% no need to include the type database passed on to the
+ %% successors of this block.
+ Ts = maps:remove(Bool, Ts0),
+ D = update_successor(Fail, Ts, D0),
+ SuccTs = infer_types(Bool, Ts, D0),
+ update_successor(Succ, SuccTs, D);
+ false ->
+ D = update_successor_bool(Bool, false, Fail, Ts0, D0),
+ SuccTs = infer_types(Bool, Ts0, D0),
+ update_successor_bool(Bool, true, Succ, SuccTs, D)
+ end;
+update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) ->
+ case cerl_sets:is_element(V, D0#d.once) of
+ true ->
+ %% This variable is defined in this block and is only
+ %% referenced by this switch terminator. Therefore, there is
+ %% no need to include the type database passed on to the
+ %% successors of this block.
+ Ts = maps:remove(V, Ts0),
+ D = update_successor(Fail, Ts, D0),
+ F = fun({_Val,S}, A) ->
+ update_successor(S, Ts, A)
+ end,
+ foldl(F, D, List);
+ false ->
+ D = update_successor(Fail, Ts0, D0),
+ F = fun({Val,S}, A) ->
+ T = get_type(Val, Ts0),
+ update_successor(S, Ts0#{V=>T}, A)
+ end,
+ foldl(F, D, List)
+ end;
update_successors(#b_ret{}, _Ts, D) -> D.
update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
@@ -447,6 +477,11 @@ update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
update_successor(S, Ts, D)
end.
+update_successor(?BADARG_BLOCK, _Ts, #d{}=D) ->
+ %% We KNOW that no variables are used in the ?BADARG_BLOCK,
+ %% so there is no need to update the type information. That
+ %% can be a huge timesaver for huge functions.
+ D;
update_successor(S, Ts0, #d{ls=Ls}=D) ->
case Ls of
#{S:=Ts1} ->
@@ -766,6 +801,48 @@ simplify_not(#b_br{bool=#b_var{}=V,succ=Succ,fail=Fail}=Br0, Ts, Ds) ->
Br0
end.
+%%%
+%%% Calculate the set of variables that are only used once in the
+%%% block that they are defined in. That will allow us to discard type
+%%% information for variables that will never be referenced by the
+%%% successor blocks, potentially improving compilation times.
+%%%
+
+used_once(Linear, Args) ->
+ Map0 = used_once_1(reverse(Linear), #{}),
+ Map = maps:without(Args, Map0),
+ cerl_sets:from_list(maps:keys(Map)).
+
+used_once_1([{L,#b_blk{is=Is,last=Last}}|Bs], Uses0) ->
+ Uses = used_once_2([Last|reverse(Is)], L, Uses0),
+ used_once_1(Bs, Uses);
+used_once_1([], Uses) -> Uses.
+
+used_once_2([I|Is], L, Uses0) ->
+ Uses = used_once_uses(beam_ssa:used(I), L, Uses0),
+ case I of
+ #b_set{dst=Dst} ->
+ case Uses of
+ #{Dst:=[L]} ->
+ used_once_2(Is, L, Uses);
+ #{} ->
+ used_once_2(Is, L, maps:remove(Dst, Uses))
+ end;
+ _ ->
+ used_once_2(Is, L, Uses)
+ end;
+used_once_2([], _, Uses) -> Uses.
+
+used_once_uses([V|Vs], L, Uses) ->
+ case Uses of
+ #{V:=Us} ->
+ used_once_uses(Vs, L, Uses#{V:=[L|Us]});
+ #{} ->
+ used_once_uses(Vs, L, Uses#{V=>[L]})
+ end;
+used_once_uses([], _, Uses) -> Uses.
+
+
get_types(Values, Ts) ->
[get_type(Val, Ts) || Val <- Values].
-spec get_type(beam_ssa:value(), type_db()) -> type().
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 7d908df3bf..1945faba7f 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -1366,8 +1366,10 @@ kill_aliases(Reg, #st{aliases=Aliases0}=St) ->
St
end.
-set_type(Type, {x,_}=Reg, Vst) -> set_type_reg(Type, Reg, Vst);
-set_type(Type, {y,_}=Reg, Vst) -> set_type_y(Type, Reg, Vst);
+set_type(Type, {x,_}=Reg, Vst) ->
+ set_type_reg(Type, Reg, Reg, Vst);
+set_type(Type, {y,_}=Reg, Vst) ->
+ set_type_reg(Type, Reg, Reg, Vst);
set_type(_, _, #vst{}=Vst) -> Vst.
set_type_reg(Type, Src, Dst, Vst) ->
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 65c4f140c9..14c8c5b4ab 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -823,6 +823,9 @@ kernel_passes() ->
{pass,beam_kernel_to_ssa},
{iff,dssa,{listing,"ssa"}},
{iff,ssalint,{pass,beam_ssa_lint}},
+ {unless,no_share_opt,{pass,beam_ssa_share}},
+ {iff,dssashare,{listing,"ssashare"}},
+ {iff,ssalint,{pass,beam_ssa_lint}},
{unless,no_bsm_opt,{pass,beam_ssa_bsm}},
{iff,dssabsm,{listing,"ssabsm"}},
{iff,ssalint,{pass,beam_ssa_lint}},
@@ -2098,6 +2101,7 @@ pre_load() ->
beam_ssa_opt,
beam_ssa_pre_codegen,
beam_ssa_recv,
+ beam_ssa_share,
beam_ssa_type,
beam_trim,
beam_utils,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 86259270bd..1472e3fde1 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -45,6 +45,7 @@
beam_ssa_pp,
beam_ssa_pre_codegen,
beam_ssa_recv,
+ beam_ssa_share,
beam_ssa_type,
beam_trim,
beam_utils,
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index d848cd8f19..43c99be982 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -2667,12 +2667,20 @@ opt_build_stacktrace(#c_let{vars=[#c_var{name=Cooked}],
#c_call{module=#c_literal{val=erlang},
name=#c_literal{val=raise},
args=[Class,Exp,#c_var{name=Cooked}]} ->
- %% The stacktrace is only used in a call to erlang:raise/3.
- %% There is no need to build the stacktrace. Replace the
- %% call to erlang:raise/3 with the the raw_raise/3 instruction,
- %% which will use a raw stacktrace.
- #c_primop{name=#c_literal{val=raw_raise},
- args=[Class,Exp,RawStk]};
+ case core_lib:is_var_used(Cooked, #c_cons{hd=Class,tl=Exp}) of
+ true ->
+ %% Not safe. The stacktrace is used in the class or
+ %% reason.
+ Let;
+ false ->
+ %% The stacktrace is only used in the last
+ %% argument for erlang:raise/3. There is no need
+ %% to build the stacktrace. Replace the call to
+ %% erlang:raise/3 with the the raw_raise/3
+ %% instruction, which will use a raw stacktrace.
+ #c_primop{name=#c_literal{val=raw_raise},
+ args=[Class,Exp,RawStk]}
+ end;
#c_let{vars=[#c_var{name=V}],arg=Arg,body=B0} when V =/= Cooked ->
case core_lib:is_var_used(Cooked, Arg) of
false ->