aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compiler/src')
-rw-r--r--lib/compiler/src/Makefile1
-rw-r--r--lib/compiler/src/beam_block.erl38
-rw-r--r--lib/compiler/src/beam_except.erl96
-rw-r--r--lib/compiler/src/beam_jump.erl133
-rw-r--r--lib/compiler/src/beam_listing.erl2
-rw-r--r--lib/compiler/src/beam_peep.erl12
-rw-r--r--lib/compiler/src/beam_ssa.erl1
-rw-r--r--lib/compiler/src/beam_ssa_codegen.erl39
-rw-r--r--lib/compiler/src/beam_ssa_dead.erl59
-rw-r--r--lib/compiler/src/beam_ssa_opt.erl20
-rw-r--r--lib/compiler/src/beam_ssa_pre_codegen.erl22
-rw-r--r--lib/compiler/src/beam_ssa_share.erl370
-rw-r--r--lib/compiler/src/beam_ssa_type.erl103
-rw-r--r--lib/compiler/src/beam_trim.erl316
-rw-r--r--lib/compiler/src/beam_utils.erl542
-rw-r--r--lib/compiler/src/beam_validator.erl6
-rw-r--r--lib/compiler/src/compile.erl39
-rw-r--r--lib/compiler/src/compiler.app.src1
-rw-r--r--lib/compiler/src/erl_bifs.erl1
-rw-r--r--lib/compiler/src/sys_core_fold.erl20
20 files changed, 1023 insertions, 798 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index d475e5a19a..961dacc6c9 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -69,6 +69,7 @@ MODULES = \
beam_ssa_pp \
beam_ssa_pre_codegen \
beam_ssa_recv \
+ beam_ssa_share \
beam_ssa_type \
beam_kernel_to_ssa \
beam_trim \
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 9d8d5b2b0c..707974b2c1 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -22,7 +22,7 @@
-module(beam_block).
-export([module/2]).
--import(lists, [reverse/1,splitwith/2]).
+-import(lists, [keysort/2,reverse/1,splitwith/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -53,7 +53,8 @@ blockify([I|Is0]=IsAll, Acc) ->
case collect(I) of
error -> blockify(Is0, [I|Acc]);
Instr when is_tuple(Instr) ->
- {Block,Is} = collect_block(IsAll),
+ {Block0,Is} = collect_block(IsAll),
+ Block = sort_moves(Block0),
blockify(Is, [{block,Block}|Acc])
end;
blockify([], Acc) -> reverse(Acc).
@@ -117,3 +118,36 @@ embed_lines([{block,B1},{line,_}=Line|T], Acc) ->
embed_lines([I|Is], Acc) ->
embed_lines(Is, [I|Acc]);
embed_lines([], Acc) -> Acc.
+
+%% sort_moves([Instruction]) -> [Instruction].
+%% Sort move instructions on the Y register to give the loader
+%% more opportunities for combining instructions.
+
+sort_moves([{set,[{x,_}],[{y,_}],move}=I|Is0]) ->
+ {Moves,Is} = sort_moves_1(Is0, x, y, [I]),
+ Moves ++ sort_moves(Is);
+sort_moves([{set,[{y,_}],[{x,_}],move}=I|Is0]) ->
+ {Moves,Is} = sort_moves_1(Is0, y, x, [I]),
+ Moves ++ sort_moves(Is);
+sort_moves([I|Is]) ->
+ [I|sort_moves(Is)];
+sort_moves([]) -> [].
+
+sort_moves_1([{set,[{x,0}],[_],move}=I|Is], _DTag, _STag, Acc) ->
+ %% The loader sometimes combines a move to x0 with the
+ %% instruction that follows, producing, for example, a move_call
+ %% instruction. Therefore, we don't want include this move
+ %% instruction in the sorting.
+ {sort_on_yreg(Acc)++[I],Is};
+sort_moves_1([{set,[{DTag,_}],[{STag,_}],move}=I|Is], DTag, STag, Acc) ->
+ sort_moves_1(Is, DTag, STag, [I|Acc]);
+sort_moves_1(Is, _DTag, _STag, Acc) ->
+ {sort_on_yreg(Acc),Is}.
+
+sort_on_yreg([{set,[Dst],[Src],move}|_]=Moves) ->
+ case {Dst,Src} of
+ {{y,_},{x,_}} ->
+ keysort(2, Moves);
+ {{x,_},{y,_}} ->
+ keysort(3, Moves)
+ end.
diff --git a/lib/compiler/src/beam_except.erl b/lib/compiler/src/beam_except.erl
index 98831d87a7..49bfb5606f 100644
--- a/lib/compiler/src/beam_except.erl
+++ b/lib/compiler/src/beam_except.erl
@@ -31,7 +31,7 @@
%%% erlang:error(function_clause, Args) => jump FuncInfoLabel
%%%
--import(lists, [reverse/1,seq/2]).
+-import(lists, [reverse/1,seq/2,splitwith/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -74,13 +74,13 @@ translate([I|Is], St, Acc) ->
translate([], _, Acc) ->
reverse(Acc).
-translate_1(Ar, I, Is, St, [{line,_}=Line|Acc1]=Acc0) ->
- case dig_out(Ar, Acc1) of
+translate_1(Ar, I, Is, #st{arity=Arity}=St, [{line,_}=Line|Acc1]=Acc0) ->
+ case dig_out(Ar, Arity, Acc1) of
no ->
translate(Is, St, [I|Acc0]);
- {yes,{function_clause,Arity},Acc2} ->
+ {yes,function_clause,Acc2} ->
case {Line,St} of
- {{line,Loc},#st{lbl=Fi,loc=Loc,arity=Arity}} ->
+ {{line,Loc},#st{lbl=Fi,loc=Loc}} ->
Instr = {jump,{f,Fi}},
translate(Is, St, [Instr|Acc2]);
{_,_} ->
@@ -92,9 +92,13 @@ translate_1(Ar, I, Is, St, [{line,_}=Line|Acc1]=Acc0) ->
translate(Is, St, [Instr,Line|Acc2])
end.
-dig_out(Ar, [{kill,_}|Is]) ->
- dig_out(Ar, Is);
-dig_out(1, [{block,Bl0}|Is]) ->
+dig_out(1, _Arity, Is) ->
+ dig_out(Is);
+dig_out(2, Arity, Is) ->
+ dig_out_fc(Arity, Is);
+dig_out(_, _, _) -> no.
+
+dig_out([{block,Bl0}|Is]) ->
case dig_out_block(reverse(Bl0)) of
no -> no;
{yes,What,[]} ->
@@ -102,12 +106,7 @@ dig_out(1, [{block,Bl0}|Is]) ->
{yes,What,Bl} ->
{yes,What,[{block,Bl}|Is]}
end;
-dig_out(2, [{block,Bl}|Is]) ->
- case dig_out_block_fc(Bl) of
- no -> no;
- {yes,What} -> {yes,What,Is}
- end;
-dig_out(_, _) -> no.
+dig_out(_) -> no.
dig_out_block([{set,[{x,0}],[{atom,if_clause}],move}]) ->
{yes,if_end,[]};
@@ -141,33 +140,64 @@ fix_block_1([{set,[],[],{alloc,Live,{F1,F2,Needed0,F3}}}|Is], Words) ->
fix_block_1([I|Is], Words) ->
[I|fix_block_1(Is, Words)].
-dig_out_block_fc([{set,[],[],{alloc,Live,_}}|Bl]) ->
- Regs = maps:from_list([{{x,X},{arg,X}} || X <- seq(0, Live-1)]),
- dig_out_fc(Bl, Regs);
-dig_out_block_fc(_) -> no.
-dig_out_fc([{set,[Dst],[Hd,Tl],put_list}|Is], Regs0) ->
+dig_out_fc(Arity, Is0) ->
+ Regs0 = maps:from_list([{{x,X},{arg,X}} || X <- seq(0, Arity-1)]),
+ {Is,Acc0} = splitwith(fun({label,_}) -> false;
+ ({test,_,_,_}) -> false;
+ (_) -> true
+ end, Is0),
+ {Regs,Acc} = dig_out_fc_1(reverse(Is), Regs0, Acc0),
+ case is_fc(Arity, Regs) of
+ true ->
+ {yes,function_clause,Acc};
+ false ->
+ no
+ end.
+
+dig_out_fc_1([{block,Bl}|Is], Regs0, Acc) ->
+ Regs = dig_out_fc_block(Bl, Regs0),
+ dig_out_fc_1(Is, Regs, Acc);
+dig_out_fc_1([{bs_set_position,_,_}=I|Is], Regs, Acc) ->
+ dig_out_fc_1(Is, Regs, [I|Acc]);
+dig_out_fc_1([{bs_get_tail,_,_,Live}=I|Is], Regs0, Acc) ->
+ Regs = prune_xregs(Live, Regs0),
+ dig_out_fc_1(Is, Regs, [I|Acc]);
+dig_out_fc_1([_|_], _Regs, _Acc) ->
+ {#{},[]};
+dig_out_fc_1([], Regs, Acc) ->
+ {Regs,Acc}.
+
+dig_out_fc_block([{set,[],[],{alloc,Live,_}}|Is], Regs0) ->
+ Regs = prune_xregs(Live, Regs0),
+ dig_out_fc_block(Is, Regs);
+dig_out_fc_block([{set,[Dst],[Hd,Tl],put_list}|Is], Regs0) ->
Regs = Regs0#{Dst=>{cons,get_reg(Hd, Regs0),get_reg(Tl, Regs0)}},
- dig_out_fc(Is, Regs);
-dig_out_fc([{set,[Dst],[Src],move}|Is], Regs0) ->
+ dig_out_fc_block(Is, Regs);
+dig_out_fc_block([{set,[Dst],[Src],move}|Is], Regs0) ->
Regs = Regs0#{Dst=>get_reg(Src, Regs0)},
- dig_out_fc(Is, Regs);
-dig_out_fc([{set,_,_,_}|_], _Regs) ->
- %% Unknown instruction. It is not a function_clause error.
- no;
-dig_out_fc([], Regs) ->
+ dig_out_fc_block(Is, Regs);
+dig_out_fc_block([{set,_,_,_}|_], _Regs) ->
+ %% Unknown instruction. Fail.
+ #{};
+dig_out_fc_block([], Regs) -> Regs.
+
+prune_xregs(Live, Regs) ->
+ maps:filter(fun({x,X}, _) -> X < Live end, Regs).
+
+is_fc(Arity, Regs) ->
case Regs of
#{{x,0}:={atom,function_clause},{x,1}:=Args} ->
- dig_out_fc_1(Args, 0);
+ is_fc_1(Args, 0) =:= Arity;
#{} ->
- no
+ false
end.
-dig_out_fc_1({cons,{arg,I},T}, I) ->
- dig_out_fc_1(T, I+1);
-dig_out_fc_1(nil, I) ->
- {yes,{function_clause,I}};
-dig_out_fc_1(_, _) -> no.
+is_fc_1({cons,{arg,I},T}, I) ->
+ is_fc_1(T, I+1);
+is_fc_1(nil, I) ->
+ I;
+is_fc_1(_, _) -> -1.
get_reg(R, Regs) ->
case Regs of
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index fbff4cfd79..8b0e3e32f8 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -101,6 +101,10 @@
%%% always keep the label. (beam_clean will remove any unused
%%% labels.)
%%%
+%%% (7) Replace a jump to a return instruction with a return instruction.
+%%% Similarly, replace a jump to deallocate + return with those
+%%% instructions.
+%%%
%%% Note: This modules depends on (almost) all branches and jumps only
%%% going forward, so that we can remove instructions (including definition
%%% of labels) after any label that has not been referenced by the code
@@ -144,13 +148,20 @@ module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) ->
%%
%% NOTE: This function assumes that there are no labels inside blocks.
function({function,Name,Arity,CLabel,Asm0}, Lc0) ->
- Asm1 = eliminate_moves(Asm0),
- {Asm2,Lc} = insert_labels(Asm1, Lc0, []),
- Asm3 = share(Asm2),
- Asm4 = move(Asm3),
- Asm5 = opt(Asm4, CLabel),
- Asm = remove_unused_labels(Asm5),
- {{function,Name,Arity,CLabel,Asm},Lc}.
+ try
+ Asm1 = eliminate_moves(Asm0),
+ {Asm2,Lc} = insert_labels(Asm1, Lc0, []),
+ Asm3 = share(Asm2),
+ Asm4 = move(Asm3),
+ Asm5 = opt(Asm4, CLabel),
+ Asm6 = unshare(Asm5),
+ Asm = remove_unused_labels(Asm6),
+ {{function,Name,Arity,CLabel,Asm},Lc}
+ catch
+ Class:Error:Stack ->
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
%%%
%%% Scan instructions in execution order and remove redundant 'move'
@@ -196,22 +207,19 @@ no_fallthrough([I|_]) ->
is_unreachable_after(I).
already_has_value(Lit, Lbl, Reg, D) ->
- Key = {Lbl,Reg},
case D of
- #{Lbl:=unsafe} ->
- false;
- #{Key:=Lit} ->
+ #{Lbl:={Reg,Lit}} ->
true;
#{} ->
false
end.
update_value_dict([Lit,{f,Lbl}|T], Reg, D0) ->
- Key = {Lbl,Reg},
D = case D0 of
- #{Key := inconsistent} -> D0;
- #{Key := _} -> D0#{Key := inconsistent};
- _ -> D0#{Key => Lit}
+ #{Lbl:=unsafe} -> D0;
+ #{Lbl:={Reg,Lit}} -> D0;
+ #{Lbl:=_} -> D0#{Lbl:=unsafe};
+ #{} -> D0#{Lbl=>{Reg,Lit}}
end,
update_value_dict(T, Reg, D);
update_value_dict([], _, D) -> D.
@@ -390,14 +398,13 @@ extract_seq_1(_, _) -> no.
{
entry :: beam_asm:label(), %Entry label (must not be moved).
replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace.
- labels :: cerl_sets:set(), %Set of referenced labels.
- index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index built lazily only if needed
+ labels :: cerl_sets:set() %Set of referenced labels.
}).
opt(Is0, CLabel) ->
find_fixpoint(fun(Is) ->
Lbls = initial_labels(Is),
- St = #st{entry=CLabel,replace=#{},labels=Lbls,index={lazy,Is}},
+ St = #st{entry=CLabel,replace=#{},labels=Lbls},
opt(Is, [], St)
end, Is0).
@@ -407,7 +414,7 @@ find_fixpoint(OptFun, Is0) ->
Is -> find_fixpoint(OptFun, Is)
end.
-opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc0, St0) ->
+opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
%% We have
%% Test Label Ops
%% jump Label
@@ -416,23 +423,10 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc0, St0) ->
case beam_utils:is_pure_test(I) of
false ->
%% Test is not pure; we must keep it.
- opt(Is, [I|Acc0], label_used(Lbl, St0));
+ opt(Is, [I|Acc], label_used(Lbl, St));
true ->
%% The test is pure and its failure label is the same
%% as in the jump that follows -- thus it is not needed.
- %% Check if any of the previous instructions could also be eliminated.
- {Acc,St} = opt_useless_loads(Acc0, L, St0),
- opt(Is, Acc, St)
- end;
-opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc0, St0) ->
- %% Similar to the above, except we have a fall-through rather than jump
- %% Test Label Ops
- %% label Label
- case beam_utils:is_pure_test(I) of
- false ->
- opt(Is, [I|Acc0], label_used(Lbl, St0));
- true ->
- {Acc,St} = opt_useless_loads(Acc0, L, St0),
opt(Is, Acc, St)
end;
opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
@@ -499,46 +493,6 @@ normalize_replace([{From,To0}|Rest], Replace, Acc) ->
normalize_replace([], _Replace, Acc) ->
maps:from_list(Acc).
-%% After eliminating a test, it might happen, that a register was only used
-%% in this test. Let's check if that was the case and if it was so, we can
-%% eliminate the load into the register completely.
-opt_useless_loads([{block,_}|_]=Is, L, #st{index={lazy,FIs}}=St) ->
- opt_useless_loads(Is, L, St#st{index=beam_utils:index_labels(FIs)});
-opt_useless_loads([{block,Block0}|Is], L, #st{index=Index}=St) ->
- case opt_useless_block_loads(Block0, L, Index) of
- [] ->
- opt_useless_loads(Is, L, St);
- [_|_]=Block ->
- {[{block,Block}|Is],St}
- end;
-%% After eliminating the test and useless blocks, it might happen,
-%% that the previous test could also be eliminated.
-%% It might be that the label was already marked as used, even if ultimately,
-%% it never will be - we can't do much about it at that point, though
-opt_useless_loads([{test,_,{f,L},_}=I|Is], L, St) ->
- case beam_utils:is_pure_test(I) of
- false ->
- {[I|Is],St};
- true ->
- opt_useless_loads(Is, L, St)
- end;
-opt_useless_loads(Is, _L, St) ->
- {Is,St}.
-
-opt_useless_block_loads([{set,[Dst],_,_}=I|Is], L, Index) ->
- BlockJump = [{block,Is},{jump,{f,L}}],
- case beam_utils:is_killed(Dst, BlockJump, Index) of
- true ->
- %% The register is killed and not used, we can remove the load
- opt_useless_block_loads(Is, L, Index);
- false ->
- [I|opt_useless_block_loads(Is, L, Index)]
- end;
-opt_useless_block_loads([I|Is], L, Index) ->
- [I|opt_useless_block_loads(Is, L, Index)];
-opt_useless_block_loads([], _L, _Index) ->
- [].
-
collect_labels(Is, Label, #st{entry=Entry,replace=Replace} = St) ->
collect_labels_1(Is, Label, Entry, Replace, St).
@@ -665,6 +619,39 @@ drop_upto_label([{label,_}|_]=Is) -> Is;
drop_upto_label([_|Is]) -> drop_upto_label(Is);
drop_upto_label([]) -> [].
+%% unshare([Instruction]) -> [Instruction].
+%% Replace a jump to a return sequence (a `return` instruction
+%% optionally preced by a `deallocate` instruction) with the return
+%% sequence. This always saves execution time and may also save code
+%% space (depending on the architecture). Eliminating `jump`
+%% instructions also gives beam_trim more opportunities to trim the
+%% stack.
+
+unshare(Is) ->
+ Short = unshare_collect_short(Is, #{}),
+ unshare_short(Is, Short).
+
+unshare_collect_short([{label,L},return|Is], Map) ->
+ unshare_collect_short(Is, Map#{L=>[return]});
+unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) ->
+ %% `deallocate` and `return` are combined into one instruction by
+ %% the loader.
+ unshare_collect_short(Is, Map#{L=>[D,return]});
+unshare_collect_short([_|Is], Map) ->
+ unshare_collect_short(Is, Map);
+unshare_collect_short([], Map) -> Map.
+
+unshare_short([{jump,{f,F}}=I|Is], Map) ->
+ case Map of
+ #{F:=Seq} ->
+ Seq ++ unshare_short(Is, Map);
+ #{} ->
+ [I|unshare_short(Is, Map)]
+ end;
+unshare_short([I|Is], Map) ->
+ [I|unshare_short(Is, Map)];
+unshare_short([], _Map) -> [].
+
%% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet'
%% Update the cerl_set UsedCerlSet with any function-local labels
%% (i.e. not with labels in call instructions) referenced by
diff --git a/lib/compiler/src/beam_listing.erl b/lib/compiler/src/beam_listing.erl
index 8a0ce5b50a..6121593b11 100644
--- a/lib/compiler/src/beam_listing.erl
+++ b/lib/compiler/src/beam_listing.erl
@@ -66,7 +66,7 @@ module(Stream, [_|_]=Fs) ->
foreach(fun (F) -> io:format(Stream, "~p.\n", [F]) end, Fs).
format_asm([{label,L}|Is]) ->
- [" {label,",integer_to_list(L),"}.\n"|format_asm(Is)];
+ [io_lib:format(" {label,~p}.\n", [L])|format_asm(Is)];
format_asm([I|Is]) ->
[io_lib:format(" ~p", [I]),".\n"|format_asm(Is)];
format_asm([]) -> [].
diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl
index 2323a439e9..5730e9704e 100644
--- a/lib/compiler/src/beam_peep.erl
+++ b/lib/compiler/src/beam_peep.erl
@@ -94,30 +94,26 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) ->
peep([{jump,{f,L}},{label,L}=I|Is], _, Acc) ->
%% Sometimes beam_jump has missed this optimization.
peep(Is, gb_sets:empty(), [I|Acc]);
-peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) ->
+peep([{select,select_val,R,F,Vls0}|Is], SeenTests0, Acc0) ->
case prune_redundant_values(Vls0, F) of
[] ->
%% No values left. Must convert to plain jump.
I = {jump,F},
peep([I|Is], gb_sets:empty(), Acc0);
- [{atom,_}=Value,Lbl] when Op =:= select_val ->
+ [{atom,_}=Value,Lbl] ->
%% Single value left. Convert to regular test.
Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
- [{integer,_}=Value,Lbl] when Op =:= select_val ->
+ [{integer,_}=Value,Lbl] ->
%% Single value left. Convert to regular test.
Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
- [Arity,Lbl] when Op =:= select_tuple_arity ->
- %% Single value left. Convert to regular test
- Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is],
- peep(Is1, SeenTests0, Acc0);
[{atom,B1},Lbl,{atom,B2},Lbl] when B1 =:= not B2 ->
%% Replace with is_boolean test.
Is1 = [{test,is_boolean,F,[R]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
[_|_]=Vls ->
- I = {select,Op,R,F,Vls},
+ I = {select,select_val,R,F,Vls},
peep(Is, gb_sets:empty(), [I|Acc0])
end;
peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) ->
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index c5e23d2ae0..b491e340b7 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -194,6 +194,7 @@ no_side_effect(#b_set{op=Op}) ->
extract -> true;
get_hd -> true;
get_tl -> true;
+ get_map_element -> true;
get_tuple_element -> true;
has_map_field -> true;
is_nonempty_list -> true;
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
index 3c14062d0b..d3facc5911 100644
--- a/lib/compiler/src/beam_ssa_codegen.erl
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -747,7 +747,16 @@ need_live_anno(Op) ->
end.
%%%
-%%% Add annotations for defined Y registers.
+%%% Add the following annotations for Y registers:
+%%%
+%%% def_yregs An ordset with variables that refer to live Y registers.
+%%% That is, Y registers that that have been killed
+%%% are not included. This annotation is added to all
+%%% instructions that require Y registers to be initialized.
+%%%
+%%% kill_yregs This annotation is added to call instructions. It is
+%%% an ordset containing variables referring to Y registers
+%%% that will no longer be used after the call instruction.
%%%
defined(Linear, #cg{regs=Regs}) ->
@@ -863,13 +872,35 @@ opt_allocate(Linear, #cg{regs=Regs}) ->
opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs)
when is_integer(Stk) ->
- Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
- I = I0#cg_alloc{def_yregs=Yregs},
- [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+ %% Collect the variables that are initialized by copy
+ %% instruction in this block.
+ case ordsets:from_list(opt_allocate_defs(Is, Regs)) of
+ Yregs when length(Yregs) =:= Stk ->
+ %% Those copy instructions are sufficient to fully
+ %% initialize the stack frame.
+ I = I0#cg_alloc{def_yregs=Yregs},
+ [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
+ Yregs0 ->
+ %% Determine a conservative approximation of the Y
+ %% registers that are guaranteed to be initialized by all
+ %% successors of this block, and to it add the variables
+ %% initialized by copy instructions in this block.
+ Yregs1 = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
+ Yregs = ordsets:union(Yregs0, Yregs1),
+ I = I0#cg_alloc{def_yregs=Yregs},
+ [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]
+ end;
opt_allocate_1([B|Bs], Regs) ->
[B|opt_allocate_1(Bs, Regs)];
opt_allocate_1([], _) -> [].
+opt_allocate_defs([#cg_set{op=copy,dst=Dst}|Is], Regs) ->
+ case is_yreg(Dst, Regs) of
+ true -> [Dst|opt_allocate_defs(Is, Regs)];
+ false -> []
+ end;
+opt_allocate_defs(_, _Regs) -> [].
+
opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) ->
case gb_sets:is_member(L, Ws0) of
false ->
diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl
index c20652580d..067d9a6741 100644
--- a/lib/compiler/src/beam_ssa_dead.erl
+++ b/lib/compiler/src/beam_ssa_dead.erl
@@ -135,7 +135,8 @@ shortcut_terminator(Last, _Is, _Bs, _St) ->
Last.
shortcut_switch([{Lit,L0}|T], Bool, Bs, St0) ->
- St = St0#st{rel_op=normalize_op({bif,'=:='}, [Bool,Lit])},
+ RelOp = {'=:=',Bool,Lit},
+ St = St0#st{rel_op=RelOp},
#b_br{bool=#b_literal{val=true},succ=L} =
shortcut(L0, bind_var(Bool, Lit, Bs), St#st{target=one_way}),
[{Lit,L}|shortcut_switch(T, Bool, Bs, St0)];
@@ -388,41 +389,43 @@ eval_terminator(#b_switch{arg=Arg,fail=Fail,list=List}=Sw, Bs, St) ->
%% Literal argument. Simplify to a `br`.
beam_ssa:normalize(Sw#b_switch{arg=Val});
#b_var{} ->
- case St of
- #st{rel_op=none} ->
- %% No previous relational operator is stored.
- %% Give up.
+ %% Try optimizing the switch.
+ case eval_switch(List, Arg, St, Fail) of
+ none ->
none;
- #st{} ->
- %% There is a previous relational operator stored.
- %% Try optimizing the switch.
- case eval_switch(List, Arg, St, Fail) of
- none ->
- none;
- To when is_integer(To) ->
- %% Either one of the values in the switch
- %% matched a previous value in a '=:=' test, or
- %% none of the values matched a previous test.
- #b_br{bool=#b_literal{val=true},succ=To,fail=To}
- end
+ To when is_integer(To) ->
+ %% Either one of the values in the switch
+ %% matched a previous value in a '=:=' test, or
+ %% none of the values matched a previous test.
+ #b_br{bool=#b_literal{val=true},succ=To,fail=To}
end
end;
eval_terminator(#b_ret{}, _Bs, _St) ->
none.
-eval_switch([{Lit,Lbl}|T], Arg, St, Fail) ->
- case eval_rel_op({bif,'=:='}, [Arg,Lit], St) of
- none ->
- %% This label could be reached.
- eval_switch(T, Arg, St, none);
- #b_literal{val=false} ->
- %% This branch will never be taken.
- eval_switch(T, Arg, St, Fail);
- #b_literal{val=true} ->
+eval_switch(List, Arg, #st{rel_op={_,Arg,_}=PrevOp}, Fail) ->
+ %% There is a previous relational operator testing the same variable.
+ %% Optimization may be possible.
+ eval_switch_1(List, Arg, PrevOp, Fail);
+eval_switch(_, _, _, _) ->
+ %% There is either no previous relational operator, or it tests
+ %% a different variable. Nothing to optimize.
+ none.
+
+eval_switch_1([{Lit,Lbl}|T], Arg, PrevOp, Fail) ->
+ RelOp = {'=:=',Arg,Lit},
+ case will_succeed(PrevOp, RelOp) of
+ yes ->
%% Success. This branch will always be taken.
- Lbl
+ Lbl;
+ no ->
+ %% This branch will never be taken.
+ eval_switch_1(T, Arg, PrevOp, Fail);
+ maybe ->
+ %% This label could be reached.
+ eval_switch_1(T, Arg, PrevOp, none)
end;
-eval_switch([], _Arg, _St, Fail) ->
+eval_switch_1([], _Arg, _PrevOp, Fail) ->
%% Fail is now either the failure label or 'none'.
Fail.
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index ac2d943fef..2dda67eac6 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -22,7 +22,8 @@
-export([module/2]).
-include("beam_ssa.hrl").
--import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2,
+-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,
+ reverse/1,reverse/2,
splitwith/2,takewhile/2,unzip/1]).
-spec module(beam_ssa:b_module(), [compile:option()]) ->
@@ -787,15 +788,20 @@ float_flush_regs(#fs{regs=Rs}) ->
%%% with a cheaper instructions
%%%
-ssa_opt_live(#st{ssa=Linear}=St) ->
- St#st{ssa=live_opt(reverse(Linear), #{}, [])}.
+ssa_opt_live(#st{ssa=Linear0}=St) ->
+ RevLinear = reverse(Linear0),
+ Blocks0 = maps:from_list(RevLinear),
+ Blocks = live_opt(RevLinear, #{}, Blocks0),
+ Linear = beam_ssa:linearize(Blocks),
+ St#st{ssa=Linear}.
-live_opt([{L,Blk0}|Bs], LiveMap0, Acc) ->
- Successors = beam_ssa:successors(Blk0),
+live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) ->
+ Blk1 = beam_ssa_share:block(Blk0, Blocks),
+ Successors = beam_ssa:successors(Blk1),
Live0 = live_opt_succ(Successors, L, LiveMap0),
- {Blk,Live} = live_opt_blk(Blk0, Live0),
+ {Blk,Live} = live_opt_blk(Blk1, Live0),
LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
- live_opt(Bs, LiveMap, [{L,Blk}|Acc]);
+ live_opt(Bs, LiveMap, Blocks#{L:=Blk});
live_opt([], _, Acc) -> Acc.
live_opt_succ([S|Ss], L, LiveMap) ->
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index 9175931375..56fe9b4793 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -1478,6 +1478,10 @@ copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
Copy, Count, Acc) ->
I = I0#b_set{args=copy_sub_args(Args0, Copy)},
{reverse(Acc, [I|acc_copy([], Copy)]),Count};
+copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
+ when Op =:= call; Op =:= make_fun ->
+ {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
+ {reverse(Acc, [I]),Count};
copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
{reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
@@ -1990,13 +1994,15 @@ reserve_zregs(Blocks, Intervals, Res) ->
beam_ssa:fold_rpo(F, [0], Res, Blocks).
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
- #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) ->
- case Val of
- #b_literal{val=Arity} when Arity bsr 32 =:= 0 ->
+ #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
+ case {Val,Last} of
+ {#b_literal{val=Arity},#b_br{}} when Arity bsr 32 =:= 0 ->
%% These two instructions can be combined to a test_arity
%% instruction provided that the arity variable is short-lived.
reserve_zreg_1(Dst, ShortLived, A0);
- _ ->
+ {_,_} ->
+ %% Either the arity is too big, or the boolean value from
+ %% '=:=' will be returned.
A0
end;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
@@ -2211,7 +2217,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
Free = init_free(maps:to_list(Res)),
Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
Intervals = sort(Intervals1),
- IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end,
+ IsReserved = fun(#i{reg=Reg}) ->
+ case Reg of
+ none -> false;
+ {prefer,{_,_}} -> false;
+ {_,_} -> true
+ end
+ end,
{UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
L = #l{unhandled_res=UnhandledRes,
unhandled_any=Unhandled,free=Free},
diff --git a/lib/compiler/src/beam_ssa_share.erl b/lib/compiler/src/beam_ssa_share.erl
new file mode 100644
index 0000000000..426efa2cc9
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_share.erl
@@ -0,0 +1,370 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Share code for semantically equivalent blocks referred to
+%% to by `br` and `switch` instructions.
+%%
+%% A similar optimization is done in beam_jump, but doing it here as
+%% well is beneficial as it may enable other optimizations. If there
+%% are many semantically equivalent clauses, this optimization can
+%% substanstially decrease compilation times.
+%%
+%% block/2 is called from the liveness optimization pass in
+%% beam_ssa_opt, as code sharing helps the liveness pass and vice
+%% versa.
+%%
+
+-module(beam_ssa_share).
+-export([module/2,block/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [keyfind/3,reverse/1,sort/1]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+ {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, _Opts) ->
+ Fs = [function(F) || F <- Fs0],
+ {ok,Module#b_module{body=Fs}}.
+
+-spec block(Blk0, Blocks0) -> Blk when
+ Blk0 :: beam_ssa:b_blk(),
+ Blocks0 :: beam_ssa:block_map(),
+ Blk :: beam_ssa:b_blk().
+
+block(#b_blk{last=Last0}=Blk, Blocks) ->
+ case share_terminator(Last0, Blocks) of
+ none -> Blk;
+ Last -> Blk#b_blk{last=beam_ssa:normalize(Last)}
+ end.
+
+%%%
+%%% Local functions.
+%%%
+
+function(#b_function{anno=Anno,bs=Blocks0}=F) ->
+ try
+ PO = reverse(beam_ssa:rpo(Blocks0)),
+ {Blocks1,Changed} = blocks(PO, Blocks0, false),
+ Blocks = case Changed of
+ true ->
+ beam_ssa:trim_unreachable(Blocks1);
+ false ->
+ Blocks0
+ end,
+ F#b_function{bs=Blocks}
+ catch
+ Class:Error:Stack ->
+ #{func_info:={_,Name,Arity}} = Anno,
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+blocks([L|Ls], Blocks, Changed) ->
+ #b_blk{last=Last0} = Blk0 = map_get(L, Blocks),
+ case block(Blk0, Blocks) of
+ #b_blk{last=Last0} ->
+ blocks(Ls, Blocks, Changed);
+ #b_blk{}=Blk ->
+ blocks(Ls, Blocks#{L:=Blk}, true)
+ end;
+blocks([], Blocks, Changed) ->
+ {Blocks,Changed}.
+
+share_terminator(#b_br{bool=#b_var{},succ=Succ0,fail=Fail0}=Br, Blocks) ->
+ {Succ,SuccBlk} = shortcut_nonempty_block(Succ0, Blocks),
+ {Fail,FailBlk} = shortcut_nonempty_block(Fail0, Blocks),
+ case are_equivalent(Succ, SuccBlk, Fail, FailBlk, Blocks) of
+ true ->
+ %% The blocks are semantically equivalent.
+ Br#b_br{succ=Succ,fail=Succ};
+ false ->
+ if
+ Succ =:= Succ0, Fail =:= Fail0 ->
+ %% None of blocks were cut short.
+ none;
+ true ->
+ %% One or both labels were cut short
+ %% to avoid jumping to an empty block.
+ Br#b_br{succ=Succ,fail=Fail}
+ end
+ end;
+share_terminator(#b_switch{}=Sw, Blocks) ->
+ share_switch(Sw, Blocks);
+share_terminator(_Last, _Blocks) -> none.
+
+%% Test whether the two blocks are semantically equivalent. This
+%% function is specially optimized to return `false` as fast as
+%% possible if the blocks are not equivalent, as that is the common
+%% case.
+
+are_equivalent(_Succ, _, ?BADARG_BLOCK, _, _Blocks) ->
+ %% ?BADARG_BLOCK is special. Sharing could be incorrect.
+ false;
+are_equivalent(_Succ, #b_blk{is=Is1,last=#b_ret{arg=RetVal1}=Ret1},
+ _Fail, #b_blk{is=Is2,last=#b_ret{arg=RetVal2}=Ret2}, _Blocks) ->
+ case {RetVal1,RetVal2} of
+ {#b_literal{},#b_literal{}} ->
+ case RetVal1 =:= RetVal2 of
+ true ->
+ %% The return values are identical literals. We
+ %% only need to compare the canonicalized bodies.
+ Can1 = canonical_is(Is1),
+ Can2 = canonical_is(Is2),
+ Can1 =:= Can2;
+ false ->
+ %% Non-equal literals.
+ false
+ end;
+ {#b_var{},#b_var{}} ->
+ %% The return values are varibles. We must canonicalize
+ %% the blocks (including returns) and compare them.
+ Can1 = canonical_is(Is1 ++ [Ret1]),
+ Can2 = canonical_is(Is2 ++ [Ret2]),
+ Can1 =:= Can2;
+ {_,_} ->
+ %% One literal and one variable.
+ false
+ end;
+are_equivalent(Succ,
+ #b_blk{is=Is1,
+ last=#b_br{bool=#b_literal{val=true},
+ succ=Target}},
+ Fail,
+ #b_blk{is=Is2,
+ last=#b_br{bool=#b_literal{val=true},
+ succ=Target}},
+ Blocks) ->
+ %% Both blocks end with an unconditional branch to the
+ %% same target block. If the target block has phi nodes,
+ %% we must pick up the values from the phi nodes and
+ %% compare them.
+ #b_blk{is=Is} = map_get(Target, Blocks),
+ Phis1 = canonical_terminator_phis(Is, Succ),
+ Phis2 = canonical_terminator_phis(Is, Fail),
+ case {Phis1,Phis2} of
+ {[#b_set{args=[#b_literal{}]}|_],_} when Phis1 =/= Phis2 ->
+ %% Different values are used in the phi nodes.
+ false;
+ {_,[#b_set{args=[#b_literal{}]}|_]} when Phis1 =/= Phis2 ->
+ %% Different values are used in the phi nodes.
+ false;
+ {_,_} ->
+ %% The values in the phi nodes are variables or identical
+ %% literals. We must canonicalize the blocks and compare
+ %% them.
+ Can1 = canonical_is(Is1 ++ Phis1),
+ Can2 = canonical_is(Is2 ++ Phis2),
+ Can1 =:= Can2
+ end;
+are_equivalent(Succ0, #b_blk{is=Is1,last=#b_br{bool=#b_var{},fail=Same}},
+ Fail0, #b_blk{is=Is2,last=#b_br{bool=#b_var{},fail=Same}},
+ Blocks) ->
+ %% Two-way branches with identical failure labels. First compare the
+ %% canonicalized bodies of the blocks.
+ case canonical_is(Is1) =:= canonical_is(Is2) of
+ false ->
+ %% Different bodies.
+ false;
+ true ->
+ %% Bodies were equal. That is fairly uncommon, so to keep
+ %% the code simple we will rewrite the `br` to a `switch`
+ %% and let share_switch/2 do the work of following the
+ %% branches.
+ Sw = #b_switch{arg=#b_var{name=not_used},fail=Fail0,
+ list=[{#b_literal{},Succ0}]},
+ #b_switch{fail=Fail,list=[{_,Succ}]} = share_switch(Sw, Blocks),
+ Fail =:= Succ
+ end;
+are_equivalent(_, _, _, _, _) -> false.
+
+share_switch(#b_switch{fail=Fail0,list=List0}=Sw, Blocks) ->
+ Prep = share_prepare_sw([{value,Fail0}|List0], Blocks, 0, []),
+ Res = do_share_switch(Prep, Blocks, []),
+ [{_,Fail}|List] = [VL || {_,VL} <- sort(Res)],
+ Sw#b_switch{fail=Fail,list=List}.
+
+share_prepare_sw([{V,L0}|T], Blocks, N, Acc) ->
+ {L,_Blk} = shortcut_nonempty_block(L0, Blocks),
+ share_prepare_sw(T, Blocks, N+1, [{{L,#{}},{N,{V,L}}}|Acc]);
+share_prepare_sw([], _, _, Acc) -> Acc.
+
+do_share_switch(Prep, Blocks, Acc) ->
+ Map = share_switch_1(Prep, Blocks, #{}),
+ share_switch_2(maps:values(Map), Blocks, Acc).
+
+share_switch_1([{Next0,Res}|T], Blocks, Map) ->
+ {Can,Next} = canonical_block(Next0, Blocks),
+ case Map of
+ #{Can:=Ls} ->
+ share_switch_1(T, Blocks, Map#{Can:=[{Next,Res}|Ls]});
+ #{} ->
+ share_switch_1(T, Blocks, Map#{Can=>[{Next,Res}]})
+ end;
+share_switch_1([], _Blocks, Map) -> Map.
+
+share_switch_2([[{_,{N,Res}}]|T], Blocks, Acc) ->
+ %% This block is not equivalent to any other block.
+ share_switch_2(T, Blocks, [{N,Res}|Acc]);
+share_switch_2([[{done,{_,{_,Common}}}|_]=Eqs|T], Blocks, Acc0) ->
+ %% Two or more blocks are semantically equivalent, and all blocks
+ %% are either terminated with a `ret` or a `br` to the same target
+ %% block. Replace the labels in the `switch` for all of those
+ %% blocks with the label for the first of the blocks.
+ Acc = [{N,{V,Common}} || {done,{N,{V,_}}} <- Eqs] ++ Acc0,
+ share_switch_2(T, Blocks, Acc);
+share_switch_2([[{_,_}|_]=Prep|T], Blocks, Acc0) ->
+ %% Two or more blocks are semantically equivalent, but they have
+ %% different successful successor blocks. Now we must check
+ %% recursively whether the successor blocks are equivalent too.
+ Acc = do_share_switch(Prep, Blocks, Acc0),
+ share_switch_2(T, Blocks, Acc);
+share_switch_2([], _, Acc) -> Acc.
+
+canonical_block({L,VarMap0}, Blocks) ->
+ #b_blk{is=Is,last=Last0} = map_get(L, Blocks),
+ case canonical_terminator(L, Last0, Blocks) of
+ none ->
+ %% The block has a terminator that we don't handle.
+ {{none,L},done};
+ {Last,done} ->
+ %% The block ends with a `ret` or an unconditional `br` to
+ %% another block.
+ {Can,_VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+ {Can,done};
+ {Last,Next} ->
+ %% The block ends with a conditional branch.
+ {Can,VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+ {Can,{Next,VarMap}}
+ end.
+
+%% Translate a sequence of instructions to a canonical representation. If the
+%% canonical representation of two blocks compare equal, the blocks are
+%% semantically equivalent. The following translations are done:
+%%
+%% * Variables defined in the instruction sequence are replaced with
+%% {var,0}, {var,1}, and so on. Free variables are not changed.
+%%
+%% * `location` annotations that would produce a `line` instruction are
+%% kept. All other annotations are cleared.
+%%
+%% * Instructions are repackaged into tuples instead of into the
+%% usual records. The main reason is to avoid violating the types for
+%% the SSA records. We can simplify things a little by linking the
+%% instructions directly instead of putting them into a list.
+
+canonical_is(Is) ->
+ {Can,_} = canonical_is(Is, #{}, []),
+ Can.
+
+canonical_is([#b_set{op=Op,dst=Dst,args=Args0}=I|Is], VarMap0, Acc) ->
+ Args = [canonical_arg(Arg, VarMap0) || Arg <-Args0],
+ Var = {var,map_size(VarMap0)},
+ VarMap = VarMap0#{Dst=>Var},
+ LineAnno = case Op of
+ bs_match ->
+ %% The location annotation for a bs_match instruction
+ %% is only used in warnings, never to emit a `line`
+ %% instruction. Therefore, it should not be included.
+ [];
+ _ ->
+ %% The location annotation will be used in a `line`
+ %% instruction. It must be included.
+ beam_ssa:get_anno(location, I, none)
+ end,
+ canonical_is(Is, VarMap, {Op,LineAnno,Var,Args,Acc});
+canonical_is([#b_ret{arg=Arg}], VarMap, Acc0) ->
+ Acc1 = case Acc0 of
+ {call,_Anno,Var,[#b_local{}|_]=Args,PrevAcc} ->
+ %% This is a tail-recursive call to a local function.
+ %% There will be no line instruction generated;
+ %% thus, the annotation is not significant.
+ {call,[],Var,Args,PrevAcc};
+ _ ->
+ Acc0
+ end,
+ {{ret,canonical_arg(Arg, VarMap),Acc1},VarMap};
+canonical_is([#b_br{bool=#b_var{},fail=Fail}], VarMap, Acc) ->
+ {{br,succ,Fail,Acc},VarMap};
+canonical_is([#b_br{succ=Succ}], VarMap, Acc) ->
+ {{br,Succ,Acc},VarMap};
+canonical_is([], VarMap, Acc) ->
+ {Acc,VarMap}.
+
+canonical_terminator(_L, #b_ret{}=Ret, _Blocks) ->
+ {[Ret],done};
+canonical_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}=Br, Blocks) ->
+ #b_blk{is=Is} = map_get(Succ, Blocks),
+ case canonical_terminator_phis(Is, L) of
+ [] ->
+ {[],Succ};
+ [_|_]=Phis ->
+ {Phis ++ [Br],done}
+ end;
+canonical_terminator(_L, #b_br{bool=#b_var{},succ=Succ}=Br, _Blocks) ->
+ {[Br],Succ};
+canonical_terminator(_, _, _) -> none.
+
+canonical_terminator_phis([#b_set{op=phi,args=PhiArgs}=Phi|Is], L) ->
+ {Value,L} = keyfind(L, 2, PhiArgs),
+ [Phi#b_set{op=copy,args=[Value]}|canonical_terminator_phis(Is, L)];
+canonical_terminator_phis([#b_set{op=peek_message}=I|_], L) ->
+ %% We could get stuck into an infinite loop if we allowed the
+ %% comparisons to continue into this block. Force an unequal
+ %% compare with all other predecessors of this block.
+ [I#b_set{op=copy,args=[#b_literal{val=L}]}];
+canonical_terminator_phis(_, _) -> [].
+
+canonical_arg(#b_var{}=Var, VarMap) ->
+ case VarMap of
+ #{Var:=CanonicalVar} ->
+ CanonicalVar;
+ #{} ->
+ Var
+ end;
+canonical_arg(#b_remote{mod=Mod,name=Name}, VarMap) ->
+ {remote,canonical_arg(Mod, VarMap),
+ canonical_arg(Name, VarMap)};
+canonical_arg(Other, _VarMap) -> Other.
+
+%% Shortcut branches to empty blocks if safe.
+
+shortcut_nonempty_block(L, Blocks) ->
+ case map_get(L, Blocks) of
+ #b_blk{is=[],last=#b_br{bool=#b_literal{val=true},succ=Succ}}=Blk ->
+ %% This block is empty.
+ case is_forbidden(Succ, Blocks) of
+ false ->
+ shortcut_nonempty_block(Succ, Blocks);
+ true ->
+ {L,Blk}
+ end;
+ #b_blk{}=Blk ->
+ {L,Blk}
+ end.
+
+is_forbidden(L, Blocks) ->
+ case map_get(L, Blocks) of
+ #b_blk{is=[#b_set{op=phi}|_]} -> true;
+ #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
+ #b_blk{} -> false
+ end.
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
index 18e6e73a46..95fc3bb0e9 100644
--- a/lib/compiler/src/beam_ssa_type.erl
+++ b/lib/compiler/src/beam_ssa_type.erl
@@ -27,9 +27,10 @@
-define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}).
--record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()},
+-record(d, {ds :: #{beam_ssa:b_var():=beam_ssa:b_set()},
ls :: #{beam_ssa:label():=type_db()},
- sub :: #{beam_ssa:var_name():=beam_ssa:value()}
+ once :: cerl_sets:set(beam_ssa:b_var()),
+ sub :: #{beam_ssa:b_var():=beam_ssa:value()}
}).
-define(ATOM_SET_SIZE, 5).
@@ -56,13 +57,15 @@
Block :: beam_ssa:b_blk().
opt(Linear, Args) ->
+ UsedOnce = used_once(Linear, Args),
Ts = maps:from_list([{V,any} || #b_var{}=V <- Args]),
FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown},
name=#b_literal{val=unknown},
arity=0}]},
Defs = maps:from_list([{Var,FakeCall#b_set{dst=Var}} ||
#b_var{}=Var <- Args]),
- D = #d{ds=Defs,ls=#{0=>Ts},sub=#{}},
+ D = #d{ds=Defs,ls=#{0=>Ts,?BADARG_BLOCK=>#{}},
+ once=UsedOnce,sub=#{}},
opt_1(Linear, D).
opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) ->
@@ -425,16 +428,43 @@ opt_terminator(#b_ret{}=Ret, _Ts, _Ds) -> Ret.
update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) ->
update_successor(S, Ts, D);
-update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts, D0) ->
- D = update_successor_bool(Bool, false, Fail, Ts, D0),
- SuccTs = infer_types(Bool, Ts, D0),
- update_successor_bool(Bool, true, Succ, SuccTs, D);
-update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts, D0) ->
- D = update_successor(Fail, Ts, D0),
- foldl(fun({Val,S}, A) ->
- T = get_type(Val, Ts),
- update_successor(S, Ts#{V=>T}, A)
- end, D, List);
+update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) ->
+ case cerl_sets:is_element(Bool, D0#d.once) of
+ true ->
+ %% This variable is defined in this block and is only
+ %% referenced by this br terminator. Therefore, there is
+ %% no need to include the type database passed on to the
+ %% successors of this block.
+ Ts = maps:remove(Bool, Ts0),
+ D = update_successor(Fail, Ts, D0),
+ SuccTs = infer_types(Bool, Ts, D0),
+ update_successor(Succ, SuccTs, D);
+ false ->
+ D = update_successor_bool(Bool, false, Fail, Ts0, D0),
+ SuccTs = infer_types(Bool, Ts0, D0),
+ update_successor_bool(Bool, true, Succ, SuccTs, D)
+ end;
+update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) ->
+ case cerl_sets:is_element(V, D0#d.once) of
+ true ->
+ %% This variable is defined in this block and is only
+ %% referenced by this switch terminator. Therefore, there is
+ %% no need to include the type database passed on to the
+ %% successors of this block.
+ Ts = maps:remove(V, Ts0),
+ D = update_successor(Fail, Ts, D0),
+ F = fun({_Val,S}, A) ->
+ update_successor(S, Ts, A)
+ end,
+ foldl(F, D, List);
+ false ->
+ D = update_successor(Fail, Ts0, D0),
+ F = fun({Val,S}, A) ->
+ T = get_type(Val, Ts0),
+ update_successor(S, Ts0#{V=>T}, A)
+ end,
+ foldl(F, D, List)
+ end;
update_successors(#b_ret{}, _Ts, D) -> D.
update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
@@ -447,6 +477,11 @@ update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
update_successor(S, Ts, D)
end.
+update_successor(?BADARG_BLOCK, _Ts, #d{}=D) ->
+ %% We KNOW that no variables are used in the ?BADARG_BLOCK,
+ %% so there is no need to update the type information. That
+ %% can be a huge timesaver for huge functions.
+ D;
update_successor(S, Ts0, #d{ls=Ls}=D) ->
case Ls of
#{S:=Ts1} ->
@@ -766,6 +801,48 @@ simplify_not(#b_br{bool=#b_var{}=V,succ=Succ,fail=Fail}=Br0, Ts, Ds) ->
Br0
end.
+%%%
+%%% Calculate the set of variables that are only used once in the
+%%% block that they are defined in. That will allow us to discard type
+%%% information for variables that will never be referenced by the
+%%% successor blocks, potentially improving compilation times.
+%%%
+
+used_once(Linear, Args) ->
+ Map0 = used_once_1(reverse(Linear), #{}),
+ Map = maps:without(Args, Map0),
+ cerl_sets:from_list(maps:keys(Map)).
+
+used_once_1([{L,#b_blk{is=Is,last=Last}}|Bs], Uses0) ->
+ Uses = used_once_2([Last|reverse(Is)], L, Uses0),
+ used_once_1(Bs, Uses);
+used_once_1([], Uses) -> Uses.
+
+used_once_2([I|Is], L, Uses0) ->
+ Uses = used_once_uses(beam_ssa:used(I), L, Uses0),
+ case I of
+ #b_set{dst=Dst} ->
+ case Uses of
+ #{Dst:=[L]} ->
+ used_once_2(Is, L, Uses);
+ #{} ->
+ used_once_2(Is, L, maps:remove(Dst, Uses))
+ end;
+ _ ->
+ used_once_2(Is, L, Uses)
+ end;
+used_once_2([], _, Uses) -> Uses.
+
+used_once_uses([V|Vs], L, Uses) ->
+ case Uses of
+ #{V:=Us} ->
+ used_once_uses(Vs, L, Uses#{V:=[L|Us]});
+ #{} ->
+ used_once_uses(Vs, L, Uses#{V=>[L]})
+ end;
+used_once_uses([], _, Uses) -> Uses.
+
+
get_types(Values, Ts) ->
[get_type(Val, Ts) || Val <- Values].
-spec get_type(beam_ssa:value(), type_db()) -> type().
diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl
index 1acbedd45b..51ff580a7a 100644
--- a/lib/compiler/src/beam_trim.erl
+++ b/lib/compiler/src/beam_trim.erl
@@ -21,12 +21,11 @@
-module(beam_trim).
-export([module/2]).
--import(lists, [reverse/1,reverse/2,splitwith/2,sort/1]).
+-import(lists, [any/2,member/2,reverse/1,reverse/2,splitwith/2,sort/1]).
-record(st,
- {safe :: gb_sets:set(beam_asm:label()), %Safe labels.
- lbl :: beam_utils:code_index() %Code at each label.
- }).
+ {safe :: cerl_sets:set(beam_asm:label()) %Safe labels.
+ }).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -36,10 +35,15 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opts) ->
{ok,{Mod,Exp,Attr,Fs,Lc}}.
function({function,Name,Arity,CLabel,Is0}) ->
- %%ok = io:fwrite("~w: ~p\n", [?LINE,{Name,Arity}]),
- St = #st{safe=safe_labels(Is0, []),lbl=beam_utils:index_labels(Is0)},
- Is = trim(Is0, St, []),
- {function,Name,Arity,CLabel,Is}.
+ try
+ St = #st{safe=safe_labels(Is0, [])},
+ Is = trim(Is0, St, []),
+ {function,Name,Arity,CLabel,Is}
+ catch
+ Class:Error:Stack ->
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
trim([{kill,_}|_]=Is0, St, Acc) ->
{Kills0,Is1} = splitwith(fun({kill,_}) -> true;
@@ -47,14 +51,33 @@ trim([{kill,_}|_]=Is0, St, Acc) ->
end, Is0),
Kills = sort(Kills0),
try
- {FrameSize,Layout} = frame_layout(Is1, Kills, St),
- Configs = trim_instructions(Layout),
- try_remap(Configs, Is1, FrameSize)
- of
+ %% Find out the size and layout of the stack frame.
+ %% Example of a layout:
+ %%
+ %% [{kill,{y,0}},{dead,{y,1},{live,{y,2}},{kill,{y,3}}]
+ %%
+ %% That means that y0 and y3 are to be killed, that y1
+ %% has been killed previously, and that y2 is live.
+ {FrameSize,Layout} = frame_layout(Is1, Kills, St),
+
+ %% Calculate all recipes that are not worse in terms
+ %% of estimated execution time. The recipes are ordered
+ %% in descending order from how much they trim.
+ Recipes = trim_recipes(Layout),
+
+ %% Try the recipes in order. A recipe may not work out because
+ %% a register that was previously killed may be
+ %% resurrected. If that happens, the next recipe, which trims
+ %% less, will be tried.
+ try_remap(Recipes, Is1, FrameSize)
+ of
{Is,TrimInstr} ->
+ %% One of the recipes was applied.
trim(Is, St, reverse(TrimInstr)++Acc)
catch
not_possible ->
+ %% No recipe worked out. Use the original kill
+ %% instructions.
trim(Is1, St, reverse(Kills, Acc))
end;
trim([I|Is], St, Acc) ->
@@ -62,34 +85,42 @@ trim([I|Is], St, Acc) ->
trim([], _, Acc) ->
reverse(Acc).
-%% trim_instructions([{kill,R}|{live,R}|{dead,R}]) -> {[Instruction],MapFun}
-%% Figure out the sequence of moves and trim to use.
+%% trim_recipes([{kill,R}|{live,R}|{dead,R}]) -> [Recipe].
+%% Recipe = {Kills,NumberToTrim,Moves}
+%% Kills = [{kill,Y}]
+%% Moves = [{move,SrcY,DstY}]
+%%
+%% Calculate how to best trim the stack and kill the correct
+%% Y registers. Return a list of possible recipes. The best
+%% recipe (the one that trims the most) is first in the list.
+%% All of the recipes are no worse in estimated execution time
+%% than the original sequences of kill instructions.
-trim_instructions(Layout) ->
+trim_recipes(Layout) ->
Cost = length([I || {kill,_}=I <- Layout]),
- trim_instructions_1(Layout, 0, [], {Cost,[]}).
+ trim_recipes_1(Layout, 0, [], {Cost,[]}).
-trim_instructions_1([{kill,{y,Trim0}}|Ks], Trim0, Moves, Config0) ->
+trim_recipes_1([{kill,{y,Trim0}}|Ks], Trim0, Moves, Recipes0) ->
Trim = Trim0 + 1,
- Config = save_config(Ks, Trim, Moves, Config0),
- trim_instructions_1(Ks, Trim, Moves, Config);
-trim_instructions_1([{dead,{y,Trim0}}|Ks], Trim0, Moves, Config0) ->
+ Recipes = save_recipe(Ks, Trim, Moves, Recipes0),
+ trim_recipes_1(Ks, Trim, Moves, Recipes);
+trim_recipes_1([{dead,{y,Trim0}}|Ks], Trim0, Moves, Recipes0) ->
Trim = Trim0 + 1,
- Config = save_config(Ks, Trim, Moves, Config0),
- trim_instructions_1(Ks, Trim, Moves, Config);
-trim_instructions_1([{live,{y,Trim0}=Src}|Ks0], Trim0, Moves0, Config0) ->
+ Recipes = save_recipe(Ks, Trim, Moves, Recipes0),
+ trim_recipes_1(Ks, Trim, Moves, Recipes);
+trim_recipes_1([{live,{y,Trim0}=Src}|Ks0], Trim0, Moves0, Recipes0) ->
case take_last_dead(Ks0) of
none ->
- {_,ConfigList} = Config0,
- ConfigList;
+ {_,RecipesList} = Recipes0,
+ RecipesList;
{Dst,Ks} ->
Trim = Trim0 + 1,
Moves = [{move,Src,Dst}|Moves0],
- Config = save_config(Ks, Trim, Moves, Config0),
- trim_instructions_1(Ks, Trim, Moves, Config)
+ Recipes = save_recipe(Ks, Trim, Moves, Recipes0),
+ trim_recipes_1(Ks, Trim, Moves, Recipes)
end;
-trim_instructions_1([], _, _, {_,ConfigList}) ->
- ConfigList.
+trim_recipes_1([], _, _, {_,RecipesList}) ->
+ RecipesList.
take_last_dead(L) ->
take_last_dead_1(reverse(L)).
@@ -100,28 +131,48 @@ take_last_dead_1([{dead,Reg}|Is]) ->
{Reg,reverse(Is)};
take_last_dead_1(_) -> none.
-save_config(Ks, Trim, Moves, {MaxCost,Acc}=Config) ->
- case config_cost(Ks, Moves) of
- Cost when Cost =< MaxCost ->
- {MaxCost,[{Ks,Trim,Moves}|Acc]};
+save_recipe(Ks, Trim, Moves, {MaxCost,Acc}=Recipes) ->
+ case recipe_cost(Ks, Moves) of
+ Cost when Cost =< MaxCost ->
+ %% The price is right.
+ {MaxCost,[{Ks,Trim,Moves}|Acc]};
_Cost ->
- Config
+ %% Too expensive.
+ Recipes
end.
-config_cost(Ks, Moves) ->
+recipe_cost(Ks, Moves) ->
%% We estimate that a {move,{y,_},{y,_}} instruction is roughly twice as
%% expensive as a {kill,{y,_}} instruction. A {trim,_} instruction is
%% roughly as expensive as a {kill,{y,_}} instruction.
- config_cost_1(Ks, 1+2*length(Moves)).
+ recipe_cost_1(Ks, 1+2*length(Moves)).
-config_cost_1([{kill,_}|Ks], Cost) ->
- config_cost_1(Ks, Cost+1);
-config_cost_1([_|Ks], Cost) ->
- config_cost_1(Ks, Cost);
-config_cost_1([], Cost) -> Cost.
+recipe_cost_1([{kill,_}|Ks], Cost) ->
+ recipe_cost_1(Ks, Cost+1);
+recipe_cost_1([_|Ks], Cost) ->
+ recipe_cost_1(Ks, Cost);
+recipe_cost_1([], Cost) -> Cost.
+
+%% try_remap([Recipe], [Instruction], FrameSize) ->
+%% {[Instruction],[TrimInstruction]}.
+%% Try to renumber Y registers in the instruction stream. The
+%% first rececipe that works will be used.
+%%
+%% This function will issue a `not_possible` exception if none
+%% of the recipes were possible to apply.
+
+try_remap([R|Rs], Is, FrameSize) ->
+ {TrimInstr,Map} = expand_recipe(R, FrameSize),
+ try
+ {remap(Is, Map, []),TrimInstr}
+ catch
+ throw:not_possible ->
+ try_remap(Rs, Is, FrameSize)
+ end;
+try_remap([], _, _) -> throw(not_possible).
-expand_config({Layout,Trim,Moves}, FrameSize) ->
+expand_recipe({Layout,Trim,Moves}, FrameSize) ->
Kills = [Kill || {kill,_}=Kill <- Layout],
{Kills++reverse(Moves, [{trim,Trim,FrameSize-Trim}]),create_map(Trim, Moves)}.
@@ -132,16 +183,16 @@ create_map(Trim, []) ->
(Any) -> Any
end;
create_map(Trim, Moves) ->
- GbTree0 = [{Src,Dst-Trim} || {move,{y,Src},{y,Dst}} <- Moves],
- GbTree = gb_trees:from_orddict(sort(GbTree0)),
- IllegalTargets = gb_sets:from_list([Dst || {move,_,{y,Dst}} <- Moves]),
+ Map0 = [{Src,Dst-Trim} || {move,{y,Src},{y,Dst}} <- Moves],
+ Map = maps:from_list(Map0),
+ IllegalTargets = cerl_sets:from_list([Dst || {move,_,{y,Dst}} <- Moves]),
fun({y,Y0}) when Y0 < Trim ->
- case gb_trees:lookup(Y0, GbTree) of
- {value,Y} -> {y,Y};
- none -> throw(not_possible)
- end;
+ case Map of
+ #{Y0:=Y} -> {y,Y};
+ #{} -> throw(not_possible)
+ end;
({y,Y}) ->
- case gb_sets:is_element(Y, IllegalTargets) of
+ case cerl_sets:is_element(Y, IllegalTargets) of
true -> throw(not_possible);
false -> {y,Y-Trim}
end;
@@ -149,19 +200,15 @@ create_map(Trim, Moves) ->
(Any) -> Any
end.
-try_remap([C|Cs], Is, FrameSize) ->
- {TrimInstr,Map} = expand_config(C, FrameSize),
- try
- {remap(Is, Map, []),TrimInstr}
- catch
- throw:not_possible ->
- try_remap(Cs, Is, FrameSize)
- end;
-try_remap([], _, _) -> throw(not_possible).
-
remap([{block,Bl0}|Is], Map, Acc) ->
Bl = remap_block(Bl0, Map, []),
remap(Is, Map, [{block,Bl}|Acc]);
+remap([{bs_get_tail,Src,Dst,Live}|Is], Map, Acc) ->
+ I = {bs_get_tail,Map(Src),Map(Dst),Live},
+ remap(Is, Map, [I|Acc]);
+remap([{bs_set_position,Src1,Src2}|Is], Map, Acc) ->
+ I = {bs_set_position,Map(Src1),Map(Src2)},
+ remap(Is, Map, [I|Acc]);
remap([{call_fun,_}=I|Is], Map, Acc) ->
remap(Is, Map, [I|Acc]);
remap([{call,_,_}=I|Is], Map, Acc) ->
@@ -205,35 +252,66 @@ remap([return|_]=Is, _, Acc) ->
reverse(Acc, Is);
remap([{line,_}=I|Is], Map, Acc) ->
remap(Is, Map, [I|Acc]).
-
+
remap_block([{set,Ds0,Ss0,Info}|Is], Map, Acc) ->
Ds = [Map(D) || D <- Ds0],
Ss = [Map(S) || S <- Ss0],
remap_block(Is, Map, [{set,Ds,Ss,Info}|Acc]);
remap_block([], _, Acc) -> reverse(Acc).
-
-safe_labels([{label,L},{line,_},{badmatch,{Tag,_}}|Is], Acc) when Tag =/= y ->
- safe_labels(Is, [L|Acc]);
-safe_labels([{label,L},{line,_},{case_end,{Tag,_}}|Is], Acc) when Tag =/= y ->
- safe_labels(Is, [L|Acc]);
-safe_labels([{label,L},{line,_},if_end|Is], Acc) ->
- safe_labels(Is, [L|Acc]);
-safe_labels([{label,L},
- {block,[{set,[{x,0}],[{Tag,_}],move}]},
- {line,_},
- {call_ext,1,{extfunc,erlang,error,1}}|Is], Acc) when Tag =/= y ->
- safe_labels(Is, [L|Acc]);
+
+%% safe_labels([Instruction], Accumulator) -> gb_set()
+%% Build a gb_set of safe labels. The code at a safe
+%% label does not depend on the values in a specific
+%% Y register, only that all Y registers are initialized
+%% so that it safe to scan the stack when an exception
+%% is generated.
+%%
+%% In other words, code at a safe label will continue
+%% to work if Y registers have been renumbered and
+%% the size of the stack frame has changed.
+
+safe_labels([{label,L}|Is], Acc) ->
+ case is_safe_label(Is) of
+ true -> safe_labels(Is, [L|Acc]);
+ false -> safe_labels(Is, Acc)
+ end;
safe_labels([_|Is], Acc) ->
safe_labels(Is, Acc);
-safe_labels([], Acc) -> gb_sets:from_list(Acc).
+safe_labels([], Acc) -> cerl_sets:from_list(Acc).
+
+is_safe_label([{line,_}|Is]) ->
+ is_safe_label(Is);
+is_safe_label([{badmatch,{Tag,_}}|_]) ->
+ Tag =/= y;
+is_safe_label([{case_end,{Tag,_}}|_]) ->
+ Tag =/= y;
+is_safe_label([{try_case_end,{Tag,_}}|_]) ->
+ Tag =/= y;
+is_safe_label([if_end|_]) ->
+ true;
+is_safe_label([{block,Bl}|Is]) ->
+ is_safe_label_block(Bl) andalso is_safe_label(Is);
+is_safe_label([{call_ext,_,{extfunc,M,F,A}}|_]) ->
+ erl_bifs:is_exit_bif(M, F, A);
+is_safe_label(_) -> false.
+
+is_safe_label_block([{set,Ds,Ss,_}|Is]) ->
+ IsYreg = fun({y,_}) -> true;
+ (_) -> false
+ end,
+ %% This instruction is safe if the instruction
+ %% neither reads or writes Y registers.
+ not (any(IsYreg, Ss) orelse any(IsYreg, Ds)) andalso
+ is_safe_label_block(Is);
+is_safe_label_block([]) -> true.
%% frame_layout([Instruction], [{kill,_}], St) ->
%% [{kill,Reg} | {live,Reg} | {dead,Reg}]
%% Figure out the layout of the stack frame.
-frame_layout(Is, Kills, #st{safe=Safe,lbl=D}) ->
+frame_layout(Is, Kills, #st{safe=Safe}) ->
N = frame_size(Is, Safe),
- IsKilled = fun(R) -> beam_utils:is_not_used(R, Is, D) end,
+ IsKilled = fun(R) -> is_not_used(R, Is) end,
{N,frame_layout_1(Kills, 0, N, IsKilled, [])}.
frame_layout_1([{kill,{y,Y}}=I|Ks], Y, N, IsKilled, Acc) ->
@@ -253,6 +331,11 @@ frame_layout_2(Is) -> reverse(Is).
%% frame_size([Instruction], SafeLabels) -> FrameSize
%% Find out the frame size by looking at the code that follows.
+%%
+%% Implicitly, also check that the instructions are a straight
+%% sequence of code that ends in a return. Any branches are
+%% to safe labels (i.e., the code at those labels don't depend
+%% on the contents of any Y register).
frame_size([{block,_}|Is], Safe) ->
frame_size(Is, Safe);
@@ -285,15 +368,92 @@ frame_size([{make_fun2,_,_,_,_}|Is], Safe) ->
frame_size(Is, Safe);
frame_size([{get_map_elements,{f,L},_,_}|Is], Safe) ->
frame_size_branch(L, Is, Safe);
-frame_size([{deallocate,N}|_], _) -> N;
+frame_size([{deallocate,N}|_], _) ->
+ N;
frame_size([{line,_}|Is], Safe) ->
frame_size(Is, Safe);
+frame_size([{bs_set_position,_,_}|Is], Safe) ->
+ frame_size(Is, Safe);
+frame_size([{bs_get_tail,_,_,_}|Is], Safe) ->
+ frame_size(Is, Safe);
frame_size(_, _) -> throw(not_possible).
frame_size_branch(0, Is, Safe) ->
frame_size(Is, Safe);
frame_size_branch(L, Is, Safe) ->
- case gb_sets:is_member(L, Safe) of
+ case cerl_sets:is_element(L, Safe) of
false -> throw(not_possible);
true -> frame_size(Is, Safe)
end.
+
+%% is_not_used(Y, [Instruction]) -> true|false.
+%% Test whether the value of Y is unused in the instruction sequence.
+%% Return true if the value of Y is not used, and false if it is used.
+%%
+%% This function handles the same instructions as frame_size/2. It
+%% assumes that any labels in the instructions are safe labels.
+
+is_not_used(Y, [{apply,_}|Is]) ->
+ is_not_used(Y, Is);
+is_not_used(Y, [{bif,_,{f,_},Ss,Dst}|Is]) ->
+ is_not_used_ss_dst(Y, Ss, Dst, Is);
+is_not_used(Y, [{block,Bl}|Is]) ->
+ case is_not_used_block(Y, Bl) of
+ used -> false;
+ killed -> true;
+ transparent -> is_not_used(Y, Is)
+ end;
+is_not_used(Y, [{bs_get_tail,Src,Dst,_}|Is]) ->
+ is_not_used_ss_dst(Y, [Src], Dst, Is);
+is_not_used(Y, [{bs_init,_,_,_,Ss,Dst}|Is]) ->
+ is_not_used_ss_dst(Y, Ss, Dst, Is);
+is_not_used(Y, [{bs_put,{f,_},_,Ss}|Is]) ->
+ not member(Y, Ss) andalso is_not_used(Y, Is);
+is_not_used(Y, [{bs_set_position,Src1,Src2}|Is]) ->
+ Y =/= Src1 andalso Y =/= Src2 andalso
+ is_not_used(Y, Is);
+is_not_used(Y, [{call,_,_}|Is]) ->
+ is_not_used(Y, Is);
+is_not_used(Y, [{call_ext,_,_}=I|Is]) ->
+ beam_jump:is_exit_instruction(I) orelse is_not_used(Y, Is);
+is_not_used(Y, [{call_fun,_}|Is]) ->
+ is_not_used(Y, Is);
+is_not_used(_Y, [{deallocate,_}|_]) ->
+ true;
+is_not_used(Y, [{gc_bif,_,{f,_},_Live,Ss,Dst}|Is]) ->
+ is_not_used_ss_dst(Y, Ss, Dst, Is);
+is_not_used(Y, [{get_map_elements,{f,_},S,{list,List}}|Is]) ->
+ {Ss,Ds} = beam_utils:split_even(List),
+ case member(Y, [S|Ss]) of
+ true ->
+ false;
+ false ->
+ member(Y, Ds) orelse is_not_used(Y, Is)
+ end;
+is_not_used(Y, [{kill,Yreg}|Is]) ->
+ Y =:= Yreg orelse is_not_used(Y, Is);
+is_not_used(Y, [{line,_}|Is]) ->
+ is_not_used(Y, Is);
+is_not_used(Y, [{make_fun2,_,_,_,_}|Is]) ->
+ is_not_used(Y, Is);
+is_not_used(Y, [{test,_,_,Ss}|Is]) ->
+ not member(Y, Ss) andalso is_not_used(Y, Is);
+is_not_used(Y, [{test,_Op,{f,_},_Live,Ss,Dst}|Is]) ->
+ is_not_used_ss_dst(Y, Ss, Dst, Is).
+
+is_not_used_block(Y, [{set,Ds,Ss,_}|Is]) ->
+ case member(Y, Ss) of
+ true ->
+ used;
+ false ->
+ case member(Y, Ds) of
+ true ->
+ killed;
+ false ->
+ is_not_used_block(Y, Is)
+ end
+ end;
+is_not_used_block(_Y, []) -> transparent.
+
+is_not_used_ss_dst(Y, Ss, Dst, Is) ->
+ not member(Y, Ss) andalso (Y =:= Dst orelse is_not_used(Y, Is)).
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 5156a04f6b..6e6574c0b3 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -18,23 +18,14 @@
%% %CopyrightEnd%
%%
%% Purpose : Common utilities used by several optimization passes.
-%%
+%%
-module(beam_utils).
--export([is_killed/3,is_killed_at/3,is_not_used/3,
- empty_label_index/0,index_label/3,index_labels/1,replace_labels/4,
- code_at/2,is_pure_test/1,
- split_even/1]).
+-export([replace_labels/4,is_pure_test/1,split_even/1]).
-export_type([code_index/0,module_code/0,instruction/0]).
--import(lists, [map/2,member/2,sort/1,reverse/1]).
-
--define(is_const(Val), (Val =:= nil orelse
- element(1, Val) =:= integer orelse
- element(1, Val) =:= float orelse
- element(1, Val) =:= atom orelse
- element(1, Val) =:= literal)).
+-import(lists, [map/2,reverse/1]).
%% instruction() describes all instructions that are used during optimization
%% (from beam_a to beam_z).
@@ -52,97 +43,6 @@
-type fail() :: beam_asm:fail() | 'fail'.
-type test() :: {'test',atom(),fail(),[beam_asm:src()]} |
{'test',atom(),fail(),integer(),list(),beam_asm:reg()}.
--type result_cache() :: gb_trees:tree(beam_asm:label(), 'killed' | 'used').
-
--record(live,
- {lbl :: code_index(), %Label to code index.
- res :: result_cache()}). %Result cache for each label.
-
-%% is_killed(Register, [Instruction], State) -> true|false
-%% Determine whether a register is killed by the instruction sequence.
-%% If true is returned, it means that the register will not be
-%% referenced in ANY way (not even indirectly by an allocate instruction);
-%% i.e. it is OK to enter the instruction sequence with Register
-%% containing garbage.
-%%
-%% The state (constructed by index_instructions/1) is used to allow us
-%% to determine the kill state across branches.
-
--spec is_killed(beam_asm:reg(), [instruction()], code_index()) -> boolean().
-
-is_killed(R, Is, D) ->
- St = #live{lbl=D,res=gb_trees:empty()},
- case check_liveness(R, Is, St) of
- {killed,_} -> true;
- {exit_not_used,_} -> false;
- {_,_} -> false
- end.
-
-%% is_killed_at(Reg, Lbl, State) -> true|false
-%% Determine whether Reg is killed at label Lbl.
-
--spec is_killed_at(beam_asm:reg(), beam_asm:label(), code_index()) -> boolean().
-
-is_killed_at(R, Lbl, D) when is_integer(Lbl) ->
- St0 = #live{lbl=D,res=gb_trees:empty()},
- case check_liveness_at(R, Lbl, St0) of
- {killed,_} -> true;
- {exit_not_used,_} -> false;
- {_,_} -> false
- end.
-
-%% is_not_used(Register, [Instruction], State) -> true|false
-%% Determine whether a register is never used in the instruction sequence
-%% (it could still be referenced by an allocate instruction, meaning that
-%% it MUST be initialized, but that its value does not matter).
-%% The state is used to allow us to determine the usage state
-%% across branches.
-
--spec is_not_used(beam_asm:reg(), [instruction()], code_index()) -> boolean().
-
-is_not_used(R, Is, D) ->
- St = #live{lbl=D,res=gb_trees:empty()},
- case check_liveness(R, Is, St) of
- {used,_} -> false;
- {exit_not_used,_} -> true;
- {_,_} -> true
- end.
-
-%% index_labels(FunctionIs) -> State
-%% Index the instruction sequence so that we can quickly
-%% look up the instruction following a specific label.
-
--spec index_labels([instruction()]) -> code_index().
-
-index_labels(Is) ->
- index_labels_1(Is, []).
-
-%% empty_label_index() -> State
-%% Create an empty label index.
-
--spec empty_label_index() -> code_index().
-
-empty_label_index() ->
- gb_trees:empty().
-
-%% index_label(Label, [Instruction], State) -> State
-%% Add an index for a label.
-
--spec index_label(beam_asm:label(), [instruction()], code_index()) ->
- code_index().
-
-index_label(Lbl, Is0, Acc) ->
- Is = drop_labels(Is0),
- gb_trees:enter(Lbl, Is, Acc).
-
-
-%% code_at(Label, State) -> [I].
-%% Retrieve the code at the given label.
-
--spec code_at(beam_asm:label(), code_index()) -> [instruction()].
-
-code_at(L, Ll) ->
- gb_trees:get(L, Ll).
%% replace_labels(FunctionIs, Tail, ReplaceDb, Fallback) -> FunctionIs.
%% Replace all labels in instructions according to the ReplaceDb.
@@ -175,7 +75,7 @@ is_pure_test({test,test_arity,_,[_,_]}) -> true;
is_pure_test({test,has_map_fields,_,[_|_]}) -> true;
is_pure_test({test,is_bitstr,_,[_]}) -> true;
is_pure_test({test,is_function2,_,[_,_]}) -> true;
-is_pure_test({test,Op,_,Ops}) ->
+is_pure_test({test,Op,_,Ops}) ->
erl_internal:new_type_test(Op, length(Ops)).
%% split_even/1
@@ -189,438 +89,6 @@ split_even(Rs) -> split_even(Rs, [], []).
%%% Local functions.
%%%
-
-%% check_liveness(Reg, [Instruction], #live{}) ->
-%% {killed | not_used | used, #live{}}
-%% Find out whether Reg is used or killed in instruction sequence.
-%%
-%% killed - Reg is assigned or killed by an allocation instruction.
-%% not_used - the value of Reg is not used, but Reg must not be garbage
-%% exit_not_used - the value of Reg is not used, but must not be garbage
-%% because the stack will be scanned because an
-%% exit BIF will raise an exception
-%% used - Reg is used
-
-check_liveness({fr,_}, _, St) ->
- %% Conservatively always consider the floating point register used.
- {used,St};
-check_liveness(R, [{block,Blk}|Is], St0) ->
- case check_liveness_block(R, Blk, St0) of
- {transparent,St1} ->
- check_liveness(R, Is, St1);
- {alloc_used,St1} ->
- %% Used by an allocating instruction, but value not referenced.
- %% Must check the rest of the instructions.
- not_used(check_liveness(R, Is, St1));
- {Other,_}=Res when is_atom(Other) ->
- Res
- end;
-check_liveness(R, [{label,_}|Is], St) ->
- check_liveness(R, Is, St);
-check_liveness(R, [{test,_,{f,Fail},As}|Is], St0) ->
- case member(R, As) of
- true ->
- {used,St0};
- false ->
- case check_liveness_at(R, Fail, St0) of
- {killed,St1} ->
- check_liveness(R, Is, St1);
- {exit_not_used,St1} ->
- not_used(check_liveness(R, Is, St1));
- {not_used,St1} ->
- not_used(check_liveness(R, Is, St1));
- {used,_}=Used ->
- Used
- end
- end;
-check_liveness(R, [{test,Op,Fail,Live,Ss,Dst}|Is], St) ->
- %% Check this instruction as a block to get a less conservative
- %% result if the caller is is_not_used/3.
- Block = [{set,[Dst],Ss,{alloc,Live,{bif,Op,Fail}}}],
- check_liveness(R, [{block,Block}|Is], St);
-check_liveness(R, [{select,_,R,_,_}|_], St) ->
- {used,St};
-check_liveness(R, [{select,_,_,Fail,Branches}|_], St) ->
- check_liveness_everywhere(R, [Fail|Branches], St);
-check_liveness(R, [{jump,{f,F}}|_], St) ->
- check_liveness_at(R, F, St);
-check_liveness(R, [{case_end,Used}|_], St) ->
- check_liveness_exit(R, Used, St);
-check_liveness(R, [{try_case_end,Used}|_], St) ->
- check_liveness_exit(R, Used, St);
-check_liveness(R, [{badmatch,Used}|_], St) ->
- check_liveness_exit(R, Used, St);
-check_liveness(R, [if_end|_], St) ->
- check_liveness_exit(R, ignore, St);
-check_liveness(R, [{func_info,_,_,Ar}|_], St) ->
- case R of
- {x,X} when X < Ar -> {used,St};
- _ -> {killed,St}
- end;
-check_liveness(R, [{kill,R}|_], St) ->
- {killed,St};
-check_liveness(R, [{kill,_}|Is], St) ->
- check_liveness(R, Is, St);
-check_liveness(R, [{bs_init,_,_,none,Ss,Dst}|Is], St) ->
- case member(R, Ss) of
- true ->
- {used,St};
- false ->
- if
- R =:= Dst -> {killed,St};
- true -> check_liveness(R, Is, St)
- end
- end;
-check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) ->
- case R of
- {x,X} ->
- case member(R, Ss) of
- true ->
- {used,St};
- false ->
- if
- X < Live ->
- not_used(check_liveness(R, Is, St));
- true ->
- {killed,St}
- end
- end;
- {y,_} ->
- case member(R, Ss) of
- true -> {used,St};
- false ->
- %% If the exception is taken, the stack may
- %% be scanned. Therefore the register is not
- %% guaranteed to be killed.
- if
- R =:= Dst -> {not_used,St};
- true -> not_used(check_liveness(R, Is, St))
- end
- end
- end;
-check_liveness(R, [{deallocate,_}|Is], St) ->
- case R of
- {y,_} -> {killed,St};
- _ -> check_liveness(R, Is, St)
- end;
-check_liveness({x,_}=R, [return|_], St) ->
- case R of
- {x,0} -> {used,St};
- {x,_} -> {killed,St}
- end;
-check_liveness(R, [{call,Live,_}|Is], St) ->
- case R of
- {x,X} when X < Live -> {used,St};
- {x,_} -> {killed,St};
- {y,_} -> not_used(check_liveness(R, Is, St))
- end;
-check_liveness(R, [{call_ext,Live,_}=I|Is], St) ->
- case R of
- {x,X} when X < Live ->
- {used,St};
- {x,_} ->
- {killed,St};
- {y,_} ->
- case beam_jump:is_exit_instruction(I) of
- false ->
- not_used(check_liveness(R, Is, St));
- true ->
- %% We must make sure we don't check beyond this
- %% instruction or we will fall through into random
- %% unrelated code and get stuck in a loop.
- {exit_not_used,St}
- end
- end;
-check_liveness(R, [{call_fun,Live}|Is], St) ->
- case R of
- {x,X} when X =< Live -> {used,St};
- {x,_} -> {killed,St};
- {y,_} -> not_used(check_liveness(R, Is, St))
- end;
-check_liveness(R, [{apply,Args}|Is], St) ->
- case R of
- {x,X} when X < Args+2 -> {used,St};
- {x,_} -> {killed,St};
- {y,_} -> not_used(check_liveness(R, Is, St))
- end;
-check_liveness(R, [{bif,Op,Fail,Ss,D}|Is], St) ->
- Set = {set,[D],Ss,{bif,Op,Fail}},
- check_liveness(R, [{block,[Set]}|Is], St);
-check_liveness(R, [{gc_bif,Op,{f,Fail},Live,Ss,D}|Is], St) ->
- Set = {set,[D],Ss,{alloc,Live,{gc_bif,Op,Fail}}},
- check_liveness(R, [{block,[Set]}|Is], St);
-check_liveness(R, [{bs_put,{f,0},_,Ss}|Is], St) ->
- case member(R, Ss) of
- true -> {used,St};
- false -> check_liveness(R, Is, St)
- end;
-check_liveness(R, [{bs_restore2,S,_}|Is], St) ->
- case R of
- S -> {used,St};
- _ -> check_liveness(R, Is, St)
- end;
-check_liveness(R, [{bs_save2,S,_}|Is], St) ->
- case R of
- S -> {used,St};
- _ -> check_liveness(R, Is, St)
- end;
-check_liveness(R, [{move,S,D}|Is], St) ->
- case R of
- S -> {used,St};
- D -> {killed,St};
- _ -> check_liveness(R, Is, St)
- end;
-check_liveness(R, [{make_fun2,_,_,_,NumFree}|Is], St) ->
- case R of
- {x,X} when X < NumFree -> {used,St};
- {x,_} -> {killed,St};
- {y,_} -> not_used(check_liveness(R, Is, St))
- end;
-check_liveness(R, [{'catch'=Op,Y,Fail}|Is], St) ->
- Set = {set,[Y],[],{try_catch,Op,Fail}},
- check_liveness(R, [{block,[Set]}|Is], St);
-check_liveness(R, [{'try'=Op,Y,Fail}|Is], St) ->
- Set = {set,[Y],[],{try_catch,Op,Fail}},
- check_liveness(R, [{block,[Set]}|Is], St);
-check_liveness(R, [{try_end,Y}|Is], St) ->
- case R of
- Y ->
- {killed,St};
- {y,_} ->
- %% y registers will be used if an exception occurs and
- %% control transfers to the label given in the previous
- %% try/2 instruction.
- {used,St};
- _ ->
- check_liveness(R, Is, St)
- end;
-check_liveness(R, [{catch_end,Y}|Is], St) ->
- case R of
- Y -> {killed,St};
- _ -> check_liveness(R, Is, St)
- end;
-check_liveness(R, [{get_tuple_element,S,_,D}|Is], St) ->
- case R of
- S -> {used,St};
- D -> {killed,St};
- _ -> check_liveness(R, Is, St)
- end;
-check_liveness(R, [{loop_rec,{f,_},{x,0}}|_], St) ->
- case R of
- {x,_} ->
- {killed,St};
- _ ->
- %% y register. Rarely happens. Be very conversative and
- %% assume it's used.
- {used,St}
- end;
-check_liveness(R, [{loop_rec_end,{f,Fail}}|_], St) ->
- check_liveness_at(R, Fail, St);
-check_liveness(R, [{line,_}|Is], St) ->
- check_liveness(R, Is, St);
-check_liveness(R, [{get_map_elements,{f,Fail},S,{list,L}}|Is], St0) ->
- {Ss,Ds} = split_even(L),
- case member(R, [S|Ss]) of
- true ->
- {used,St0};
- false ->
- case check_liveness_at(R, Fail, St0) of
- {killed,St}=Killed ->
- case member(R, Ds) of
- true -> Killed;
- false -> check_liveness(R, Is, St)
- end;
- Other ->
- Other
- end
- end;
-check_liveness(R, [{put_map,F,Op,S,D,Live,{list,Puts}}|Is], St) ->
- Set = {set,[D],[S|Puts],{alloc,Live,{put_map,Op,F}}},
- check_liveness(R, [{block,[Set]}||Is], St);
-check_liveness(R, [{put_tuple,Ar,D}|Is], St) ->
- Set = {set,[D],[],{put_tuple,Ar}},
- check_liveness(R, [{block,[Set]}||Is], St);
-check_liveness(R, [{put_list,S1,S2,D}|Is], St) ->
- Set = {set,[D],[S1,S2],put_list},
- check_liveness(R, [{block,[Set]}||Is], St);
-check_liveness(R, [{test_heap,N,Live}|Is], St) ->
- I = {block,[{set,[],[],{alloc,Live,{nozero,nostack,N,[]}}}]},
- check_liveness(R, [I|Is], St);
-check_liveness(R, [{allocate_zero,N,Live}|Is], St) ->
- I = {block,[{set,[],[],{alloc,Live,{zero,N,0,[]}}}]},
- check_liveness(R, [I|Is], St);
-check_liveness(R, [{get_hd,S,D}|Is], St) ->
- I = {block,[{set,[D],[S],get_hd}]},
- check_liveness(R, [I|Is], St);
-check_liveness(R, [{get_tl,S,D}|Is], St) ->
- I = {block,[{set,[D],[S],get_tl}]},
- check_liveness(R, [I|Is], St);
-check_liveness(R, [remove_message|Is], St) ->
- check_liveness(R, Is, St);
-check_liveness({x,X}, [build_stacktrace|_], St) when X > 0 ->
- {killed,St};
-check_liveness(R, [{recv_mark,_}|Is], St) ->
- check_liveness(R, Is, St);
-check_liveness(R, [{recv_set,_}|Is], St) ->
- check_liveness(R, Is, St);
-check_liveness(R, [{'%',_}|Is], St) ->
- check_liveness(R, Is, St);
-check_liveness(_R, Is, St) when is_list(Is) ->
- %% Not implemented. Conservatively assume that the register is used.
- {used,St}.
-
-check_liveness_everywhere(R, Lbls, St0) ->
- check_liveness_everywhere_1(R, Lbls, killed, St0).
-
-check_liveness_everywhere_1(R, [{f,Lbl}|T], Res0, St0) ->
- {Res1,St} = check_liveness_at(R, Lbl, St0),
- Res = case Res1 of
- killed -> Res0;
- _ -> Res1
- end,
- case Res of
- used -> {used,St};
- _ -> check_liveness_everywhere_1(R, T, Res, St)
- end;
-check_liveness_everywhere_1(R, [_|T], Res, St) ->
- check_liveness_everywhere_1(R, T, Res, St);
-check_liveness_everywhere_1(_, [], Res, St) ->
- {Res,St}.
-
-check_liveness_at(R, Lbl, #live{lbl=Ll,res=ResMemorized}=St0) ->
- case gb_trees:lookup(Lbl, ResMemorized) of
- {value,Res} ->
- {Res,St0};
- none ->
- {Res,St} = case gb_trees:lookup(Lbl, Ll) of
- {value,Is} -> check_liveness(R, Is, St0);
- none -> {used,St0}
- end,
- {Res,St#live{res=gb_trees:insert(Lbl, Res, St#live.res)}}
- end.
-
-not_used({used,_}=Res) -> Res;
-not_used({_,St}) -> {not_used,St}.
-
-check_liveness_exit(R, R, St) -> {used,St};
-check_liveness_exit({x,_}, _, St) -> {killed,St};
-check_liveness_exit({y,_}, _, St) -> {exit_not_used,St}.
-
-%% check_liveness_block(Reg, [Instruction], State) ->
-%% {killed | not_used | used | alloc_used | transparent,State'}
-%% Finds out how Reg is used in the instruction sequence inside a block.
-%% Returns one of:
-%% killed - Reg is assigned a new value or killed by an
-%% allocation instruction
-%% not_used - The value is not used, but the register is referenced
-%% e.g. by an allocation instruction
-%% transparent - Reg is neither used nor killed
-%% alloc_used - Used only in an allocate instruction
-%% used - Reg is explicitly used by an instruction
-%%
-%% Annotations are not allowed.
-%%
-%% (Unknown instructions will cause an exception.)
-
-check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) ->
- if
- X >= Live ->
- {killed,St0};
- true ->
- case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
- {transparent,St} -> {alloc_used,St};
- {_,_}=Res -> not_used(Res)
- end
- end;
-check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St0) ->
- case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
- {transparent,St} -> {alloc_used,St};
- {_,_}=Res -> not_used(Res)
- end;
-check_liveness_block({y,_}=R, [{set,Ds,Ss,{try_catch,_,Op}}|Is], St0) ->
- case Ds of
- [R] ->
- {killed,St0};
- _ ->
- case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
- {exit_not_used,St} ->
- {used,St};
- {transparent,St} ->
- %% Conservatively assumed that it is used.
- {used,St};
- {_,_}=Res ->
- Res
- end
- end;
-check_liveness_block(R, [{set,Ds,Ss,Op}|Is], St) ->
- check_liveness_block_1(R, Ss, Ds, Op, Is, St);
-check_liveness_block(_, [], St) -> {transparent,St}.
-
-check_liveness_block_1(R, Ss, Ds, Op, Is, St0) ->
- case member(R, Ss) of
- true ->
- {used,St0};
- false ->
- case check_liveness_block_2(R, Op, Ss, St0) of
- {killed,St} ->
- case member(R, Ds) of
- true -> {killed,St};
- false -> check_liveness_block(R, Is, St)
- end;
- {exit_not_used,St} ->
- case member(R, Ds) of
- true -> {exit_not_used,St};
- false -> check_liveness_block(R, Is, St)
- end;
- {not_used,St} ->
- not_used(case member(R, Ds) of
- true -> {killed,St};
- false -> check_liveness_block(R, Is, St)
- end);
- {used,St} ->
- {used,St}
- end
- end.
-
-check_liveness_block_2(R, {gc_bif,Op,{f,Lbl}}, Ss, St) ->
- check_liveness_block_3(R, Lbl, {Op,length(Ss)}, St);
-check_liveness_block_2(R, {bif,Op,{f,Lbl}}, Ss, St) ->
- Arity = length(Ss),
- case erl_internal:comp_op(Op, Arity) orelse
- erl_internal:new_type_test(Op, Arity) of
- true ->
- {killed,St};
- false ->
- check_liveness_block_3(R, Lbl, {Op,length(Ss)}, St)
- end;
-check_liveness_block_2(R, {put_map,_Op,{f,Lbl}}, _Ss, St) ->
- check_liveness_block_3(R, Lbl, {unsafe,0}, St);
-check_liveness_block_2(_, _, _, St) ->
- {killed,St}.
-
-check_liveness_block_3({x,_}, 0, _FA, St) ->
- {killed,St};
-check_liveness_block_3({y,_}, 0, {F,A}, St) ->
- %% If the exception is thrown, the stack may be scanned,
- %% thus implicitly using the y register.
- case erl_bifs:is_safe(erlang, F, A) of
- true -> {killed,St};
- false -> {used,St}
- end;
-check_liveness_block_3(R, Lbl, _FA, St0) ->
- check_liveness_at(R, Lbl, St0).
-
-index_labels_1([{label,Lbl}|Is0], Acc) ->
- Is = drop_labels(Is0),
- index_labels_1(Is0, [{Lbl,Is}|Acc]);
-index_labels_1([_|Is], Acc) ->
- index_labels_1(Is, Acc);
-index_labels_1([], Acc) -> gb_trees:from_orddict(sort(Acc)).
-
-drop_labels([{label,_}|Is]) -> drop_labels(Is);
-drop_labels(Is) -> Is.
-
-
replace_labels_1([{test,Test,{f,Lbl},Ops}|Is], Acc, D, Fb) ->
replace_labels_1(Is, [{test,Test,{f,label(Lbl, D, Fb)},Ops}|Acc], D, Fb);
replace_labels_1([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D, Fb) ->
@@ -676,8 +144,6 @@ label(Old, D, Fb) ->
_ -> Fb(Old)
end.
-%% live_opt/4.
-
split_even([], Ss, Ds) ->
{reverse(Ss),reverse(Ds)};
split_even([S,D|Rs], Ss, Ds) ->
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 7d908df3bf..1945faba7f 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -1366,8 +1366,10 @@ kill_aliases(Reg, #st{aliases=Aliases0}=St) ->
St
end.
-set_type(Type, {x,_}=Reg, Vst) -> set_type_reg(Type, Reg, Vst);
-set_type(Type, {y,_}=Reg, Vst) -> set_type_y(Type, Reg, Vst);
+set_type(Type, {x,_}=Reg, Vst) ->
+ set_type_reg(Type, Reg, Reg, Vst);
+set_type(Type, {y,_}=Reg, Vst) ->
+ set_type_reg(Type, Reg, Reg, Vst);
set_type(_, _, #vst{}=Vst) -> Vst.
set_type_reg(Type, Src, Dst, Vst) ->
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 27e6e8fe00..14c8c5b4ab 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -823,6 +823,9 @@ kernel_passes() ->
{pass,beam_kernel_to_ssa},
{iff,dssa,{listing,"ssa"}},
{iff,ssalint,{pass,beam_ssa_lint}},
+ {unless,no_share_opt,{pass,beam_ssa_share}},
+ {iff,dssashare,{listing,"ssashare"}},
+ {iff,ssalint,{pass,beam_ssa_lint}},
{unless,no_bsm_opt,{pass,beam_ssa_bsm}},
{iff,dssabsm,{listing,"ssabsm"}},
{iff,ssalint,{pass,beam_ssa_lint}},
@@ -868,7 +871,9 @@ asm_passes() ->
%% need to do a few clean-ups to code.
{iff,no_postopt,[{pass,beam_clean}]},
+ {iff,diffable,?pass(diffable)},
{pass,beam_z},
+ {iff,diffable,{listing,"S"}},
{iff,dz,{listing,"z"}},
{iff,dopt,{listing,"optimize"}},
{iff,'S',{listing,"S"}},
@@ -1926,6 +1931,39 @@ restore_expand_module([F|Fs]) ->
[F|restore_expand_module(Fs)];
restore_expand_module([]) -> [].
+%%%
+%%% Transform the BEAM code to make it more friendly for
+%%% diffing: using function names instead of labels for
+%%% local calls and number labels relative to each function.
+%%%
+
+diffable(Code0, St) ->
+ {Mod,Exp,Attr,Fs0,NumLabels} = Code0,
+ EntryLabels0 = [{Entry,{Name,Arity}} ||
+ {function,Name,Arity,Entry,_} <- Fs0],
+ EntryLabels = maps:from_list(EntryLabels0),
+ Fs = [diffable_fix_function(F, EntryLabels) || F <- Fs0],
+ Code = {Mod,Exp,Attr,Fs,NumLabels},
+ {ok,Code,St}.
+
+diffable_fix_function({function,Name,Arity,Entry0,Is0}, LabelMap0) ->
+ Entry = maps:get(Entry0, LabelMap0),
+ {Is1,LabelMap} = diffable_label_map(Is0, 1, LabelMap0, []),
+ Fb = fun(Old) -> error({no_fb,Old}) end,
+ Is = beam_utils:replace_labels(Is1, [], LabelMap, Fb),
+ {function,Name,Arity,Entry,Is}.
+
+diffable_label_map([{label,Old}|Is], New, Map, Acc) ->
+ case Map of
+ #{Old:=NewLabel} ->
+ diffable_label_map(Is, New, Map, [{label,NewLabel}|Acc]);
+ #{} ->
+ diffable_label_map(Is, New+1, Map#{Old=>New}, [{label,New}|Acc])
+ end;
+diffable_label_map([I|Is], New, Map, Acc) ->
+ diffable_label_map(Is, New, Map, [I|Acc]);
+diffable_label_map([], _New, Map, Acc) ->
+ {Acc,Map}.
-spec options() -> 'ok'.
@@ -2063,6 +2101,7 @@ pre_load() ->
beam_ssa_opt,
beam_ssa_pre_codegen,
beam_ssa_recv,
+ beam_ssa_share,
beam_ssa_type,
beam_trim,
beam_utils,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 86259270bd..1472e3fde1 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -45,6 +45,7 @@
beam_ssa_pp,
beam_ssa_pre_codegen,
beam_ssa_recv,
+ beam_ssa_share,
beam_ssa_type,
beam_trim,
beam_utils,
diff --git a/lib/compiler/src/erl_bifs.erl b/lib/compiler/src/erl_bifs.erl
index 71ab0e872a..ce9762899e 100644
--- a/lib/compiler/src/erl_bifs.erl
+++ b/lib/compiler/src/erl_bifs.erl
@@ -108,6 +108,7 @@ is_pure(erlang, list_to_atom, 1) -> true;
is_pure(erlang, list_to_binary, 1) -> true;
is_pure(erlang, list_to_float, 1) -> true;
is_pure(erlang, list_to_integer, 1) -> true;
+is_pure(erlang, list_to_integer, 2) -> true;
is_pure(erlang, list_to_pid, 1) -> true;
is_pure(erlang, list_to_tuple, 1) -> true;
is_pure(erlang, max, 2) -> true;
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index d848cd8f19..43c99be982 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -2667,12 +2667,20 @@ opt_build_stacktrace(#c_let{vars=[#c_var{name=Cooked}],
#c_call{module=#c_literal{val=erlang},
name=#c_literal{val=raise},
args=[Class,Exp,#c_var{name=Cooked}]} ->
- %% The stacktrace is only used in a call to erlang:raise/3.
- %% There is no need to build the stacktrace. Replace the
- %% call to erlang:raise/3 with the the raw_raise/3 instruction,
- %% which will use a raw stacktrace.
- #c_primop{name=#c_literal{val=raw_raise},
- args=[Class,Exp,RawStk]};
+ case core_lib:is_var_used(Cooked, #c_cons{hd=Class,tl=Exp}) of
+ true ->
+ %% Not safe. The stacktrace is used in the class or
+ %% reason.
+ Let;
+ false ->
+ %% The stacktrace is only used in the last
+ %% argument for erlang:raise/3. There is no need
+ %% to build the stacktrace. Replace the call to
+ %% erlang:raise/3 with the the raw_raise/3
+ %% instruction, which will use a raw stacktrace.
+ #c_primop{name=#c_literal{val=raw_raise},
+ args=[Class,Exp,RawStk]}
+ end;
#c_let{vars=[#c_var{name=V}],arg=Arg,body=B0} when V =/= Cooked ->
case core_lib:is_var_used(Cooked, Arg) of
false ->