aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compiler')
-rw-r--r--lib/compiler/src/beam_ssa_opt.erl164
-rw-r--r--lib/compiler/src/beam_ssa_pre_codegen.erl195
-rw-r--r--lib/compiler/src/beam_validator.erl2
-rw-r--r--lib/compiler/test/beam_validator_SUITE.erl17
4 files changed, 334 insertions, 44 deletions
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 3685c09a2b..2c898ba6f8 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -39,7 +39,7 @@
-include("beam_ssa_opt.hrl").
--import(lists, [append/1,duplicate/2,foldl/3,keyfind/3,member/2,
+-import(lists, [all/2,append/1,duplicate/2,foldl/3,keyfind/3,member/2,
reverse/1,reverse/2,
splitwith/2,sort/1,takewhile/2,unzip/1]).
@@ -142,6 +142,7 @@ finish_1([], _StMap) ->
prologue_passes(Opts) ->
Ps = [?PASS(ssa_opt_split_blocks),
?PASS(ssa_opt_coalesce_phis),
+ ?PASS(ssa_opt_tail_phis),
?PASS(ssa_opt_element),
?PASS(ssa_opt_linearize),
?PASS(ssa_opt_tuple_size),
@@ -157,6 +158,7 @@ repeated_passes(Opts) ->
?PASS(ssa_opt_bs_puts),
?PASS(ssa_opt_dead),
?PASS(ssa_opt_cse),
+ ?PASS(ssa_opt_tail_phis),
?PASS(ssa_opt_type_continue)], %Must run after ssa_opt_dead to
%clean up phi nodes.
passes_1(Ps, Opts).
@@ -431,6 +433,160 @@ c_fix_branches([{_,Pred}|As], L, Blocks0) ->
c_fix_branches([], _, Blocks) -> Blocks.
%%%
+%%% Eliminate phi nodes in the tail of a function.
+%%%
+%%% Try to eliminate short blocks that starts with a phi node
+%%% and end in a return. For example:
+%%%
+%%% Result = phi { Res1, 4 }, { literal true, 5 }
+%%% Ret = put_tuple literal ok, Result
+%%% ret Ret
+%%%
+%%% The code in this block can be inserted at the end blocks 4 and
+%%% 5. Thus, the following code can be inserted into block 4:
+%%%
+%%% Ret:1 = put_tuple literal ok, Res1
+%%% ret Ret:1
+%%%
+%%% And the following code into block 5:
+%%%
+%%% Ret:2 = put_tuple literal ok, literal true
+%%% ret Ret:2
+%%%
+%%% Which can be further simplified to:
+%%%
+%%% ret literal {ok, true}
+%%%
+%%% This transformation may lead to more code improvements:
+%%%
+%%% - Stack trimming
+%%% - Fewer test_heap instructions
+%%% - Smaller stack frames
+%%%
+
+ssa_opt_tail_phis({#st{ssa=SSA0,cnt=Count0}=St, FuncDb}) ->
+ {SSA,Count} = opt_tail_phis(SSA0, Count0),
+ {St#st{ssa=SSA,cnt=Count}, FuncDb}.
+
+opt_tail_phis(Blocks, Count) when is_map(Blocks) ->
+ opt_tail_phis(maps:values(Blocks), Blocks, Count);
+opt_tail_phis(Linear0, Count0) when is_list(Linear0) ->
+ Blocks0 = maps:from_list(Linear0),
+ {Blocks,Count} = opt_tail_phis(Blocks0, Count0),
+ {beam_ssa:linearize(Blocks),Count}.
+
+opt_tail_phis([#b_blk{is=Is0,last=Last}|Bs], Blocks0, Count0) ->
+ case {Is0,Last} of
+ {[#b_set{op=phi,args=[_,_|_]}|_],#b_ret{arg=#b_var{}}=Ret} ->
+ {Phis,Is} = splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is0),
+ case suitable_tail_ops(Is) of
+ true ->
+ {Blocks,Count} = opt_tail_phi(Phis, Is, Ret,
+ Blocks0, Count0),
+ opt_tail_phis(Bs, Blocks, Count);
+ false ->
+ opt_tail_phis(Bs, Blocks0, Count0)
+ end;
+ {_,_} ->
+ opt_tail_phis(Bs, Blocks0, Count0)
+ end;
+opt_tail_phis([], Blocks, Count) ->
+ {Blocks,Count}.
+
+opt_tail_phi(Phis0, Is, Ret, Blocks0, Count0) ->
+ Phis = rel2fam(reduce_phis(Phis0)),
+ {Blocks,Count,Cost} =
+ foldl(fun(PhiArg, Acc) ->
+ opt_tail_phi_arg(PhiArg, Is, Ret, Acc)
+ end, {Blocks0,Count0,0}, Phis),
+ MaxCost = length(Phis) * 3 + 2,
+ if
+ Cost =< MaxCost ->
+ %% The transformation would cause at most a slight
+ %% increase in code size if no more optimizations
+ %% can be applied.
+ {Blocks,Count};
+ true ->
+ %% The code size would be increased too much.
+ {Blocks0,Count0}
+ end.
+
+reduce_phis([#b_set{dst=PhiDst,args=PhiArgs}|Is]) ->
+ [{L,{PhiDst,Val}} || {Val,L} <- PhiArgs] ++ reduce_phis(Is);
+reduce_phis([]) -> [].
+
+opt_tail_phi_arg({PredL,Sub0}, Is0, Ret0, {Blocks0,Count0,Cost0}) ->
+ Blk0 = map_get(PredL, Blocks0),
+ #b_blk{is=IsPrefix,last=#b_br{succ=Next,fail=Next}} = Blk0,
+ case is_exit_bif(IsPrefix) of
+ false ->
+ Sub1 = maps:from_list(Sub0),
+ {Is1,Count,Sub} = new_names(Is0, Sub1, Count0, []),
+ Is2 = [sub(I, Sub) || I <- Is1],
+ Cost = build_cost(Is2, Cost0),
+ Is = IsPrefix ++ Is2,
+ Ret = sub(Ret0, Sub),
+ Blk = Blk0#b_blk{is=Is,last=Ret},
+ Blocks = Blocks0#{PredL:=Blk},
+ {Blocks,Count,Cost};
+ true ->
+ %% The block ends in a call to a function that
+ %% will cause an exception.
+ {Blocks0,Count0,Cost0+3}
+ end.
+
+is_exit_bif([#b_set{op=call,
+ args=[#b_remote{mod=#b_literal{val=Mod},
+ name=#b_literal{val=Name}}|Args]}]) ->
+ erl_bifs:is_exit_bif(Mod, Name, length(Args));
+is_exit_bif(_) -> false.
+
+new_names([#b_set{dst=Dst}=I|Is], Sub0, Count0, Acc) ->
+ {NewDst,Count} = new_var(Dst, Count0),
+ Sub = Sub0#{Dst=>NewDst},
+ new_names(Is, Sub, Count, [I#b_set{dst=NewDst}|Acc]);
+new_names([], Sub, Count, Acc) ->
+ {reverse(Acc),Count,Sub}.
+
+suitable_tail_ops(Is) ->
+ all(fun(#b_set{op=Op}) ->
+ is_suitable_tail_op(Op)
+ end, Is).
+
+is_suitable_tail_op({bif,_}) -> true;
+is_suitable_tail_op(put_list) -> true;
+is_suitable_tail_op(put_tuple) -> true;
+is_suitable_tail_op(_) -> false.
+
+build_cost([#b_set{op=put_list,args=Args}|Is], Cost) ->
+ case are_all_literals(Args) of
+ true ->
+ build_cost(Is, Cost);
+ false ->
+ build_cost(Is, Cost + 1)
+ end;
+build_cost([#b_set{op=put_tuple,args=Args}|Is], Cost) ->
+ case are_all_literals(Args) of
+ true ->
+ build_cost(Is, Cost);
+ false ->
+ build_cost(Is, Cost + length(Args) + 1)
+ end;
+build_cost([#b_set{op={bif,_},args=Args}|Is], Cost) ->
+ case are_all_literals(Args) of
+ true ->
+ build_cost(Is, Cost);
+ false ->
+ build_cost(Is, Cost + 1)
+ end;
+build_cost([], Cost) -> Cost.
+
+are_all_literals(Args) ->
+ all(fun(#b_literal{}) -> true;
+ (_) -> false
+ end, Args).
+
+%%%
%%% Order element/2 calls.
%%%
%%% Order an unbroken chain of element/2 calls for the same tuple
@@ -2116,3 +2272,9 @@ sub_arg(Old, Sub) ->
#{Old:=New} -> New;
#{} -> Old
end.
+
+new_var(#b_var{name={Base,N}}, Count) ->
+ true = is_integer(N), %Assertion.
+ {#b_var{name={Base,Count}},Count+1};
+new_var(#b_var{name=Base}, Count) ->
+ {#b_var{name={Base,Count}},Count+1}.
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index 9b2bb65911..fde1118c29 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -2086,23 +2086,95 @@ reserve_freg([], Res) -> Res.
%% will allocate the lowest free X register for the variable.
reserve_xregs(Blocks, Res) ->
- F = fun(L, #b_blk{is=Is,last=Last}, R) ->
- {Xs0,Used0} = reserve_terminator(L, Last, Blocks, R),
- reserve_xregs_is(reverse(Is), R, Xs0, Used0)
- end,
- beam_ssa:fold_po(F, Res, Blocks).
-
+ Ls = reverse(beam_ssa:rpo(Blocks)),
+ reserve_xregs(Ls, Blocks, #{}, Res).
+
+reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
+ #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks),
+
+ %% Calculate mapping from variable name to the preferred
+ %% register.
+ Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0),
+
+ %% We need to figure out where the code generator will
+ %% place instructions that will do a garbage collection.
+ %% Insert 'gc' markers as pseudo-instructions in the
+ %% instruction sequence.
+ Is1 = reverse(Is0),
+ Is2 = res_place_gc_instrs(Is1, []),
+ Is = res_place_allocate(Anno, Is2),
+
+ %% Add register hints for variables that are defined
+ %% in the (reversed) instruction sequence.
+ {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []),
+
+ XsMap = XsMap0#{L=>Xs},
+ reserve_xregs(Ls, Blocks, XsMap, Res);
+reserve_xregs([], _, _, Res) -> Res.
+
+%% Insert explicit 'gc' markers points where there will
+%% be a garbage collection. (Note that the instruction
+%% sequence passed to this function is reversed.)
+
+res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) ->
+ res_place_gc_instrs(Is, [I|Acc]);
+res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc)
+ when Op =:= call; Op =:= make_fun ->
+ case Acc of
+ [] ->
+ res_place_gc_instrs(Is, [I|Acc]);
+ [GC|_] when GC =:= gc; GC =:= test_heap ->
+ res_place_gc_instrs(Is, [I,gc|Acc]);
+ [_|_] ->
+ res_place_gc_instrs(Is, [I,gc|Acc])
+ end;
+res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) ->
+ case beam_ssa_codegen:classify_heap_need(Op, Args) of
+ neutral ->
+ case Acc0 of
+ [test_heap|Acc] ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc]);
+ Acc ->
+ res_place_gc_instrs(Is, [I|Acc])
+ end;
+ {put,_} ->
+ case Acc0 of
+ [test_heap|Acc] ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc]);
+ Acc ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc])
+ end;
+ _ ->
+ res_place_gc_instrs(Is, [gc,I|Acc0])
+ end;
+res_place_gc_instrs([], Acc) ->
+ %% Reverse and replace 'test_heap' markers with 'gc'.
+ %% (The distinction is no longer useful.)
+ res_place_gc_instrs_rev(Acc, []).
+
+res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) ->
+ res_place_gc_instrs_rev(Is, Acc);
+res_place_gc_instrs_rev([test_heap|Is], Acc) ->
+ res_place_gc_instrs_rev(Is, [gc|Acc]);
+res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) ->
+ res_place_gc_instrs_rev(Is, Acc);
+res_place_gc_instrs_rev([I|Is], Acc) ->
+ res_place_gc_instrs_rev(Is, [I|Acc]);
+res_place_gc_instrs_rev([], Acc) -> Acc.
+
+res_place_allocate(#{yregs:=_}, Is) ->
+ %% There will be an 'allocate' instruction inserted here.
+ Is ++ [gc];
+res_place_allocate(#{}, Is) -> Is.
+
+reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
+ %% At this point, the code generator will place an instruction
+ %% that does a garbage collection. We must prune the remembered
+ %% registers.
+ Xs = res_xregs_prune(Xs0, Used, Res),
+ reserve_xregs_is(Is, Res, Xs, Used);
reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
- Xs1 = case is_gc_safe(I) of
- true ->
- Xs0;
- false ->
- %% There may be a garbage collection after executing this
- %% instruction. We will need prune the list of preferred
- %% X registers.
- res_xregs_prune(Xs0, Used0, Res0)
- end,
- Res = reserve_xreg(Dst, Xs1, Res0),
+ Res = reserve_xreg(Dst, Xs0, Res0),
Used1 = ordsets:union(Used0, beam_ssa:used(I)),
Used = ordsets:del_element(Dst, Used1),
case Op of
@@ -2113,28 +2185,74 @@ reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
Xs = reserve_call_args(tl(Args)),
reserve_xregs_is(Is, Res, Xs, Used);
_ ->
- reserve_xregs_is(Is, Res, Xs1, Used)
+ reserve_xregs_is(Is, Res, Xs0, Used)
end;
-reserve_xregs_is([], Res, _Xs, _Used) -> Res.
-
-reserve_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}, Blocks, Res) ->
- case maps:get(Succ, Blocks) of
+reserve_xregs_is([], Res, Xs, _Used) ->
+ {Res,Xs}.
+
+%% Pick up register hints from the successors of this blocks.
+reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
+ _Blocks, XsMap, _Res) ->
+ %% We know that no variables are used at ?BADARG_BLOCK, so
+ %% any register hints from the success blocks are safe to use.
+ map_get(Succ, XsMap);
+reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
+ Blocks, XsMap, Res) when Succ =/= Fail ->
+ #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks,
+ case {SuccBlk,FailBlk} of
+ {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}},
+ #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} ->
+ %% Both branches ultimately transfer to the same
+ %% block (via two blocks with no instructions).
+ %% Pick up register hints from the phi nodes
+ %% in the common block.
+ #{PhiL:=#b_blk{is=PhiIs}} = Blocks,
+ Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}),
+ res_xregs_from_phi(PhiIs, Fail, Res, Xs);
+ {_,_} when Is =/= [] ->
+ case last(Is) of
+ #b_set{op=succeeded,args=[Arg]} ->
+ %% We know that Arg will not be used at the failure
+ %% label, so we can pick up register hints from the
+ %% success label.
+ Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
+ case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of
+ #{Arg:=Reg} -> #{Arg=>Reg};
+ #{} -> #{}
+ end;
+ _ ->
+ %% Register hints from the success block may not
+ %% be safe at the failure block, and vice versa.
+ #{}
+ end;
+ {_,_} ->
+ %% Register hints from the success block may not
+ %% be safe at the failure block, and vice versa.
+ #{}
+ end;
+reserve_terminator(L, Is, #b_br{bool=#b_literal{val=true},succ=Succ},
+ Blocks, XsMap, Res) ->
+ case map_get(Succ, Blocks) of
#b_blk{is=[],last=Last} ->
- reserve_terminator(Succ, Last, Blocks, Res);
- #b_blk{is=[_|_]=Is} ->
- {res_xregs_from_phi(Is, L, Res, #{}),[]}
+ reserve_terminator(Succ, Is, Last, Blocks, XsMap, Res);
+ #b_blk{is=[_|_]=PhiIs} ->
+ res_xregs_from_phi(PhiIs, L, Res, #{})
end;
-reserve_terminator(_, Last, _, _) ->
- {#{},beam_ssa:used(Last)}.
+reserve_terminator(_, _, _, _, _, _) -> #{}.
+%% Pick up a reservation from a phi node.
res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
Pred, Res, Acc) ->
case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
[] ->
+ %% The value of the phi node for this predecessor
+ %% is a literal. Nothing to do here.
res_xregs_from_phi(Is, Pred, Res, Acc);
[V] ->
case Res of
#{Dst:={prefer,Reg}} ->
+ %% Try placing V in the same register as for
+ %% the phi node.
res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
#{Dst:=_} ->
res_xregs_from_phi(Is, Pred, Res, Acc)
@@ -2154,12 +2272,12 @@ reserve_call_args([], _, Xs) -> Xs.
reserve_xreg(V, Xs, Res) ->
case Res of
#{V:=_} ->
- %% Already reserved.
+ %% Already reserved (but not as an X register).
Res;
#{} ->
case Xs of
#{V:=X} ->
- %% Add a hint that a specific X register is
+ %% Add a hint that this specific X register is
%% preferred, unless it is already in use.
Res#{V=>{prefer,X}};
#{} ->
@@ -2168,23 +2286,15 @@ reserve_xreg(V, Xs, Res) ->
end
end.
-is_gc_safe(#b_set{op=phi}) ->
- false;
-is_gc_safe(#b_set{op=Op,args=Args}) ->
- case beam_ssa_codegen:classify_heap_need(Op, Args) of
- neutral -> true;
- {put,_} -> true;
- _ -> false
- end.
-
%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
-%% Prune the list of preferred to only include X registers that
-%% are guaranteed to survice a garbage collection.
+%% Prune the list of preferred registers, to make sure that
+%% there are no "holes" (uninitialized X registers) when
+%% invoking the garbage collector.
-res_xregs_prune(Xs, Used, Res) ->
+res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 ->
%% The number of safe registers is the number of the X registers
%% used after this point. The actual number of safe registers may
- %% be highter than this number, but this is a conservative safe
+ %% be higher than this number, but this is a conservative safe
%% estimate.
NumSafe = foldl(fun(V, N) ->
case Res of
@@ -2196,7 +2306,8 @@ res_xregs_prune(Xs, Used, Res) ->
%% Remove unsafe registers from the list of potential
%% preferred registers.
- maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs).
+ maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs);
+res_xregs_prune(Xs, _Used, _Res) -> Xs.
%%%
%%% Register allocation using linear scan.
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 15ed267c54..4081e366a5 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -871,7 +871,7 @@ valfun_4({test,is_map,{f,Lbl},[Src]}, Vst0) ->
valfun_4({test,is_nil,{f,Lbl},[Src]}, Vst0) ->
Vst = case get_term_type(Src, Vst0) of
list ->
- branch_state(Lbl, set_type_reg(cons, Src, Vst0));
+ branch_state(Lbl, set_aliased_type(cons, Src, Vst0));
_ ->
branch_state(Lbl, Vst0)
end,
diff --git a/lib/compiler/test/beam_validator_SUITE.erl b/lib/compiler/test/beam_validator_SUITE.erl
index c9df066958..585d0e7191 100644
--- a/lib/compiler/test/beam_validator_SUITE.erl
+++ b/lib/compiler/test/beam_validator_SUITE.erl
@@ -586,6 +586,9 @@ aliased_types(Config) ->
{1,1} = aliased_types_2(Seq),
{42,none} = aliased_types_2([]),
+ gurka = aliased_types_3([gurka]),
+ gaffel = aliased_types_3([gaffel]),
+
ok.
%% ERL-735: validator failed to track types on aliased registers, rejecting
@@ -614,6 +617,20 @@ aliased_types_2(Bug) ->
_ -> hd(Bug)
end}.
+%% ERL-832 part deux; validator failed to realize that an aliased register was
+%% a cons.
+aliased_types_3(Bug) ->
+ List = [Y || Y <- Bug],
+ case List of
+ [] -> Bug;
+ _ ->
+ if
+ hd(List) -> a:a();
+ true -> ok
+ end,
+ hd(List)
+ end.
+
%%%-------------------------------------------------------------------------
transform_remove(Remove, Module) ->