diff options
author | Björn Gustavsson <[email protected]> | 2019-01-19 06:26:50 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2019-01-24 10:15:32 +0100 |
commit | 17f9cfaf93f7b46087eff39c4cf4da7b07df5b11 (patch) | |
tree | 3f671b8d5e33a63372d48d1c2a740a205475c2d4 /lib | |
parent | a0104bc16c8c6f57c2725d07b811bf3bcb0a2455 (diff) | |
download | otp-17f9cfaf93f7b46087eff39c4cf4da7b07df5b11.tar.gz otp-17f9cfaf93f7b46087eff39c4cf4da7b07df5b11.tar.bz2 otp-17f9cfaf93f7b46087eff39c4cf4da7b07df5b11.zip |
Introduce optimizations of tail phis
Try to eliminate short blocks that starts with a phi node
and end in a return. For example:
Result = phi { Res1, 4 }, { literal true, 5 }
Ret = put_tuple literal ok, Result
ret Ret
The code in this block can be inserted at the end blocks 4 and
5. Thus, the following code can be inserted into block 4:
Ret:1 = put_tuple literal ok, Res1
ret Ret:1
And the following code into block 5:
Ret:2 = put_tuple literal ok, literal true
ret Ret:2
Which can be further simplified to:
ret literal {ok, true}
This transformation may lead to more code improvements, for example:
* Stack trimming
* Fewer test_heap instructions
* Smaller stack frames
Diffstat (limited to 'lib')
-rw-r--r-- | lib/compiler/src/beam_ssa_opt.erl | 164 |
1 files changed, 163 insertions, 1 deletions
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index 3685c09a2b..2c898ba6f8 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -39,7 +39,7 @@ -include("beam_ssa_opt.hrl"). --import(lists, [append/1,duplicate/2,foldl/3,keyfind/3,member/2, +-import(lists, [all/2,append/1,duplicate/2,foldl/3,keyfind/3,member/2, reverse/1,reverse/2, splitwith/2,sort/1,takewhile/2,unzip/1]). @@ -142,6 +142,7 @@ finish_1([], _StMap) -> prologue_passes(Opts) -> Ps = [?PASS(ssa_opt_split_blocks), ?PASS(ssa_opt_coalesce_phis), + ?PASS(ssa_opt_tail_phis), ?PASS(ssa_opt_element), ?PASS(ssa_opt_linearize), ?PASS(ssa_opt_tuple_size), @@ -157,6 +158,7 @@ repeated_passes(Opts) -> ?PASS(ssa_opt_bs_puts), ?PASS(ssa_opt_dead), ?PASS(ssa_opt_cse), + ?PASS(ssa_opt_tail_phis), ?PASS(ssa_opt_type_continue)], %Must run after ssa_opt_dead to %clean up phi nodes. passes_1(Ps, Opts). @@ -431,6 +433,160 @@ c_fix_branches([{_,Pred}|As], L, Blocks0) -> c_fix_branches([], _, Blocks) -> Blocks. %%% +%%% Eliminate phi nodes in the tail of a function. +%%% +%%% Try to eliminate short blocks that starts with a phi node +%%% and end in a return. For example: +%%% +%%% Result = phi { Res1, 4 }, { literal true, 5 } +%%% Ret = put_tuple literal ok, Result +%%% ret Ret +%%% +%%% The code in this block can be inserted at the end blocks 4 and +%%% 5. Thus, the following code can be inserted into block 4: +%%% +%%% Ret:1 = put_tuple literal ok, Res1 +%%% ret Ret:1 +%%% +%%% And the following code into block 5: +%%% +%%% Ret:2 = put_tuple literal ok, literal true +%%% ret Ret:2 +%%% +%%% Which can be further simplified to: +%%% +%%% ret literal {ok, true} +%%% +%%% This transformation may lead to more code improvements: +%%% +%%% - Stack trimming +%%% - Fewer test_heap instructions +%%% - Smaller stack frames +%%% + +ssa_opt_tail_phis({#st{ssa=SSA0,cnt=Count0}=St, FuncDb}) -> + {SSA,Count} = opt_tail_phis(SSA0, Count0), + {St#st{ssa=SSA,cnt=Count}, FuncDb}. + +opt_tail_phis(Blocks, Count) when is_map(Blocks) -> + opt_tail_phis(maps:values(Blocks), Blocks, Count); +opt_tail_phis(Linear0, Count0) when is_list(Linear0) -> + Blocks0 = maps:from_list(Linear0), + {Blocks,Count} = opt_tail_phis(Blocks0, Count0), + {beam_ssa:linearize(Blocks),Count}. + +opt_tail_phis([#b_blk{is=Is0,last=Last}|Bs], Blocks0, Count0) -> + case {Is0,Last} of + {[#b_set{op=phi,args=[_,_|_]}|_],#b_ret{arg=#b_var{}}=Ret} -> + {Phis,Is} = splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is0), + case suitable_tail_ops(Is) of + true -> + {Blocks,Count} = opt_tail_phi(Phis, Is, Ret, + Blocks0, Count0), + opt_tail_phis(Bs, Blocks, Count); + false -> + opt_tail_phis(Bs, Blocks0, Count0) + end; + {_,_} -> + opt_tail_phis(Bs, Blocks0, Count0) + end; +opt_tail_phis([], Blocks, Count) -> + {Blocks,Count}. + +opt_tail_phi(Phis0, Is, Ret, Blocks0, Count0) -> + Phis = rel2fam(reduce_phis(Phis0)), + {Blocks,Count,Cost} = + foldl(fun(PhiArg, Acc) -> + opt_tail_phi_arg(PhiArg, Is, Ret, Acc) + end, {Blocks0,Count0,0}, Phis), + MaxCost = length(Phis) * 3 + 2, + if + Cost =< MaxCost -> + %% The transformation would cause at most a slight + %% increase in code size if no more optimizations + %% can be applied. + {Blocks,Count}; + true -> + %% The code size would be increased too much. + {Blocks0,Count0} + end. + +reduce_phis([#b_set{dst=PhiDst,args=PhiArgs}|Is]) -> + [{L,{PhiDst,Val}} || {Val,L} <- PhiArgs] ++ reduce_phis(Is); +reduce_phis([]) -> []. + +opt_tail_phi_arg({PredL,Sub0}, Is0, Ret0, {Blocks0,Count0,Cost0}) -> + Blk0 = map_get(PredL, Blocks0), + #b_blk{is=IsPrefix,last=#b_br{succ=Next,fail=Next}} = Blk0, + case is_exit_bif(IsPrefix) of + false -> + Sub1 = maps:from_list(Sub0), + {Is1,Count,Sub} = new_names(Is0, Sub1, Count0, []), + Is2 = [sub(I, Sub) || I <- Is1], + Cost = build_cost(Is2, Cost0), + Is = IsPrefix ++ Is2, + Ret = sub(Ret0, Sub), + Blk = Blk0#b_blk{is=Is,last=Ret}, + Blocks = Blocks0#{PredL:=Blk}, + {Blocks,Count,Cost}; + true -> + %% The block ends in a call to a function that + %% will cause an exception. + {Blocks0,Count0,Cost0+3} + end. + +is_exit_bif([#b_set{op=call, + args=[#b_remote{mod=#b_literal{val=Mod}, + name=#b_literal{val=Name}}|Args]}]) -> + erl_bifs:is_exit_bif(Mod, Name, length(Args)); +is_exit_bif(_) -> false. + +new_names([#b_set{dst=Dst}=I|Is], Sub0, Count0, Acc) -> + {NewDst,Count} = new_var(Dst, Count0), + Sub = Sub0#{Dst=>NewDst}, + new_names(Is, Sub, Count, [I#b_set{dst=NewDst}|Acc]); +new_names([], Sub, Count, Acc) -> + {reverse(Acc),Count,Sub}. + +suitable_tail_ops(Is) -> + all(fun(#b_set{op=Op}) -> + is_suitable_tail_op(Op) + end, Is). + +is_suitable_tail_op({bif,_}) -> true; +is_suitable_tail_op(put_list) -> true; +is_suitable_tail_op(put_tuple) -> true; +is_suitable_tail_op(_) -> false. + +build_cost([#b_set{op=put_list,args=Args}|Is], Cost) -> + case are_all_literals(Args) of + true -> + build_cost(Is, Cost); + false -> + build_cost(Is, Cost + 1) + end; +build_cost([#b_set{op=put_tuple,args=Args}|Is], Cost) -> + case are_all_literals(Args) of + true -> + build_cost(Is, Cost); + false -> + build_cost(Is, Cost + length(Args) + 1) + end; +build_cost([#b_set{op={bif,_},args=Args}|Is], Cost) -> + case are_all_literals(Args) of + true -> + build_cost(Is, Cost); + false -> + build_cost(Is, Cost + 1) + end; +build_cost([], Cost) -> Cost. + +are_all_literals(Args) -> + all(fun(#b_literal{}) -> true; + (_) -> false + end, Args). + +%%% %%% Order element/2 calls. %%% %%% Order an unbroken chain of element/2 calls for the same tuple @@ -2116,3 +2272,9 @@ sub_arg(Old, Sub) -> #{Old:=New} -> New; #{} -> Old end. + +new_var(#b_var{name={Base,N}}, Count) -> + true = is_integer(N), %Assertion. + {#b_var{name={Base,Count}},Count+1}; +new_var(#b_var{name=Base}, Count) -> + {#b_var{name={Base,Count}},Count+1}. |