%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
-module(beam_ssa_opt).
-export([module/2]).
-include("beam_ssa.hrl").
-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2,
splitwith/2,takewhile/2,unzip/1]).
-spec module(beam_ssa:b_module(), [compile:option()]) ->
{'ok',beam_ssa:b_module()}.
module(#b_module{body=Fs0}=Module, Opts) ->
Ps = passes(Opts),
Fs = functions(Fs0, Ps),
{ok,Module#b_module{body=Fs}}.
functions([F|Fs], Ps) ->
[function(F, Ps)|functions(Fs, Ps)];
functions([], _Ps) -> [].
-type b_blk() :: beam_ssa:b_blk().
-type b_var() :: beam_ssa:b_var().
-type label() :: beam_ssa:label().
-record(st, {ssa :: beam_ssa:block_map() | [{label(),b_blk()}],
args :: [b_var()],
cnt :: label()}).
-define(PASS(N), {N,fun N/1}).
passes(Opts0) ->
Ps = [?PASS(ssa_opt_split_blocks),
?PASS(ssa_opt_coalesce_phis),
?PASS(ssa_opt_element),
?PASS(ssa_opt_linearize),
?PASS(ssa_opt_record),
%% Run ssa_opt_cse twice, because it will help ssa_opt_dead,
%% and ssa_opt_dead will help ssa_opt_cse. Run ssa_opt_live
%% twice, because it will help ssa_opt_dead and ssa_opt_dead
%% will help ssa_opt_live.
?PASS(ssa_opt_cse),
?PASS(ssa_opt_type),
?PASS(ssa_opt_live),
?PASS(ssa_opt_dead),
?PASS(ssa_opt_cse), %Second time.
?PASS(ssa_opt_float),
?PASS(ssa_opt_live), %Second time.
?PASS(ssa_opt_bsm),
?PASS(ssa_opt_bsm_units),
?PASS(ssa_opt_bsm_shortcut),
?PASS(ssa_opt_misc),
?PASS(ssa_opt_tuple_size),
?PASS(ssa_opt_sw),
?PASS(ssa_opt_blockify),
?PASS(ssa_opt_sink),
?PASS(ssa_opt_merge_blocks),
?PASS(ssa_opt_trim_unreachable)],
Negations = [{list_to_atom("no_"++atom_to_list(N)),N} ||
{N,_} <- Ps],
Opts = proplists:substitute_negations(Negations, Opts0),
[case proplists:get_value(Name, Opts, true) of
true ->
P;
false ->
{NoName,Name} = keyfind(Name, 2, Negations),
{NoName,fun(S) -> S end}
end || {Name,_}=P <- Ps].
function(#b_function{anno=Anno,bs=Blocks0,args=Args,cnt=Count0}=F, Ps) ->
try
St = #st{ssa=Blocks0,args=Args,cnt=Count0},
#st{ssa=Blocks,cnt=Count} = compile:run_sub_passes(Ps, St),
F#b_function{bs=Blocks,cnt=Count}
catch
Class:Error:Stack ->
#{func_info:={_,Name,Arity}} = Anno,
io:fwrite("Function: ~w/~w\n", [Name,Arity]),
erlang:raise(Class, Error, Stack)
end.
%%%
%%% Trivial sub passes.
%%%
ssa_opt_dead(#st{ssa=Linear}=St) ->
St#st{ssa=beam_ssa_dead:opt(Linear)}.
ssa_opt_linearize(#st{ssa=Blocks}=St) ->
St#st{ssa=beam_ssa:linearize(Blocks)}.
ssa_opt_type(#st{ssa=Linear,args=Args}=St) ->
St#st{ssa=beam_ssa_type:opt(Linear, Args)}.
ssa_opt_blockify(#st{ssa=Linear}=St) ->
St#st{ssa=maps:from_list(Linear)}.
ssa_opt_trim_unreachable(#st{ssa=Blocks}=St) ->
St#st{ssa=beam_ssa:trim_unreachable(Blocks)}.
%%%
%%% Split blocks before certain instructions to enable more optimizations.
%%%
%%% Splitting before element/2 enables the optimization that swaps
%%% element/2 instructions.
%%%
%%% Splitting before call and make_fun instructions gives more opportunities
%%% for sinking get_tuple_element instructions.
%%%
ssa_opt_split_blocks(#st{ssa=Blocks0,cnt=Count0}=St) ->
P = fun(#b_set{op={bif,element}}) -> true;
(#b_set{op=call}) -> true;
(#b_set{op=make_fun}) -> true;
(_) -> false
end,
{Blocks,Count} = beam_ssa:split_blocks(P, Blocks0, Count0),
St#st{ssa=Blocks,cnt=Count}.
%%%
%%% Coalesce phi nodes.
%%%
%%% Nested cases can led to code such as this:
%%%
%%% 10:
%%% _1 = phi {literal value1, label 8}, {Var, label 9}
%%% br 11
%%%
%%% 11:
%%% _2 = phi {_1, label 10}, {literal false, label 3}
%%%
%%% The phi nodes can be coalesced like this:
%%%
%%% 11:
%%% _2 = phi {literal value1, label 8}, {Var, label 9}, {literal false, label 3}
%%%
%%% Coalescing can help other optimizations, and can in some cases reduce register
%%% shuffling (if the phi variables for two phi nodes happens to be allocated to
%%% different registers).
%%%
ssa_opt_coalesce_phis(#st{ssa=Blocks0}=St) ->
Ls = beam_ssa:rpo(Blocks0),
Blocks = c_phis_1(Ls, Blocks0),
St#st{ssa=Blocks}.
c_phis_1([L|Ls], Blocks0) ->
case maps:get(L, Blocks0) of
#b_blk{is=[#b_set{op=phi}|_]}=Blk ->
Blocks = c_phis_2(L, Blk, Blocks0),
c_phis_1(Ls, Blocks);
#b_blk{} ->
c_phis_1(Ls, Blocks0)
end;
c_phis_1([], Blocks) -> Blocks.
c_phis_2(L, #b_blk{is=Is0}=Blk0, Blocks0) ->
case c_phis_args(Is0, Blocks0) of
none ->
Blocks0;
{_,_,Preds}=Info ->
Is = c_rewrite_phis(Is0, Info),
Blk = Blk0#b_blk{is=Is},
Blocks = Blocks0#{L:=Blk},
c_fix_branches(Preds, L, Blocks)
end.
c_phis_args([#b_set{op=phi,args=Args0}|Is], Blocks) ->
case c_phis_args_1(Args0, Blocks) of
none ->
c_phis_args(Is, Blocks);
Res ->
Res
end;
c_phis_args(_, _Blocks) -> none.
c_phis_args_1([{Var,Pred}|As], Blocks) ->
case c_get_pred_vars(Var, Pred, Blocks) of
none ->
c_phis_args_1(As, Blocks);
Result ->
Result
end;
c_phis_args_1([], _Blocks) -> none.
c_get_pred_vars(Var, Pred, Blocks) ->
case maps:get(Pred, Blocks) of
#b_blk{is=[#b_set{op=phi,dst=Var,args=Args}]} ->
{Var,Pred,Args};
#b_blk{} ->
none
end.
c_rewrite_phis([#b_set{op=phi,args=Args0}=I|Is], Info) ->
Args = c_rewrite_phi(Args0, Info),
[I#b_set{args=Args}|c_rewrite_phis(Is, Info)];
c_rewrite_phis(Is, _Info) -> Is.
c_rewrite_phi([{Var,Pred}|As], {Var,Pred,Values}) ->
Values ++ As;
c_rewrite_phi([{Value,Pred}|As], {_,Pred,Values}) ->
[{Value,P} || {_,P} <- Values] ++ As;
c_rewrite_phi([A|As], Info) ->
[A|c_rewrite_phi(As, Info)];
c_rewrite_phi([], _Info) -> [].
c_fix_branches([{_,Pred}|As], L, Blocks0) ->
#b_blk{last=Last0} = Blk0 = maps:get(Pred, Blocks0),
#b_br{bool=#b_literal{val=true}} = Last0, %Assertion.
Last = Last0#b_br{bool=#b_literal{val=true},succ=L,fail=L},
Blk = Blk0#b_blk{last=Last},
Blocks = Blocks0#{Pred:=Blk},
c_fix_branches(As, L, Blocks);
c_fix_branches([], _, Blocks) -> Blocks.
%%%
%%% Order element/2 calls.
%%%
%%% Order an unbroken chain of element/2 calls for the same tuple
%%% with the same failure label so that the highest element is
%%% retrieved first. That will allow the other element/2 calls to
%%% be replaced with get_tuple_element/3 instructions.
%%%
ssa_opt_element(#st{ssa=Blocks}=St) ->
%% Collect the information about element instructions in this
%% function.
GetEls = collect_element_calls(beam_ssa:linearize(Blocks)),
%% Collect the element instructions into chains. The
%% element calls in each chain are ordered in reverse
%% execution order.
Chains = collect_chains(GetEls, []),
%% For each chain, swap the first element call with the
%% element call with the highest index.
St#st{ssa=swap_element_calls(Chains, Blocks)}.
collect_element_calls([{L,#b_blk{is=Is0,last=Last}}|Bs]) ->
case {Is0,Last} of
{[#b_set{op={bif,element},dst=Element,
args=[#b_literal{val=N},#b_var{}=Tuple]},
#b_set{op=succeeded,dst=Bool,args=[Element]}],
#b_br{bool=Bool,succ=Succ,fail=Fail}} ->
Info = {L,Succ,{Tuple,Fail},N},
[Info|collect_element_calls(Bs)];
{_,_} ->
collect_element_calls(Bs)
end;
collect_element_calls([]) -> [].
collect_chains([{This,_,V,_}=El|Els], [{_,This,V,_}|_]=Chain) ->
%% Add to the previous chain.
collect_chains(Els, [El|Chain]);
collect_chains([El|Els], [_,_|_]=Chain) ->
%% Save the previous chain and start a new chain.
[Chain|collect_chains(Els, [El])];
collect_chains([El|Els], _Chain) ->
%% The previous chain is too short; discard it and start a new.
collect_chains(Els, [El]);
collect_chains([], [_,_|_]=Chain) ->
%% Save the last chain.
[Chain];
collect_chains([], _) -> [].
swap_element_calls([[{L,_,_,N}|_]=Chain|Chains], Blocks0) ->
Blocks = swap_element_calls_1(Chain, {N,L}, Blocks0),
swap_element_calls(Chains, Blocks);
swap_element_calls([], Blocks) -> Blocks.
swap_element_calls_1([{L1,_,_,N1}], {N2,L2}, Blocks) when N2 > N1 ->
%% We have reached the end of the chain, and the first
%% element instrution to be executed. Its index is lower
%% than the maximum index found while traversing the chain,
%% so we will need to swap the instructions.
#{L1:=Blk1,L2:=Blk2} = Blocks,
[#b_set{dst=Dst1}=GetEl1,Succ1] = Blk1#b_blk.is,
[#b_set{dst=Dst2}=GetEl2,Succ2] = Blk2#b_blk.is,
Is1 = [GetEl2,Succ1#b_set{args=[Dst2]}],
Is2 = [GetEl1,Succ2#b_set{args=[Dst1]}],
Blocks#{L1:=Blk1#b_blk{is=Is1},L2:=Blk2#b_blk{is=Is2}};
swap_element_calls_1([{L,_,_,N1}|Els], {N2,_}, Blocks) when N1 > N2 ->
swap_element_calls_1(Els, {N2,L}, Blocks);
swap_element_calls_1([_|Els], Highest, Blocks) ->
swap_element_calls_1(Els, Highest, Blocks);
swap_element_calls_1([], _, Blocks) ->
%% Nothing to do. The element call with highest index
%% is already the first one to be executed.
Blocks.
%%%
%%% Record optimization.
%%%
%%% Replace tuple matching with an is_tagged_tuple instruction
%%% when applicable.
%%%
ssa_opt_record(#st{ssa=Linear}=St) ->
Blocks = maps:from_list(Linear),
St#st{ssa=record_opt(Linear, Blocks)}.
record_opt([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs], Blocks) ->
Is = record_opt_is(Is0, Last, Blocks),
Blk = Blk0#b_blk{is=Is},
[{L,Blk}|record_opt(Bs, Blocks)];
record_opt([], _Blocks) -> [].
record_opt_is([#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}=Set],
Last, Blocks) ->
case is_tagged_tuple(Tuple, Bool, Last, Blocks) of
{yes,Size,Tag} ->
Args = [Tuple,Size,Tag],
[Set#b_set{op=is_tagged_tuple,args=Args}];
no ->
[Set]
end;
record_opt_is([I|Is], Last, Blocks) ->
[I|record_opt_is(Is, Last, Blocks)];
record_opt_is([], _Last, _Blocks) -> [].
is_tagged_tuple(#b_var{}=Tuple, Bool,
#b_br{bool=Bool,succ=Succ,fail=Fail},
Blocks) ->
SuccBlk = maps:get(Succ, Blocks),
is_tagged_tuple_1(SuccBlk, Tuple, Fail, Blocks);
is_tagged_tuple(_, _, _, _) -> no.
is_tagged_tuple_1(#b_blk{is=Is,last=Last}, Tuple, Fail, Blocks) ->
case Is of
[#b_set{op={bif,tuple_size},dst=ArityVar,
args=[#b_var{}=Tuple]},
#b_set{op={bif,'=:='},
dst=Bool,
args=[ArityVar, #b_literal{val=ArityVal}=Arity]}]
when is_integer(ArityVal) ->
case Last of
#b_br{bool=Bool,succ=Succ,fail=Fail} ->
SuccBlk = maps:get(Succ, Blocks),
case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of
no ->
no;
{yes,Tag} ->
{yes,Arity,Tag}
end;
_ ->
no
end;
_ ->
no
end.
is_tagged_tuple_2(#b_blk{is=Is,
last=#b_br{bool=#b_var{}=Bool,fail=Fail}},
Tuple, Fail) ->
is_tagged_tuple_3(Is, Bool, Tuple);
is_tagged_tuple_2(#b_blk{}, _, _) -> no.
is_tagged_tuple_3([#b_set{op=get_tuple_element,
dst=TagVar,
args=[#b_var{}=Tuple,#b_literal{val=0}]}|Is],
Bool, Tuple) ->
is_tagged_tuple_4(Is, Bool, TagVar);
is_tagged_tuple_3([_|Is], Bool, Tuple) ->
is_tagged_tuple_3(Is, Bool, Tuple);
is_tagged_tuple_3([], _, _) -> no.
is_tagged_tuple_4([#b_set{op={bif,'=:='},dst=Bool,
args=[#b_var{}=TagVar,
#b_literal{val=TagVal}=Tag]}],
Bool, TagVar) when is_atom(TagVal) ->
{yes,Tag};
is_tagged_tuple_4([_|Is], Bool, TagVar) ->
is_tagged_tuple_4(Is, Bool, TagVar);
is_tagged_tuple_4([], _, _) -> no.
%%%
%%% Common subexpression elimination (CSE).
%%%
%%% Eliminate repeated evaluation of identical expressions. To avoid
%%% increasing the size of the stack frame, we don't eliminate
%%% subexpressions across instructions that clobber the X registers.
%%%
ssa_opt_cse(#st{ssa=Linear}=St) ->
M = #{0=>#{}},
St#st{ssa=cse(Linear, #{}, M)}.
cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
Es0 = maps:get(L, M0),
{Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
Last = sub(Last0, Sub),
M = cse_successors(Is1, Blk, Es, M0),
Is = reverse(Is1),
[{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
cse([], _, _) -> [].
cse_successors([#b_set{op=succeeded,args=[Src]},Bif|_], Blk, EsSucc, M0) ->
case cse_suitable(Bif) of
true ->
%% The previous instruction only has a valid value at the success branch.
%% We must remove the substitution for Src from the failure branch.
#b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk,
M = cse_successors_1([Succ], EsSucc, M0),
EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc),
cse_successors_1([Fail], EsFail, M);
false ->
%% There can't be any replacement for Src in EsSucc. No need for
%% any special handling.
cse_successors_1(beam_ssa:successors(Blk), EsSucc, M0)
end;
cse_successors(_Is, Blk, Es, M) ->
cse_successors_1(beam_ssa:successors(Blk), Es, M).
cse_successors_1([L|Ls], Es0, M) ->
case M of
#{L:=Es1} when map_size(Es1) =:= 0 ->
%% The map is already empty. No need to do anything
%% since the intersection will be empty.
cse_successors_1(Ls, Es0, M);
#{L:=Es1} ->
%% Calculate the intersection of the two maps.
%% Both keys and values must match.
Es = maps:filter(fun(Key, Value) ->
case Es1 of
#{Key:=Value} -> true;
#{} -> false
end
end, Es0),
cse_successors_1(Ls, Es0, M#{L:=Es});
#{} ->
cse_successors_1(Ls, Es0, M#{L=>Es0})
end;
cse_successors_1([], _, M) -> M.
cse_is([#b_set{op=succeeded,dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) ->
I = sub(I0, Sub0),
case I of
#b_set{args=[Src]} ->
cse_is(Is, Es, Sub0, [I|Acc]);
#b_set{} ->
%% The previous instruction has been eliminated. Eliminate the
%% 'succeeded' instruction too.
Sub = Sub0#{Bool=>#b_literal{val=true}},
cse_is(Is, Es, Sub, Acc)
end;
cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
I = sub(I0, Sub0),
case beam_ssa:clobbers_xregs(I) of
true ->
%% Retaining the expressions map across calls and other
%% clobbering instructions would work, but it would cause
%% the common subexpressions to be saved to Y registers,
%% which would probably increase the size of the stack
%% frame.
cse_is(Is, #{}, Sub0, [I|Acc]);
false ->
case cse_expr(I) of
none ->
%% Not suitable for CSE.
cse_is(Is, Es0, Sub0, [I|Acc]);
{ok,ExprKey} ->
case Es0 of
#{ExprKey:=Src} ->
Sub = Sub0#{Dst=>Src},
cse_is(Is, Es0, Sub, Acc);
#{} ->
Es = Es0#{ExprKey=>Dst},
cse_is(Is, Es, Sub0, [I|Acc])
end
end
end;
cse_is([], Es, Sub, Acc) ->
{Acc,Es,Sub}.
cse_expr(#b_set{op=Op,args=Args}=I) ->
case cse_suitable(I) of
true -> {ok,{Op,Args}};
false -> none
end.
cse_suitable(#b_set{op=get_hd}) -> true;
cse_suitable(#b_set{op=get_tl}) -> true;
cse_suitable(#b_set{op=put_list}) -> true;
cse_suitable(#b_set{op=put_tuple}) -> true;
cse_suitable(#b_set{op={bif,tuple_size}}) ->
%% Doing CSE for tuple_size/1 can prevent the
%% creation of test_arity and select_tuple_arity
%% instructions. That could decrease performance
%% and beam_validator could fail to understand
%% that tuple operations that follow are safe.
false;
cse_suitable(#b_set{anno=Anno,op={bif,Name},args=Args}) ->
%% Doing CSE for floating point operators is unsafe.
%% Doing CSE for comparison operators would prevent
%% creation of 'test' instructions.
Arity = length(Args),
not (is_map_key(float_op, Anno) orelse
erl_internal:new_type_test(Name, Arity) orelse
erl_internal:comp_op(Name, Arity) orelse
erl_internal:bool_op(Name, Arity));
cse_suitable(#b_set{}) -> false.
%%%
%%% Using floating point instructions.
%%%
%%% Use the special floating points version of arithmetic
%%% instructions, if the operands are known to be floats or the result
%%% of the operation will be a float.
%%%
%%% The float instructions were never used in guards before, so we
%%% will take special care to keep not using them in guards. Using
%%% them in guards would require a new version of the 'fconv'
%%% instruction that would take a failure label. Since it is unlikely
%%% that using float instructions in guards would be benefical, why
%%% bother implementing a new instruction? Also, implementing float
%%% instructions in guards in HiPE could turn out to be a lot of work.
%%%
-record(fs,
{s=undefined :: 'undefined' | 'cleared',
regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()},
fail=none :: 'none' | beam_ssa:label(),
non_guards :: gb_sets:set(beam_ssa:label()),
bs :: beam_ssa:block_map()
}).
ssa_opt_float(#st{ssa=Linear0,cnt=Count0}=St) ->
NonGuards0 = float_non_guards(Linear0),
NonGuards = gb_sets:from_list(NonGuards0),
Blocks = maps:from_list(Linear0),
Fs = #fs{non_guards=NonGuards,bs=Blocks},
{Linear,Count} = float_opt(Linear0, Count0, Fs),
St#st{ssa=Linear,cnt=Count}.
float_non_guards([{L,#b_blk{is=Is}}|Bs]) ->
case Is of
[#b_set{op=landingpad}|_] ->
[L|float_non_guards(Bs)];
_ ->
float_non_guards(Bs)
end;
float_non_guards([]) -> [?BADARG_BLOCK].
float_opt([{L,#b_blk{last=#b_br{fail=F}}=Blk}|Bs0],
Count0, #fs{non_guards=NonGuards}=Fs) ->
case gb_sets:is_member(F, NonGuards) of
true ->
%% This block is not inside a guard.
%% We can do the optimization.
float_opt_1(L, Blk, Bs0, Count0, Fs);
false ->
%% This block is inside a guard. Don't do
%% any floating point optimizations.
{Bs,Count} = float_opt(Bs0, Count0, Fs),
{[{L,Blk}|Bs],Count}
end;
float_opt([{L,Blk}|Bs], Count, Fs) ->
float_opt_1(L, Blk, Bs, Count, Fs);
float_opt([], Count, _Fs) ->
{[],Count}.
float_opt_1(L, #b_blk{is=Is0}=Blk0, Bs0, Count0, Fs0) ->
case float_opt_is(Is0, Fs0, Count0, []) of
{Is1,Fs1,Count1} ->
Fs2 = float_fail_label(Blk0, Fs1),
Fail = Fs2#fs.fail,
{Flush,Blk,Fs,Count2} = float_maybe_flush(Blk0, Fs2, Count1),
Split = float_split_conv(Is1, Blk),
{Blks0,Count3} = float_number(Split, L, Count2),
{Blks,Count4} = float_conv(Blks0, Fail, Count3),
{Bs,Count} = float_opt(Bs0, Count4, Fs),
{Blks++Flush++Bs,Count};
none ->
{Bs,Count} = float_opt(Bs0, Count0, Fs0),
{[{L,Blk0}|Bs],Count}
end.
%% Split {float,convert} instructions into individual blocks.
float_split_conv(Is0, Blk) ->
Br = #b_br{bool=#b_literal{val=true},succ=0,fail=0},
case splitwith(fun(#b_set{op=Op}) ->
Op =/= {float,convert}
end, Is0) of
{Is,[]} ->
[Blk#b_blk{is=Is}];
{[_|_]=Is1,[#b_set{op={float,convert}}=Conv|Is2]} ->
[#b_blk{is=Is1,last=Br},
#b_blk{is=[Conv],last=Br}|float_split_conv(Is2, Blk)];
{[],[#b_set{op={float,convert}}=Conv|Is1]} ->
[#b_blk{is=[Conv],last=Br}|float_split_conv(Is1, Blk)]
end.
%% Number the blocks that were split.
float_number([B|Bs0], FirstL, Count0) ->
{Bs,Count} = float_number(Bs0, Count0),
{[{FirstL,B}|Bs],Count}.
float_number([B|Bs0], Count0) ->
{Bs,Count} = float_number(Bs0, Count0+1),
{[{Count0,B}|Bs],Count};
float_number([], Count) ->
{[],Count}.
%% Insert 'succeeded' instructions after each {float,convert}
%% instruction.
float_conv([{L,#b_blk{is=Is0}=Blk0}|Bs0], Fail, Count0) ->
case Is0 of
[#b_set{op={float,convert}}=Conv] ->
{Bool0,Count1} = new_reg('@ssa_bool', Count0),
Bool = #b_var{name=Bool0},
Succeeded = #b_set{op=succeeded,dst=Bool,
args=[Conv#b_set.dst]},
Is = [Conv,Succeeded],
[{NextL,_}|_] = Bs0,
Br = #b_br{bool=Bool,succ=NextL,fail=Fail},
Blk = Blk0#b_blk{is=Is,last=Br},
{Bs,Count} = float_conv(Bs0, Fail, Count1),
{[{L,Blk}|Bs],Count};
[_|_] ->
case Bs0 of
[{NextL,_}|_] ->
Br = #b_br{bool=#b_literal{val=true},
succ=NextL,fail=NextL},
Blk = Blk0#b_blk{last=Br},
{Bs,Count} = float_conv(Bs0, Fail, Count0),
{[{L,Blk}|Bs],Count};
[] ->
{[{L,Blk0}],Count0}
end
end.
float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) ->
#b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0,
#b_blk{is=Is} = maps:get(Succ, Blocks),
case Is of
[#b_set{anno=#{float_op:=_}}|_] ->
%% The next operation is also a floating point operation.
%% No flush needed.
{[],Blk0,Fs0,Count0};
_ ->
%% Flush needed.
{Bool0,Count1} = new_reg('@ssa_bool', Count0),
Bool = #b_var{name=Bool0},
%% Allocate block numbers.
CheckL = Count1, %For checkerror.
FlushL = Count1 + 1, %For flushing of float regs.
Count = Count1 + 2,
Blk = Blk0#b_blk{last=Br#b_br{succ=CheckL}},
%% Build the block with the checkerror instruction.
CheckIs = [#b_set{op={float,checkerror},dst=Bool}],
CheckBr = #b_br{bool=Bool,succ=FlushL,fail=Fail},
CheckBlk = #b_blk{is=CheckIs,last=CheckBr},
%% Build the block that flushes all registers.
FlushIs = float_flush_regs(Fs0),
FlushBr = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
FlushBlk = #b_blk{is=FlushIs,last=FlushBr},
%% Update state and blocks.
Fs = Fs0#fs{s=undefined,regs=#{},fail=none},
FlushBs = [{CheckL,CheckBlk},{FlushL,FlushBlk}],
{FlushBs,Blk,Fs,Count}
end;
float_maybe_flush(Blk, Fs, Count) ->
{[],Blk,Fs,Count}.
float_opt_is([#b_set{op=succeeded,args=[Src]}=I0],
#fs{regs=Rs}=Fs, Count, Acc) ->
case Rs of
#{Src:=Fr} ->
I = I0#b_set{args=[Fr]},
{reverse(Acc, [I]),Fs,Count};
#{} ->
{reverse(Acc, [I0]),Fs,Count}
end;
float_opt_is([#b_set{anno=Anno0}=I0|Is0], Fs0, Count0, Acc) ->
case Anno0 of
#{float_op:=FTypes} ->
Anno = maps:remove(float_op, Anno0),
I1 = I0#b_set{anno=Anno},
{Is,Fs,Count} = float_make_op(I1, FTypes, Fs0, Count0),
float_opt_is(Is0, Fs, Count, reverse(Is, Acc));
#{} ->
float_opt_is(Is0, Fs0#fs{regs=#{}}, Count0, [I0|Acc])
end;
float_opt_is([], Fs, _Count, _Acc) ->
#fs{s=undefined} = Fs, %Assertion.
none.
float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0,
Ts, #fs{s=S,regs=Rs0}=Fs, Count0) ->
{As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []),
{As,Is0} = unzip(As1),
{Fr,Count2} = new_reg('@fr', Count1),
FrDst = #b_var{name=Fr},
I = I0#b_set{op={float,Op},dst=FrDst,args=As},
Rs = Rs1#{Dst=>FrDst},
Is = append(Is0) ++ [I],
case S of
undefined ->
{Ignore,Count} = new_reg('@ssa_ignore', Count2),
C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}},
{[C|Is],Fs#fs{s=cleared,regs=Rs},Count};
cleared ->
{Is,Fs#fs{regs=Rs},Count2}
end.
float_load([A|As], [T|Ts], Rs0, Count0, Acc) ->
{Load,Rs,Count} = float_reg_arg(A, T, Rs0, Count0),
float_load(As, Ts, Rs, Count, [Load|Acc]);
float_load([], [], Rs, Count, Acc) ->
{reverse(Acc),Rs,Count}.
float_reg_arg(A, T, Rs, Count0) ->
case Rs of
#{A:=Fr} ->
{{Fr,[]},Rs,Count0};
#{} ->
{Fr,Count} = new_float_copy_reg(Count0),
Dst = #b_var{name=Fr},
I = float_load_reg(T, A, Dst),
{{Dst,[I]},Rs#{A=>Dst},Count}
end.
float_load_reg(convert, #b_var{}=Src, Dst) ->
#b_set{op={float,convert},dst=Dst,args=[Src]};
float_load_reg(convert, #b_literal{val=Val}=Src, Dst) ->
try float(Val) of
F ->
#b_set{op={float,put},dst=Dst,args=[#b_literal{val=F}]}
catch
error:_ ->
%% Let the exception happen at runtime.
#b_set{op={float,convert},dst=Dst,args=[Src]}
end;
float_load_reg(float, Src, Dst) ->
#b_set{op={float,put},dst=Dst,args=[Src]}.
new_float_copy_reg(Count) ->
new_reg('@fr_copy', Count).
new_reg(Base, Count) ->
Fr = {Base,Count},
{Fr,Count+1}.
float_fail_label(#b_blk{last=Last}, Fs) ->
case Last of
#b_br{bool=#b_var{},fail=Fail} ->
Fs#fs{fail=Fail};
_ ->
Fs
end.
float_flush_regs(#fs{regs=Rs}) ->
maps:fold(fun(_, #b_var{name={'@fr_copy',_}}, Acc) ->
Acc;
(Dst, Fr, Acc) ->
[#b_set{op={float,get},dst=Dst,args=[Fr]}|Acc]
end, [], Rs).
%%%
%%% Live optimization.
%%%
%%% Optimize instructions whose values are not used. They could be
%%% removed if they have no side effects, or in a few cases replaced
%%% with a cheaper instructions
%%%
ssa_opt_live(#st{ssa=Linear}=St) ->
St#st{ssa=live_opt(reverse(Linear), #{}, [])}.
live_opt([{L,Blk0}|Bs], LiveMap0, Acc) ->
Successors = beam_ssa:successors(Blk0),
Live0 = live_opt_succ(Successors, L, LiveMap0),
{Blk,Live} = live_opt_blk(Blk0, Live0),
LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
live_opt(Bs, LiveMap, [{L,Blk}|Acc]);
live_opt([], _, Acc) -> Acc.
live_opt_succ([S|Ss], L, LiveMap) ->
Live0 = live_opt_succ(Ss, L, LiveMap),
Key = {S,L},
case LiveMap of
#{Key:=Live} ->
gb_sets:union(Live, Live0);
#{S:=Live} ->
gb_sets:union(Live, Live0);
#{} ->
Live0
end;
live_opt_succ([], _, _) ->
gb_sets:empty().
live_opt_phis(Is, L, Live0, LiveMap0) ->
LiveMap = LiveMap0#{L=>Live0},
Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is),
case Phis of
[] ->
LiveMap;
[_|_] ->
PhiArgs = append([Args || #b_set{args=Args} <- Phis]),
case [{P,V} || {#b_var{}=V,P} <- PhiArgs] of
[_|_]=PhiVars ->
PhiLive0 = rel2fam(PhiVars),
PhiLive = [{{L,P},gb_sets:union(gb_sets:from_list(Vs), Live0)} ||
{P,Vs} <- PhiLive0],
maps:merge(LiveMap, maps:from_list(PhiLive));
[] ->
%% There were only literals in the phi node(s).
LiveMap
end
end.
live_opt_blk(#b_blk{is=Is0,last=Last}=Blk, Live0) ->
Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(Last))),
{Is,Live} = live_opt_is(reverse(Is0), Live1, []),
{Blk#b_blk{is=Is},Live}.
live_opt_is([#b_set{op=phi,dst=Dst}=I|Is], Live, Acc) ->
case gb_sets:is_member(Dst, Live) of
true ->
live_opt_is(Is, Live, [I|Acc]);
false ->
live_opt_is(Is, Live, Acc)
end;
live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar,
args=[Dst]}=SuccI,
#b_set{dst=Dst}=I|Is], Live0, Acc) ->
case gb_sets:is_member(Dst, Live0) of
true ->
case gb_sets:is_member(SuccDst, Live0) of
true ->
Live1 = gb_sets:add(Dst, Live0),
Live = gb_sets:delete_any(SuccDst, Live1),
live_opt_is([I|Is], Live, [SuccI|Acc]);
false ->
live_opt_is([I|Is], Live0, Acc)
end;
false ->
case live_opt_unused(I) of
{replace,NewI0} ->
NewI = NewI0#b_set{dst=SuccDstVar},
live_opt_is([NewI|Is], Live0, Acc);
keep ->
case gb_sets:is_member(SuccDst, Live0) of
true ->
Live1 = gb_sets:add(Dst, Live0),
Live = gb_sets:delete_any(SuccDst, Live1),
live_opt_is([I|Is], Live, [SuccI|Acc]);
false ->
live_opt_is([I|Is], Live0, Acc)
end
end
end;
live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) ->
case gb_sets:is_member(Dst, Live0) of
true ->
Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
Live = gb_sets:delete_any(Dst, Live1),
live_opt_is(Is, Live, [I|Acc]);
false ->
case beam_ssa:no_side_effect(I) of
true ->
live_opt_is(Is, Live0, Acc);
false ->
Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
live_opt_is(Is, Live, [I|Acc])
end
end;
live_opt_is([], Live, Acc) ->
{Acc,Live}.
live_opt_unused(#b_set{op=get_map_element}=Set) ->
{replace,Set#b_set{op=has_map_field}};
live_opt_unused(_) -> keep.
%%%
%%% Optimize binary matching instructions.
%%%
ssa_opt_bsm(#st{ssa=Linear}=St) ->
Extracted0 = bsm_extracted(Linear),
Extracted = cerl_sets:from_list(Extracted0),
St#st{ssa=bsm_skip(Linear, Extracted)}.
bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs], Extracted) ->
Is = bsm_skip_is(Is0, Extracted),
[{L,Blk#b_blk{is=Is}}|bsm_skip(Bs, Extracted)];
bsm_skip([], _) -> [].
bsm_skip_is([I0|Is], Extracted) ->
case I0 of
#b_set{op=bs_match,args=[#b_literal{val=string}|_]} ->
[I0|bsm_skip_is(Is, Extracted)];
#b_set{op=bs_match,dst=Ctx,args=[Type,PrevCtx|Args0]} ->
I = case cerl_sets:is_element(Ctx, Extracted) of
true ->
I0;
false ->
%% The value is never extracted.
Args = [#b_literal{val=skip},PrevCtx,Type|Args0],
I0#b_set{args=Args}
end,
[I|Is];
#b_set{} ->
[I0|bsm_skip_is(Is, Extracted)]
end;
bsm_skip_is([], _) -> [].
bsm_extracted([{_,#b_blk{is=Is}}|Bs]) ->
case Is of
[#b_set{op=bs_extract,args=[Ctx]}|_] ->
[Ctx|bsm_extracted(Bs)];
_ ->
bsm_extracted(Bs)
end;
bsm_extracted([]) -> [].
%%%
%%% Short-cutting binary matching instructions.
%%%
ssa_opt_bsm_shortcut(#st{ssa=Linear}=St) ->
Positions = bsm_positions(Linear, #{}),
case map_size(Positions) of
0 ->
%% No binary matching instructions.
St;
_ ->
St#st{ssa=bsm_shortcut(Linear, Positions)}
end.
bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) ->
PosMap = bsm_positions_is(Is, PosMap0),
case {Is,Last} of
{[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}],
#b_br{bool=Bool,fail=Fail}} ->
Bits = Bits0 + maps:get(Ctx, PosMap0),
bsm_positions(Bs, PosMap#{L=>{Bits,Fail}});
{_,_} ->
bsm_positions(Bs, PosMap)
end;
bsm_positions([], PosMap) -> PosMap.
bsm_positions_is([#b_set{op=bs_start_match,dst=New}|Is], PosMap0) ->
PosMap = PosMap0#{New=>0},
bsm_positions_is(Is, PosMap);
bsm_positions_is([#b_set{op=bs_match,dst=New,args=Args}|Is], PosMap0) ->
[_,Old|_] = Args,
#{Old:=Bits0} = PosMap0,
Bits = bsm_update_bits(Args, Bits0),
PosMap = PosMap0#{New=>Bits},
bsm_positions_is(Is, PosMap);
bsm_positions_is([_|Is], PosMap) ->
bsm_positions_is(Is, PosMap);
bsm_positions_is([], PosMap) -> PosMap.
bsm_update_bits([#b_literal{val=string},_,#b_literal{val=String}], Bits) ->
Bits + bit_size(String);
bsm_update_bits([#b_literal{val=utf8}|_], Bits) ->
Bits + 8;
bsm_update_bits([#b_literal{val=utf16}|_], Bits) ->
Bits + 16;
bsm_update_bits([#b_literal{val=utf32}|_], Bits) ->
Bits + 32;
bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits)
when is_integer(Sz) ->
Bits + Sz*U;
bsm_update_bits(_, Bits) -> Bits.
bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) ->
case {Is,Last0} of
{[#b_set{op=bs_match,dst=New,args=[_,Old|_]},
#b_set{op=succeeded,dst=Bool,args=[New]}],
#b_br{bool=Bool,fail=Fail}} ->
case PosMap of
#{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits ->
Last = Last0#b_br{fail=NextFail},
[{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)];
#{} ->
[{L,Blk}|bsm_shortcut(Bs, PosMap)]
end;
{_,_} ->
[{L,Blk}|bsm_shortcut(Bs, PosMap)]
end;
bsm_shortcut([], _PosMap) -> [].
%%%
%%% Eliminate redundant bs_test_unit2 instructions.
%%%
ssa_opt_bsm_units(#st{ssa=Linear}=St) ->
St#st{ssa=bsm_units(Linear, #{})}.
bsm_units([{L,#b_blk{last=#b_br{succ=Succ,fail=Fail}}=Block0} | Bs], UnitMaps0) ->
UnitsIn = maps:get(L, UnitMaps0, #{}),
{Block, UnitsOut} = bsm_units_skip(Block0, UnitsIn),
UnitMaps1 = bsm_units_join(Succ, UnitsOut, UnitMaps0),
UnitMaps = bsm_units_join(Fail, UnitsIn, UnitMaps1),
[{L, Block} | bsm_units(Bs, UnitMaps)];
bsm_units([{L,#b_blk{last=#b_switch{fail=Fail,list=Switch}}=Block} | Bs], UnitMaps0) ->
UnitsIn = maps:get(L, UnitMaps0, #{}),
Labels = [Fail | [Lbl || {_Arg, Lbl} <- Switch]],
UnitMaps = foldl(fun(Lbl, UnitMaps) ->
bsm_units_join(Lbl, UnitsIn, UnitMaps)
end, UnitMaps0, Labels),
[{L, Block} | bsm_units(Bs, UnitMaps)];
bsm_units([{L, Block} | Bs], UnitMaps) ->
[{L, Block} | bsm_units(Bs, UnitMaps)];
bsm_units([], _UnitMaps) ->
[].
bsm_units_skip(Block, Units) ->
bsm_units_skip_1(Block#b_blk.is, Block, Units).
bsm_units_skip_1([#b_set{op=bs_start_match,dst=New}|_], Block, Units) ->
%% We bail early since there can't be more than one match per block.
{Block, Units#{ New => 1 }};
bsm_units_skip_1([#b_set{op=bs_match,
dst=New,
args=[#b_literal{val=skip},
Ctx,
#b_literal{val=binary},
_Flags,
#b_literal{val=all},
#b_literal{val=OpUnit}]}=Skip | Test],
Block0, Units) ->
[#b_set{op=succeeded,dst=Bool,args=[New]}] = Test, %Assertion.
#b_br{bool=Bool} = Last0 = Block0#b_blk.last, %Assertion.
CtxUnit = maps:get(Ctx, Units),
if
CtxUnit rem OpUnit =:= 0 ->
Is = takewhile(fun(I) -> I =/= Skip end, Block0#b_blk.is),
Last = Last0#b_br{bool=#b_literal{val=true}},
Block = Block0#b_blk{is=Is,last=Last},
{Block, Units#{ New => CtxUnit }};
CtxUnit rem OpUnit =/= 0 ->
{Block0, Units#{ New => OpUnit, Ctx => OpUnit }}
end;
bsm_units_skip_1([#b_set{op=bs_match,dst=New,args=Args}|_], Block, Units) ->
[_,Ctx|_] = Args,
CtxUnit = maps:get(Ctx, Units),
OpUnit = bsm_op_unit(Args),
{Block, Units#{ New => gcd(OpUnit, CtxUnit) }};
bsm_units_skip_1([_I | Is], Block, Units) ->
bsm_units_skip_1(Is, Block, Units);
bsm_units_skip_1([], Block, Units) ->
{Block, Units}.
bsm_op_unit([_,_,_,Size,#b_literal{val=U}]) ->
case Size of
#b_literal{val=Sz} when is_integer(Sz) -> Sz*U;
_ -> U
end;
bsm_op_unit([#b_literal{val=string},_,#b_literal{val=String}]) ->
bit_size(String);
bsm_op_unit([#b_literal{val=utf8}|_]) ->
8;
bsm_op_unit([#b_literal{val=utf16}|_]) ->
16;
bsm_op_unit([#b_literal{val=utf32}|_]) ->
32;
bsm_op_unit(_) ->
1.
%% Several paths can lead to the same match instruction and the inferred units
%% may differ between them, so we can only keep the information that is common
%% to all paths.
bsm_units_join(Lbl, MapA, UnitMaps0) when is_map_key(Lbl, UnitMaps0) ->
MapB = maps:get(Lbl, UnitMaps0),
Merged = if
map_size(MapB) =< map_size(MapA) ->
bsm_units_join_1(maps:keys(MapB), MapA, MapB);
map_size(MapB) > map_size(MapA) ->
bsm_units_join_1(maps:keys(MapA), MapB, MapA)
end,
maps:put(Lbl, Merged, UnitMaps0);
bsm_units_join(Lbl, MapA, UnitMaps0) when MapA =/= #{} ->
maps:put(Lbl, MapA, UnitMaps0);
bsm_units_join(_Lbl, _MapA, UnitMaps0) ->
UnitMaps0.
bsm_units_join_1([Key | Keys], Left, Right) when is_map_key(Key, Left) ->
UnitA = maps:get(Key, Left),
UnitB = maps:get(Key, Right),
bsm_units_join_1(Keys, Left, maps:put(Key, gcd(UnitA, UnitB), Right));
bsm_units_join_1([Key | Keys], Left, Right) ->
bsm_units_join_1(Keys, Left, maps:remove(Key, Right));
bsm_units_join_1([], _MapA, Right) ->
Right.
%%%
%%% Miscellanous optimizations in execution order.
%%%
ssa_opt_misc(#st{ssa=Linear}=St) ->
St#st{ssa=misc_opt(Linear, #{})}.
misc_opt([{L,#b_blk{is=Is0,last=Last0}=Blk0}|Bs], Sub0) ->
{Is,Sub} = misc_opt_is(Is0, Sub0, []),
Last = sub(Last0, Sub),
Blk = Blk0#b_blk{is=Is,last=Last},
[{L,Blk}|misc_opt(Bs, Sub)];
misc_opt([], _) -> [].
misc_opt_is([#b_set{op=phi}=I0|Is], Sub0, Acc) ->
#b_set{dst=Dst,args=Args} = I = sub(I0, Sub0),
case all_same(Args) of
true ->
%% Eliminate the phi node if there is just one source
%% value or if the values are identical.
[{Val,_}|_] = Args,
Sub = Sub0#{Dst=>Val},
misc_opt_is(Is, Sub, Acc);
false ->
misc_opt_is(Is, Sub0, [I|Acc])
end;
misc_opt_is([#b_set{op={bif,'and'}}=I0], Sub, Acc) ->
#b_set{dst=Dst,args=Args} = I = sub(I0, Sub),
case eval_and(Args) of
error ->
misc_opt_is([], Sub, [I|Acc]);
Val ->
misc_opt_is([], Sub#{Dst=>Val}, Acc)
end;
misc_opt_is([#b_set{op={bif,'or'}}=I0], Sub, Acc) ->
#b_set{dst=Dst,args=Args} = I = sub(I0, Sub),
case eval_or(Args) of
error ->
misc_opt_is([], Sub, [I|Acc]);
Val ->
misc_opt_is([], Sub#{Dst=>Val}, Acc)
end;
misc_opt_is([#b_set{}=I0|Is], Sub, Acc) ->
#b_set{op=Op,dst=Dst,args=Args} = I = sub(I0, Sub),
case make_literal(Op, Args) of
#b_literal{}=Literal ->
misc_opt_is(Is, Sub#{Dst=>Literal}, Acc);
error ->
misc_opt_is(Is, Sub, [I|Acc])
end;
misc_opt_is([], Sub, Acc) ->
{reverse(Acc),Sub}.
all_same([{H,_}|T]) ->
all(fun({E,_}) -> E =:= H end, T).
make_literal(put_tuple, Args) ->
case make_literal_list(Args, []) of
error ->
error;
List ->
#b_literal{val=list_to_tuple(List)}
end;
make_literal(put_list, [#b_literal{val=H},#b_literal{val=T}]) ->
#b_literal{val=[H|T]};
make_literal(_, _) -> error.
make_literal_list([#b_literal{val=H}|T], Acc) ->
make_literal_list(T, [H|Acc]);
make_literal_list([_|_], _) ->
error;
make_literal_list([], Acc) ->
reverse(Acc).
eval_and(Args) ->
case Args of
[_,#b_literal{val=false}=Res] -> Res;
[Res,#b_literal{val=true}] -> Res;
[_,_] -> error
end.
eval_or(Args) ->
case Args of
[Res,#b_literal{val=false}] -> Res;
[_,#b_literal{val=true}=Res] -> Res;
[_,_] -> error
end.
%%%
%%% Optimize expressions such as "tuple_size(Var) =:= 2".
%%%
%%% Consider this code:
%%%
%%% 0:
%%% .
%%% .
%%% .
%%% Size = bif:tuple_size Var
%%% BoolVar1 = succeeded Size
%%% br BoolVar1, label 4, label 3
%%%
%%% 4:
%%% BoolVar2 = bif:'=:=' Size, literal 2
%%% br BoolVar2, label 6, label 3
%%%
%%% 6: ... %% OK
%%%
%%% 3: ... %% Not a tuple of size 2
%%%
%%% The BEAM code will look this:
%%%
%%% {bif,tuple_size,{f,3},[{x,0}],{x,0}}.
%%% {test,is_eq_exact,{f,3},[{x,0},{integer,2}]}.
%%%
%%% Better BEAM code will be produced if we transform the
%%% code like this:
%%%
%%% 0:
%%% .
%%% .
%%% .
%%% br label 10
%%%
%%% 10:
%%% NewBoolVar = bif:is_tuple Var
%%% br NewBoolVar, label 11, label 3
%%%
%%% 11:
%%% Size = bif:tuple_size Var
%%% br label 4
%%%
%%% 4:
%%% BoolVar2 = bif:'=:=' Size, literal 2
%%% br BoolVar2, label 6, label 3
%%%
%%% (The key part of the transformation is the removal of
%%% the 'succeeded' instruction to signal to the code generator
%%% that the call to tuple_size/1 can't fail.)
%%%
%%% The BEAM code will look like:
%%%
%%% {test,is_tuple,{f,3},[{x,0}]}.
%%% {test_arity,{f,3},[{x,0},2]}.
%%%
%%% Those two instructions will be combined into a single
%%% is_tuple_of_arity instruction by the loader.
%%%
ssa_opt_tuple_size(#st{ssa=Linear0,cnt=Count0}=St) ->
{Linear,Count} = opt_tup_size(Linear0, Count0, []),
St#st{ssa=Linear,cnt=Count}.
opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], Count0, Acc0) ->
case {Is,Last} of
{[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}],
#b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 ->
{Acc,Count} = opt_tup_size_1(Tup, L, Count0, Acc0),
opt_tup_size(Bs, Count, [{L,Blk}|Acc]);
{_,_} ->
opt_tup_size(Bs, Count0, [{L,Blk}|Acc0])
end;
opt_tup_size([], Count, Acc) ->
{reverse(Acc),Count}.
opt_tup_size_1(Size, EqL, Count0, [{L,Blk0}|Acc]) ->
case Blk0 of
#b_blk{is=Is0,last=#b_br{bool=Bool,succ=EqL,fail=Fail}} ->
case opt_tup_size_is(Is0, Bool, Size, []) of
none ->
{[{L,Blk0}|Acc],Count0};
{PreIs,TupleSizeIs,Tuple} ->
opt_tup_size_2(PreIs, TupleSizeIs, L, EqL,
Tuple, Fail, Count0, Acc)
end;
#b_blk{} ->
{[{L,Blk0}|Acc],Count0}
end;
opt_tup_size_1(_, _, Count, Acc) ->
{Acc,Count}.
opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) ->
IsTupleL = Count0,
TupleSizeL = Count0 + 1,
Bool = #b_var{name={'@ssa_bool',Count0+2}},
Count = Count0 + 3,
True = #b_literal{val=true},
PreBr = #b_br{bool=True,succ=IsTupleL,fail=IsTupleL},
PreBlk = #b_blk{is=PreIs,last=PreBr},
IsTupleIs = [#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}],
IsTupleBr = #b_br{bool=Bool,succ=TupleSizeL,fail=Fail},
IsTupleBlk = #b_blk{is=IsTupleIs,last=IsTupleBr},
TupleSizeBr = #b_br{bool=True,succ=EqL,fail=EqL},
TupleSizeBlk = #b_blk{is=TupleSizeIs,last=TupleSizeBr},
{[{TupleSizeL,TupleSizeBlk},
{IsTupleL,IsTupleBlk},
{PreL,PreBlk}|Acc],Count}.
opt_tup_size_is([#b_set{op={bif,tuple_size},dst=Size,args=[Tuple]}=I,
#b_set{op=succeeded,dst=Bool,args=[Size]}],
Bool, Size, Acc) ->
{reverse(Acc),[I],Tuple};
opt_tup_size_is([I|Is], Bool, Size, Acc) ->
opt_tup_size_is(Is, Bool, Size, [I|Acc]);
opt_tup_size_is([], _, _, _Acc) -> none.
%%%
%%% Optimize #b_switch{} instructions.
%%%
%%% If the argument for a #b_switch{} comes from a phi node with all
%%% literals, any values in the switch list which are not in the phi
%%% node can be removed.
%%%
%%% If the values in the phi node and switch list are the same,
%%% the failure label can't be reached and be eliminated.
%%%
%%% A #b_switch{} with only one value can be rewritten to
%%% a #b_br{}. A switch that only verifies that the argument
%%% is 'true' or 'false' can be rewritten to a is_boolean test.
%%%
ssa_opt_sw(#st{ssa=Linear0,cnt=Count0}=St) ->
{Linear,Count} = opt_sw(Linear0, #{}, Count0, []),
St#st{ssa=Linear,cnt=Count}.
opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Last0}=Blk0}|Bs], Phis0, Count0, Acc) ->
Phis = opt_sw_phis(Is, Phis0),
case opt_sw_last(Last0, Phis) of
#b_switch{arg=Arg,fail=Fail,list=[{Lit,Lbl}]} ->
%% Rewrite a single value switch to a br.
Bool = #b_var{name={'@ssa_bool',Count0}},
Count = Count0 + 1,
IsEq = #b_set{op={bif,'=:='},dst=Bool,args=[Arg,Lit]},
Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
Blk = Blk0#b_blk{is=Is++[IsEq],last=Br},
opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
#b_switch{arg=Arg,fail=Fail,
list=[{#b_literal{val=B1},Lbl},{#b_literal{val=B2},Lbl}]}
when B1 =:= not B2 ->
%% Replace with is_boolean test.
Bool = #b_var{name={'@ssa_bool',Count0}},
Count = Count0 + 1,
IsBool = #b_set{op={bif,is_boolean},dst=Bool,args=[Arg]},
Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
Blk = Blk0#b_blk{is=Is++[IsBool],last=Br},
opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
Last0 ->
opt_sw(Bs, Phis, Count0, [{L,Blk0}|Acc]);
Last ->
Blk = Blk0#b_blk{last=Last},
opt_sw(Bs, Phis, Count0, [{L,Blk}|Acc])
end;
opt_sw([{L,#b_blk{is=Is}=Blk}|Bs], Phis0, Count, Acc) ->
Phis = opt_sw_phis(Is, Phis0),
opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
opt_sw([], _Phis, Count, Acc) ->
{reverse(Acc),Count}.
opt_sw_phis([#b_set{op=phi,dst=Dst,args=Args}|Is], Phis) ->
case opt_sw_literals(Args, []) of
error ->
opt_sw_phis(Is, Phis);
Literals ->
opt_sw_phis(Is, Phis#{Dst=>Literals})
end;
opt_sw_phis(_, Phis) -> Phis.
opt_sw_last(#b_switch{arg=Arg,fail=Fail,list=List0}=Sw0, Phis) ->
case Phis of
#{Arg:=Values0} ->
Values = gb_sets:from_list(Values0),
%% Prune the switch list to only contain the possible values.
List1 = [P || {Lit,_}=P <- List0, gb_sets:is_member(Lit, Values)],
%% Now test whether the failure label can ever be reached.
Sw = case gb_sets:size(Values) =:= length(List1) of
true ->
%% The switch list has the same number of values as the phi node.
%% The values must be the same, because the values that were not
%% possible were pruned from the switch list. Therefore, the
%% failure label can't possibly be reached, and we can choose a
%% a new failure label by picking a value from the list.
case List1 of
[{#b_literal{},Lbl}|List] ->
Sw0#b_switch{fail=Lbl,list=List};
[] ->
Sw0#b_switch{list=List1}
end;
false ->
%% There are some values in the phi node that are not in the
%% switch list; thus, the failure label can still be reached.
Sw0
end,
beam_ssa:normalize(Sw);
#{} ->
%% Ensure that no label in the switch list is the same
%% as the failure label.
List = [{Val,Lbl} || {Val,Lbl} <- List0, Lbl =/= Fail],
Sw = Sw0#b_switch{list=List},
beam_ssa:normalize(Sw)
end.
opt_sw_literals([{#b_literal{}=Lit,_}|T], Acc) ->
opt_sw_literals(T, [Lit|Acc]);
opt_sw_literals([_|_], _Acc) ->
error;
opt_sw_literals([], Acc) -> Acc.
%%%
%%% Merge blocks.
%%%
ssa_opt_merge_blocks(#st{ssa=Blocks}=St) ->
Preds = beam_ssa:predecessors(Blocks),
St#st{ssa=merge_blocks_1(beam_ssa:rpo(Blocks), Preds, Blocks)}.
merge_blocks_1([L|Ls], Preds0, Blocks0) ->
case Preds0 of
#{L:=[P]} ->
#{P:=Blk0,L:=Blk1} = Blocks0,
case is_merge_allowed(L, Blk0, Blk1) of
true ->
#b_blk{is=Is0} = Blk0,
#b_blk{is=Is1} = Blk1,
Is = Is0 ++ Is1,
Blk = Blk1#b_blk{is=Is},
Blocks1 = maps:remove(L, Blocks0),
Blocks2 = maps:put(P, Blk, Blocks1),
Successors = beam_ssa:successors(Blk),
Blocks = beam_ssa:update_phi_labels(Successors, L, P, Blocks2),
Preds = merge_update_preds(Successors, L, P, Preds0),
merge_blocks_1(Ls, Preds, Blocks);
false ->
merge_blocks_1(Ls, Preds0, Blocks0)
end;
#{} ->
merge_blocks_1(Ls, Preds0, Blocks0)
end;
merge_blocks_1([], _Preds, Blocks) -> Blocks.
merge_update_preds([L|Ls], From, To, Preds0) ->
Ps = [rename_label(P, From, To) || P <- maps:get(L, Preds0)],
Preds = maps:put(L, Ps, Preds0),
merge_update_preds(Ls, From, To, Preds);
merge_update_preds([], _, _, Preds) -> Preds.
rename_label(From, From, To) -> To;
rename_label(Lbl, _, _) -> Lbl.
is_merge_allowed(_, _, #b_blk{is=[#b_set{op=peek_message}|_]}) ->
false;
is_merge_allowed(L, Blk0, #b_blk{}) ->
case beam_ssa:successors(Blk0) of
[L] -> true;
[_|_] -> false
end.
%%%
%%% When a tuple is matched, the pattern matching compiler generates a
%%% get_tuple_element instruction for every tuple element that will
%%% ever be used in the rest of the function. That often forces the
%%% extracted tuple elements to be stored in Y registers until it's
%%% time to use them. It could also mean that there could be execution
%%% paths that will never use the extracted elements.
%%%
%%% This optimization will sink get_tuple_element instructions, that
%%% is, move them forward in the execution stream to the last possible
%%% block there they will still dominate all uses. That may reduce the
%%% size of stack frames, reduce register shuffling, and avoid
%%% extracting tuple elements on execution paths that never use the
%%% extracted values.
%%%
ssa_opt_sink(#st{ssa=Blocks0}=St) ->
Linear = beam_ssa:linearize(Blocks0),
%% Create a map with all variables that define get_tuple_element
%% instructions. The variable name map to the block it is defined in.
Defs = maps:from_list(def_blocks(Linear)),
%% Now find all the blocks that use variables defined by get_tuple_element
%% instructions.
Used = used_blocks(Linear, Defs, []),
%% Calculate dominators.
Dom0 = beam_ssa:dominators(Blocks0),
%% It is not safe to move get_tuple_element instructions to blocks
%% that begin with certain instructions. It is also unsafe to move
%% the instructions into any part of a receive. To avoid such
%% unsafe moves, pretend that the unsuitable blocks are not
%% dominators.
Unsuitable = unsuitable(Linear, Blocks0),
Dom = case gb_sets:is_empty(Unsuitable) of
true ->
Dom0;
false ->
F = fun(_, DomBy) ->
[L || L <- DomBy,
not gb_sets:is_element(L, Unsuitable)]
end,
maps:map(F, Dom0)
end,
%% Calculate new positions for get_tuple_element instructions. The new
%% position is a block that dominates all uses of the variable.
DefLoc = new_def_locations(Used, Defs, Dom),
%% Now move all suitable get_tuple_element instructions to their
%% new blocks.
Blocks = foldl(fun({V,To}, A) ->
From = maps:get(V, Defs),
move_defs(V, From, To, A)
end, Blocks0, DefLoc),
St#st{ssa=Blocks}.
def_blocks([{L,#b_blk{is=Is}}|Bs]) ->
def_blocks_is(Is, L, def_blocks(Bs));
def_blocks([]) -> [].
def_blocks_is([#b_set{op=get_tuple_element,dst=Dst}|Is], L, Acc) ->
def_blocks_is(Is, L, [{Dst,L}|Acc]);
def_blocks_is([_|Is], L, Acc) ->
def_blocks_is(Is, L, Acc);
def_blocks_is([], _, Acc) -> Acc.
used_blocks([{L,Blk}|Bs], Def, Acc0) ->
Used = beam_ssa:used(Blk),
Acc = [{V,L} || V <- Used, maps:is_key(V, Def)] ++ Acc0,
used_blocks(Bs, Def, Acc);
used_blocks([], _Def, Acc) ->
rel2fam(Acc).
%% unsuitable(Linear, Blocks) -> Unsuitable.
%% Return an ordset of block labels for the blocks that are not
%% suitable for sinking of get_tuple_element instructions.
unsuitable(Linear, Blocks) ->
Predecessors = beam_ssa:predecessors(Blocks),
Unsuitable0 = unsuitable_1(Linear),
Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors),
gb_sets:from_list(Unsuitable0 ++ Unsuitable1).
unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) ->
Unsuitable = case Op of
bs_extract -> true;
bs_put -> true;
{float,_} -> true;
landingpad -> true;
peek_message -> true;
wait_timeout -> true;
_ -> false
end,
case Unsuitable of
true ->
[L|unsuitable_1(Bs)];
false ->
unsuitable_1(Bs)
end;
unsuitable_1([{_,#b_blk{}}|Bs]) ->
unsuitable_1(Bs);
unsuitable_1([]) -> [].
unsuitable_recv([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs], Blocks, Predecessors) ->
Ls = case Op of
remove_message ->
unsuitable_loop(L, Blocks, Predecessors);
recv_next ->
unsuitable_loop(L, Blocks, Predecessors);
_ ->
[]
end,
Ls ++ unsuitable_recv(Bs, Blocks, Predecessors);
unsuitable_recv([_|Bs], Blocks, Predecessors) ->
unsuitable_recv(Bs, Blocks, Predecessors);
unsuitable_recv([], _, _) -> [].
unsuitable_loop(L, Blocks, Predecessors) ->
unsuitable_loop(L, Blocks, Predecessors, []).
unsuitable_loop(L, Blocks, Predecessors, Acc) ->
Ps = maps:get(L, Predecessors),
unsuitable_loop_1(Ps, Blocks, Predecessors, Acc).
unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) ->
case maps:get(P, Blocks) of
#b_blk{is=[#b_set{op=peek_message}|_]} ->
unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0);
#b_blk{} ->
case ordsets:is_element(P, Acc0) of
false ->
Acc1 = ordsets:add_element(P, Acc0),
Acc = unsuitable_loop(P, Blocks, Predecessors, Acc1),
unsuitable_loop_1(Ps, Blocks, Predecessors, Acc);
true ->
unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0)
end
end;
unsuitable_loop_1([], _, _, Acc) -> Acc.
%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, Dominators) ->
%% [{Variable,NewDefinitionBlock}]
%% Calculate new locations for get_tuple_element instructions. For each
%% variable, the new location is a block that dominates all uses of
%% variable and as near to the uses of as possible. If no such block
%% distinct from the block where the instruction currently is, the
%% variable will not be included in the result list.
new_def_locations([{V,UsedIn}|Vs], Defs, Dom) ->
DefIn = maps:get(V, Defs),
case common_dom(UsedIn, DefIn, Dom) of
[] ->
new_def_locations(Vs, Defs, Dom);
[_|_]=BetterDef ->
L = most_dominated(BetterDef, Dom),
[{V,L}|new_def_locations(Vs, Defs, Dom)]
end;
new_def_locations([], _, _) -> [].
common_dom([L|Ls], DefIn, Dom) ->
DomBy0 = maps:get(L, Dom),
DomBy = ordsets:subtract(DomBy0, maps:get(DefIn, Dom)),
common_dom_1(Ls, Dom, DomBy).
common_dom_1(_, _, []) ->
[];
common_dom_1([L|Ls], Dom, [_|_]=DomBy0) ->
DomBy1 = maps:get(L, Dom),
DomBy = ordsets:intersection(DomBy0, DomBy1),
common_dom_1(Ls, Dom, DomBy);
common_dom_1([], _, DomBy) -> DomBy.
most_dominated([L|Ls], Dom) ->
most_dominated(Ls, L, maps:get(L, Dom), Dom).
most_dominated([L|Ls], L0, DomBy, Dom) ->
case member(L, DomBy) of
true ->
most_dominated(Ls, L0, DomBy, Dom);
false ->
most_dominated(Ls, L, maps:get(L, Dom), Dom)
end;
most_dominated([], L, _, _) -> L.
%% Move get_tuple_element instructions to their new locations.
move_defs(V, From, To, Blocks) ->
#{From:=FromBlk0,To:=ToBlk0} = Blocks,
{Def,FromBlk} = remove_def(V, FromBlk0),
try insert_def(V, Def, ToBlk0) of
ToBlk ->
%%io:format("~p: ~p => ~p\n", [V,From,To]),
Blocks#{From:=FromBlk,To:=ToBlk}
catch
throw:not_possible ->
Blocks
end.
remove_def(V, #b_blk{is=Is0}=Blk) ->
{Def,Is} = remove_def_is(Is0, V, []),
{Def,Blk#b_blk{is=Is}}.
remove_def_is([#b_set{dst=Dst}=Def|Is], Dst, Acc) ->
{Def,reverse(Acc, Is)};
remove_def_is([I|Is], Dst, Acc) ->
remove_def_is(Is, Dst, [I|Acc]).
insert_def(V, Def, #b_blk{is=Is0}=Blk) ->
Is = insert_def_is(Is0, V, Def),
Blk#b_blk{is=Is}.
insert_def_is([#b_set{op=phi}=I|Is], V, Def) ->
case member(V, beam_ssa:used(I)) of
true ->
throw(not_possible);
false ->
[I|insert_def_is(Is, V, Def)]
end;
insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) ->
Action0 = case Op of
call -> beyond;
'catch_end' -> beyond;
set_tuple_element -> beyond;
timeout -> beyond;
_ -> here
end,
Action = case Is of
[#b_set{op=succeeded}|_] -> here;
_ -> Action0
end,
case Action of
beyond ->
case member(V, beam_ssa:used(I)) of
true ->
%% The variable is used by this instruction. We must
%% place the definition before this instruction.
[Def|Is0];
false ->
%% Place it beyond the current instruction.
[I|insert_def_is(Is, V, Def)]
end;
here ->
[Def|Is0]
end;
insert_def_is([], _V, Def) ->
[Def].
%%%
%%% Common utilities.
%%%
gcd(A, B) ->
case A rem B of
0 -> B;
X -> gcd(B, X)
end.
rel2fam(S0) ->
S1 = sofs:relation(S0),
S = sofs:rel2fam(S1),
sofs:to_external(S).
sub(I, Sub) ->
beam_ssa:normalize(sub_1(I, Sub)).
sub_1(#b_set{op=phi,args=Args}=I, Sub) ->
I#b_set{args=[{sub_arg(A, Sub),P} || {A,P} <- Args]};
sub_1(#b_set{args=Args}=I, Sub) ->
I#b_set{args=[sub_arg(A, Sub) || A <- Args]};
sub_1(#b_br{bool=#b_var{}=Old}=Br, Sub) ->
New = sub_arg(Old, Sub),
Br#b_br{bool=New};
sub_1(#b_switch{arg=#b_var{}=Old}=Sw, Sub) ->
New = sub_arg(Old, Sub),
Sw#b_switch{arg=New};
sub_1(#b_ret{arg=#b_var{}=Old}=Ret, Sub) ->
New = sub_arg(Old, Sub),
Ret#b_ret{arg=New};
sub_1(Last, _) -> Last.
sub_arg(#b_remote{mod=Mod,name=Name}=Rem, Sub) ->
Rem#b_remote{mod=sub_arg(Mod, Sub),name=sub_arg(Name, Sub)};
sub_arg(Old, Sub) ->
case Sub of
#{Old:=New} -> New;
#{} -> Old
end.