%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%%%
%%% If a fun is defined locally and only used for calls, it can be replaced
%%% with direct calls to the relevant function. This greatly speeds up "named
%%% functions" (which rely on make_fun to recreate themselves) and macros that
%%% wrap their body in a fun.
%%%
-module(beam_ssa_funs).
-export([module/2]).
-include("beam_ssa.hrl").
-import(lists, [foldl/3]).
-spec module(Module, Options) -> Result when
Module :: beam_ssa:b_module(),
Options :: [compile:option()],
Result :: {ok, beam_ssa:b_module()}.
module(#b_module{body=Fs0}=Module, _Opts) ->
Trampolines = foldl(fun find_trampolines/2, #{}, Fs0),
Fs = [lfo(F, Trampolines) || F <- Fs0],
{ok, Module#b_module{body=Fs}}.
%% If a function does absolutely nothing beyond calling another function with
%% the same arguments in the same order, we can shave off a call by short-
%% circuiting it.
find_trampolines(#b_function{args=Args,bs=Blocks}=F, Trampolines) ->
case map_get(0, Blocks) of
#b_blk{is=[#b_set{op=call,
args=[#b_local{}=Actual | Args],
dst=Dst}],
last=#b_ret{arg=Dst}} ->
{_, Name, Arity} = beam_ssa:get_anno(func_info, F),
Trampoline = #b_local{name=#b_literal{val=Name},arity=Arity},
Trampolines#{Trampoline => Actual};
_ ->
Trampolines
end.
lfo(#b_function{bs=Blocks0}=F, Trampolines) ->
Linear0 = beam_ssa:linearize(Blocks0),
Linear = lfo_optimize(Linear0, lfo_analyze(Linear0, #{}), Trampolines),
F#b_function{bs=maps:from_list(Linear)}.
%% Gather a map of the locally defined funs that are only used for calls.
lfo_analyze([{_L,#b_blk{is=Is,last=Last}}|Bs], LFuns0) ->
LFuns = lfo_analyze_last(Last, lfo_analyze_is(Is, LFuns0)),
lfo_analyze(Bs, LFuns);
lfo_analyze([], LFuns) ->
LFuns.
lfo_analyze_is([#b_set{op=make_fun,
dst=Dst,
args=[#b_local{} | FreeVars]}=Def | Is],
LFuns0) ->
LFuns = maps:put(Dst, Def, maps:without(FreeVars, LFuns0)),
lfo_analyze_is(Is, LFuns);
lfo_analyze_is([#b_set{op=call,
args=[Fun | CallArgs]} | Is],
LFuns) when is_map_key(Fun, LFuns) ->
#b_set{args=[#b_local{arity=Arity} | FreeVars]} = map_get(Fun, LFuns),
case length(CallArgs) + length(FreeVars) of
Arity ->
lfo_analyze_is(Is, maps:without(CallArgs, LFuns));
_ ->
%% This will `badarity` at runtime, and it's easier to disable the
%% optimization than to simulate it.
lfo_analyze_is(Is, maps:without([Fun | CallArgs], LFuns))
end;
lfo_analyze_is([#b_set{args=Args} | Is], LFuns) when map_size(LFuns) =/= 0 ->
%% We disqualify funs that are used outside calls because this forces them
%% to be created anyway, and the slight performance gain from direct calls
%% is not enough to offset the potential increase in stack frame size (the
%% free variables need to be kept alive until the call).
%%
%% This is also a kludge to make HiPE work, as the latter will generate
%% code with the assumption that the functions referenced in a make_fun
%% will only be used by funs, which will not be the case if we mix it with
%% direct calls. See cerl_cconv.erl for details.
%%
%% Future optimizations like delaying fun creation until use may require us
%% to copy affected functions so that HiPE gets its own to play with (until
%% HiPE is fixed anyway).
lfo_analyze_is(Is, maps:without(Args, LFuns));
lfo_analyze_is([_ | Is], LFuns) ->
lfo_analyze_is(Is, LFuns);
lfo_analyze_is([], LFuns) ->
LFuns.
lfo_analyze_last(#b_switch{arg=Arg}, LFuns) ->
maps:remove(Arg, LFuns);
lfo_analyze_last(#b_ret{arg=Arg}, LFuns) ->
maps:remove(Arg, LFuns);
lfo_analyze_last(_, LFuns) ->
LFuns.
%% Replace all calls of suitable funs with a direct call to their
%% implementation. Liveness optimization will get rid of the make_fun
%% instruction.
lfo_optimize(Linear, LFuns, _Trampolines) when map_size(LFuns) =:= 0 ->
Linear;
lfo_optimize(Linear, LFuns, Trampolines) ->
lfo_optimize_1(Linear, LFuns, Trampolines).
lfo_optimize_1([{L,#b_blk{is=Is0}=Blk}|Bs], LFuns, Trampolines) ->
Is = lfo_optimize_is(Is0, LFuns, Trampolines),
[{L,Blk#b_blk{is=Is}} | lfo_optimize_1(Bs, LFuns, Trampolines)];
lfo_optimize_1([], _LFuns, _Trampolines) ->
[].
lfo_optimize_is([#b_set{op=call,
args=[Fun | CallArgs]}=Call0 | Is],
LFuns, Trampolines) when is_map_key(Fun, LFuns) ->
#b_set{args=[Local | FreeVars]} = map_get(Fun, LFuns),
Args = [lfo_short_circuit(Local, Trampolines) | CallArgs ++ FreeVars],
Call = beam_ssa:add_anno(local_fun_opt, Fun, Call0#b_set{args=Args}),
[Call | lfo_optimize_is(Is, LFuns, Trampolines)];
lfo_optimize_is([I | Is], LFuns, Trampolines) ->
[I | lfo_optimize_is(Is, LFuns, Trampolines)];
lfo_optimize_is([], _LFuns, _Trampolines) ->
[].
lfo_short_circuit(Call, Trampolines) ->
case maps:find(Call, Trampolines) of
{ok, Other} -> lfo_short_circuit(Other, Trampolines);
error -> Call
end.