aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Högberg <[email protected]>2018-10-01 15:23:17 +0200
committerJohn Högberg <[email protected]>2018-10-03 13:42:10 +0200
commit31a4c1d65c24d8240d5d46d8cffe81097ebb28bf (patch)
tree39f2593781ab8d9029fe161b0da6bdad5943bdbd
parent25d15bc981659947899499bbce6040868d73418f (diff)
downloadotp-31a4c1d65c24d8240d5d46d8cffe81097ebb28bf.tar.gz
otp-31a4c1d65c24d8240d5d46d8cffe81097ebb28bf.tar.bz2
otp-31a4c1d65c24d8240d5d46d8cffe81097ebb28bf.zip
Optimize named funs and fun-wrapped macros
If a fun is defined locally and only used for calls, it can be replaced with direct calls to the relevant function. This greatly speeds up "named functions" (which rely on make_fun to recreate themselves) and macros that wrap their body in a fun.
-rw-r--r--lib/compiler/src/Makefile1
-rw-r--r--lib/compiler/src/beam_ssa.erl1
-rw-r--r--lib/compiler/src/beam_ssa_funs.erl149
-rw-r--r--lib/compiler/src/compile.erl4
-rw-r--r--lib/compiler/src/compiler.app.src1
5 files changed, 156 insertions, 0 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index 26ae6566e6..d475e5a19a 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -63,6 +63,7 @@ MODULES = \
beam_ssa_bsm \
beam_ssa_codegen \
beam_ssa_dead \
+ beam_ssa_funs \
beam_ssa_lint \
beam_ssa_opt \
beam_ssa_pp \
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index 1a2e759965..a71802cf80 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -198,6 +198,7 @@ no_side_effect(#b_set{op=Op}) ->
has_map_field -> true;
is_nonempty_list -> true;
is_tagged_tuple -> true;
+ make_fun -> true;
put_map -> true;
put_list -> true;
put_tuple -> true;
diff --git a/lib/compiler/src/beam_ssa_funs.erl b/lib/compiler/src/beam_ssa_funs.erl
new file mode 100644
index 0000000000..38df50fd74
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_funs.erl
@@ -0,0 +1,149 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%%
+%%% If a fun is defined locally and only used for calls, it can be replaced
+%%% with direct calls to the relevant function. This greatly speeds up "named
+%%% functions" (which rely on make_fun to recreate themselves) and macros that
+%%% wrap their body in a fun.
+%%%
+
+-module(beam_ssa_funs).
+
+-export([module/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [foldl/3]).
+
+-spec module(Module, Options) -> Result when
+ Module :: beam_ssa:b_module(),
+ Options :: [compile:option()],
+ Result :: {ok, beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, _Opts) ->
+ Trampolines = foldl(fun find_trampolines/2, #{}, Fs0),
+ Fs = [lfo(F, Trampolines) || F <- Fs0],
+ {ok, Module#b_module{body=Fs}}.
+
+%% If a function does absolutely nothing beyond calling another function with
+%% the same arguments in the same order, we can shave off a call by short-
+%% circuiting it.
+find_trampolines(#b_function{args=Args,bs=Blocks}=F, Trampolines) ->
+ case maps:get(0, Blocks) of
+ #b_blk{is=[#b_set{op=call,
+ args=[#b_local{}=Actual | Args],
+ dst=Dst}],
+ last=#b_ret{arg=Dst}} ->
+ {_, Name, Arity} = beam_ssa:get_anno(func_info, F),
+ Trampoline = #b_local{name=#b_literal{val=Name},arity=Arity},
+ maps:put(Trampoline, Actual, Trampolines);
+ _ ->
+ Trampolines
+ end.
+
+lfo(#b_function{bs=Blocks0}=F, Trampolines) ->
+ Linear0 = beam_ssa:linearize(Blocks0),
+ Linear = lfo_optimize(Linear0, lfo_analyze(Linear0, #{}), Trampolines),
+ F#b_function{bs=maps:from_list(Linear)}.
+
+%% Gather a map of the locally defined funs that are only used for calls.
+lfo_analyze([{_L,#b_blk{is=Is,last=Last}}|Bs], LFuns0) ->
+ LFuns = lfo_analyze_last(Last, lfo_analyze_is(Is, LFuns0)),
+ lfo_analyze(Bs, LFuns);
+lfo_analyze([], LFuns) ->
+ LFuns.
+
+lfo_analyze_is([#b_set{op=make_fun,
+ dst=Dst,
+ args=[#b_local{} | FreeVars]}=Def | Is],
+ LFuns0) ->
+ LFuns = maps:put(Dst, Def, maps:without(FreeVars, LFuns0)),
+ lfo_analyze_is(Is, LFuns);
+lfo_analyze_is([#b_set{op=call,
+ args=[Fun | CallArgs]} | Is],
+ LFuns) when is_map_key(Fun, LFuns) ->
+ #b_set{args=[#b_local{arity=Arity} | FreeVars]} = maps:get(Fun, LFuns),
+ case length(CallArgs) + length(FreeVars) of
+ Arity ->
+ lfo_analyze_is(Is, maps:without(CallArgs, LFuns));
+ _ ->
+ %% This will `badarity` at runtime, and it's easier to disable the
+ %% optimization than to simulate it.
+ lfo_analyze_is(Is, maps:without([Fun | CallArgs], LFuns))
+ end;
+lfo_analyze_is([#b_set{args=Args} | Is], LFuns) when map_size(LFuns) =/= 0 ->
+ %% We disqualify funs that are used outside calls because this forces them
+ %% to be created anyway, and the slight performance gain from direct calls
+ %% is not enough to offset the potential increase in stack frame size (the
+ %% free variables need to be kept alive until the call).
+ %%
+ %% This is also a kludge to make HiPE work, as the latter will generate
+ %% code with the assumption that the functions referenced in a make_fun
+ %% will only be used by funs, which will not be the case if we mix it with
+ %% direct calls. See cerl_cconv.erl for details.
+ %%
+ %% Future optimizations like delaying fun creation until use may require us
+ %% to copy affected functions so that HiPE gets its own to play with (until
+ %% HiPE is fixed anyway).
+ lfo_analyze_is(Is, maps:without(Args, LFuns));
+lfo_analyze_is([_ | Is], LFuns) ->
+ lfo_analyze_is(Is, LFuns);
+lfo_analyze_is([], LFuns) ->
+ LFuns.
+
+lfo_analyze_last(#b_switch{arg=Arg}, LFuns) ->
+ maps:remove(Arg, LFuns);
+lfo_analyze_last(#b_ret{arg=Arg}, LFuns) ->
+ maps:remove(Arg, LFuns);
+lfo_analyze_last(_, LFuns) ->
+ LFuns.
+
+%% Replace all calls of suitable funs with a direct call to their
+%% implementation. Liveness optimization will get rid of the make_fun
+%% instruction.
+lfo_optimize(Linear, LFuns, _Trampolines) when map_size(LFuns) =:= 0 ->
+ Linear;
+lfo_optimize(Linear, LFuns, Trampolines) ->
+ lfo_optimize_1(Linear, LFuns, Trampolines).
+
+lfo_optimize_1([{L,#b_blk{is=Is0}=Blk}|Bs], LFuns, Trampolines) ->
+ Is = lfo_optimize_is(Is0, LFuns, Trampolines),
+ [{L,Blk#b_blk{is=Is}} | lfo_optimize_1(Bs, LFuns, Trampolines)];
+lfo_optimize_1([], _LFuns, _Trampolines) ->
+ [].
+
+lfo_optimize_is([#b_set{op=call,
+ args=[Fun | CallArgs]}=Call0 | Is],
+ LFuns, Trampolines) when is_map_key(Fun, LFuns) ->
+ #b_set{args=[Local | FreeVars]} = maps:get(Fun, LFuns),
+ Args = [lfo_short_circuit(Local, Trampolines) | CallArgs ++ FreeVars],
+ Call = beam_ssa:add_anno(local_fun_opt, Fun, Call0#b_set{args=Args}),
+ [Call | lfo_optimize_is(Is, LFuns, Trampolines)];
+lfo_optimize_is([I | Is], LFuns, Trampolines) ->
+ [I | lfo_optimize_is(Is, LFuns, Trampolines)];
+lfo_optimize_is([], _LFuns, _Trampolines) ->
+ [].
+
+lfo_short_circuit(Call, Trampolines) ->
+ case maps:find(Call, Trampolines) of
+ {ok, Other} -> lfo_short_circuit(Other, Trampolines);
+ error -> Call
+ end.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index d894694c79..b4a446e532 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -823,6 +823,9 @@ kernel_passes() ->
{unless,no_bsm_opt,{pass,beam_ssa_bsm}},
{iff,dssabsm,{listing,"ssabsm"}},
{iff,ssalint,{pass,beam_ssa_lint}},
+ {unless,no_fun_opt,{pass,beam_ssa_funs}},
+ {iff,dssafuns,{listing,"ssafuns"}},
+ {iff,ssalint,{pass,beam_ssa_lint}},
{unless,no_ssa_opt,{pass,beam_ssa_opt}},
{iff,dssaopt,{listing,"ssaopt"}},
{iff,ssalint,{pass,beam_ssa_lint}},
@@ -2047,6 +2050,7 @@ pre_load() ->
beam_ssa_bsm,
beam_ssa_codegen,
beam_ssa_dead,
+ beam_ssa_funs,
beam_ssa_opt,
beam_ssa_pre_codegen,
beam_ssa_recv,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 7b802fdd62..86259270bd 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -39,6 +39,7 @@
beam_ssa_bsm,
beam_ssa_codegen,
beam_ssa_dead,
+ beam_ssa_funs,
beam_ssa_lint,
beam_ssa_opt,
beam_ssa_pp,