aboutsummaryrefslogtreecommitdiffstats
path: root/lib/dialyzer/test/options1_tests_SUITE_data/src/compiler/cerl_inline.erl
diff options
context:
space:
mode:
authorStavros Aronis <[email protected]>2010-06-18 03:44:25 +0300
committerLukas Larsson <[email protected]>2011-02-18 12:03:18 +0100
commit98de31e836a04ccc8f5f9acd90b9ba0803a24ab5 (patch)
tree3f26237297b0b2d9040de1b97eeb7cd75bce2dfe /lib/dialyzer/test/options1_tests_SUITE_data/src/compiler/cerl_inline.erl
parent08cec89bb1e781157a75c13e72562258b271b469 (diff)
downloadotp-98de31e836a04ccc8f5f9acd90b9ba0803a24ab5.tar.gz
otp-98de31e836a04ccc8f5f9acd90b9ba0803a24ab5.tar.bz2
otp-98de31e836a04ccc8f5f9acd90b9ba0803a24ab5.zip
Test suites for Dialyzer
This is a transcription of most of the cvs.srv.it.uu.se:/hipe repository dialyzer_tests into test suites that use the test server framework. See README for information on how to use the included scripts for modifications and updates. When testing Dialyzer it's important that several OTP modules are included in the plt. The suites takes care of that too.
Diffstat (limited to 'lib/dialyzer/test/options1_tests_SUITE_data/src/compiler/cerl_inline.erl')
-rw-r--r--lib/dialyzer/test/options1_tests_SUITE_data/src/compiler/cerl_inline.erl2762
1 files changed, 2762 insertions, 0 deletions
diff --git a/lib/dialyzer/test/options1_tests_SUITE_data/src/compiler/cerl_inline.erl b/lib/dialyzer/test/options1_tests_SUITE_data/src/compiler/cerl_inline.erl
new file mode 100644
index 0000000000..e040904a19
--- /dev/null
+++ b/lib/dialyzer/test/options1_tests_SUITE_data/src/compiler/cerl_inline.erl
@@ -0,0 +1,2762 @@
+%% ``The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved via the world wide web at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Initial Developer of the Original Code is Richard Carlsson.
+%% Copyright (C) 1999-2002 Richard Carlsson.
+%% Portions created by Ericsson are Copyright 2001, Ericsson Utvecklings
+%% AB. All Rights Reserved.''
+%%
+%% $Id: cerl_inline.erl,v 1.1 2008/12/17 09:53:41 mikpe Exp $
+%%
+%% Core Erlang inliner.
+
+%% =====================================================================
+%%
+%% This is an implementation of the algorithm by Waddell and Dybvig
+%% ("Fast and Effective Procedure Inlining", International Static
+%% Analysis Symposium 1997), adapted to the Core Erlang language.
+%%
+%% Instead of always renaming variables and function variables, this
+%% implementation uses the "no-shadowing strategy" of Peyton Jones and
+%% Marlow ("Secrets of the Glasgow Haskell Compiler Inliner", 1999).
+%%
+%% =====================================================================
+
+%% TODO: inline single-source-reference operands without size limit.
+
+-module(cerl_inline).
+
+-export([core_transform/2, transform/1, transform/2]).
+
+-import(cerl, [abstract/1, alias_pat/1, alias_var/1, apply_args/1,
+ apply_op/1, atom_name/1, atom_val/1, bitstr_val/1,
+ bitstr_size/1, bitstr_unit/1, bitstr_type/1,
+ bitstr_flags/1, binary_segments/1, update_c_alias/3,
+ update_c_apply/3, update_c_binary/2, update_c_bitstr/6,
+ update_c_call/4, update_c_case/3, update_c_catch/2,
+ update_c_clause/4, c_fun/2, c_int/1, c_let/3,
+ update_c_let/4, update_c_letrec/3, update_c_module/5,
+ update_c_primop/3, update_c_receive/4, update_c_seq/3,
+ c_seq/2, update_c_try/6, c_tuple/1, update_c_values/2,
+ c_values/1, c_var/1, call_args/1, call_module/1,
+ call_name/1, case_arity/1, case_arg/1, case_clauses/1,
+ catch_body/1, clause_body/1, clause_guard/1,
+ clause_pats/1, clause_vars/1, concrete/1, cons_hd/1,
+ cons_tl/1, data_arity/1, data_es/1, data_type/1,
+ fun_body/1, fun_vars/1, get_ann/1, int_val/1,
+ is_c_atom/1, is_c_cons/1, is_c_fun/1, is_c_int/1,
+ is_c_list/1, is_c_seq/1, is_c_tuple/1, is_c_var/1,
+ is_data/1, is_literal/1, is_literal_term/1, let_arg/1,
+ let_body/1, let_vars/1, letrec_body/1, letrec_defs/1,
+ list_length/1, list_elements/1, update_data/3,
+ make_list/1, make_data_skel/2, module_attrs/1,
+ module_defs/1, module_exports/1, module_name/1,
+ primop_args/1, primop_name/1, receive_action/1,
+ receive_clauses/1, receive_timeout/1, seq_arg/1,
+ seq_body/1, set_ann/2, try_arg/1, try_body/1, try_vars/1,
+ try_evars/1, try_handler/1, tuple_es/1, tuple_arity/1,
+ type/1, values_es/1, var_name/1]).
+
+-import(lists, [foldl/3, foldr/3, mapfoldl/3, reverse/1]).
+
+%%
+%% Constants
+%%
+
+debug_runtime() -> false.
+debug_counters() -> false.
+
+%% Normal execution times for inlining are between 0.1 and 0.3 seconds
+%% (on the author's current equipment). The default effort limit of 150
+%% is high enough that most normal programs never hit the limit even
+%% once, and for difficult programs, it generally keeps the execution
+%% times below 2-5 seconds. Using an effort counter of 1000 will thus
+%% have no further effect on most programs, but some programs may take
+%% as much as 10 seconds or more. Effort counts larger than 2500 have
+%% never been observed even on very ill-conditioned programs.
+%%
+%% Size limits between 6 and 18 tend to actually shrink the code,
+%% because of the simplifications made possible by inlining. A limit of
+%% 16 seems to be optimal for this purpose, often shrinking the
+%% executable code by up to 10%. Size limits between 18 and 30 generally
+%% give the same code size as if no inlining was done (i.e., code
+%% duplication balances out the simplifications at these levels). A size
+%% limit between 1 and 5 tends to inline small functions and propagate
+%% constants, but does not cause much simplifications do be done, so the
+%% net effect will be a slight increase in code size. For size limits
+%% above 30, the executable code size tends to increase with about 10%
+%% per 100 units, with some variations depending on the sizes of
+%% functions in the source code.
+%%
+%% Typically, about 90% of the maximum speedup achievable is already
+%% reached using a size limit of 30, and 98% is reached at limits around
+%% 100-150; there is rarely any point in letting the code size increase
+%% by more than 10-15%. If too large functions are inlined, cache
+%% effects will slow the program down.
+
+default_effort() -> 150.
+default_size() -> 24.
+
+%% Base costs/weights for different kinds of expressions. If these are
+%% modified, the size limits above may have to be adjusted.
+
+weight(var) -> 0; % We count no cost for variable accesses.
+weight(values) -> 0; % Value aggregates have no cost in themselves.
+weight(literal) -> 1; % We assume efficient handling of constants.
+weight(data) -> 1; % Base cost; add 1 per element.
+weight(element) -> 1; % Cost of storing/fetching an element.
+weight(argument) -> 1; % Cost of passing a function argument.
+weight('fun') -> 6; % Base cost + average number of free vars.
+weight('let') -> 0; % Count no cost for let-bindings.
+weight(letrec) -> 0; % Like a let-binding.
+weight('case') -> 0; % Case switches have no base cost.
+weight(clause) -> 1; % Count one jump at the end of each clause body.
+weight('receive') -> 9; % Initialization/cleanup cost.
+weight('try') -> 1; % Assume efficient implementation.
+weight('catch') -> 1; % See `try'.
+weight(apply) -> 3; % Average base cost: call/return.
+weight(call) -> 3; % Assume remote-calls as efficient as `apply'.
+weight(primop) -> 2; % Assume more efficient than `apply'.
+weight(binary) -> 4; % Initialisation base cost.
+weight(bitstr) -> 3; % Coding/decoding a value; like a primop.
+weight(module) -> 1. % Like a letrec with a constant body
+
+%% These "reference" structures are used for variables and function
+%% variables. They keep track of the variable name, any bound operand,
+%% and the associated store location.
+
+-record(ref, {name, opnd, loc}).
+
+%% Operand structures contain the operand expression, the renaming and
+%% environment, the state location, and the effort counter at the call
+%% site (cf. `visit').
+
+-record(opnd, {expr, ren, env, loc, effort}).
+
+%% Since expressions are only visited in `effect' context when they are
+%% not bound to a referenced variable, only expressions visited in
+%% 'value' context are cached.
+
+-record(cache, {expr, size}).
+
+%% The context flags for an application structure are kept separate from
+%% the structure itself. Note that the original algorithm had exactly
+%% one operand in each application context structure, while we can have
+%% several, or none.
+
+-record(app, {opnds, ctxt, loc}).
+
+
+%%
+%% Interface functions
+%%
+
+%% Use compile option `{core_transform, inline}' to insert this as a
+%% compilation pass.
+
+core_transform(Code, Opts) ->
+ cerl:to_records(transform(cerl:from_records(Code), Opts)).
+
+transform(Tree) ->
+ transform(Tree, []).
+
+transform(Tree, Opts) ->
+ main(Tree, value, Opts).
+
+main(Tree, Ctxt, Opts) ->
+ %% We spawn a new process to do the work, so we don't have to worry
+ %% about cluttering the process dictionary with debugging info, or
+ %% proper deallocation of ets-tables.
+ Opts1 = Opts ++ [{inline_size, default_size()},
+ {inline_effort, default_effort()}],
+ Reply = self(),
+ Pid = spawn_link(fun () -> start(Reply, Tree, Ctxt, Opts1) end),
+ receive
+ {Pid1, Tree1} when Pid1 == Pid ->
+ Tree1
+ end.
+
+start(Reply, Tree, Ctxt, Opts) ->
+ init_debug(),
+ case debug_runtime() of
+ true ->
+ put(inline_start_time,
+ element(1, erlang:statistics(runtime)));
+ _ ->
+ ok
+ end,
+ Size = max(1, proplists:get_value(inline_size, Opts)),
+ Effort = max(1, proplists:get_value(inline_effort, Opts)),
+ case proplists:get_bool(verbose, Opts) of
+ true ->
+ io:fwrite("Inlining: inline_size=~w inline_effort=~w\n",
+ [Size, Effort]);
+ false ->
+ ok
+ end,
+
+ %% Note that the counters of the new state are passive.
+ S = st__new(Effort, Size),
+
+%%% Initialization is not needed at present. Note that the code in
+%%% `inline_init' is not up-to-date with this module.
+%%% {Tree1, S1} = inline_init:init(Tree, S),
+%%% {Tree2, _S2} = i(Tree1, Ctxt, S1),
+ {Tree2, _S2} = i(Tree, Ctxt, S),
+ report_debug(),
+ Reply ! {self(), Tree2}.
+
+init_debug() ->
+ case debug_counters() of
+ true ->
+ put(counter_effort_triggers, 0),
+ put(counter_effort_max, 0),
+ put(counter_size_triggers, 0),
+ put(counter_size_max, 0);
+ _ ->
+ ok
+ end.
+
+report_debug() ->
+ case debug_runtime() of
+ true ->
+ {Time, _} = erlang:statistics(runtime),
+ report("Total run time for inlining: ~.2.0f s.\n",
+ [(Time - get(inline_start_time))/1000]);
+ _ ->
+ ok
+ end,
+ case debug_counters() of
+ true ->
+ counter_stats();
+ _ ->
+ ok
+ end.
+
+counter_stats() ->
+ T1 = get(counter_effort_triggers),
+ T2 = get(counter_size_triggers),
+ E = get(counter_effort_max),
+ S = get(counter_size_max),
+ M1 = io_lib:fwrite("\tNumber of triggered "
+ "effort counters: ~p.\n", [T1]),
+ M2 = io_lib:fwrite("\tNumber of triggered "
+ "size counters: ~p.\n", [T2]),
+ M3 = io_lib:fwrite("\tLargest active effort counter: ~p.\n",
+ [E]),
+ M4 = io_lib:fwrite("\tLargest active size counter: ~p.\n",
+ [S]),
+ report("Counter statistics:\n~s", [[M1, M2, M3, M4]]).
+
+
+%% =====================================================================
+%% The main inlining function
+%%
+%% i(E :: coreErlang(),
+%% Ctxt :: value | effect | #app{}
+%% Ren :: renaming(),
+%% Env :: environment(),
+%% S :: state())
+%% -> {E', S'}
+%%
+%% Note: It is expected that the input source code ('E') does not
+%% contain free variables. If it does, there is a risk of accidental
+%% name capture, in case a generated "new" variable name happens to be
+%% the same as the name of a variable that is free further below in the
+%% tree; the algorithm only consults the current environment to check if
+%% a name already exists.
+%%
+%% The renaming maps names of source-code variable and function
+%% variables to new names as necessary to avoid clashes, according to
+%% the "no-shadowing" strategy. The environment maps *residual-code*
+%% variables and function variables to operands and global information.
+%% Separating the renaming from the environment, and using the
+%% residual-code variables instead of the source-code variables as its
+%% domain, improves the behaviour of the algorithm when code needs to be
+%% traversed more than once.
+%%
+%% Note that there is no such thing as a `test' context for expressions
+%% in (Core) Erlang (see `i_case' below for details).
+
+i(E, Ctxt, S) ->
+ i(E, Ctxt, ren__identity(), env__empty(), S).
+
+i(E, Ctxt, Ren, Env, S0) ->
+ %% Count one unit of effort on each pass.
+ S = count_effort(1, S0),
+ case is_data(E) of
+ true ->
+ i_data(E, Ctxt, Ren, Env, S);
+ false ->
+ case type(E) of
+ var ->
+ i_var(E, Ctxt, Ren, Env, S);
+ values ->
+ i_values(E, Ctxt, Ren, Env, S);
+ 'fun' ->
+ i_fun(E, Ctxt, Ren, Env, S);
+ seq ->
+ i_seq(E, Ctxt, Ren, Env, S);
+ 'let' ->
+ i_let(E, Ctxt, Ren, Env, S);
+ letrec ->
+ i_letrec(E, Ctxt, Ren, Env, S);
+ 'case' ->
+ i_case(E, Ctxt, Ren, Env, S);
+ 'receive' ->
+ i_receive(E, Ctxt, Ren, Env, S);
+ apply ->
+ i_apply(E, Ctxt, Ren, Env, S);
+ call ->
+ i_call(E, Ctxt, Ren, Env, S);
+ primop ->
+ i_primop(E, Ren, Env, S);
+ 'try' ->
+ i_try(E, Ctxt, Ren, Env, S);
+ 'catch' ->
+ i_catch(E, Ctxt, Ren, Env, S);
+ binary ->
+ i_binary(E, Ren, Env, S);
+ module ->
+ i_module(E, Ctxt, Ren, Env, S)
+ end
+ end.
+
+i_data(E, Ctxt, Ren, Env, S) ->
+ case is_literal(E) of
+ true ->
+ %% This is the `(const c)' case of the original algorithm:
+ %% literal terms which (regardless of size) do not need to
+ %% be constructed dynamically at runtime - boldly assuming
+ %% that the compiler/runtime system can handle this.
+ case Ctxt of
+ effect ->
+ %% Reduce useless constants to a simple value.
+ {void(), count_size(weight(literal), S)};
+ _ ->
+ %% (In Erlang, we cannot set all non-`false'
+ %% constants to `true' in a `test' context, like we
+ %% could do in Lisp or C, so the above is the only
+ %% special case to be handled here.)
+ {E, count_size(weight(literal), S)}
+ end;
+ false ->
+ %% Data constructors are like to calls to safe built-in
+ %% functions, for which we can "decide to inline"
+ %% immediately; there is no need to create operand
+ %% structures. In `effect' context, we can simply make a
+ %% sequence of the argument expressions, also visited in
+ %% `effect' context. In all other cases, the arguments are
+ %% visited for value.
+ case Ctxt of
+ effect ->
+ %% Note that this will count the sizes of the
+ %% subexpressions, even though some or all of them
+ %% might be discarded by the sequencing afterwards.
+ {Es1, S1} = mapfoldl(fun (E, S) ->
+ i(E, effect, Ren, Env,
+ S)
+ end,
+ S, data_es(E)),
+ E1 = foldl(fun (E1, E2) -> make_seq(E1, E2) end,
+ void(), Es1),
+ {E1, S1};
+ _ ->
+ {Es1, S1} = mapfoldl(fun (E, S) ->
+ i(E, value, Ren, Env,
+ S)
+ end,
+ S, data_es(E)),
+ %% The total size/cost is the base cost for a data
+ %% constructor plus the cost for storing each
+ %% element.
+ N = weight(data) + length(Es1) * weight(element),
+ S2 = count_size(N, S1),
+ {update_data(E, data_type(E), Es1), S2}
+ end
+ end.
+
+%% This is the `(ref x)' (variable use) case of the original algorithm.
+%% Note that binding occurrences are always handled in the respective
+%% cases of the binding constructs.
+
+i_var(E, Ctxt, Ren, Env, S) ->
+ case Ctxt of
+ effect ->
+ %% Reduce useless variable references to a simple constant.
+ %% This also avoids useless visiting of bound operands.
+ {void(), count_size(weight(literal), S)};
+ _ ->
+ Name = var_name(E),
+ case env__lookup(ren__map(Name, Ren), Env) of
+ {ok, R} ->
+ case R#ref.opnd of
+ undefined ->
+ %% The variable is not associated with an
+ %% argument expression; just residualize it.
+ residualize_var(R, S);
+ Opnd ->
+ i_var_1(R, Opnd, Ctxt, Env, S)
+ end;
+ error ->
+ %% The variable is unbound. (It has not been
+ %% accidentally captured, however, or it would have
+ %% been in the environment.) We leave it as it is,
+ %% without any warning.
+ {E, count_size(weight(var), S)}
+ end
+ end.
+
+%% This first visits the bound operand and then does copy propagation.
+%% Note that we must first set the "inner-pending" flag, and clear the
+%% flag afterwards.
+
+i_var_1(R, Opnd, Ctxt, Env, S) ->
+ %% If the operand is already "inner-pending", it is residualised.
+ %% (In Lisp/C, if the variable might be assigned to, it should also
+ %% be residualised.)
+ L = Opnd#opnd.loc,
+ case st__test_inner_pending(L, S) of
+ true ->
+ residualize_var(R, S);
+ false ->
+ S1 = st__mark_inner_pending(L, S),
+ case catch {ok, visit(Opnd, S1)} of
+ {ok, {E, S2}} ->
+ %% Note that we pass the current environment and
+ %% context to `copy', but not the current renaming.
+ S3 = st__clear_inner_pending(L, S2),
+ copy(R, Opnd, E, Ctxt, Env, S3);
+ {'EXIT', X} ->
+ exit(X);
+ X ->
+ %% If we use destructive update for the
+ %% `inner-pending' flag, we must make sure to clear
+ %% it also if we make a nonlocal return.
+ st__clear_inner_pending(Opnd#opnd.loc, S1),
+ throw(X)
+ end
+ end.
+
+%% A multiple-value aggregate `<e1, ..., en>'. This is very much like a
+%% tuple data constructor `{e1, ..., en}'; cf. `i_data' for details.
+
+i_values(E, Ctxt, Ren, Env, S) ->
+ case values_es(E) of
+ [E1] ->
+ %% Single-value aggregates can be dropped; they are simply
+ %% notation.
+ i(E1, Ctxt, Ren, Env, S);
+ Es ->
+ %% In `effect' context, we can simply make a sequence of the
+ %% argument expressions, also visited in `effect' context.
+ %% In all other cases, the arguments are visited for value.
+ case Ctxt of
+ effect ->
+ {Es1, S1} =
+ mapfoldl(fun (E, S) ->
+ i(E, effect, Ren, Env, S)
+ end,
+ S, Es),
+ E1 = foldl(fun (E1, E2) ->
+ make_seq(E1, E2)
+ end,
+ void(), Es1),
+ {E1, S1}; % drop annotations on E
+ _ ->
+ {Es1, S1} = mapfoldl(fun (E, S) ->
+ i(E, value, Ren, Env,
+ S)
+ end,
+ S, Es),
+ %% Aggregating values does not write them to memory,
+ %% so we count no extra cost per element.
+ S2 = count_size(weight(values), S1),
+ {update_c_values(E, Es1), S2}
+ end
+ end.
+
+%% A let-expression `let <v1,...,vn> = e0 in e1' is semantically
+%% equivalent to a case-expression `case e0 of <v1,...,vn> when 'true'
+%% -> e1 end'. As a special case, `let <v> = e0 in e1' is also
+%% equivalent to `apply fun (v) -> e0 (e1)'. However, for efficiency,
+%% and in order to allow the handling of `case' clauses to introduce new
+%% let-expressions without entering an infinite rewrite loop, we handle
+%% these directly.
+
+%%% %% Rewriting a `let' to an equivalent expression.
+%%% i_let(E, Ctxt, Ren, Env, S) ->
+%%% case let_vars(E) of
+%%% [V] ->
+%%% E1 = update_c_apply(E, c_fun([V], let_body(E)), [let_arg(E)]),
+%%% i(E1, Ctxt, Ren, Env, S);
+%%% Vs ->
+%%% C = c_clause(Vs, abstract(true), let_body(E)),
+%%% E1 = update_c_case(E, let_arg(E), [C]),
+%%% i(E1, Ctxt, Ren, Env, S)
+%%% end.
+
+i_let(E, Ctxt, Ren, Env, S) ->
+ case let_vars(E) of
+ [V] ->
+ i_let_1(V, E, Ctxt, Ren, Env, S);
+ Vs ->
+ %% Visit the argument expression in `value' context, to
+ %% simplify it as far as possible.
+ {A, S1} = i(let_arg(E), value, Ren, Env, S),
+ case get_components(length(Vs), result(A)) of
+ {true, As} ->
+ %% Note that only the components of the result of
+ %% `A' are passed on; any effects are hoisted.
+ {E1, S2} = i_let_2(Vs, As, E, Ctxt, Ren, Env, S1),
+ {hoist_effects(A, E1), S2};
+ false ->
+ %% We cannot do anything with this `let', since the
+ %% variables cannot be matched against the argument
+ %% components. Just visit the variables for renaming
+ %% and visit the body for value (cf. `i_fun').
+ {_, Ren1, Env1, S2} = bind_locals(Vs, Ren, Env, S1),
+ Vs1 = i_params(Vs, Ren1, Env1),
+ %% The body is always visited for value here.
+ {B, S3} = i(let_body(E), value, Ren1, Env1, S2),
+ S4 = count_size(weight('let'), S3),
+ {update_c_let(E, Vs1, A, B), S4}
+ end
+ end.
+
+%% Single-variable `let' binding.
+
+i_let_1(V, E, Ctxt, Ren, Env, S) ->
+ %% Make an operand structure for the argument expression, create a
+ %% local binding from the parameter to the operand structure, and
+ %% visit the body. Finally create necessary bindings and/or set
+ %% flags.
+ {Opnd, S1} = make_opnd(let_arg(E), Ren, Env, S),
+ {[R], Ren1, Env1, S2} = bind_locals([V], [Opnd], Ren, Env, S1),
+ {E1, S3} = i(let_body(E), Ctxt, Ren1, Env1, S2),
+ i_let_3([R], [Opnd], E1, S3).
+
+%% Multi-variable `let' binding.
+
+i_let_2(Vs, As, E, Ctxt, Ren, Env, S) ->
+ %% Make operand structures for the argument components. Note that
+ %% since the argument has already been visited at this point, we use
+ %% the identity renaming for the operands.
+ {Opnds, S1} = mapfoldl(fun (E, S) ->
+ make_opnd(E, ren__identity(), Env, S)
+ end,
+ S, As),
+ %% Create local bindings from the parameters to their respective
+ %% operand structures, and visit the body.
+ {Rs, Ren1, Env1, S2} = bind_locals(Vs, Opnds, Ren, Env, S1),
+ {E1, S3} = i(let_body(E), Ctxt, Ren1, Env1, S2),
+ i_let_3(Rs, Opnds, E1, S3).
+
+i_let_3(Rs, Opnds, E, S) ->
+ %% Create necessary bindings and/or set flags.
+ {E1, S1} = make_let_bindings(Rs, E, S),
+
+ %% We must also create evaluation for effect, for any unused
+ %% operands, as after an application expression.
+ residualize_operands(Opnds, E1, S1).
+
+%% A sequence `do e1 e2', written `(seq e1 e2)' in the original
+%% algorithm, where `e1' is evaluated for effect only (since its value
+%% is not used), and `e2' yields the final value. Note that we use
+%% `make_seq' to recompose the sequence after visiting the parts.
+
+i_seq(E, Ctxt, Ren, Env, S) ->
+ {E1, S1} = i(seq_arg(E), effect, Ren, Env, S),
+ {E2, S2} = i(seq_body(E), Ctxt, Ren, Env, S1),
+ %% A sequence has no cost in itself.
+ {make_seq(E1, E2), S2}.
+
+
+%% The `case' switch of Core Erlang is rather different from the boolean
+%% `(if e1 e2 e3)' case of the original algorithm, but the central idea
+%% is the same: if, given the simplified switch expression (which is
+%% visited in `value' context - a boolean `test' context would not be
+%% generally useful), there is a clause which could definitely be
+%% selected, such that no clause before it can possibly be selected,
+%% then we can eliminate all other clauses. (And even if this is not the
+%% case, some clauses can often be eliminated.) Furthermore, if a clause
+%% can be selected, we can replace the case-expression (including the
+%% switch expression) with the body of the clause and a set of zero or
+%% more let-bindings of subexpressions of the switch expression. (In the
+%% simplest case, the switch expression is evaluated only for effect.)
+
+i_case(E, Ctxt, Ren, Env, S) ->
+ %% First visit the switch expression in `value' context, to simplify
+ %% it as far as possible. Note that only the result part is passed
+ %% on to the clause matching below; any effects are hoisted.
+ {A, S1} = i(case_arg(E), value, Ren, Env, S),
+ A1 = result(A),
+
+ %% Propagating an application context into the branches could cause
+ %% the arguments of the application to be evaluated *after* the
+ %% switch expression, but *before* the body of the selected clause.
+ %% Such interleaving is not allowed in general, and it does not seem
+ %% worthwile to make a more powerful transformation here. Therefore,
+ %% the clause bodies are conservatively visited for value if the
+ %% context is `application'.
+ Ctxt1 = safe_context(Ctxt),
+ {E1, S2} = case get_components(case_arity(E), A1) of
+ {true, As} ->
+ i_case_1(As, E, Ctxt1, Ren, Env, S1);
+ false ->
+ i_case_1([], E, Ctxt1, Ren, Env, S1)
+ end,
+ {hoist_effects(A, E1), S2}.
+
+i_case_1(As, E, Ctxt, Ren, Env, S) ->
+ case i_clauses(As, case_clauses(E), Ctxt, Ren, Env, S) of
+ {false, {As1, Vs, Env1, Cs}, S1} ->
+ %% We still have a list of clauses. Sanity check:
+ if Cs == [] ->
+ report_warning("empty list of clauses "
+ "in residual program!.\n");
+ true ->
+ ok
+ end,
+ {A, S2} = i(c_values(As1), value, ren__identity(), Env1,
+ S1),
+ {E1, S3} = i_case_2(Cs, A, E, S2),
+ i_case_3(Vs, Env1, E1, S3);
+ {true, {_, Vs, Env1, [C]}, S1} ->
+ %% A single clause was selected; we just take the body.
+ i_case_3(Vs, Env1, clause_body(C), S1)
+ end.
+
+%% Check if all clause bodies are actually equivalent expressions that
+%% do not depent on pattern variables (this sometimes occurs as a
+%% consequence of inlining, e.g., all branches might yield 'true'), and
+%% if so, replace the `case' with a sequence, first evaluating the
+%% clause selection for effect, then evaluating one of the clause bodies
+%% for its value. (Unless the switch contains a catch-all clause, the
+%% clause selection must be evaluated for effect, since there is no
+%% guarantee that any of the clauses will actually match. Assuming that
+%% some clause always matches could make an undefined program produce a
+%% value.) This makes the final size less than what was accounted for
+%% when visiting the clauses, but currently we don't try to adjust for
+%% this.
+
+i_case_2(Cs, A, E, S) ->
+ case equivalent_clauses(Cs) of
+ false ->
+ %% Count the base sizes for the remaining clauses; pattern
+ %% and guard sizes are already counted.
+ N = weight('case') + weight(clause) * length(Cs),
+ S1 = count_size(N, S),
+ {update_c_case(E, A, Cs), S1};
+ true ->
+ case cerl_clauses:any_catchall(Cs) of
+ true ->
+ %% We know that some clause must be selected, so we
+ %% can drop all the testing as well.
+ E1 = make_seq(A, clause_body(hd(Cs))),
+ {E1, S};
+ false ->
+ %% The clause selection must be performed for
+ %% effect.
+ E1 = update_c_case(E, A,
+ set_clause_bodies(Cs, void())),
+ {make_seq(E1, clause_body(hd(Cs))), S}
+ end
+ end.
+
+i_case_3(Vs, Env, E, S) ->
+ %% For the variables bound to the switch expression subexpressions,
+ %% make let bindings or create evaluation for effect.
+ Rs = [env__get(var_name(V), Env) || V <- Vs],
+ {E1, S1} = make_let_bindings(Rs, E, S),
+ Opnds = [R#ref.opnd || R <- Rs],
+ residualize_operands(Opnds, E1, S1).
+
+%% This function takes a sequence of switch expressions `Es' (which can
+%% be the empty list if these are unknown) and a list `Cs' of clauses,
+%% and returns `{Match, {As, Vs, Env1, Cs1}, S1}' where `As' is a list
+%% of residual switch expressions, `Vs' the list of variables used in
+%% the templates, `Env1' the environment for the templates, and `Cs1'
+%% the list of residual clauses. `Match' is `true' if some clause could
+%% be shown to definitely match (in this case, `Cs1' contains exactly
+%% one element), and `false' otherwise. `S1' is the new state. The given
+%% `Ctxt' is the context to be used for visiting the body of clauses.
+%%
+%% Visiting a clause basically amounts to extending the environment for
+%% all variables in the pattern, as for a `fun' (cf. `i_fun'),
+%% propagating match information if possible, and visiting the guard and
+%% body in the new environment.
+%%
+%% To make it cheaper to do handle a set of clauses, and to avoid
+%% unnecessarily exceeding the size limit, we avoid visiting the bodies
+%% of clauses which are subsequently removed, by dividing the visiting
+%% of a clause into two stages: first construct the environment(s) and
+%% visit the pattern (for renaming) and the guard (for value), then
+%% reduce the switch as much as possible, and lastly visit the body.
+
+i_clauses(Cs, Ctxt, Ren, Env, S) ->
+ i_clauses([], Cs, Ctxt, Ren, Env, S).
+
+i_clauses(Es, Cs, Ctxt, Ren, Env, S) ->
+ %% Create templates for the switch expressions.
+ {Ts, {Vs, Env0}} = mapfoldl(fun (E, {Vs, Env}) ->
+ {T, Vs1, Env1} =
+ make_template(E, Env),
+ {T, {Vs1 ++ Vs, Env1}}
+ end,
+ {[], Env}, Es),
+
+ %% Make operand structures for the switch subexpression templates
+ %% (found in `Env0') and add proper ref-structure bindings to the
+ %% environment. Since the subexpressions in general can be
+ %% interdependent (Vs is in reverse-dependency order), the
+ %% environment (and renaming) must be created incrementally. Note
+ %% that since the switch expressions have been visited already, the
+ %% identity renaming is used for the operands.
+ Vs1 = lists:reverse(Vs),
+ {Ren1, Env1, S1} =
+ foldl(fun (V, {Ren, Env, S}) ->
+ E = env__get(var_name(V), Env0),
+ {Opnd, S_1} = make_opnd(E, ren__identity(), Env,
+ S),
+ {_, Ren1, Env1, S_2} = bind_locals([V], [Opnd],
+ Ren, Env, S_1),
+ {Ren1, Env1, S_2}
+ end,
+ {Ren, Env, S}, Vs1),
+
+ %% First we visit the head of each individual clause, renaming
+ %% pattern variables, inserting let-bindings in the guard and body,
+ %% and visiting the guard. The information used for visiting the
+ %% clause body will be prefixed to the clause annotations.
+ {Cs1, S2} = mapfoldl(fun (C, S) ->
+ i_clause_head(C, Ts, Ren1, Env1, S)
+ end,
+ S1, Cs),
+
+ %% Now that the clause guards have been reduced as far as possible,
+ %% we can attempt to reduce the clauses.
+ As = [hd(get_ann(T)) || T <- Ts],
+ case cerl_clauses:reduce(Cs1, Ts) of
+ {false, Cs2} ->
+ %% We still have one or more clauses (with associated
+ %% extended environments). Their bodies have not yet been
+ %% visited, so we do that (in the respective safe
+ %% environments, adding the sizes of the visited heads to
+ %% the current size counter) and return the final list of
+ %% clauses.
+ {Cs3, S3} = mapfoldl(
+ fun (C, S) ->
+ i_clause_body(C, Ctxt, S)
+ end,
+ S2, Cs2),
+ {false, {As, Vs1, Env1, Cs3}, S3};
+ {true, {C, _}} ->
+ %% A clause C could be selected (the bindings have already
+ %% been added to the guard/body). Note that since the clause
+ %% head will probably be discarded, its size is not counted.
+ {C1, Ren2, Env2, _} = get_clause_extras(C),
+ {B, S3} = i(clause_body(C), Ctxt, Ren2, Env2, S2),
+ C2 = update_c_clause(C1, clause_pats(C1), clause_guard(C1), B),
+ {true, {As, Vs1, Env1, [C2]}, S3}
+ end.
+
+%% This visits the head of a clause, renames pattern variables, inserts
+%% let-bindings in the guard and body, and does inlining on the guard
+%% expression. Returns a list of pairs `{NewClause, Data}', where `Data'
+%% is `{Renaming, Environment, Size}' used for visiting the body of the
+%% new clause.
+
+i_clause_head(C, Ts, Ren, Env, S) ->
+ %% Match the templates against the (non-renamed) patterns to get the
+ %% available information about matching subexpressions. We don't
+ %% care at this point whether an exact match/nomatch is detected.
+ Ps = clause_pats(C),
+ Bs = case cerl_clauses:match_list(Ps, Ts) of
+ {_, Bs1} -> Bs1;
+ none -> []
+ end,
+
+ %% The patterns must be visited for renaming; cf. `i_pattern'. We
+ %% use a passive size counter for visiting the patterns and the
+ %% guard (cf. `visit'), because we do not know at this stage whether
+ %% the clause will be kept or not; the final value of the counter is
+ %% included in the returned value below.
+ {_, Ren1, Env1, S1} = bind_locals(clause_vars(C), Ren, Env, S),
+ S2 = new_passive_size(get_size_limit(S1), S1),
+ {Ps1, S3} = mapfoldl(fun (P, S) ->
+ i_pattern(P, Ren1, Env1, Ren, Env, S)
+ end,
+ S2, Ps),
+
+ %% Rewrite guard and body and visit the guard for value. Discard the
+ %% latter size count if the guard turns out to be a constant.
+ G = add_match_bindings(Bs, clause_guard(C)),
+ B = add_match_bindings(Bs, clause_body(C)),
+ {G1, S4} = i(G, value, Ren1, Env1, S3),
+ S5 = case is_literal(G1) of
+ true ->
+ revert_size(S3, S4);
+ false ->
+ S4
+ end,
+
+ %% Revert to the size counter we had on entry to this function. The
+ %% environment and renaming, together with the size of the clause
+ %% head, are prefixed to the annotations for later use.
+ Size = get_size_value(S5),
+ C1 = update_c_clause(C, Ps1, G1, B),
+ {set_clause_extras(C1, Ren1, Env1, Size), revert_size(S, S5)}.
+
+add_match_bindings(Bs, E) ->
+ %% Don't waste time if the variables definitely cannot be used.
+ %% (Most guards are simply `true'.)
+ case is_literal(E) of
+ true ->
+ E;
+ false ->
+ Vs = [V || {V, E} <- Bs, E /= any],
+ Es = [hd(get_ann(E)) || {_V, E} <- Bs, E /= any],
+ c_let(Vs, c_values(Es), E)
+ end.
+
+i_clause_body(C0, Ctxt, S) ->
+ {C, Ren, Env, Size} = get_clause_extras(C0),
+ S1 = count_size(Size, S),
+ {B, S2} = i(clause_body(C), Ctxt, Ren, Env, S1),
+ C1 = update_c_clause(C, clause_pats(C), clause_guard(C), B),
+ {C1, S2}.
+
+get_clause_extras(C) ->
+ [{Ren, Env, Size} | As] = get_ann(C),
+ {set_ann(C, As), Ren, Env, Size}.
+
+set_clause_extras(C, Ren, Env, Size) ->
+ As = [{Ren, Env, Size} | get_ann(C)],
+ set_ann(C, As).
+
+%% This is the `(lambda x e)' case of the original algorithm. A
+%% `fun' is like a lambda expression, but with a varying number of
+%% parameters; possibly zero.
+
+i_fun(E, Ctxt, Ren, Env, S) ->
+ case Ctxt of
+ effect ->
+ %% Reduce useless `fun' expressions to a simple constant;
+ %% visiting the body would be a waste of time, and could
+ %% needlessly mark variables as referenced.
+ {void(), count_size(weight(literal), S)};
+ value ->
+ %% Note that the variables are visited as patterns.
+ Vs = fun_vars(E),
+ {_, Ren1, Env1, S1} = bind_locals(Vs, Ren, Env, S),
+ Vs1 = i_params(Vs, Ren1, Env1),
+
+ %% The body is always visited for value.
+ {B, S2} = i(fun_body(E), value, Ren1, Env1, S1),
+
+ %% We don't bother to include the exact number of free
+ %% variables in the cost for creating a fun-value.
+ S3 = count_size(weight('fun'), S2),
+
+ %% Inlining might have duplicated code, so we must remove
+ %% any 'id'-annotations from the original fun-expression.
+ %% (This forces a later stage to invent new id:s.) This is
+ %% necessary as long as fun:s may still need to be
+ %% identified the old way. Function variables that are not
+ %% in application context also have such annotations, but
+ %% the inlining will currently lose all annotations on
+ %% variable references (I think), so that's not a problem.
+ {set_ann(c_fun(Vs1, B), kill_id_anns(get_ann(E))), S3};
+ #app{} ->
+ %% An application of a fun-expression (in the source code)
+ %% is handled by going directly to `inline'; this is never
+ %% residualised, and we don't set up new counters here. Note
+ %% that inlining of copy-propagated fun-expressions is done
+ %% in `copy'; not here.
+ inline(E, Ctxt, Ren, Env, S)
+ end.
+
+%% A `letrec' requires a circular environment, but is otherwise like a
+%% `let', i.e. like a direct lambda application. Note that only
+%% fun-expressions (lambda abstractions) may occur in the right-hand
+%% side of each definition.
+
+i_letrec(E, Ctxt, Ren, Env, S) ->
+ %% Note that we pass an empty list for the auto-referenced
+ %% (exported) functions here.
+ {Es, B, _, S1} = i_letrec(letrec_defs(E), letrec_body(E), [], Ctxt,
+ Ren, Env, S),
+
+ %% If no bindings remain, only the body is returned.
+ case Es of
+ [] ->
+ {B, S1}; % drop annotations on E
+ _ ->
+ S2 = count_size(weight(letrec), S1),
+ {update_c_letrec(E, Es, B), S2}
+ end.
+
+%% The major part of this is shared by letrec-expressions and module
+%% definitions alike.
+
+i_letrec(Es, B, Xs, Ctxt, Ren, Env, S) ->
+ %% First, we create operands with dummy renamings and environments,
+ %% and with fresh store locations for cached expressions and operand
+ %% info.
+ {Opnds, S1} = mapfoldl(fun ({_, E}, S) ->
+ make_opnd(E, undefined, undefined, S)
+ end,
+ S, Es),
+
+ %% Then we make recursive bindings for the definitions.
+ {Rs, Ren1, Env1, S2} = bind_recursive([F || {F, _} <- Es],
+ Opnds, Ren, Env, S1),
+
+ %% For the function variables listed in Xs (none for a
+ %% letrec-expression), we must make sure that the corresponding
+ %% operand expressions are visited and that the definitions are
+ %% marked as referenced; we also need to return the possibly renamed
+ %% function variables.
+ {Xs1, S3} =
+ mapfoldl(
+ fun (X, S) ->
+ Name = ren__map(var_name(X), Ren1),
+ case env__lookup(Name, Env1) of
+ {ok, R} ->
+ S_1 = i_letrec_export(R, S),
+ {ref_to_var(R), S_1};
+ error ->
+ %% We just skip any exports that are not
+ %% actually defined here, and generate a
+ %% warning message.
+ {N, A} = var_name(X),
+ report_warning("export `~w'/~w "
+ "not defined.\n", [N, A]),
+ {X, S}
+ end
+ end,
+ S2, Xs),
+
+ %% At last, we can then visit the body.
+ {B1, S4} = i(B, Ctxt, Ren1, Env1, S3),
+
+ %% Finally, we create new letrec-bindings for any and all
+ %% residualised definitions. All referenced functions should have
+ %% been visited; the call to `visit' below is expected to retreive a
+ %% cached expression.
+ Rs1 = keep_referenced(Rs, S4),
+ {Es1, S5} = mapfoldl(fun (R, S) ->
+ {E_1, S_1} = visit(R#ref.opnd, S),
+ {{ref_to_var(R), E_1}, S_1}
+ end,
+ S4, Rs1),
+ {Es1, B1, Xs1, S5}.
+
+%% This visits the operand for a function definition exported by a
+%% `letrec' (which is really a `module' module definition, since normal
+%% letrecs have no export declarations). Only the updated state is
+%% returned. We must handle the "inner-pending" flag when doing this;
+%% cf. `i_var'.
+
+i_letrec_export(R, S) ->
+ Opnd = R#ref.opnd,
+ S1 = st__mark_inner_pending(Opnd#opnd.loc, S),
+ {_, S2} = visit(Opnd, S1),
+ {_, S3} = residualize_var(R, st__clear_inner_pending(Opnd#opnd.loc,
+ S2)),
+ S3.
+
+%% This is the `(call e1 e2)' case of the original algorithm. The only
+%% difference is that we must handle multiple (or no) operand
+%% expressions.
+
+i_apply(E, Ctxt, Ren, Env, S) ->
+ {Opnds, S1} = mapfoldl(fun (E, S) ->
+ make_opnd(E, Ren, Env, S)
+ end,
+ S, apply_args(E)),
+
+ %% Allocate a new app-context location and set up an application
+ %% context structure containing the surrounding context.
+ {L, S2} = st__new_app_loc(S1),
+ Ctxt1 = #app{opnds = Opnds, ctxt = Ctxt, loc = L},
+
+ %% Visit the operator expression in the new call context.
+ {E1, S3} = i(apply_op(E), Ctxt1, Ren, Env, S2),
+
+ %% Check the "inlined" flag to find out what to do next. (The store
+ %% location could be recycled after the flag has been tested, but
+ %% there is no real advantage to that, because in practice, only
+ %% 4-5% of all created store locations will ever be reused, while
+ %% there will be a noticable overhead for managing the free list.)
+ case st__get_app_inlined(L, S3) of
+ true ->
+ %% The application was inlined, so we have the final
+ %% expression in `E1'. We just have to handle any operands
+ %% that need to be residualized for effect only (i.e., those
+ %% the values of which are not used).
+ residualize_operands(Opnds, E1, S3);
+ false ->
+ %% Otherwise, `E1' is the residual operator expression. We
+ %% make sure all operands are visited, and rebuild the
+ %% application.
+ {Es, S4} = mapfoldl(fun (Opnd, S) ->
+ visit_and_count_size(Opnd, S)
+ end,
+ S3, Opnds),
+ N = apply_size(length(Es)),
+ {update_c_apply(E, E1, Es), count_size(N, S4)}
+ end.
+
+apply_size(A) ->
+ weight(apply) + weight(argument) * A.
+
+%% Since it is not the task of this transformation to handle
+%% cross-module inlining, all inter-module calls are handled by visiting
+%% the components (the module and function name, and the arguments of
+%% the call) for value. In `effect' context, if the function itself is
+%% known to be completely effect free, the call can be discarded and the
+%% arguments evaluated for effect. Otherwise, if all the visited
+%% arguments are to constants, and the function is known to be safe to
+%% execute at compile time, then we try to evaluate the call. If
+%% evaluation completes normally, the call is replaced by the result;
+%% otherwise the call is residualised.
+
+i_call(E, Ctxt, Ren, Env, S) ->
+ {M, S1} = i(call_module(E), value, Ren, Env, S),
+ {F, S2} = i(call_name(E), value, Ren, Env, S1),
+ As = call_args(E),
+ Arity = length(As),
+
+ %% Check if the name of the called function is static. If so,
+ %% discard the size counts performed above, since the values will
+ %% not cause any runtime cost.
+ Static = is_c_atom(M) and is_c_atom(F),
+ S3 = case Static of
+ true ->
+ revert_size(S, S2);
+ false ->
+ S2
+ end,
+ case Ctxt of
+ effect when Static == true ->
+ case is_safe_call(atom_val(M), atom_val(F), Arity) of
+ true ->
+ %% The result will not be used, and the call is
+ %% effect free, so we create a multiple-value
+ %% aggregate containing the (not yet visited)
+ %% arguments and process that instead.
+ i(c_values(As), effect, Ren, Env, S3);
+ false ->
+ %% We are not allowed to simply discard the call,
+ %% but we can try to evaluate it.
+ i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env,
+ S3)
+ end;
+ _ ->
+ i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, S3)
+ end.
+
+i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, S) ->
+ %% Visit the arguments for value.
+ {As1, S1} = mapfoldl(fun (X, A) -> i(X, value, Ren, Env, A) end,
+ S, As),
+ case Static of
+ true ->
+ case erl_bifs:is_pure(atom_val(M), atom_val(F), Arity) of
+ true ->
+ %% It is allowed to evaluate this at compile time.
+ case all_static(As1) of
+ true ->
+ i_call_3(M, F, As1, E, Ctxt, Env, S1);
+ false ->
+ %% See if the call can be rewritten instead.
+ i_call_4(M, F, As1, E, Ctxt, Env, S1)
+ end;
+ false ->
+ i_call_2(M, F, As1, E, S1)
+ end;
+ false ->
+ i_call_2(M, F, As1, E, S1)
+ end.
+
+%% Residualise the call.
+
+i_call_2(M, F, As, E, S) ->
+ N = weight(call) + weight(argument) * length(As),
+ {update_c_call(E, M, F, As), count_size(N, S)}.
+
+%% Attempt to evaluate the call to yield a literal; if that fails, try
+%% to rewrite the expression.
+
+i_call_3(M, F, As, E, Ctxt, Env, S) ->
+ %% Note that we extract the results of argument expessions here; the
+ %% expressions could still be sequences with side effects.
+ Vs = [concrete(result(A)) || A <- As],
+ case catch {ok, apply(atom_val(M), atom_val(F), Vs)} of
+ {ok, V} ->
+ %% Evaluation completed normally - try to turn the result
+ %% back into a syntax tree (representing a literal).
+ case is_literal_term(V) of
+ true ->
+ %% Make a sequence of the arguments (as a
+ %% multiple-value aggregate) and the final value.
+ S1 = count_size(weight(values), S),
+ S2 = count_size(weight(literal), S1),
+ {make_seq(c_values(As), abstract(V)), S2};
+ false ->
+ %% The result could not be represented as a literal.
+ i_call_4(M, F, As, E, Ctxt, Env, S)
+ end;
+ _ ->
+ %% The evaluation attempt did not complete normally.
+ i_call_4(M, F, As, E, Ctxt, Env, S)
+ end.
+
+%% Rewrite the expression, if possible, otherwise residualise it.
+
+i_call_4(M, F, As, E, Ctxt, Env, S) ->
+ case reduce_bif_call(atom_val(M), atom_val(F), As, Env) of
+ false ->
+ %% Nothing more to be done - residualise the call.
+ i_call_2(M, F, As, E, S);
+ {true, E1} ->
+ %% We revisit the result, because the rewriting might have
+ %% opened possibilities for further inlining. Since the
+ %% parts have already been visited once, we use the identity
+ %% renaming here.
+ i(E1, Ctxt, ren__identity(), Env, S)
+ end.
+
+%% For now, we assume that primops cannot be evaluated at compile time,
+%% probably being too special. Also, we have no knowledge about their
+%% side effects.
+
+i_primop(E, Ren, Env, S) ->
+ %% Visit the arguments for value.
+ {As, S1} = mapfoldl(fun (E, S) ->
+ i(E, value, Ren, Env, S)
+ end,
+ S, primop_args(E)),
+ N = weight(primop) + weight(argument) * length(As),
+ {update_c_primop(E, primop_name(E), As), count_size(N, S1)}.
+
+%% This is like having an expression with an extra fun-expression
+%% attached for "exceptional cases"; actually, there are exactly two
+%% parameter variables for the body, but they are easiest handled as if
+%% their number might vary, just as for a `fun'.
+
+i_try(E, Ctxt, Ren, Env, S) ->
+ %% The argument expression is evaluated in `value' context, and the
+ %% surrounding context is propagated into both branches. We do not
+ %% try to recognize cases when the protected expression will
+ %% actually raise an exception. Note that the variables are visited
+ %% as patterns.
+ {A, S1} = i(try_arg(E), value, Ren, Env, S),
+ Vs = try_vars(E),
+ {_, Ren1, Env1, S2} = bind_locals(Vs, Ren, Env, S1),
+ Vs1 = i_params(Vs, Ren1, Env1),
+ {B, S3} = i(try_body(E), Ctxt, Ren1, Env1, S2),
+ case is_safe(A) of
+ true ->
+ %% The `try' wrapper can be dropped in this case. Since the
+ %% expressions have been visited already, the identity
+ %% renaming is used when we revisit the new let-expression.
+ i(c_let(Vs1, A, B), Ctxt, ren__identity(), Env, S3);
+ false ->
+ Evs = try_evars(E),
+ {_, Ren2, Env2, S4} = bind_locals(Evs, Ren, Env, S3),
+ Evs1 = i_params(Evs, Ren2, Env2),
+ {H, S5} = i(try_handler(E), Ctxt, Ren2, Env2, S4),
+ S6 = count_size(weight('try'), S5),
+ {update_c_try(E, A, Vs1, B, Evs1, H), S6}
+ end.
+
+%% A special case of try-expressions:
+
+i_catch(E, Ctxt, Ren, Env, S) ->
+ %% We cannot propagate application contexts into the catch.
+ {E1, S1} = i(catch_body(E), safe_context(Ctxt), Ren, Env, S),
+ case is_safe(E1) of
+ true ->
+ %% The `catch' wrapper can be dropped in this case.
+ {E1, S1};
+ false ->
+ S2 = count_size(weight('catch'), S1),
+ {update_c_catch(E, E1), S2}
+ end.
+
+%% A receive-expression is very much like a case-expression, with the
+%% difference that we do not have access to a switch expression, since
+%% the value being switched on is taken from the mailbox. The fact that
+%% the receive-expression may iterate over an arbitrary number of
+%% messages is not of interest to us. All we can do here is to visit its
+%% subexpressions, and possibly eliminate definitely unselectable
+%% clauses.
+
+i_receive(E, Ctxt, Ren, Env, S) ->
+ %% We first visit the expiry expression (for value) and the expiry
+ %% body (in the surrounding context).
+ {T, S1} = i(receive_timeout(E), value, Ren, Env, S),
+ {B, S2} = i(receive_action(E), Ctxt, Ren, Env, S1),
+
+ %% Then we visit the clauses. Note that application contexts may not
+ %% in general be propagated into the branches (and the expiry body),
+ %% because the execution of the `receive' may remove a message from
+ %% the mailbox as a side effect; the situation is thus analogous to
+ %% that in a `case' expression.
+ Ctxt1 = safe_context(Ctxt),
+ case i_clauses(receive_clauses(E), Ctxt1, Ren, Env, S2) of
+ {false, {[], _, _, Cs}, S3} ->
+ %% We still have a list of clauses. If the list is empty,
+ %% and the expiry expression is the integer zero, the
+ %% expression reduces to the expiry body.
+ if Cs == [] ->
+ case is_c_int(T) andalso (int_val(T) == 0) of
+ true ->
+ {B, S3};
+ false ->
+ i_receive_1(E, Cs, T, B, S3)
+ end;
+ true ->
+ i_receive_1(E, Cs, T, B, S3)
+ end;
+ {true, {_, _, _, Cs}, S3} ->
+ %% Cs is a single clause that will always be matched (if a
+ %% message exists), but we must keep the `receive' statement
+ %% in order to fetch the message from the mailbox.
+ i_receive_1(E, Cs, T, B, S3)
+ end.
+
+i_receive_1(E, Cs, T, B, S) ->
+ %% Here, we just add the base sizes for the receive-expression
+ %% itself and for each remaining clause; cf. `case'.
+ N = weight('receive') + weight(clause) * length(Cs),
+ {update_c_receive(E, Cs, T, B), count_size(N, S)}.
+
+%% A module definition is like a `letrec', with some add-ons (export and
+%% attribute declarations) but without an explicit body. Actually, the
+%% exporting of function names has the same effect as if there was a
+%% body consisting of the list of references to the exported functions.
+%% Thus, the exported functions are exactly those which can be
+%% referenced from outside the module.
+
+i_module(E, Ctxt, Ren, Env, S) ->
+ %% Cf. `i_letrec'. Note that we pass a dummy constant value for the
+ %% "body" parameter.
+ {Es, _, Xs1, S1} = i_letrec(module_defs(E), void(),
+ module_exports(E), Ctxt, Ren, Env, S),
+ %% Sanity check:
+ case Es of
+ [] ->
+ report_warning("no function definitions remaining "
+ "in module `~s'.\n",
+ [atom_name(module_name(E))]);
+ _ ->
+ ok
+ end,
+ E1 = update_c_module(E, module_name(E), Xs1, module_attrs(E), Es),
+ {E1, count_size(weight(module), S1)}.
+
+%% Binary-syntax expressions are too complicated to do anything
+%% interesting with here - that is beyond the scope of this program;
+%% also, their construction could have side effects, so even in effect
+%% context we can't remove them. (We don't bother to identify cases of
+%% "safe" unused binaries which could be removed.)
+
+i_binary(E, Ren, Env, S) ->
+ %% Visit the segments for value.
+ {Es, S1} = mapfoldl(fun (E, S) ->
+ i_bitstr(E, Ren, Env, S)
+ end,
+ S, binary_segments(E)),
+ S2 = count_size(weight(binary), S1),
+ {update_c_binary(E, Es), S2}.
+
+i_bitstr(E, Ren, Env, S) ->
+ %% It is not necessary to visit the Unit, Type and Flags fields,
+ %% since these are always literals.
+ {Val, S1} = i(bitstr_val(E), value, Ren, Env, S),
+ {Size, S2} = i(bitstr_size(E), value, Ren, Env, S1),
+ Unit = bitstr_unit(E),
+ Type = bitstr_type(E),
+ Flags = bitstr_flags(E),
+ S3 = count_size(weight(bitstr), S2),
+ {update_c_bitstr(E, Val, Size, Unit, Type, Flags), S3}.
+
+%% This is a simplified version of `i_pattern', for lists of parameter
+%% variables only. It does not modify the state.
+
+i_params([V | Vs], Ren, Env) ->
+ Name = ren__map(var_name(V), Ren),
+ case env__lookup(Name, Env) of
+ {ok, R} ->
+ [ref_to_var(R) | i_params(Vs, Ren, Env)];
+ error ->
+ report_internal_error("variable `~w' not bound "
+ "in pattern.\n", [Name]),
+ exit(error)
+ end;
+i_params([], _, _) ->
+ [].
+
+%% For ordinary patterns, we just visit to rename variables and count
+%% the size/cost. All occurring binding instances of variables should
+%% already have been added to the renaming and environment; however, to
+%% handle the size expressions of binary-syntax patterns, we must pass
+%% the renaming and environment of the containing expression
+
+i_pattern(E, Ren, Env, Ren0, Env0, S) ->
+ case type(E) of
+ var ->
+ %% Count no size.
+ Name = ren__map(var_name(E), Ren),
+ case env__lookup(Name, Env) of
+ {ok, R} ->
+ {ref_to_var(R), S};
+ error ->
+ report_internal_error("variable `~w' not bound "
+ "in pattern.\n", [Name]),
+ exit(error)
+ end;
+ alias ->
+ %% Count no size.
+ V = alias_var(E),
+ Name = ren__map(var_name(V), Ren),
+ case env__lookup(Name, Env) of
+ {ok, R} ->
+ %% Visit the subpattern and recompose.
+ V1 = ref_to_var(R),
+ {P, S1} = i_pattern(alias_pat(E), Ren, Env, Ren0,
+ Env0, S),
+ {update_c_alias(E, V1, P), S1};
+ error ->
+ report_internal_error("variable `~w' not bound "
+ "in pattern.\n", [Name]),
+ exit(error)
+ end;
+ binary ->
+ {Es, S1} = mapfoldl(fun (E, S) ->
+ i_bitstr_pattern(E, Ren, Env,
+ Ren0, Env0, S)
+ end,
+ S, binary_segments(E)),
+ S2 = count_size(weight(binary), S1),
+ {update_c_binary(E, Es), S2};
+ _ ->
+ case is_literal(E) of
+ true ->
+ {E, count_size(weight(literal), S)};
+ false ->
+ {Es1, S1} = mapfoldl(fun (E, S) ->
+ i_pattern(E, Ren, Env,
+ Ren0, Env0,
+ S)
+ end,
+ S, data_es(E)),
+ %% We assume that in general, the elements of the
+ %% constructor will all be fetched.
+ N = weight(data) + length(Es1) * weight(element),
+ S2 = count_size(N, S1),
+ {update_data(E, data_type(E), Es1), S2}
+ end
+ end.
+
+i_bitstr_pattern(E, Ren, Env, Ren0, Env0, S) ->
+ %% It is not necessary to visit the Unit, Type and Flags fields,
+ %% since these are always literals. The Value field is a limited
+ %% pattern - either a literal or an unbound variable. The Size field
+ %% is a limited expression - either a literal or a variable bound in
+ %% the environment of the containing expression.
+ {Val, S1} = i_pattern(bitstr_val(E), Ren, Env, Ren0, Env0, S),
+ {Size, S2} = i(bitstr_size(E), value, Ren0, Env0, S1),
+ Unit = bitstr_unit(E),
+ Type = bitstr_type(E),
+ Flags = bitstr_flags(E),
+ S3 = count_size(weight(bitstr), S2),
+ {update_c_bitstr(E, Val, Size, Unit, Type, Flags), S3}.
+
+
+%% ---------------------------------------------------------------------
+%% Other central inlining functions
+
+%% It is assumed here that `E' is a fun-expression and the context is an
+%% app-structure. If the inlining might be aborted for some reason, a
+%% corresponding catch should have been set up before entering `inline'.
+%%
+%% Note: if the inlined body is a lambda abstraction, and the
+%% surrounding context of the app-context is also an app-context, the
+%% `inlined' flag of the outermost context will be set before that of
+%% the inner context is set. E.g.: `let F = fun (X) -> fun (Y) -> E in
+%% apply apply F(A)(B)' will propagate the body of F, which is a lambda
+%% abstraction, into the outer application context, which will be
+%% inlined to produce expression `E', and the flag of the outer context
+%% will be set. Upon return, the flag of the inner context will also be
+%% set. However, the flags are then tested in innermost-first order.
+%% Thus, if some inlining attempt is aborted, the `inlined' flags of any
+%% nested app-contexts must be cleared.
+%%
+%% This implementation does nothing to handle inlining of calls to
+%% recursive functions in a smart way. This means that as long as the
+%% size and effort counters do not prevent it, the function body will be
+%% inlined (i.e., the first iteration will be unrolled), and the
+%% recursive calls will be residualized.
+
+inline(E, #app{opnds = Opnds, ctxt = Ctxt, loc = L}, Ren, Env, S) ->
+ %% Check that the arities match:
+ Vs = fun_vars(E),
+ if length(Opnds) /= length(Vs) ->
+ report_error("function called with wrong number "
+ "of arguments!\n"),
+ %% TODO: should really just residualise the call...
+ exit(error);
+ true ->
+ ok
+ end,
+ %% Create local bindings for the parameters to their respective
+ %% operand structures from the app-structure, and visit the body in
+ %% the context saved in the structure.
+ {Rs, Ren1, Env1, S1} = bind_locals(Vs, Opnds, Ren, Env, S),
+ {E1, S2} = i(fun_body(E), Ctxt, Ren1, Env1, S1),
+
+ %% Create necessary bindings and/or set flags.
+ {E2, S3} = make_let_bindings(Rs, E1, S2),
+
+ %% Lastly, flag the application as inlined, since the inlining
+ %% attempt was not aborted before we reached this point.
+ {E2, st__set_app_inlined(L, S3)}.
+
+%% For the (possibly renamed) argument variables to an inlined call,
+%% either create `let' bindings for them, if they are still referenced
+%% in the residual expression (in C/Lisp, also if they are assigned to),
+%% or otherwise (if they are not referenced or assigned) mark them for
+%% evaluation for side effects.
+
+make_let_bindings([R | Rs], E, S) ->
+ {E1, S1} = make_let_bindings(Rs, E, S),
+ make_let_binding(R, E1, S1);
+make_let_bindings([], E, S) ->
+ {E, S}.
+
+make_let_binding(R, E, S) ->
+ %% The `referenced' flag is conservatively computed. We therefore
+ %% first check some simple cases where parameter R is definitely not
+ %% referenced in the resulting body E.
+ case is_literal(E) of
+ true ->
+ %% A constant contains no variable references.
+ make_let_binding_1(R, E, S);
+ false ->
+ case is_c_var(E) of
+ true ->
+ case var_name(E) =:= R#ref.name of
+ true ->
+ %% The body is simply the parameter variable
+ %% itself. Visit the operand for value and
+ %% substitute the result for the body.
+ visit_and_count_size(R#ref.opnd, S);
+ false ->
+ %% Not the same variable, so the parameter
+ %% is not referenced at all.
+ make_let_binding_1(R, E, S)
+ end;
+ false ->
+ %% Proceed to check the `referenced' flag.
+ case st__get_var_referenced(R#ref.loc, S) of
+ true ->
+ %% The parameter is probably referenced in
+ %% the residual code (although it might not
+ %% be). Visit the operand for value and
+ %% create a let-binding.
+ {E1, S1} = visit_and_count_size(R#ref.opnd,
+ S),
+ S2 = count_size(weight('let'), S1),
+ {c_let([ref_to_var(R)], E1, E), S2};
+ false ->
+ %% The parameter is definitely not
+ %% referenced.
+ make_let_binding_1(R, E, S)
+ end
+ end
+ end.
+
+%% This marks the operand for evaluation for effect.
+
+make_let_binding_1(R, E, S) ->
+ Opnd = R#ref.opnd,
+ {E, st__set_opnd_effect(Opnd#opnd.loc, S)}.
+
+%% Here, `R' is the ref-structure which is the target of the copy
+%% propagation, and `Opnd' is a visited operand structure, to be
+%% propagated through `R' if possible - if not, `R' is residualised.
+%% `Opnd' is normally the operand that `R' is bound to, and `E' is the
+%% result of visiting `Opnd' for value; we pass this as an argument so
+%% we don't have to fetch it multiple times (because we don't have
+%% constant time access).
+%%
+%% We also pass the environment of the site of the variable reference,
+%% for use when inlining a propagated fun-expression. In the original
+%% algorithm by Waddell, the environment used for inlining such cases is
+%% the identity mapping, because the fun-expression body has already
+%% been visited for value, and their algorithm combines renaming of
+%% source-code variables with the looking up of information about
+%% residual-code variables. We, however, need to check the environment
+%% of the call site when creating new non-shadowed variables, but we
+%% must avoid repeated renaming. We therefore separate the renaming and
+%% the environment (as in the renaming algorithm of Peyton-Jones and
+%% Marlow). This also makes our implementation more general, compared to
+%% the original algorithm, because we do not give up on propagating
+%% variables that were free in the fun-body.
+%%
+%% Example:
+%%
+%% let F = fun (X) -> {'foo', X} in
+%% let G = fun (H) -> apply H(F) % F is free in the fun G
+%% in apply G(fun (F) -> apply F(42))
+%% =>
+%% let F = fun (X) -> {'foo', X} in
+%% apply (fun (H) -> apply H(F))(fun (F) -> apply F(42))
+%% =>
+%% let F = fun (X) -> {'foo', X} in
+%% apply (fun (F) -> apply F(42))(F)
+%% =>
+%% let F = fun (X) -> {'foo', X} in
+%% apply F(42)
+%% =>
+%% apply (fun (X) -> {'foo', X})(2)
+%% =>
+%% {'foo', 42}
+%%
+%% The original algorithm would give up at stage 4, because F was free
+%% in the propagated fun-expression. Our version inlines this example
+%% completely.
+
+copy(R, Opnd, E, Ctxt, Env, S) ->
+ case is_c_var(E) of
+ true ->
+ %% The operand reduces to another variable - get its
+ %% ref-structure and attempt to propagate further.
+ copy_var(env__get(var_name(E), Opnd#opnd.env), Ctxt, Env,
+ S);
+ false ->
+ %% Apart from variables and functional values (the latter
+ %% are handled by `copy_1' below), only constant literals
+ %% are copyable in general; other things, including e.g.
+ %% tuples `{foo, X}', could cause duplication of work, and
+ %% are not copy propagated.
+ case is_literal(E) of
+ true ->
+ {E, count_size(weight(literal), S)};
+ false ->
+ copy_1(R, Opnd, E, Ctxt, Env, S)
+ end
+ end.
+
+copy_var(R, Ctxt, Env, S) ->
+ %% (In Lisp or C, if this other variable might be assigned to, we
+ %% should residualize the "parent" instead, so we don't bypass any
+ %% destructive updates.)
+ case R#ref.opnd of
+ undefined ->
+ %% This variable is not bound to an expression, so just
+ %% residualize it.
+ residualize_var(R, S);
+ Opnd ->
+ %% Note that because operands are always visited before
+ %% copied, all copyable operand expressions will be
+ %% propagated through any number of bindings. If `R' was
+ %% bound to a constant literal, we would never have reached
+ %% this point.
+ case st__lookup_opnd_cache(Opnd#opnd.loc, S) of
+ error ->
+ %% The result for this operand is not yet ready
+ %% (which should mean that it is a recursive
+ %% reference). Thus, we must residualise the
+ %% variable.
+ residualize_var(R, S);
+ {ok, #cache{expr = E1}} ->
+ %% The result for the operand is ready, so we can
+ %% proceed to propagate it.
+ copy_1(R, Opnd, E1, Ctxt, Env, S)
+ end
+ end.
+
+copy_1(R, Opnd, E, Ctxt, Env, S) ->
+ %% Fun-expression (lambdas) are a bit special; they are copyable,
+ %% but should preferably not be duplicated, so they should not be
+ %% copy propagated except into application contexts, where they can
+ %% be inlined.
+ case is_c_fun(E) of
+ true ->
+ case Ctxt of
+ #app{} ->
+ %% First test if the operand is "outer-pending"; if
+ %% so, don't inline.
+ case st__test_outer_pending(Opnd#opnd.loc, S) of
+ false ->
+ copy_inline(R, Opnd, E, Ctxt, Env, S);
+ true ->
+ %% Cyclic reference forced inlining to stop
+ %% (avoiding infinite unfolding).
+ residualize_var(R, S)
+ end;
+ _ ->
+ residualize_var(R, S)
+ end;
+ false ->
+ %% We have no other cases to handle here
+ residualize_var(R, S)
+ end.
+
+%% This inlines a function value that was propagated to an application
+%% context. The inlining is done with an identity renaming (since the
+%% expression is already visited) but in the environment of the call
+%% site (which is OK because of the no-shadowing strategy for renaming,
+%% and because the domain of our environments are the residual-program
+%% variables instead of the source-program variables). Note that we must
+%% first set the "outer-pending" flag, and clear it afterwards.
+
+copy_inline(R, Opnd, E, Ctxt, Env, S) ->
+ S1 = st__mark_outer_pending(Opnd#opnd.loc, S),
+ case catch {ok, copy_inline_1(R, E, Ctxt, Env, S1)} of
+ {ok, {E1, S2}} ->
+ {E1, st__clear_outer_pending(Opnd#opnd.loc, S2)};
+ {'EXIT', X} ->
+ exit(X);
+ X ->
+ %% If we use destructive update for the `outer-pending'
+ %% flag, we must make sure to clear it upon a nonlocal
+ %% return.
+ st__clear_outer_pending(Opnd#opnd.loc, S1),
+ throw(X)
+ end.
+
+%% If the current effort counter was passive, we use a new active effort
+%% counter with the inherited limit for this particular inlining.
+
+copy_inline_1(R, E, Ctxt, Env, S) ->
+ case effort_is_active(S) of
+ true ->
+ copy_inline_2(R, E, Ctxt, Env, S);
+ false ->
+ S1 = new_active_effort(get_effort_limit(S), S),
+ case catch {ok, copy_inline_2(R, E, Ctxt, Env, S1)} of
+ {ok, {E1, S2}} ->
+ %% Revert to the old effort counter.
+ {E1, revert_effort(S, S2)};
+ {counter_exceeded, effort, _} ->
+ %% Aborted this inlining attempt because too much
+ %% effort was spent. Residualize the variable and
+ %% revert to the previous state.
+ residualize_var(R, S);
+ {'EXIT', X} ->
+ exit(X);
+ X ->
+ throw(X)
+ end
+ end.
+
+%% Regardless of whether the current size counter is active or not, we
+%% use a new active size counter for each inlining. If the current
+%% counter was passive, the new counter gets the inherited size limit;
+%% if it was active, the size limit of the new counter will be equal to
+%% the remaining budget of the current counter (which itself is not
+%% affected by the inlining). This distributes the size budget more
+%% evenly over "inlinings within inlinings", so that the whole size
+%% budget is not spent on the first few call sites (in an inlined
+%% function body) forcing the remaining call sites to be residualised.
+
+copy_inline_2(R, E, Ctxt, Env, S) ->
+ Limit = case size_is_active(S) of
+ true ->
+ get_size_limit(S) - get_size_value(S);
+ false ->
+ get_size_limit(S)
+ end,
+ %% Add the cost of the application to the new size limit, so we
+ %% always inline functions that are small enough, even if `Limit' is
+ %% close to zero at this point. (This is an extension to the
+ %% original algorithm.)
+ S1 = new_active_size(Limit + apply_size(length(Ctxt#app.opnds)), S),
+ case catch {ok, inline(E, Ctxt, ren__identity(), Env, S1)} of
+ {ok, {E1, S2}} ->
+ %% Revert to the old size counter.
+ {E1, revert_size(S, S2)};
+ {counter_exceeded, size, S2} ->
+ %% Aborted this inlining attempt because it got too big.
+ %% Residualize the variable and revert to the old size
+ %% counter. (It is important that we do not also revert the
+ %% effort counter here. Because the effort and size counters
+ %% are always set up together, we know that the effort
+ %% counter returned in S2 is the same that was passed to
+ %% `inline'.)
+ S3 = revert_size(S, S2),
+ %% If we use destructive update for the `inlined' flag, we
+ %% must make sure to clear the flags of any nested
+ %% app-contexts upon aborting; see `inline' for details.
+ reset_nested_apps(Ctxt, S3), % for effect
+ residualize_var(R, S3);
+ {'EXIT', X} ->
+ exit(X);
+ X ->
+ throw(X)
+ end.
+
+reset_nested_apps(#app{ctxt = Ctxt, loc = L}, S) ->
+ reset_nested_apps(Ctxt, st__clear_app_inlined(L, S));
+reset_nested_apps(_, S) ->
+ S.
+
+
+%% ---------------------------------------------------------------------
+%% Support functions
+
+new_var(Env) ->
+ Name = env__new_vname(Env),
+ c_var(Name).
+
+residualize_var(R, S) ->
+ S1 = count_size(weight(var), S),
+ {ref_to_var(R), st__set_var_referenced(R#ref.loc, S1)}.
+
+%% This function returns the value-producing subexpression of any
+%% expression. (Except for sequencing expressions, this is the
+%% expression itself.)
+
+result(E) ->
+ case is_c_seq(E) of
+ true ->
+ %% Also see `make_seq', which is used in all places to build
+ %% sequences so that they are always nested in the first
+ %% position.
+ seq_body(E);
+ false ->
+ E
+ end.
+
+%% This function rewrites E to `do A1 E' if A is `do A1 A2', and
+%% otherwise returns E unchanged.
+
+hoist_effects(A, E) ->
+ case type(A) of
+ seq -> make_seq(seq_arg(A), E);
+ _ -> E
+ end.
+
+%% This "build sequencing expression" operation assures that sequences
+%% are always nested in the first position, which makes it easy to find
+%% the actual value-producing expression of a sequence (cf. `result').
+
+make_seq(E1, E2) ->
+ case is_safe(E1) of
+ true ->
+ %% The first expression can safely be dropped.
+ E2;
+ false ->
+ %% If `E1' is a sequence whose final expression has no side
+ %% effects, then we can lose *that* expression when we
+ %% compose the new sequence, since its value will not be
+ %% used.
+ E3 = case is_c_seq(E1) of
+ true ->
+ case is_safe(seq_body(E1)) of
+ true ->
+ %% Drop the final expression.
+ seq_arg(E1);
+ false ->
+ E1
+ end;
+ false ->
+ E1
+ end,
+ case is_c_seq(E2) of
+ true ->
+ %% `E2' is a sequence (E2' E2''), so we must
+ %% rearrange the nesting to ((E1, E2') E2''), to
+ %% preserve the invariant. Annotations on `E2' are
+ %% lost.
+ c_seq(c_seq(E3, seq_arg(E2)), seq_body(E2));
+ false ->
+ c_seq(E3, E2)
+ end
+ end.
+
+%% Currently, safe expressions include variables, lambda expressions,
+%% constructors with safe subexpressions (this includes atoms, integers,
+%% empty lists, etc.), seq-, let- and letrec-expressions with safe
+%% subexpressions, try- and catch-expressions with safe subexpressions
+%% and calls to safe functions with safe argument subexpressions.
+%% Binaries seem too tricky to be considered.
+
+is_safe(E) ->
+ case is_data(E) of
+ true ->
+ is_safe_list(data_es(E));
+ false ->
+ case type(E) of
+ var ->
+ true;
+ 'fun' ->
+ true;
+ values ->
+ is_safe_list(values_es(E));
+ 'seq' ->
+ case is_safe(seq_arg(E)) of
+ true ->
+ is_safe(seq_body(E));
+ false ->
+ false
+ end;
+ 'let' ->
+ case is_safe(let_arg(E)) of
+ true ->
+ is_safe(let_body(E));
+ false ->
+ false
+ end;
+ letrec ->
+ is_safe(letrec_body(E));
+ 'try' ->
+ %% If the argument expression is not safe, it could
+ %% be modifying the state; thus, even if the body is
+ %% safe, the try-expression as a whole would not be.
+ %% If the argument is safe, the handler is not used.
+ case is_safe(try_arg(E)) of
+ true ->
+ is_safe(try_body(E));
+ false ->
+ false
+ end;
+ 'catch' ->
+ is_safe(catch_body(E));
+ call ->
+ M = call_module(E),
+ F = call_name(E),
+ case is_c_atom(M) and is_c_atom(F) of
+ true ->
+ As = call_args(E),
+ case is_safe_list(As) of
+ true ->
+ is_safe_call(atom_val(M),
+ atom_val(F),
+ length(As));
+ false ->
+ false
+ end;
+ false ->
+ false
+ end;
+ _ ->
+ false
+ end
+ end.
+
+is_safe_list([E | Es]) ->
+ case is_safe(E) of
+ true ->
+ is_safe_list(Es);
+ false ->
+ false
+ end;
+is_safe_list([]) ->
+ true.
+
+is_safe_call(M, F, A) ->
+ erl_bifs:is_safe(M, F, A).
+
+%% When setting up local variables, we only create new names if we have
+%% to, according to the "no-shadowing" strategy.
+
+make_locals(Vs, Ren, Env) ->
+ make_locals(Vs, [], Ren, Env).
+
+make_locals([V | Vs], As, Ren, Env) ->
+ Name = var_name(V),
+ case env__is_defined(Name, Env) of
+ false ->
+ %% The variable need not be renamed. Just make sure that the
+ %% renaming will map it to itself.
+ Name1 = Name,
+ Ren1 = ren__add_identity(Name, Ren);
+ true ->
+ %% The variable must be renamed to maintain the no-shadowing
+ %% invariant. Do the right thing for function variables.
+ Name1 = case Name of
+ {A, N} ->
+ env__new_fname(A, N, Env);
+ _ ->
+ env__new_vname(Env)
+ end,
+ Ren1 = ren__add(Name, Name1, Ren)
+ end,
+ %% This temporary binding is added for correct new-key generation.
+ Env1 = env__bind(Name1, dummy, Env),
+ make_locals(Vs, [Name1 | As], Ren1, Env1);
+make_locals([], As, Ren, Env) ->
+ {reverse(As), Ren, Env}.
+
+%% This adds let-bindings for the source code variables in `Es' to the
+%% environment `Env'.
+%%
+%% Note that we always assign a new state location for the
+%% residual-program variable, since we cannot know when a location for a
+%% particular variable in the source code can be reused.
+
+bind_locals(Vs, Ren, Env, S) ->
+ Opnds = lists:duplicate(length(Vs), undefined),
+ bind_locals(Vs, Opnds, Ren, Env, S).
+
+bind_locals(Vs, Opnds, Ren, Env, S) ->
+ {Ns, Ren1, Env1} = make_locals(Vs, Ren, Env),
+ {Rs, Env2, S1} = bind_locals_1(Ns, Opnds, [], Env1, S),
+ {Rs, Ren1, Env2, S1}.
+
+%% Note that the `Vs' are currently not used for anything except the
+%% number of variables. If we were maintaining "source-referenced"
+%% flags, then the flag in the new variable should be initialized to the
+%% current value of the (residual-) referenced-flag of the "parent".
+
+bind_locals_1([N | Ns], [Opnd | Opnds], Rs, Env, S) ->
+ {R, S1} = new_ref(N, Opnd, S),
+ Env1 = env__bind(N, R, Env),
+ bind_locals_1(Ns, Opnds, [R | Rs], Env1, S1);
+bind_locals_1([], [], Rs, Env, S) ->
+ {lists:reverse(Rs), Env, S}.
+
+new_refs(Ns, Opnds, S) ->
+ new_refs(Ns, Opnds, [], S).
+
+new_refs([N | Ns], [Opnd | Opnds], Rs, S) ->
+ {R, S1} = new_ref(N, Opnd, S),
+ new_refs(Ns, Opnds, [R | Rs], S1);
+new_refs([], [], Rs, S) ->
+ {lists:reverse(Rs), S}.
+
+new_ref(N, Opnd, S) ->
+ {L, S1} = st__new_ref_loc(S),
+ {#ref{name = N, opnd = Opnd, loc = L}, S1}.
+
+%% This adds recursive bindings for the source code variables in `Es' to
+%% the environment `Env'. Note that recursive binding of a set of
+%% variables is an atomic operation on the environment - they cannot be
+%% added one at a time.
+
+bind_recursive(Vs, Opnds, Ren, Env, S) ->
+ {Ns, Ren1, Env1} = make_locals(Vs, Ren, Env),
+ {Rs, S1} = new_refs(Ns, Opnds, S),
+
+ %% When this fun-expression is evaluated, it updates the operand
+ %% structure in the ref-structure to contain the recursively defined
+ %% environment and the correct renaming.
+ Fun = fun (R, Env) ->
+ Opnd = R#ref.opnd,
+ R#ref{opnd = Opnd#opnd{ren = Ren1, env = Env}}
+ end,
+ {Rs, Ren1, env__bind_recursive(Ns, Rs, Fun, Env1), S1}.
+
+safe_context(Ctxt) ->
+ case Ctxt of
+ #app{} ->
+ value;
+ _ ->
+ Ctxt
+ end.
+
+%% Note that the name of a variable encodes its type: a "plain" variable
+%% or a function variable. The latter kind also contains an arity number
+%% which should be preserved upon renaming.
+
+ref_to_var(#ref{name = Name}) ->
+ %% If we were maintaining "source-referenced" flags, the annotation
+ %% `add_ann([#source_ref{loc = L}], E)' should also be done here, to
+ %% make the algorithm reapplicable. This is however not necessary
+ %% since there are no destructive variable assignments in Erlang.
+ c_var(Name).
+
+%% Including the effort counter of the call site assures that the cost
+%% of processing an operand via `visit' is charged to the correct
+%% counter. In particular, if the effort counter of the call site was
+%% passive, the operands will also be processed with a passive counter.
+
+make_opnd(E, Ren, Env, S) ->
+ {L, S1} = st__new_opnd_loc(S),
+ C = st__get_effort(S1),
+ Opnd = #opnd{expr = E, ren = Ren, env = Env, loc = L, effort = C},
+ {Opnd, S1}.
+
+keep_referenced(Rs, S) ->
+ [R || R <- Rs, st__get_var_referenced(R#ref.loc, S)].
+
+residualize_operands(Opnds, E, S) ->
+ foldr(fun (Opnd, {E, S}) -> residualize_operand(Opnd, E, S) end,
+ {E, S}, Opnds).
+
+%% This is the only case where an operand expression can be visited in
+%% `effect' context instead of `value' context.
+
+residualize_operand(Opnd, E, S) ->
+ case st__get_opnd_effect(Opnd#opnd.loc, S) of
+ true ->
+ %% The operand has not been visited, so we do that now, but
+ %% in `effect' context. (Waddell's algoritm does some stuff
+ %% here to account specially for the operand size, which
+ %% appears unnecessary.)
+ {E1, S1} = i(Opnd#opnd.expr, effect, Opnd#opnd.ren,
+ Opnd#opnd.env, S),
+ {make_seq(E1, E), S1};
+ false ->
+ {E, S}
+ end.
+
+%% The `visit' function always visits the operand expression in `value'
+%% context (`residualize_operand' visits an unreferenced operand
+%% expression in `effect' context when necessary). A new passive size
+%% counter is used for visiting the operand, the final value of which is
+%% then cached along with the resulting expression.
+%%
+%% Note that the effort counter of the call site, included in the
+%% operand structure, is not a shared object. Thus, the effort budget is
+%% actually reused over all occurrences of the operands of a single
+%% application. This does not appear to be a problem; just a
+%% modification of the algorithm.
+
+visit(Opnd, S) ->
+ {C, S1} = visit_1(Opnd, S),
+ {C#cache.expr, S1}.
+
+visit_and_count_size(Opnd, S) ->
+ {C, S1} = visit_1(Opnd, S),
+ {C#cache.expr, count_size(C#cache.size, S1)}.
+
+visit_1(Opnd, S) ->
+ case st__lookup_opnd_cache(Opnd#opnd.loc, S) of
+ error ->
+ %% Use a new, passive, size counter for visiting operands,
+ %% and use the effort counter of the context of the operand.
+ %% It turns out that if the latter is active, it must be the
+ %% same object as the one currently used, and if it is
+ %% passive, it does not matter if it is the same object as
+ %% any other counter.
+ Effort = Opnd#opnd.effort,
+ Active = counter__is_active(Effort),
+ S1 = case Active of
+ true ->
+ S; % don't change effort counter
+ false ->
+ st__set_effort(Effort, S)
+ end,
+ S2 = new_passive_size(get_size_limit(S1), S1),
+
+ %% Visit the expression and cache the result, along with the
+ %% final value of the size counter.
+ {E, S3} = i(Opnd#opnd.expr, value, Opnd#opnd.ren,
+ Opnd#opnd.env, S2),
+ Size = get_size_value(S3),
+ C = #cache{expr = E, size = Size},
+ S4 = revert_size(S, st__set_opnd_cache(Opnd#opnd.loc, C,
+ S3)),
+ case Active of
+ true ->
+ {C, S4}; % keep using the same effort counter
+ false ->
+ {C, revert_effort(S, S4)}
+ end;
+ {ok, C} ->
+ {C, S}
+ end.
+
+%% Create a pattern matching template for an expression. A template
+%% contains only data constructors (including atomic ones) and
+%% variables, and compound literals are not folded into a single node.
+%% Each node in the template is annotated with the variable which holds
+%% the corresponding subexpression; these are new, unique variables not
+%% existing in the given `Env'. Returns `{Template, Variables, NewEnv}',
+%% where `Variables' is the list of all variables corresponding to nodes
+%% in the template *listed in reverse dependency order*, and `NewEnv' is
+%% `Env' augmented with mappings from the variable names to
+%% subexpressions of `E' (not #ref{} structures!) rewritten so that no
+%% computations are duplicated. `Variables' is guaranteed to be nonempty
+%% - at least the root node will always be bound to a new variable.
+
+make_template(E, Env) ->
+ make_template(E, [], Env).
+
+make_template(E, Vs0, Env0) ->
+ case is_data(E) of
+ true ->
+ {Ts, {Vs1, Env1}} = mapfoldl(
+ fun (E, {Vs0, Env0}) ->
+ {T, Vs1, Env1} =
+ make_template(E, Vs0,
+ Env0),
+ {T, {Vs1, Env1}}
+ end,
+ {Vs0, Env0}, data_es(E)),
+ T = make_data_skel(data_type(E), Ts),
+ E1 = update_data(E, data_type(E),
+ [hd(get_ann(T)) || T <- Ts]),
+ V = new_var(Env1),
+ Env2 = env__bind(var_name(V), E1, Env1),
+ {set_ann(T, [V]), [V | Vs1], Env2};
+ false ->
+ case type(E) of
+ seq ->
+ %% For a sequencing, we can rebind the variable used
+ %% for the body, and pass on the template as it is.
+ {T, Vs1, Env1} = make_template(seq_body(E), Vs0,
+ Env0),
+ V = var_name(hd(get_ann(T))),
+ E1 = update_c_seq(E, seq_arg(E), env__get(V, Env1)),
+ Env2 = env__bind(V, E1, Env1),
+ {T, Vs1, Env2};
+ _ ->
+ V = new_var(Env0),
+ Env1 = env__bind(var_name(V), E, Env0),
+ {set_ann(V, [V]), [V | Vs0], Env1}
+ end
+ end.
+
+%% Two clauses are equivalent if their bodies are equivalent expressions
+%% given that the respective pattern variables are local.
+
+equivalent_clauses([]) ->
+ true;
+equivalent_clauses([C | Cs]) ->
+ Env = cerl_trees:variables(c_values(clause_pats(C))),
+ equivalent_clauses_1(clause_body(C), Cs, Env).
+
+equivalent_clauses_1(E, [C | Cs], Env) ->
+ Env1 = cerl_trees:variables(c_values(clause_pats(C))),
+ case equivalent(E, clause_body(C), ordsets:union(Env, Env1)) of
+ true ->
+ equivalent_clauses_1(E, Cs, Env);
+ false ->
+ false
+ end;
+equivalent_clauses_1(_, [], _Env) ->
+ true.
+
+%% Two expressions are equivalent if and only if they yield the same
+%% value and has the same side effects in the same order. Currently, we
+%% only accept equality between constructors (constants) and nonlocal
+%% variables, since this should cover most cases of interest. If a
+%% variable is locally bound in one expression, it cannot be equivalent
+%% to one with the same name in the other expression, so we need not
+%% keep track of two environments.
+
+equivalent(E1, E2, Env) ->
+ case is_data(E1) of
+ true ->
+ case is_data(E2) of
+ true ->
+ T1 = {data_type(E1), data_arity(E1)},
+ T2 = {data_type(E2), data_arity(E2)},
+ %% Note that we must test for exact equality.
+ if T1 =:= T2 ->
+ equivalent_lists(data_es(E1), data_es(E2),
+ Env);
+ true ->
+ false
+ end;
+ false ->
+ false
+ end;
+ false ->
+ case type(E1) of
+ var ->
+ case is_c_var(E2) of
+ true ->
+ N1 = var_name(E1),
+ N2 = var_name(E2),
+ if N1 =:= N2 ->
+ not ordsets:is_element(N1, Env);
+ true ->
+ false
+ end;
+ false ->
+ false
+ end;
+ _ ->
+ %% Other constructs are not being considered.
+ false
+ end
+ end.
+
+equivalent_lists([E1 | Es1], [E2 | Es2], Env) ->
+ equivalent(E1, E2, Env) and equivalent_lists(Es1, Es2, Env);
+equivalent_lists([], [], _) ->
+ true;
+equivalent_lists(_, _, _) ->
+ false.
+
+%% Return `false' or `{true, EffectExpr, ValueExpr}'. The environment is
+%% passed for new-variable generation.
+
+reduce_bif_call(M, F, As, Env) ->
+ reduce_bif_call_1(M, F, length(As), As, Env).
+
+reduce_bif_call_1(erlang, element, 2, [X, Y], _Env) ->
+ case is_c_int(X) and is_c_tuple(Y) of
+ true ->
+ %% We are free to change the relative evaluation order of
+ %% the elements, so lifting out a particular element is OK.
+ T = list_to_tuple(tuple_es(Y)),
+ N = int_val(X),
+ if integer(N), N > 0, N =< size(T) ->
+ E = element(N, T),
+ Es = tuple_to_list(setelement(N, T, void())),
+ {true, make_seq(c_tuple(Es), E)};
+ true ->
+ false
+ end;
+ false ->
+ false
+ end;
+reduce_bif_call_1(erlang, hd, 1, [X], _Env) ->
+ case is_c_cons(X) of
+ true ->
+ %% Cf. `element/2' above.
+ {true, make_seq(cons_tl(X), cons_hd(X))};
+ false ->
+ false
+ end;
+reduce_bif_call_1(erlang, length, 1, [X], _Env) ->
+ case is_c_list(X) of
+ true ->
+ %% Cf. `erlang:size/1' below.
+ {true, make_seq(X, c_int(list_length(X)))};
+ false ->
+ false
+ end;
+reduce_bif_call_1(erlang, list_to_tuple, 1, [X], _Env) ->
+ case is_c_list(X) of
+ true ->
+ %% This does not actually preserve all the evaluation order
+ %% constraints of the list, but I don't imagine that it will
+ %% be a problem.
+ {true, c_tuple(list_elements(X))};
+ false ->
+ false
+ end;
+reduce_bif_call_1(erlang, setelement, 3, [X, Y, Z], Env) ->
+ case is_c_int(X) and is_c_tuple(Y) of
+ true ->
+ %% Here, unless `Z' is a simple expression, we must bind it
+ %% to a new variable, because in that case, `Z' must be
+ %% evaluated before any part of `Y'.
+ T = list_to_tuple(tuple_es(Y)),
+ N = int_val(X),
+ if integer(N), N > 0, N =< size(T) ->
+ E = element(N, T),
+ case is_simple(Z) of
+ true ->
+ Es = tuple_to_list(setelement(N, T, Z)),
+ {true, make_seq(E, c_tuple(Es))};
+ false ->
+ V = new_var(Env),
+ Es = tuple_to_list(setelement(N, T, V)),
+ E1 = make_seq(E, c_tuple(Es)),
+ {true, c_let([V], Z, E1)}
+ end;
+ true ->
+ false
+ end;
+ false ->
+ false
+ end;
+reduce_bif_call_1(erlang, size, 1, [X], _Env) ->
+ case is_c_tuple(X) of
+ true ->
+ %% Just evaluate the tuple for effect and use the size (the
+ %% arity) as the result.
+ {true, make_seq(X, c_int(tuple_arity(X)))};
+ false ->
+ false
+ end;
+reduce_bif_call_1(erlang, tl, 1, [X], _Env) ->
+ case is_c_cons(X) of
+ true ->
+ %% Cf. `element/2' above.
+ {true, make_seq(cons_hd(X), cons_tl(X))};
+ false ->
+ false
+ end;
+reduce_bif_call_1(erlang, tuple_to_list, 1, [X], _Env) ->
+ case is_c_tuple(X) of
+ true ->
+ %% This actually introduces slightly stronger constraints on
+ %% the evaluation order of the subexpressions.
+ {true, make_list(tuple_es(X))};
+ false ->
+ false
+ end;
+reduce_bif_call_1(_M, _F, _A, _As, _Env) ->
+ false.
+
+effort_is_active(S) ->
+ counter__is_active(st__get_effort(S)).
+
+size_is_active(S) ->
+ counter__is_active(st__get_size(S)).
+
+get_effort_limit(S) ->
+ counter__limit(st__get_effort(S)).
+
+new_active_effort(Limit, S) ->
+ st__set_effort(counter__new_active(Limit), S).
+
+revert_effort(S1, S2) ->
+ st__set_effort(st__get_effort(S1), S2).
+
+new_active_size(Limit, S) ->
+ st__set_size(counter__new_active(Limit), S).
+
+new_passive_size(Limit, S) ->
+ st__set_size(counter__new_passive(Limit), S).
+
+revert_size(S1, S2) ->
+ st__set_size(st__get_size(S1), S2).
+
+count_effort(N, S) ->
+ C = st__get_effort(S),
+ C1 = counter__add(N, C, effort, S),
+ case debug_counters() of
+ true ->
+ case counter__is_active(C1) of
+ true ->
+ V = counter__value(C1),
+ case V > get(counter_effort_max) of
+ true ->
+ put(counter_effort_max, V);
+ false ->
+ ok
+ end;
+ false ->
+ ok
+ end;
+ _ ->
+ ok
+ end,
+ st__set_effort(C1, S).
+
+count_size(N, S) ->
+ C = st__get_size(S),
+ C1 = counter__add(N, C, size, S),
+ case debug_counters() of
+ true ->
+ case counter__is_active(C1) of
+ true ->
+ V = counter__value(C1),
+ case V > get(counter_size_max) of
+ true ->
+ put(counter_size_max, V);
+ false ->
+ ok
+ end;
+ false ->
+ ok
+ end;
+ _ ->
+ ok
+ end,
+ st__set_size(C1, S).
+
+get_size_value(S) ->
+ counter__value(st__get_size(S)).
+
+get_size_limit(S) ->
+ counter__limit(st__get_size(S)).
+
+kill_id_anns([{'id',_} | As]) ->
+ kill_id_anns(As);
+kill_id_anns([A | As]) ->
+ [A | kill_id_anns(As)];
+kill_id_anns([]) ->
+ [].
+
+
+%% =====================================================================
+%% General utilities
+
+max(X, Y) when X > Y -> X;
+max(_, Y) -> Y.
+
+%% The atom `ok', is widely used in Erlang for "void" values.
+
+void() -> abstract(ok).
+
+is_simple(E) ->
+ case type(E) of
+ literal -> true;
+ var -> true;
+ 'fun' -> true;
+ _ -> false
+ end.
+
+get_components(N, E) ->
+ case type(E) of
+ values ->
+ Es = values_es(E),
+ if length(Es) == N ->
+ {true, Es};
+ true ->
+ false
+ end;
+ _ when N == 1 ->
+ {true, [E]};
+ _ ->
+ false
+ end.
+
+all_static([E | Es]) ->
+ case is_literal(result(E)) of
+ true ->
+ all_static(Es);
+ false ->
+ false
+ end;
+all_static([]) ->
+ true.
+
+set_clause_bodies([C | Cs], B) ->
+ [update_c_clause(C, clause_pats(C), clause_guard(C), B)
+ | set_clause_bodies(Cs, B)];
+set_clause_bodies([], _) ->
+ [].
+
+filename([C | T]) when integer(C), C > 0, C =< 255 ->
+ [C | filename(T)];
+filename([H|T]) ->
+ filename(H) ++ filename(T);
+filename([]) ->
+ [];
+filename(N) when atom(N) ->
+ atom_to_list(N);
+filename(N) ->
+ report_error("bad filename: `~P'.", [N, 25]),
+ exit(error).
+
+
+%% =====================================================================
+%% Abstract datatype: renaming()
+
+ren__identity() ->
+ dict:new().
+
+ren__add(X, Y, Ren) ->
+ dict:store(X, Y, Ren).
+
+ren__map(X, Ren) ->
+ case dict:find(X, Ren) of
+ {ok, Y} ->
+ Y;
+ error ->
+ X
+ end.
+
+ren__add_identity(X, Ren) ->
+ dict:erase(X, Ren).
+
+
+%% =====================================================================
+%% Abstract datatype: environment()
+
+env__empty() ->
+ rec_env:empty().
+
+env__bind(Key, Val, Env) ->
+ rec_env:bind(Key, Val, Env).
+
+%% `Es' should have type `[{Key, Val}]', and `Fun' should have type
+%% `(Val, Env) -> T', mapping a value together with the recursive
+%% environment itself to some term `T' to be returned when the entry is
+%% looked up.
+
+env__bind_recursive(Ks, Vs, F, Env) ->
+ rec_env:bind_recursive(Ks, Vs, F, Env).
+
+env__lookup(Key, Env) ->
+ rec_env:lookup(Key, Env).
+
+env__get(Key, Env) ->
+ rec_env:get(Key, Env).
+
+env__is_defined(Key, Env) ->
+ rec_env:is_defined(Key, Env).
+
+env__new_vname(Env) ->
+ rec_env:new_key(Env).
+
+env__new_fname(A, N, Env) ->
+ rec_env:new_key(fun (X) ->
+ S = integer_to_list(X),
+ {list_to_atom(atom_to_list(A) ++ "_" ++ S),
+ N}
+ end, Env).
+
+
+%% =====================================================================
+%% Abstract datatype: state()
+
+-record(state, {free, % next free location
+ size, % size counter
+ effort, % effort counter
+ cache, % operand expression cache
+ var_flags, % flags for variables (#ref-structures)
+ opnd_flags, % flags for operands
+ app_flags}). % flags for #app-structures
+
+%% Note that we do not have a `var_assigned' flag, since there is no
+%% destructive assignment in Erlang. In the original algorithm, the
+%% "residual-referenced"-flags of the previous inlining pass (or
+%% initialization pass) are used as the "source-referenced"-flags for
+%% the subsequent pass. The latter may then be used as a safe
+%% approximation whenever we need to base a decision on whether or not a
+%% particular variable or function variable could be referenced in the
+%% program being generated, and computation of the new
+%% "residual-referenced" flag for that variable is not yet finished. In
+%% the present algorithm, this can only happen in the presence of
+%% variable assignments, which do not exist in Erlang. Therefore, we do
+%% not keep "source-referenced" flags for residual-code references in
+%% our implementation.
+%%
+%% The "inner-pending" flag tells us whether we are already in the
+%% process of visiting a particular operand, and the "outer-pending"
+%% flag whether we are in the process of inlining a propagated
+%% functional value. The "pending flags" are really counters limiting
+%% the number of times an operand may be inlined recursively, causing
+%% loop unrolling; however, unrolling more than one iteration does not
+%% work offhand in the present implementation. (TODO: find out why.)
+%% Note that the initial value must be greater than zero in order for
+%% any inlining at all to be done.
+
+%% Flags are stored in ETS-tables, one table for each class. The second
+%% element in each stored tuple is the key (the "label").
+
+-record(var_flags, {lab, referenced = false}).
+-record(opnd_flags, {lab, inner_pending = 1, outer_pending = 1,
+ effect = false}).
+-record(app_flags, {lab, inlined = false}).
+
+st__new(Effort, Size) ->
+ #state{free = 0,
+ size = counter__new_passive(Size),
+ effort = counter__new_passive(Effort),
+ cache = dict:new(),
+ var_flags = ets:new(var, [set, private, {keypos, 2}]),
+ opnd_flags = ets:new(opnd, [set, private, {keypos, 2}]),
+ app_flags = ets:new(app, [set, private, {keypos, 2}])}.
+
+st__new_loc(S) ->
+ N = S#state.free,
+ {N, S#state{free = N + 1}}.
+
+st__get_effort(S) ->
+ S#state.effort.
+
+st__set_effort(C, S) ->
+ S#state{effort = C}.
+
+st__get_size(S) ->
+ S#state.size.
+
+st__set_size(C, S) ->
+ S#state{size = C}.
+
+st__set_var_referenced(L, S) ->
+ T = S#state.var_flags,
+ [F] = ets:lookup(T, L),
+ ets:insert(T, F#var_flags{referenced = true}),
+ S.
+
+st__get_var_referenced(L, S) ->
+ ets:lookup_element(S#state.var_flags, L, #var_flags.referenced).
+
+st__lookup_opnd_cache(L, S) ->
+ dict:find(L, S#state.cache).
+
+%% Note that setting the cache should only be done once.
+
+st__set_opnd_cache(L, C, S) ->
+ S#state{cache = dict:store(L, C, S#state.cache)}.
+
+st__set_opnd_effect(L, S) ->
+ T = S#state.opnd_flags,
+ [F] = ets:lookup(T, L),
+ ets:insert(T, F#opnd_flags{effect = true}),
+ S.
+
+st__get_opnd_effect(L, S) ->
+ ets:lookup_element(S#state.opnd_flags, L, #opnd_flags.effect).
+
+st__set_app_inlined(L, S) ->
+ T = S#state.app_flags,
+ [F] = ets:lookup(T, L),
+ ets:insert(T, F#app_flags{inlined = true}),
+ S.
+
+st__clear_app_inlined(L, S) ->
+ T = S#state.app_flags,
+ [F] = ets:lookup(T, L),
+ ets:insert(T, F#app_flags{inlined = false}),
+ S.
+
+st__get_app_inlined(L, S) ->
+ ets:lookup_element(S#state.app_flags, L, #app_flags.inlined).
+
+%% The pending-flags are initialized by `st__new_opnd_loc' below.
+
+st__test_inner_pending(L, S) ->
+ T = S#state.opnd_flags,
+ P = ets:lookup_element(T, L, #opnd_flags.inner_pending),
+ P =< 0.
+
+st__mark_inner_pending(L, S) ->
+ ets:update_counter(S#state.opnd_flags, L,
+ {#opnd_flags.inner_pending, -1}),
+ S.
+
+st__clear_inner_pending(L, S) ->
+ ets:update_counter(S#state.opnd_flags, L,
+ {#opnd_flags.inner_pending, 1}),
+ S.
+
+st__test_outer_pending(L, S) ->
+ T = S#state.opnd_flags,
+ P = ets:lookup_element(T, L, #opnd_flags.outer_pending),
+ P =< 0.
+
+st__mark_outer_pending(L, S) ->
+ ets:update_counter(S#state.opnd_flags, L,
+ {#opnd_flags.outer_pending, -1}),
+ S.
+
+st__clear_outer_pending(L, S) ->
+ ets:update_counter(S#state.opnd_flags, L,
+ {#opnd_flags.outer_pending, 1}),
+ S.
+
+st__new_app_loc(S) ->
+ V = {L, _S1} = st__new_loc(S),
+ ets:insert(S#state.app_flags, #app_flags{lab = L}),
+ V.
+
+st__new_ref_loc(S) ->
+ V = {L, _S1} = st__new_loc(S),
+ ets:insert(S#state.var_flags, #var_flags{lab = L}),
+ V.
+
+st__new_opnd_loc(S) ->
+ V = {L, _S1} = st__new_loc(S),
+ ets:insert(S#state.opnd_flags, #opnd_flags{lab = L}),
+ V.
+
+
+%% =====================================================================
+%% Abstract datatype: counter()
+%%
+%% `counter__add' throws `{counter_exceeded, Type, Data}' if the
+%% resulting counter value would exceed the limit for the counter in
+%% question (`Type' and `Data' are given by the user).
+
+-record(counter, {active, value, limit}).
+
+counter__new_passive(Limit) when Limit > 0 ->
+ {0, Limit}.
+
+counter__new_active(Limit) when Limit > 0 ->
+ {Limit, Limit}.
+
+%% Active counters have values > 0 internally; passive counters start at
+%% zero. The 'limit' field is only accessed by the 'counter__limit'
+%% function.
+
+counter__is_active({C, _}) ->
+ C > 0.
+
+counter__limit({_, L}) ->
+ L.
+
+counter__value({N, L}) ->
+ if N > 0 ->
+ L - N;
+ true ->
+ -N
+ end.
+
+counter__add(N, {V, L}, Type, Data) ->
+ N1 = V - N,
+ if V > 0, N1 =< 0 ->
+ case debug_counters() of
+ true ->
+ case Type of
+ effort ->
+ put(counter_effort_triggers,
+ get(counter_effort_triggers) + 1);
+ size ->
+ put(counter_size_triggers,
+ get(counter_size_triggers) + 1)
+ end;
+ _ ->
+ ok
+ end,
+ throw({counter_exceeded, Type, Data});
+ true ->
+ {N1, L}
+ end.
+
+
+%% =====================================================================
+%% Reporting
+
+% report_internal_error(S) ->
+% report_internal_error(S, []).
+
+report_internal_error(S, Vs) ->
+ report_error("internal error: " ++ S, Vs).
+
+report_error(D) ->
+ report_error(D, []).
+
+report_error({F, L, D}, Vs) ->
+ report({F, L, {error, D}}, Vs);
+report_error(D, Vs) ->
+ report({error, D}, Vs).
+
+report_warning(D) ->
+ report_warning(D, []).
+
+report_warning({F, L, D}, Vs) ->
+ report({F, L, {warning, D}}, Vs);
+report_warning(D, Vs) ->
+ report({warning, D}, Vs).
+
+report(D, Vs) ->
+ io:put_chars(format(D, Vs)).
+
+format({error, D}, Vs) ->
+ ["error: ", format(D, Vs)];
+format({warning, D}, Vs) ->
+ ["warning: ", format(D, Vs)];
+format({"", L, D}, Vs) when integer(L), L > 0 ->
+ [io_lib:fwrite("~w: ", [L]), format(D, Vs)];
+format({"", _L, D}, Vs) ->
+ format(D, Vs);
+format({F, L, D}, Vs) when integer(L), L > 0 ->
+ [io_lib:fwrite("~s:~w: ", [filename(F), L]), format(D, Vs)];
+format({F, _L, D}, Vs) ->
+ [io_lib:fwrite("~s: ", [filename(F)]), format(D, Vs)];
+format(S, Vs) when list(S) ->
+ [io_lib:fwrite(S, Vs), $\n].
+
+
+%% =====================================================================