diff options
author | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/compiler/src/cerl_inline.erl | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/compiler/src/cerl_inline.erl')
-rw-r--r-- | lib/compiler/src/cerl_inline.erl | 2717 |
1 files changed, 2717 insertions, 0 deletions
diff --git a/lib/compiler/src/cerl_inline.erl b/lib/compiler/src/cerl_inline.erl new file mode 100644 index 0000000000..191efa3032 --- /dev/null +++ b/lib/compiler/src/cerl_inline.erl @@ -0,0 +1,2717 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Core Erlang inliner. + +%% ===================================================================== +%% +%% This is an implementation of the algorithm by Waddell and Dybvig +%% ("Fast and Effective Procedure Inlining", International Static +%% Analysis Symposium 1997), adapted to the Core Erlang language. +%% +%% Instead of always renaming variables and function variables, this +%% implementation uses the "no-shadowing strategy" of Peyton Jones and +%% Marlow ("Secrets of the Glasgow Haskell Compiler Inliner", 1999). +%% +%% ===================================================================== + +%% TODO: inline single-source-reference operands without size limit. + +-module(cerl_inline). + +-export([core_transform/2, transform/1, transform/2]). + +-import(cerl, [abstract/1, alias_pat/1, alias_var/1, apply_args/1, + apply_op/1, atom_name/1, atom_val/1, bitstr_val/1, + bitstr_size/1, bitstr_unit/1, bitstr_type/1, + bitstr_flags/1, binary_segments/1, update_c_alias/3, + update_c_apply/3, update_c_binary/2, update_c_bitstr/6, + update_c_call/4, update_c_case/3, update_c_catch/2, + update_c_clause/4, c_fun/2, c_int/1, c_let/3, + update_c_let/4, update_c_letrec/3, update_c_module/5, + update_c_primop/3, update_c_receive/4, update_c_seq/3, + c_seq/2, update_c_try/6, c_tuple/1, update_c_values/2, + c_values/1, c_var/1, call_args/1, call_module/1, + call_name/1, case_arity/1, case_arg/1, case_clauses/1, + catch_body/1, clause_body/1, clause_guard/1, + clause_pats/1, clause_vars/1, concrete/1, cons_hd/1, + cons_tl/1, data_arity/1, data_es/1, data_type/1, + fun_body/1, fun_vars/1, get_ann/1, int_val/1, + is_c_atom/1, is_c_cons/1, is_c_fun/1, is_c_int/1, + is_c_list/1, is_c_seq/1, is_c_tuple/1, is_c_var/1, + is_data/1, is_literal/1, is_literal_term/1, let_arg/1, + let_body/1, let_vars/1, letrec_body/1, letrec_defs/1, + list_length/1, list_elements/1, update_data/3, + make_list/1, make_data_skel/2, module_attrs/1, + module_defs/1, module_exports/1, module_name/1, + primop_args/1, primop_name/1, receive_action/1, + receive_clauses/1, receive_timeout/1, seq_arg/1, + seq_body/1, set_ann/2, try_arg/1, try_body/1, try_vars/1, + try_evars/1, try_handler/1, tuple_es/1, tuple_arity/1, + type/1, values_es/1, var_name/1]). + +-import(erlang, [max/2]). +-import(lists, [foldl/3, foldr/3, mapfoldl/3, reverse/1]). + +%% +%% Constants +%% + +debug_runtime() -> false. +debug_counters() -> false. + +%% Normal execution times for inlining are between 0.1 and 0.3 seconds +%% (on the author's current equipment). The default effort limit of 150 +%% is high enough that most normal programs never hit the limit even +%% once, and for difficult programs, it generally keeps the execution +%% times below 2-5 seconds. Using an effort counter of 1000 will thus +%% have no further effect on most programs, but some programs may take +%% as much as 10 seconds or more. Effort counts larger than 2500 have +%% never been observed even on very ill-conditioned programs. +%% +%% Size limits between 6 and 18 tend to actually shrink the code, +%% because of the simplifications made possible by inlining. A limit of +%% 16 seems to be optimal for this purpose, often shrinking the +%% executable code by up to 10%. Size limits between 18 and 30 generally +%% give the same code size as if no inlining was done (i.e., code +%% duplication balances out the simplifications at these levels). A size +%% limit between 1 and 5 tends to inline small functions and propagate +%% constants, but does not cause much simplifications do be done, so the +%% net effect will be a slight increase in code size. For size limits +%% above 30, the executable code size tends to increase with about 10% +%% per 100 units, with some variations depending on the sizes of +%% functions in the source code. +%% +%% Typically, about 90% of the maximum speedup achievable is already +%% reached using a size limit of 30, and 98% is reached at limits around +%% 100-150; there is rarely any point in letting the code size increase +%% by more than 10-15%. If too large functions are inlined, cache +%% effects will slow the program down. + +default_effort() -> 150. +default_size() -> 24. +default_unroll() -> 1. + +%% Base costs/weights for different kinds of expressions. If these are +%% modified, the size limits above may have to be adjusted. + +weight(var) -> 0; % We count no cost for variable accesses. +weight(values) -> 0; % Value aggregates have no cost in themselves. +weight(literal) -> 1; % We assume efficient handling of constants. +weight(data) -> 1; % Base cost; add 1 per element. +weight(element) -> 1; % Cost of storing/fetching an element. +weight(argument) -> 1; % Cost of passing a function argument. +weight('fun') -> 6; % Base cost + average number of free vars. +weight('let') -> 0; % Count no cost for let-bindings. +weight(letrec) -> 0; % Like a let-binding. +weight('case') -> 0; % Case switches have no base cost. +weight(clause) -> 1; % Count one jump at the end of each clause body. +weight('receive') -> 9; % Initialization/cleanup cost. +weight('try') -> 1; % Assume efficient implementation. +weight('catch') -> 1; % See `try'. +weight(apply) -> 3; % Average base cost: call/return. +weight(call) -> 3; % Assume remote-calls as efficient as `apply'. +weight(primop) -> 2; % Assume more efficient than `apply'. +weight(binary) -> 4; % Initialisation base cost. +weight(bitstr) -> 3; % Coding/decoding a value; like a primop. +weight(module) -> 1. % Like a letrec with a constant body + +%% These "reference" structures are used for variables and function +%% variables. They keep track of the variable name, any bound operand, +%% and the associated store location. + +-record(ref, {name, opnd, loc}). + +%% Operand structures contain the operand expression, the renaming and +%% environment, the state location, and the effort counter at the call +%% site (cf. `visit'). + +-record(opnd, {expr, ren, env, loc, effort}). + +%% Since expressions are only visited in `effect' context when they are +%% not bound to a referenced variable, only expressions visited in +%% 'value' context are cached. + +-record(cache, {expr, size}). + +%% The context flags for an application structure are kept separate from +%% the structure itself. Note that the original algorithm had exactly +%% one operand in each application context structure, while we can have +%% several, or none. + +-record(app, {opnds, ctxt, loc}). + + +%% +%% Interface functions +%% + +%% Use compile option `{core_transform, inline}' to insert this as a +%% compilation pass. + +-spec core_transform(cerl:cerl(), [compile:option()]) -> cerl:cerl(). + +core_transform(Code, Opts) -> + cerl:to_records(transform(cerl:from_records(Code), Opts)). + +-spec transform(cerl:cerl()) -> cerl:cerl(). + +transform(Tree) -> + transform(Tree, []). + +-spec transform(cerl:cerl(), [compile:option()]) -> cerl:cerl(). + +transform(Tree, Opts) -> + main(Tree, value, Opts). + +main(Tree, Ctxt, Opts) -> + %% We spawn a new process to do the work, so we don't have to worry + %% about cluttering the process dictionary with debugging info, or + %% proper deallocation of ets-tables. + Opts1 = Opts ++ [{inline_size, default_size()}, + {inline_effort, default_effort()}, + {inline_unroll, default_unroll()}], + Reply = self(), + Pid = spawn_link(fun () -> start(Reply, Tree, Ctxt, Opts1) end), + receive + {Pid, Tree1} -> Tree1 + end. + +start(Reply, Tree, Ctxt, Opts) -> + init_debug(), + case debug_runtime() of + %% true -> + %% put(inline_start_time, + %% element(1, erlang:statistics(runtime))); + false -> + ok + end, + Size = max(1, proplists:get_value(inline_size, Opts)), + Effort = max(1, proplists:get_value(inline_effort, Opts)), + Unroll = max(1, proplists:get_value(inline_unroll, Opts)), + case proplists:get_bool(verbose, Opts) of + true -> + io:fwrite("Inlining: inline_size=~w inline_effort=~w\n", + [Size, Effort]); + false -> + ok + end, + + %% Note that the counters of the new state are passive. + S = st__new(Effort, Size, Unroll), + +%%% Initialization is not needed at present. Note that the code in +%%% `inline_init' is not up-to-date with this module. +%%% {Tree1, S1} = inline_init:init(Tree, S), +%%% {Tree2, _S2} = i(Tree1, Ctxt, S1), + {Tree2, _S2} = i(Tree, Ctxt, S), + report_debug(), + Reply ! {self(), Tree2}. + +init_debug() -> + case debug_counters() of + %% true -> + %% put(counter_effort_triggers, 0), + %% put(counter_effort_max, 0), + %% put(counter_size_triggers, 0), + %% put(counter_size_max, 0); + false -> + ok + end. + +report_debug() -> + case debug_runtime() of + %% true -> + %% {Time, _} = erlang:statistics(runtime), + %% report("Total run time for inlining: ~.2.0f s.\n", + %% [(Time - get(inline_start_time))/1000]); + false -> + ok + end, + case debug_counters() of + %% true -> + %% counter_stats(); + false -> + ok + end. + +%% counter_stats() -> +%% T1 = get(counter_effort_triggers), +%% T2 = get(counter_size_triggers), +%% E = get(counter_effort_max), +%% S = get(counter_size_max), +%% M1 = io_lib:fwrite("\tNumber of triggered " +%% "effort counters: ~p.\n", [T1]), +%% M2 = io_lib:fwrite("\tNumber of triggered " +%% "size counters: ~p.\n", [T2]), +%% M3 = io_lib:fwrite("\tLargest active effort counter: ~p.\n", +%% [E]), +%% M4 = io_lib:fwrite("\tLargest active size counter: ~p.\n", +%% [S]), +%% report("Counter statistics:\n~s", [[M1, M2, M3, M4]]). + + +%% ===================================================================== +%% The main inlining function +%% +%% i(E :: coreErlang(), +%% Ctxt :: value | effect | #app{} +%% Ren :: renaming(), +%% Env :: environment(), +%% S :: state()) +%% -> {E', S'} +%% +%% Note: It is expected that the input source code ('E') does not +%% contain free variables. If it does, there is a risk of accidental +%% name capture, in case a generated "new" variable name happens to be +%% the same as the name of a variable that is free further below in the +%% tree; the algorithm only consults the current environment to check if +%% a name already exists. +%% +%% The renaming maps names of source-code variable and function +%% variables to new names as necessary to avoid clashes, according to +%% the "no-shadowing" strategy. The environment maps *residual-code* +%% variables and function variables to operands and global information. +%% Separating the renaming from the environment, and using the +%% residual-code variables instead of the source-code variables as its +%% domain, improves the behaviour of the algorithm when code needs to be +%% traversed more than once. +%% +%% Note that there is no such thing as a `test' context for expressions +%% in (Core) Erlang (see `i_case' below for details). + +i(E, Ctxt, S) -> + i(E, Ctxt, ren__identity(), env__empty(), S). + +i(E, Ctxt, Ren, Env, S0) -> + %% Count one unit of effort on each pass. + S = count_effort(1, S0), + case is_data(E) of + true -> + i_data(E, Ctxt, Ren, Env, S); + false -> + case type(E) of + var -> + i_var(E, Ctxt, Ren, Env, S); + values -> + i_values(E, Ctxt, Ren, Env, S); + 'fun' -> + i_fun(E, Ctxt, Ren, Env, S); + seq -> + i_seq(E, Ctxt, Ren, Env, S); + 'let' -> + i_let(E, Ctxt, Ren, Env, S); + letrec -> + i_letrec(E, Ctxt, Ren, Env, S); + 'case' -> + i_case(E, Ctxt, Ren, Env, S); + 'receive' -> + i_receive(E, Ctxt, Ren, Env, S); + apply -> + i_apply(E, Ctxt, Ren, Env, S); + call -> + i_call(E, Ctxt, Ren, Env, S); + primop -> + i_primop(E, Ren, Env, S); + 'try' -> + i_try(E, Ctxt, Ren, Env, S); + 'catch' -> + i_catch(E, Ctxt, Ren, Env, S); + binary -> + i_binary(E, Ren, Env, S); + module -> + i_module(E, Ctxt, Ren, Env, S) + end + end. + +i_data(E, Ctxt, Ren, Env, S) -> + case is_literal(E) of + true -> + %% This is the `(const c)' case of the original algorithm: + %% literal terms which (regardless of size) do not need to + %% be constructed dynamically at runtime - boldly assuming + %% that the compiler/runtime system can handle this. + case Ctxt of + effect -> + %% Reduce useless constants to a simple value. + {void(), count_size(weight(literal), S)}; + _ -> + %% (In Erlang, we cannot set all non-`false' + %% constants to `true' in a `test' context, like we + %% could do in Lisp or C, so the above is the only + %% special case to be handled here.) + {E, count_size(weight(literal), S)} + end; + false -> + %% Data constructors are like to calls to safe built-in + %% functions, for which we can "decide to inline" + %% immediately; there is no need to create operand + %% structures. In `effect' context, we can simply make a + %% sequence of the argument expressions, also visited in + %% `effect' context. In all other cases, the arguments are + %% visited for value. + case Ctxt of + effect -> + %% Note that this will count the sizes of the + %% subexpressions, even though some or all of them + %% might be discarded by the sequencing afterwards. + {Es1, S1} = mapfoldl(fun (E, S) -> + i(E, effect, Ren, Env, + S) + end, + S, data_es(E)), + E1 = foldl(fun (E1, E2) -> make_seq(E1, E2) end, + void(), Es1), + {E1, S1}; + _ -> + {Es1, S1} = mapfoldl(fun (E, S) -> + i(E, value, Ren, Env, + S) + end, + S, data_es(E)), + %% The total size/cost is the base cost for a data + %% constructor plus the cost for storing each + %% element. + N = weight(data) + length(Es1) * weight(element), + S2 = count_size(N, S1), + {update_data(E, data_type(E), Es1), S2} + end + end. + +%% This is the `(ref x)' (variable use) case of the original algorithm. +%% Note that binding occurrences are always handled in the respective +%% cases of the binding constructs. + +i_var(E, Ctxt, Ren, Env, S) -> + case Ctxt of + effect -> + %% Reduce useless variable references to a simple constant. + %% This also avoids useless visiting of bound operands. + {void(), count_size(weight(literal), S)}; + _ -> + Name = var_name(E), + case env__lookup(ren__map(Name, Ren), Env) of + {ok, R} -> + case R#ref.opnd of + undefined -> + %% The variable is not associated with an + %% argument expression; just residualize it. + residualize_var(R, S); + Opnd -> + i_var_1(R, Opnd, Ctxt, Env, S) + end; + error -> + %% The variable is unbound. (It has not been + %% accidentally captured, however, or it would have + %% been in the environment.) We leave it as it is, + %% without any warning. + {E, count_size(weight(var), S)} + end + end. + +%% This first visits the bound operand and then does copy propagation. +%% Note that we must first set the "inner-pending" flag, and clear the +%% flag afterwards. + +i_var_1(R, Opnd, Ctxt, Env, S) -> + %% If the operand is already "inner-pending", it is residualised. + %% (In Lisp/C, if the variable might be assigned to, it should also + %% be residualised.) + L = Opnd#opnd.loc, + case st__test_inner_pending(L, S) of + true -> + residualize_var(R, S); + false -> + S1 = st__mark_inner_pending(L, S), + case catch {ok, visit(Opnd, S1)} of + {ok, {E, S2}} -> + %% Note that we pass the current environment and + %% context to `copy', but not the current renaming. + S3 = st__clear_inner_pending(L, S2), + copy(R, Opnd, E, Ctxt, Env, S3); + {'EXIT', X} -> + exit(X); + X -> + %% If we use destructive update for the + %% `inner-pending' flag, we must make sure to clear + %% it also if we make a nonlocal return. + _S2 = st__clear_inner_pending(Opnd#opnd.loc, S1), + throw(X) + end + end. + +%% A multiple-value aggregate `<e1, ..., en>'. This is very much like a +%% tuple data constructor `{e1, ..., en}'; cf. `i_data' for details. + +i_values(E, Ctxt, Ren, Env, S) -> + case values_es(E) of + [E1] -> + %% Single-value aggregates can be dropped; they are simply + %% notation. + i(E1, Ctxt, Ren, Env, S); + Es -> + %% In `effect' context, we can simply make a sequence of the + %% argument expressions, also visited in `effect' context. + %% In all other cases, the arguments are visited for value. + case Ctxt of + effect -> + {Es1, S1} = + mapfoldl(fun (E, S) -> + i(E, effect, Ren, Env, S) + end, + S, Es), + E1 = foldl(fun (E1, E2) -> + make_seq(E1, E2) + end, + void(), Es1), + {E1, S1}; % drop annotations on E + _ -> + {Es1, S1} = mapfoldl(fun (E, S) -> + i(E, value, Ren, Env, + S) + end, + S, Es), + %% Aggregating values does not write them to memory, + %% so we count no extra cost per element. + S2 = count_size(weight(values), S1), + {update_c_values(E, Es1), S2} + end + end. + +%% A let-expression `let <v1,...,vn> = e0 in e1' is semantically +%% equivalent to a case-expression `case e0 of <v1,...,vn> when 'true' +%% -> e1 end'. As a special case, `let <v> = e0 in e1' is also +%% equivalent to `apply fun (v) -> e0 (e1)'. However, for efficiency, +%% and in order to allow the handling of `case' clauses to introduce new +%% let-expressions without entering an infinite rewrite loop, we handle +%% these directly. + +%%% %% Rewriting a `let' to an equivalent expression. +%%% i_let(E, Ctxt, Ren, Env, S) -> +%%% case let_vars(E) of +%%% [V] -> +%%% E1 = update_c_apply(E, c_fun([V], let_body(E)), [let_arg(E)]), +%%% i(E1, Ctxt, Ren, Env, S); +%%% Vs -> +%%% C = c_clause(Vs, abstract(true), let_body(E)), +%%% E1 = update_c_case(E, let_arg(E), [C]), +%%% i(E1, Ctxt, Ren, Env, S) +%%% end. + +i_let(E, Ctxt, Ren, Env, S) -> + case let_vars(E) of + [V] -> + i_let_1(V, E, Ctxt, Ren, Env, S); + Vs -> + %% Visit the argument expression in `value' context, to + %% simplify it as far as possible. + {A, S1} = i(let_arg(E), value, Ren, Env, S), + case get_components(length(Vs), result(A)) of + {true, As} -> + %% Note that only the components of the result of + %% `A' are passed on; any effects are hoisted. + {E1, S2} = i_let_2(Vs, As, E, Ctxt, Ren, Env, S1), + {hoist_effects(A, E1), S2}; + false -> + %% We cannot do anything with this `let', since the + %% variables cannot be matched against the argument + %% components. Just visit the variables for renaming + %% and visit the body for value (cf. `i_fun'). + {_, Ren1, Env1, S2} = bind_locals(Vs, Ren, Env, S1), + Vs1 = i_params(Vs, Ren1, Env1), + %% The body is always visited for value here. + {B, S3} = i(let_body(E), value, Ren1, Env1, S2), + S4 = count_size(weight('let'), S3), + {update_c_let(E, Vs1, A, B), S4} + end + end. + +%% Single-variable `let' binding. + +i_let_1(V, E, Ctxt, Ren, Env, S) -> + %% Make an operand structure for the argument expression, create a + %% local binding from the parameter to the operand structure, and + %% visit the body. Finally create necessary bindings and/or set + %% flags. + {Opnd, S1} = make_opnd(let_arg(E), Ren, Env, S), + {[R], Ren1, Env1, S2} = bind_locals([V], [Opnd], Ren, Env, S1), + {E1, S3} = i(let_body(E), Ctxt, Ren1, Env1, S2), + i_let_3([R], [Opnd], E1, S3). + +%% Multi-variable `let' binding. + +i_let_2(Vs, As, E, Ctxt, Ren, Env, S) -> + %% Make operand structures for the argument components. Note that + %% since the argument has already been visited at this point, we use + %% the identity renaming for the operands. + {Opnds, S1} = mapfoldl(fun (E, S) -> + make_opnd(E, ren__identity(), Env, S) + end, + S, As), + %% Create local bindings from the parameters to their respective + %% operand structures, and visit the body. + {Rs, Ren1, Env1, S2} = bind_locals(Vs, Opnds, Ren, Env, S1), + {E1, S3} = i(let_body(E), Ctxt, Ren1, Env1, S2), + i_let_3(Rs, Opnds, E1, S3). + +i_let_3(Rs, Opnds, E, S) -> + %% Create necessary bindings and/or set flags. + {E1, S1} = make_let_bindings(Rs, E, S), + + %% We must also create evaluation for effect, for any unused + %% operands, as after an application expression. + residualize_operands(Opnds, E1, S1). + +%% A sequence `do e1 e2', written `(seq e1 e2)' in the original +%% algorithm, where `e1' is evaluated for effect only (since its value +%% is not used), and `e2' yields the final value. Note that we use +%% `make_seq' to recompose the sequence after visiting the parts. + +i_seq(E, Ctxt, Ren, Env, S) -> + {E1, S1} = i(seq_arg(E), effect, Ren, Env, S), + {E2, S2} = i(seq_body(E), Ctxt, Ren, Env, S1), + %% A sequence has no cost in itself. + {make_seq(E1, E2), S2}. + + +%% The `case' switch of Core Erlang is rather different from the boolean +%% `(if e1 e2 e3)' case of the original algorithm, but the central idea +%% is the same: if, given the simplified switch expression (which is +%% visited in `value' context - a boolean `test' context would not be +%% generally useful), there is a clause which could definitely be +%% selected, such that no clause before it can possibly be selected, +%% then we can eliminate all other clauses. (And even if this is not the +%% case, some clauses can often be eliminated.) Furthermore, if a clause +%% can be selected, we can replace the case-expression (including the +%% switch expression) with the body of the clause and a set of zero or +%% more let-bindings of subexpressions of the switch expression. (In the +%% simplest case, the switch expression is evaluated only for effect.) + +i_case(E, Ctxt, Ren, Env, S) -> + %% First visit the switch expression in `value' context, to simplify + %% it as far as possible. Note that only the result part is passed + %% on to the clause matching below; any effects are hoisted. + {A, S1} = i(case_arg(E), value, Ren, Env, S), + A1 = result(A), + + %% Propagating an application context into the branches could cause + %% the arguments of the application to be evaluated *after* the + %% switch expression, but *before* the body of the selected clause. + %% Such interleaving is not allowed in general, and it does not seem + %% worthwile to make a more powerful transformation here. Therefore, + %% the clause bodies are conservatively visited for value if the + %% context is `application'. + Ctxt1 = safe_context(Ctxt), + {E1, S2} = case get_components(case_arity(E), A1) of + {true, As} -> + i_case_1(As, E, Ctxt1, Ren, Env, S1); + false -> + i_case_1([], E, Ctxt1, Ren, Env, S1) + end, + {hoist_effects(A, E1), S2}. + +i_case_1(As, E, Ctxt, Ren, Env, S) -> + case i_clauses(As, case_clauses(E), Ctxt, Ren, Env, S) of + {false, {As1, Vs, Env1, Cs}, S1} -> + %% We still have a list of clauses. Sanity check: + if Cs =:= [] -> + report_warning("empty list of clauses " + "in residual program!.\n"); + true -> + ok + end, + {A, S2} = i(c_values(As1), value, ren__identity(), Env1, + S1), + {E1, S3} = i_case_2(Cs, A, E, S2), + i_case_3(Vs, Env1, E1, S3); + {true, {_, Vs, Env1, [C]}, S1} -> + %% A single clause was selected; we just take the body. + i_case_3(Vs, Env1, clause_body(C), S1) + end. + +%% Check if all clause bodies are actually equivalent expressions that +%% do not depent on pattern variables (this sometimes occurs as a +%% consequence of inlining, e.g., all branches might yield 'true'), and +%% if so, replace the `case' with a sequence, first evaluating the +%% clause selection for effect, then evaluating one of the clause bodies +%% for its value. (Unless the switch contains a catch-all clause, the +%% clause selection must be evaluated for effect, since there is no +%% guarantee that any of the clauses will actually match. Assuming that +%% some clause always matches could make an undefined program produce a +%% value.) This makes the final size less than what was accounted for +%% when visiting the clauses, but currently we don't try to adjust for +%% this. + +i_case_2(Cs, A, E, S) -> + case equivalent_clauses(Cs) of + false -> + %% Count the base sizes for the remaining clauses; pattern + %% and guard sizes are already counted. + N = weight('case') + weight(clause) * length(Cs), + S1 = count_size(N, S), + {update_c_case(E, A, Cs), S1}; + true -> + case cerl_clauses:any_catchall(Cs) of + true -> + %% We know that some clause must be selected, so we + %% can drop all the testing as well. + E1 = make_seq(A, clause_body(hd(Cs))), + {E1, S}; + false -> + %% The clause selection must be performed for + %% effect. + E1 = update_c_case(E, A, + set_clause_bodies(Cs, void())), + {make_seq(E1, clause_body(hd(Cs))), S} + end + end. + +i_case_3(Vs, Env, E, S) -> + %% For the variables bound to the switch expression subexpressions, + %% make let bindings or create evaluation for effect. + Rs = [env__get(var_name(V), Env) || V <- Vs], + {E1, S1} = make_let_bindings(Rs, E, S), + Opnds = [R#ref.opnd || R <- Rs], + residualize_operands(Opnds, E1, S1). + +%% This function takes a sequence of switch expressions `Es' (which can +%% be the empty list if these are unknown) and a list `Cs' of clauses, +%% and returns `{Match, {As, Vs, Env1, Cs1}, S1}' where `As' is a list +%% of residual switch expressions, `Vs' the list of variables used in +%% the templates, `Env1' the environment for the templates, and `Cs1' +%% the list of residual clauses. `Match' is `true' if some clause could +%% be shown to definitely match (in this case, `Cs1' contains exactly +%% one element), and `false' otherwise. `S1' is the new state. The given +%% `Ctxt' is the context to be used for visiting the body of clauses. +%% +%% Visiting a clause basically amounts to extending the environment for +%% all variables in the pattern, as for a `fun' (cf. `i_fun'), +%% propagating match information if possible, and visiting the guard and +%% body in the new environment. +%% +%% To make it cheaper to do handle a set of clauses, and to avoid +%% unnecessarily exceeding the size limit, we avoid visiting the bodies +%% of clauses which are subsequently removed, by dividing the visiting +%% of a clause into two stages: first construct the environment(s) and +%% visit the pattern (for renaming) and the guard (for value), then +%% reduce the switch as much as possible, and lastly visit the body. + +i_clauses(Cs, Ctxt, Ren, Env, S) -> + i_clauses([], Cs, Ctxt, Ren, Env, S). + +i_clauses(Es, Cs, Ctxt, Ren, Env, S) -> + %% Create templates for the switch expressions. + {Ts, {Vs, Env0}} = mapfoldl(fun (E, {Vs, Env}) -> + {T, Vs1, Env1} = + make_template(E, Env), + {T, {Vs1 ++ Vs, Env1}} + end, + {[], Env}, Es), + + %% Make operand structures for the switch subexpression templates + %% (found in `Env0') and add proper ref-structure bindings to the + %% environment. Since the subexpressions in general can be + %% interdependent (Vs is in reverse-dependency order), the + %% environment (and renaming) must be created incrementally. Note + %% that since the switch expressions have been visited already, the + %% identity renaming is used for the operands. + Vs1 = lists:reverse(Vs), + {Ren1, Env1, S1} = + foldl(fun (V, {Ren, Env, S}) -> + E = env__get(var_name(V), Env0), + {Opnd, S_1} = make_opnd(E, ren__identity(), Env, + S), + {_, Ren1, Env1, S_2} = bind_locals([V], [Opnd], + Ren, Env, S_1), + {Ren1, Env1, S_2} + end, + {Ren, Env, S}, Vs1), + + %% First we visit the head of each individual clause, renaming + %% pattern variables, inserting let-bindings in the guard and body, + %% and visiting the guard. The information used for visiting the + %% clause body will be prefixed to the clause annotations. + {Cs1, S2} = mapfoldl(fun (C, S) -> + i_clause_head(C, Ts, Ren1, Env1, S) + end, + S1, Cs), + + %% Now that the clause guards have been reduced as far as possible, + %% we can attempt to reduce the clauses. + As = [hd(get_ann(T)) || T <- Ts], + case cerl_clauses:reduce(Cs1, Ts) of + {false, Cs2} -> + %% We still have one or more clauses (with associated + %% extended environments). Their bodies have not yet been + %% visited, so we do that (in the respective safe + %% environments, adding the sizes of the visited heads to + %% the current size counter) and return the final list of + %% clauses. + {Cs3, S3} = mapfoldl( + fun (C, S) -> + i_clause_body(C, Ctxt, S) + end, + S2, Cs2), + {false, {As, Vs1, Env1, Cs3}, S3}; + {true, {C, _}} -> + %% A clause C could be selected (the bindings have already + %% been added to the guard/body). Note that since the clause + %% head will probably be discarded, its size is not counted. + {C1, Ren2, Env2, _} = get_clause_extras(C), + {B, S3} = i(clause_body(C), Ctxt, Ren2, Env2, S2), + C2 = update_c_clause(C1, clause_pats(C1), clause_guard(C1), B), + {true, {As, Vs1, Env1, [C2]}, S3} + end. + +%% This visits the head of a clause, renames pattern variables, inserts +%% let-bindings in the guard and body, and does inlining on the guard +%% expression. Returns a list of pairs `{NewClause, Data}', where `Data' +%% is `{Renaming, Environment, Size}' used for visiting the body of the +%% new clause. + +i_clause_head(C, Ts, Ren, Env, S) -> + %% Match the templates against the (non-renamed) patterns to get the + %% available information about matching subexpressions. We don't + %% care at this point whether an exact match/nomatch is detected. + Ps = clause_pats(C), + Bs = case cerl_clauses:match_list(Ps, Ts) of + {_, Bs1} -> Bs1; + none -> [] + end, + + %% The patterns must be visited for renaming; cf. `i_pattern'. We + %% use a passive size counter for visiting the patterns and the + %% guard (cf. `visit'), because we do not know at this stage whether + %% the clause will be kept or not; the final value of the counter is + %% included in the returned value below. + {_, Ren1, Env1, S1} = bind_locals(clause_vars(C), Ren, Env, S), + S2 = new_passive_size(get_size_limit(S1), S1), + {Ps1, S3} = mapfoldl(fun (P, S) -> + i_pattern(P, Ren1, Env1, Ren, Env, S) + end, + S2, Ps), + + %% Rewrite guard and body and visit the guard for value. Discard the + %% latter size count if the guard turns out to be a constant. + G = add_match_bindings(Bs, clause_guard(C)), + B = add_match_bindings(Bs, clause_body(C)), + {G1, S4} = i(G, value, Ren1, Env1, S3), + S5 = case is_literal(G1) of + true -> + revert_size(S3, S4); + false -> + S4 + end, + + %% Revert to the size counter we had on entry to this function. The + %% environment and renaming, together with the size of the clause + %% head, are prefixed to the annotations for later use. + Size = get_size_value(S5), + C1 = update_c_clause(C, Ps1, G1, B), + {set_clause_extras(C1, Ren1, Env1, Size), revert_size(S, S5)}. + +add_match_bindings(Bs, E) -> + %% Don't waste time if the variables definitely cannot be used. + %% (Most guards are simply `true'.) + case is_literal(E) of + true -> + E; + false -> + Vs = [V || {V, E} <- Bs, E =/= any], + Es = [hd(get_ann(E)) || {_V, E} <- Bs, E =/= any], + c_let(Vs, c_values(Es), E) + end. + +i_clause_body(C0, Ctxt, S) -> + {C, Ren, Env, Size} = get_clause_extras(C0), + S1 = count_size(Size, S), + {B, S2} = i(clause_body(C), Ctxt, Ren, Env, S1), + C1 = update_c_clause(C, clause_pats(C), clause_guard(C), B), + {C1, S2}. + +get_clause_extras(C) -> + [{Ren, Env, Size} | As] = get_ann(C), + {set_ann(C, As), Ren, Env, Size}. + +set_clause_extras(C, Ren, Env, Size) -> + As = [{Ren, Env, Size} | get_ann(C)], + set_ann(C, As). + +%% This is the `(lambda x e)' case of the original algorithm. A +%% `fun' is like a lambda expression, but with a varying number of +%% parameters; possibly zero. + +i_fun(E, Ctxt, Ren, Env, S) -> + case Ctxt of + effect -> + %% Reduce useless `fun' expressions to a simple constant; + %% visiting the body would be a waste of time, and could + %% needlessly mark variables as referenced. + {void(), count_size(weight(literal), S)}; + value -> + %% Note that the variables are visited as patterns. + Vs = fun_vars(E), + {_, Ren1, Env1, S1} = bind_locals(Vs, Ren, Env, S), + Vs1 = i_params(Vs, Ren1, Env1), + + %% The body is always visited for value. + {B, S2} = i(fun_body(E), value, Ren1, Env1, S1), + + %% We don't bother to include the exact number of free + %% variables in the cost for creating a fun-value. + S3 = count_size(weight('fun'), S2), + + %% Inlining might have duplicated code, so we must remove + %% any 'id'-annotations from the original fun-expression. + %% (This forces a later stage to invent new id:s.) This is + %% necessary as long as fun:s may still need to be + %% identified the old way. Function variables that are not + %% in application context also have such annotations, but + %% the inlining will currently lose all annotations on + %% variable references (I think), so that's not a problem. + {set_ann(c_fun(Vs1, B), kill_id_anns(get_ann(E))), S3}; + #app{} -> + %% An application of a fun-expression (in the original + %% source code) is handled by going directly to `inline'. + %% This is never residualised unless there is an arity + %% mismatch, so we don't set up new counters here. Note that + %% inlining of copy-propagated fun-expressions is done in + %% `copy'; not here! + inline(E, Ctxt, Ren, Env, S) + end. + +%% A `letrec' requires a circular environment, but is otherwise like a +%% `let', i.e. like a direct lambda application. Note that only +%% fun-expressions (lambda abstractions) may occur in the right-hand +%% side of each definition. + +i_letrec(E, Ctxt, Ren, Env, S) -> + %% Note that we pass an empty list for the auto-referenced + %% (exported) functions here. + {Es, B, _, S1} = i_letrec(letrec_defs(E), letrec_body(E), [], Ctxt, + Ren, Env, S), + + %% If no bindings remain, only the body is returned. + case Es of + [] -> + {B, S1}; % drop annotations on E + _ -> + S2 = count_size(weight(letrec), S1), + {update_c_letrec(E, Es, B), S2} + end. + +%% The major part of this is shared by letrec-expressions and module +%% definitions alike. + +i_letrec(Es, B, Xs, Ctxt, Ren, Env, S) -> + %% First, we create operands with dummy renamings and environments, + %% and with fresh store locations for cached expressions and operand + %% info. + {Opnds, S1} = mapfoldl(fun ({_, E}, S) -> + make_opnd(E, undefined, undefined, S) + end, + S, Es), + + %% Then we make recursive bindings for the definitions. + {Rs, Ren1, Env1, S2} = bind_recursive([F || {F, _} <- Es], + Opnds, Ren, Env, S1), + + %% For the function variables listed in Xs (none for a + %% letrec-expression), we must make sure that the corresponding + %% operand expressions are visited and that the definitions are + %% marked as referenced; we also need to return the possibly renamed + %% function variables. + {Xs1, S3} = + mapfoldl( + fun (X, S) -> + Name = ren__map(var_name(X), Ren1), + case env__lookup(Name, Env1) of + {ok, R} -> + S_1 = i_letrec_export(R, S), + {ref_to_var(R), S_1}; + error -> + %% We just skip any exports that are not + %% actually defined here, and generate a + %% warning message. + {N, A} = var_name(X), + report_warning("export `~w'/~w " + "not defined.\n", [N, A]), + {X, S} + end + end, + S2, Xs), + + %% At last, we can then visit the body. + {B1, S4} = i(B, Ctxt, Ren1, Env1, S3), + + %% Finally, we create new letrec-bindings for any and all + %% residualised definitions. All referenced functions should have + %% been visited; the call to `visit' below is expected to retreive a + %% cached expression. + Rs1 = keep_referenced(Rs, S4), + {Es1, S5} = mapfoldl(fun (R, S) -> + {E_1, S_1} = visit(R#ref.opnd, S), + {{ref_to_var(R), E_1}, S_1} + end, + S4, Rs1), + {Es1, B1, Xs1, S5}. + +%% This visits the operand for a function definition exported by a +%% `letrec' (which is really a `module' module definition, since normal +%% letrecs have no export declarations). Only the updated state is +%% returned. We must handle the "inner-pending" flag when doing this; +%% cf. `i_var'. + +i_letrec_export(R, S) -> + Opnd = R#ref.opnd, + S1 = st__mark_inner_pending(Opnd#opnd.loc, S), + {_, S2} = visit(Opnd, S1), + {_, S3} = residualize_var(R, st__clear_inner_pending(Opnd#opnd.loc, + S2)), + S3. + +%% This is the `(call e1 e2)' case of the original algorithm. The only +%% difference is that we must handle multiple (or no) operand +%% expressions. + +i_apply(E, Ctxt, Ren, Env, S) -> + {Opnds, S1} = mapfoldl(fun (E, S) -> + make_opnd(E, Ren, Env, S) + end, + S, apply_args(E)), + + %% Allocate a new app-context location and set up an application + %% context structure containing the surrounding context. + {L, S2} = st__new_app_loc(S1), + Ctxt1 = #app{opnds = Opnds, ctxt = Ctxt, loc = L}, + + %% Visit the operator expression in the new call context. + {E1, S3} = i(apply_op(E), Ctxt1, Ren, Env, S2), + + %% Check the "inlined" flag to find out what to do next. (The store + %% location could be recycled after the flag has been tested, but + %% there is no real advantage to that, because in practice, only + %% 4-5% of all created store locations will ever be reused, while + %% there will be a noticable overhead for managing the free list.) + case st__get_app_inlined(L, S3) of + true -> + %% The application was inlined, so we have the final + %% expression in `E1'. We just have to handle any operands + %% that need to be residualized for effect only (i.e., those + %% the values of which are not used). + residualize_operands(Opnds, E1, S3); + false -> + %% Otherwise, `E1' is the residual operator expression. We + %% make sure all operands are visited, and rebuild the + %% application. + {Es, S4} = mapfoldl(fun (Opnd, S) -> + visit_and_count_size(Opnd, S) + end, + S3, Opnds), + N = apply_size(length(Es)), + {update_c_apply(E, E1, Es), count_size(N, S4)} + end. + +apply_size(A) -> + weight(apply) + weight(argument) * A. + +%% Since it is not the task of this transformation to handle +%% cross-module inlining, all inter-module calls are handled by visiting +%% the components (the module and function name, and the arguments of +%% the call) for value. In `effect' context, if the function itself is +%% known to be completely effect free, the call can be discarded and the +%% arguments evaluated for effect. Otherwise, if all the visited +%% arguments are to constants, and the function is known to be safe to +%% execute at compile time, then we try to evaluate the call. If +%% evaluation completes normally, the call is replaced by the result; +%% otherwise the call is residualised. + +i_call(E, Ctxt, Ren, Env, S) -> + {M, S1} = i(call_module(E), value, Ren, Env, S), + {F, S2} = i(call_name(E), value, Ren, Env, S1), + As = call_args(E), + Arity = length(As), + + %% Check if the name of the called function is static. If so, + %% discard the size counts performed above, since the values will + %% not cause any runtime cost. + Static = is_c_atom(M) and is_c_atom(F), + S3 = case Static of + true -> + revert_size(S, S2); + false -> + S2 + end, + case Ctxt of + effect when Static =:= true -> + case is_safe_call(atom_val(M), atom_val(F), Arity) of + true -> + %% The result will not be used, and the call is + %% effect free, so we create a multiple-value + %% aggregate containing the (not yet visited) + %% arguments and process that instead. + i(c_values(As), effect, Ren, Env, S3); + false -> + %% We are not allowed to simply discard the call, + %% but we can try to evaluate it. + i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, + S3) + end; + _ -> + i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, S3) + end. + +i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, S) -> + %% Visit the arguments for value. + {As1, S1} = mapfoldl(fun (X, A) -> i(X, value, Ren, Env, A) end, + S, As), + case Static of + true -> + case erl_bifs:is_pure(atom_val(M), atom_val(F), Arity) of + true -> + %% It is allowed to evaluate this at compile time. + case all_static(As1) of + true -> + i_call_3(M, F, As1, E, Ctxt, Env, S1); + false -> + %% See if the call can be rewritten instead. + i_call_4(M, F, As1, E, Ctxt, Env, S1) + end; + false -> + i_call_2(M, F, As1, E, S1) + end; + false -> + i_call_2(M, F, As1, E, S1) + end. + +%% Residualise the call. + +i_call_2(M, F, As, E, S) -> + N = weight(call) + weight(argument) * length(As), + {update_c_call(E, M, F, As), count_size(N, S)}. + +%% Attempt to evaluate the call to yield a literal; if that fails, try +%% to rewrite the expression. + +i_call_3(M, F, As, E, Ctxt, Env, S) -> + %% Note that we extract the results of argument expessions here; the + %% expressions could still be sequences with side effects. + Vs = [concrete(result(A)) || A <- As], + case catch {ok, apply(atom_val(M), atom_val(F), Vs)} of + {ok, V} -> + %% Evaluation completed normally - try to turn the result + %% back into a syntax tree (representing a literal). + case is_literal_term(V) of + true -> + %% Make a sequence of the arguments (as a + %% multiple-value aggregate) and the final value. + S1 = count_size(weight(values), S), + S2 = count_size(weight(literal), S1), + {make_seq(c_values(As), abstract(V)), S2}; + false -> + %% The result could not be represented as a literal. + i_call_4(M, F, As, E, Ctxt, Env, S) + end; + _ -> + %% The evaluation attempt did not complete normally. + i_call_4(M, F, As, E, Ctxt, Env, S) + end. + +%% Rewrite the expression, if possible, otherwise residualise it. + +i_call_4(M, F, As, E, Ctxt, Env, S) -> + case reduce_bif_call(atom_val(M), atom_val(F), As, Env) of + false -> + %% Nothing more to be done - residualise the call. + i_call_2(M, F, As, E, S); + {true, E1} -> + %% We revisit the result, because the rewriting might have + %% opened possibilities for further inlining. Since the + %% parts have already been visited once, we use the identity + %% renaming here. + i(E1, Ctxt, ren__identity(), Env, S) + end. + +%% For now, we assume that primops cannot be evaluated at compile time, +%% probably being too special. Also, we have no knowledge about their +%% side effects. + +i_primop(E, Ren, Env, S) -> + %% Visit the arguments for value. + {As, S1} = mapfoldl(fun (E, S) -> + i(E, value, Ren, Env, S) + end, + S, primop_args(E)), + N = weight(primop) + weight(argument) * length(As), + {update_c_primop(E, primop_name(E), As), count_size(N, S1)}. + +%% This is like having an expression with an extra fun-expression +%% attached for "exceptional cases"; actually, there are exactly two +%% parameter variables for the body, but they are easiest handled as if +%% their number might vary, just as for a `fun'. + +i_try(E, Ctxt, Ren, Env, S) -> + %% The argument expression is evaluated in `value' context, and the + %% surrounding context is propagated into both branches. We do not + %% try to recognize cases when the protected expression will + %% actually raise an exception. Note that the variables are visited + %% as patterns. + {A, S1} = i(try_arg(E), value, Ren, Env, S), + Vs = try_vars(E), + {_, Ren1, Env1, S2} = bind_locals(Vs, Ren, Env, S1), + Vs1 = i_params(Vs, Ren1, Env1), + {B, S3} = i(try_body(E), Ctxt, Ren1, Env1, S2), + case is_safe(A) of + true -> + %% The `try' wrapper can be dropped in this case. Since the + %% expressions have been visited already, the identity + %% renaming is used when we revisit the new let-expression. + i(c_let(Vs1, A, B), Ctxt, ren__identity(), Env, S3); + false -> + Evs = try_evars(E), + {_, Ren2, Env2, S4} = bind_locals(Evs, Ren, Env, S3), + Evs1 = i_params(Evs, Ren2, Env2), + {H, S5} = i(try_handler(E), Ctxt, Ren2, Env2, S4), + S6 = count_size(weight('try'), S5), + {update_c_try(E, A, Vs1, B, Evs1, H), S6} + end. + +%% A special case of try-expressions: + +i_catch(E, Ctxt, Ren, Env, S) -> + %% We cannot propagate application contexts into the catch. + {E1, S1} = ES1 = i(catch_body(E), safe_context(Ctxt), Ren, Env, S), + case is_safe(E1) of + true -> + %% The `catch' wrapper can be dropped in this case. + ES1; + false -> + S2 = count_size(weight('catch'), S1), + {update_c_catch(E, E1), S2} + end. + +%% A receive-expression is very much like a case-expression, with the +%% difference that we do not have access to a switch expression, since +%% the value being switched on is taken from the mailbox. The fact that +%% the receive-expression may iterate over an arbitrary number of +%% messages is not of interest to us. All we can do here is to visit its +%% subexpressions, and possibly eliminate definitely unselectable +%% clauses. + +i_receive(E, Ctxt, Ren, Env, S) -> + %% We first visit the expiry expression (for value) and the expiry + %% body (in the surrounding context). + {T, S1} = i(receive_timeout(E), value, Ren, Env, S), + {B, S2} = i(receive_action(E), Ctxt, Ren, Env, S1), + + %% Then we visit the clauses. Note that application contexts may not + %% in general be propagated into the branches (and the expiry body), + %% because the execution of the `receive' may remove a message from + %% the mailbox as a side effect; the situation is thus analogous to + %% that in a `case' expression. + Ctxt1 = safe_context(Ctxt), + case i_clauses(receive_clauses(E), Ctxt1, Ren, Env, S2) of + {false, {[], _, _, Cs}, S3} -> + %% We still have a list of clauses. If the list is empty, + %% and the expiry expression is the integer zero, the + %% expression reduces to the expiry body. + if Cs =:= [] -> + case is_c_int(T) andalso (int_val(T) =:= 0) of + true -> + {B, S3}; + false -> + i_receive_1(E, Cs, T, B, S3) + end; + true -> + i_receive_1(E, Cs, T, B, S3) + end; + {true, {_, _, _, Cs}, S3} -> + %% Cs is a single clause that will always be matched (if a + %% message exists), but we must keep the `receive' statement + %% in order to fetch the message from the mailbox. + i_receive_1(E, Cs, T, B, S3) + end. + +i_receive_1(E, Cs, T, B, S) -> + %% Here, we just add the base sizes for the receive-expression + %% itself and for each remaining clause; cf. `case'. + N = weight('receive') + weight(clause) * length(Cs), + {update_c_receive(E, Cs, T, B), count_size(N, S)}. + +%% A module definition is like a `letrec', with some add-ons (export and +%% attribute declarations) but without an explicit body. Actually, the +%% exporting of function names has the same effect as if there was a +%% body consisting of the list of references to the exported functions. +%% Thus, the exported functions are exactly those which can be +%% referenced from outside the module. + +i_module(E, Ctxt, Ren, Env, S) -> + %% Cf. `i_letrec'. Note that we pass a dummy constant value for the + %% "body" parameter. + {Es, _, Xs1, S1} = i_letrec(module_defs(E), void(), + module_exports(E), Ctxt, Ren, Env, S), + %% Sanity check: + case Es of + [] -> + report_warning("no function definitions remaining " + "in module `~s'.\n", + [atom_name(module_name(E))]); + _ -> + ok + end, + E1 = update_c_module(E, module_name(E), Xs1, module_attrs(E), Es), + {E1, count_size(weight(module), S1)}. + +%% Binary-syntax expressions are too complicated to do anything +%% interesting with here - that is beyond the scope of this program; +%% also, their construction could have side effects, so even in effect +%% context we can't remove them. (We don't bother to identify cases of +%% "safe" unused binaries which could be removed.) + +i_binary(E, Ren, Env, S) -> + %% Visit the segments for value. + {Es, S1} = mapfoldl(fun (E, S) -> + i_bitstr(E, Ren, Env, S) + end, + S, binary_segments(E)), + S2 = count_size(weight(binary), S1), + {update_c_binary(E, Es), S2}. + +i_bitstr(E, Ren, Env, S) -> + %% It is not necessary to visit the Unit, Type and Flags fields, + %% since these are always literals. + {Val, S1} = i(bitstr_val(E), value, Ren, Env, S), + {Size, S2} = i(bitstr_size(E), value, Ren, Env, S1), + Unit = bitstr_unit(E), + Type = bitstr_type(E), + Flags = bitstr_flags(E), + S3 = count_size(weight(bitstr), S2), + {update_c_bitstr(E, Val, Size, Unit, Type, Flags), S3}. + +%% This is a simplified version of `i_pattern', for lists of parameter +%% variables only. It does not modify the state. + +i_params([V | Vs], Ren, Env) -> + Name = ren__map(var_name(V), Ren), + case env__lookup(Name, Env) of + {ok, R} -> + [ref_to_var(R) | i_params(Vs, Ren, Env)]; + error -> + report_internal_error("variable `~w' not bound " + "in pattern.\n", [Name]), + exit(error) + end; +i_params([], _, _) -> + []. + +%% For ordinary patterns, we just visit to rename variables and count +%% the size/cost. All occurring binding instances of variables should +%% already have been added to the renaming and environment; however, to +%% handle the size expressions of binary-syntax patterns, we must pass +%% the renaming and environment of the containing expression + +i_pattern(E, Ren, Env, Ren0, Env0, S) -> + case type(E) of + var -> + %% Count no size. + Name = ren__map(var_name(E), Ren), + case env__lookup(Name, Env) of + {ok, R} -> + {ref_to_var(R), S}; + error -> + report_internal_error("variable `~w' not bound " + "in pattern.\n", [Name]), + exit(error) + end; + alias -> + %% Count no size. + V = alias_var(E), + Name = ren__map(var_name(V), Ren), + case env__lookup(Name, Env) of + {ok, R} -> + %% Visit the subpattern and recompose. + V1 = ref_to_var(R), + {P, S1} = i_pattern(alias_pat(E), Ren, Env, Ren0, + Env0, S), + {update_c_alias(E, V1, P), S1}; + error -> + report_internal_error("variable `~w' not bound " + "in pattern.\n", [Name]), + exit(error) + end; + binary -> + {Es, S1} = mapfoldl(fun (E, S) -> + i_bitstr_pattern(E, Ren, Env, + Ren0, Env0, S) + end, + S, binary_segments(E)), + S2 = count_size(weight(binary), S1), + {update_c_binary(E, Es), S2}; + _ -> + case is_literal(E) of + true -> + {E, count_size(weight(literal), S)}; + false -> + {Es1, S1} = mapfoldl(fun (E, S) -> + i_pattern(E, Ren, Env, + Ren0, Env0, + S) + end, + S, data_es(E)), + %% We assume that in general, the elements of the + %% constructor will all be fetched. + N = weight(data) + length(Es1) * weight(element), + S2 = count_size(N, S1), + {update_data(E, data_type(E), Es1), S2} + end + end. + +i_bitstr_pattern(E, Ren, Env, Ren0, Env0, S) -> + %% It is not necessary to visit the Unit, Type and Flags fields, + %% since these are always literals. The Value field is a limited + %% pattern - either a literal or an unbound variable. The Size field + %% is a limited expression - either a literal or a variable bound in + %% the environment of the containing expression. + {Val, S1} = i_pattern(bitstr_val(E), Ren, Env, Ren0, Env0, S), + {Size, S2} = i(bitstr_size(E), value, Ren0, Env0, S1), + Unit = bitstr_unit(E), + Type = bitstr_type(E), + Flags = bitstr_flags(E), + S3 = count_size(weight(bitstr), S2), + {update_c_bitstr(E, Val, Size, Unit, Type, Flags), S3}. + + +%% --------------------------------------------------------------------- +%% Other central inlining functions + +%% The following function assumes that `E' is a fun-expression and the +%% context is an app-structure. If the inlining could be aborted, a +%% corresponding catch should be set up before entering the function. +%% +%% Note: if the inlined body is some lambda abstraction, and the +%% surrounding context of the app-context is also an app-context, the +%% `inlined' flag of the outermost context will be set before that of +%% the inner context is set. E.g.: `let F = fun (X) -> fun (Y) -> E in +%% apply apply F(A)(B)' will propagate the body of F, which is a lambda +%% abstraction, into the outer application context, which will be +%% inlined to produce expression `E', and the flag of the outer context +%% will be set. Upon return, the flag of the inner context will also be +%% set. However, the flags are then tested in innermost-first order. +%% Thus, if some inlining attempt is aborted, the `inlined' flags of any +%% nested app-contexts must be cleared. +%% +%% This implementation does nothing to handle inlining of calls to +%% recursive functions in a smart way. This means that as long as the +%% size and effort counters do not prevent it, the function body will be +%% inlined (i.e., the first iteration will be unrolled), and the +%% recursive calls will be residualized. + +inline(E, #app{opnds = Opnds, ctxt = Ctxt, loc = L}, Ren, Env, S) -> + %% Check that the arities match: + Vs = fun_vars(E), + if length(Opnds) =/= length(Vs) -> + %% Arity mismatch: the call will be residualized + {E, S}; + true -> + %% Create local bindings for the parameters to their + %% respective operand structures from the app-structure, and + %% visit the body in the context saved in the structure. + {Rs, Ren1, Env1, S1} = bind_locals(Vs, Opnds, Ren, Env, S), + {E1, S2} = i(fun_body(E), Ctxt, Ren1, Env1, S1), + + %% Create necessary bindings and/or set flags. + {E2, S3} = make_let_bindings(Rs, E1, S2), + + %% Lastly, flag the application as inlined, since the inlining + %% attempt was not aborted before we reached this point. + {E2, st__set_app_inlined(L, S3)} + end. + +%% For the (possibly renamed) argument variables to an inlined call, +%% either create `let' bindings for them, if they are still referenced +%% in the residual expression (in C/Lisp, also if they are assigned to), +%% or otherwise (if they are not referenced or assigned) mark them for +%% evaluation for side effects. + +make_let_bindings([R | Rs], E, S) -> + {E1, S1} = make_let_bindings(Rs, E, S), + make_let_binding(R, E1, S1); +make_let_bindings([], E, S) -> + {E, S}. + +make_let_binding(R, E, S) -> + %% The `referenced' flag is conservatively computed. We therefore + %% first check some simple cases where parameter R is definitely not + %% referenced in the resulting body E. + case is_literal(E) of + true -> + %% A constant contains no variable references. + make_let_binding_1(R, E, S); + false -> + case is_c_var(E) of + true -> + case var_name(E) =:= R#ref.name of + true -> + %% The body is simply the parameter variable + %% itself. Visit the operand for value and + %% substitute the result for the body. + visit_and_count_size(R#ref.opnd, S); + false -> + %% Not the same variable, so the parameter + %% is not referenced at all. + make_let_binding_1(R, E, S) + end; + false -> + %% Proceed to check the `referenced' flag. + case st__get_var_referenced(R#ref.loc, S) of + true -> + %% The parameter is probably referenced in + %% the residual code (although it might not + %% be). Visit the operand for value and + %% create a let-binding. + {E1, S1} = visit_and_count_size(R#ref.opnd, + S), + S2 = count_size(weight('let'), S1), + {c_let([ref_to_var(R)], E1, E), S2}; + false -> + %% The parameter is definitely not + %% referenced. + make_let_binding_1(R, E, S) + end + end + end. + +%% This marks the operand for evaluation for effect. + +make_let_binding_1(R, E, S) -> + Opnd = R#ref.opnd, + {E, st__set_opnd_effect(Opnd#opnd.loc, S)}. + +%% Here, `R' is the ref-structure which is the target of the copy +%% propagation, and `Opnd' is a visited operand structure, to be +%% propagated through `R' if possible - if not, `R' is residualised. +%% `Opnd' is normally the operand that `R' is bound to, and `E' is the +%% result of visiting `Opnd' for value; we pass this as an argument so +%% we don't have to fetch it multiple times (because we don't have +%% constant time access). +%% +%% We also pass the environment of the site of the variable reference, +%% for use when inlining a propagated fun-expression. In the original +%% algorithm by Waddell, the environment used for inlining such cases is +%% the identity mapping, because the fun-expression body has already +%% been visited for value, and their algorithm combines renaming of +%% source-code variables with the looking up of information about +%% residual-code variables. We, however, need to check the environment +%% of the call site when creating new non-shadowed variables, but we +%% must avoid repeated renaming. We therefore separate the renaming and +%% the environment (as in the renaming algorithm of Peyton-Jones and +%% Marlow). This also makes our implementation more general, compared to +%% the original algorithm, because we do not give up on propagating +%% variables that were free in the fun-body. +%% +%% Example: +%% +%% let F = fun (X) -> {'foo', X} in +%% let G = fun (H) -> apply H(F) % F is free in the fun G +%% in apply G(fun (F) -> apply F(42)) +%% => +%% let F = fun (X) -> {'foo', X} in +%% apply (fun (H) -> apply H(F))(fun (F) -> apply F(42)) +%% => +%% let F = fun (X) -> {'foo', X} in +%% apply (fun (F) -> apply F(42))(F) +%% => +%% let F = fun (X) -> {'foo', X} in +%% apply F(42) +%% => +%% apply (fun (X) -> {'foo', X})(2) +%% => +%% {'foo', 42} +%% +%% The original algorithm would give up at stage 4, because F was free +%% in the propagated fun-expression. Our version inlines this example +%% completely. + +copy(R, Opnd, E, Ctxt, Env, S) -> + case is_c_var(E) of + true -> + %% The operand reduces to another variable - get its + %% ref-structure and attempt to propagate further. + copy_var(env__get(var_name(E), Opnd#opnd.env), Ctxt, Env, + S); + false -> + %% Apart from variables and functional values (the latter + %% are handled by `copy_1' below), only constant literals + %% are copyable in general; other things, including e.g. + %% tuples `{foo, X}', could cause duplication of work, and + %% are not copy propagated. + case is_literal(E) of + true -> + {E, count_size(weight(literal), S)}; + false -> + copy_1(R, Opnd, E, Ctxt, Env, S) + end + end. + +copy_var(R, Ctxt, Env, S) -> + %% (In Lisp or C, if this other variable might be assigned to, we + %% should residualize the "parent" instead, so we don't bypass any + %% destructive updates.) + case R#ref.opnd of + undefined -> + %% This variable is not bound to an expression, so just + %% residualize it. + residualize_var(R, S); + Opnd -> + %% Note that because operands are always visited before + %% copied, all copyable operand expressions will be + %% propagated through any number of bindings. If `R' was + %% bound to a constant literal, we would never have reached + %% this point. + case st__lookup_opnd_cache(Opnd#opnd.loc, S) of + error -> + %% The result for this operand is not yet ready + %% (which should mean that it is a recursive + %% reference). Thus, we must residualise the + %% variable. + residualize_var(R, S); + {ok, #cache{expr = E1}} -> + %% The result for the operand is ready, so we can + %% proceed to propagate it. + copy_1(R, Opnd, E1, Ctxt, Env, S) + end + end. + +copy_1(R, Opnd, E, Ctxt, Env, S) -> + %% Fun-expression (lambdas) are a bit special; they are copyable, + %% but should preferably not be duplicated, so they should not be + %% copy propagated except into application contexts, where they can + %% be inlined. + case is_c_fun(E) of + true -> + case Ctxt of + #app{} -> + %% First test if the operand is "outer-pending"; if + %% so, don't inline. + case st__test_outer_pending(Opnd#opnd.loc, S) of + false -> + copy_inline(R, Opnd, E, Ctxt, Env, S); + true -> + %% Cyclic reference forced inlining to stop + %% (avoiding infinite unfolding). + residualize_var(R, S) + end; + _ -> + residualize_var(R, S) + end; + false -> + %% We have no other cases to handle here + residualize_var(R, S) + end. + +%% This inlines a function value that was propagated to an application +%% context. The inlining is done with an identity renaming (since the +%% expression is already visited) but in the environment of the call +%% site (which is OK because of the no-shadowing strategy for renaming, +%% and because the domain of our environments are the residual-program +%% variables instead of the source-program variables). Note that we must +%% first set the "outer-pending" flag, and clear it afterwards. + +copy_inline(R, Opnd, E, Ctxt, Env, S) -> + S1 = st__mark_outer_pending(Opnd#opnd.loc, S), + case catch {ok, copy_inline_1(R, E, Ctxt, Env, S1)} of + {ok, {E1, S2}} -> + {E1, st__clear_outer_pending(Opnd#opnd.loc, S2)}; + {'EXIT', X} -> + exit(X); + X -> + %% If we use destructive update for the `outer-pending' + %% flag, we must make sure to clear it upon a nonlocal + %% return. + _S2 = st__clear_outer_pending(Opnd#opnd.loc, S1), + throw(X) + end. + +%% If the current effort counter was passive, we use a new active effort +%% counter with the inherited limit for this particular inlining. + +copy_inline_1(R, E, Ctxt, Env, S) -> + case effort_is_active(S) of + true -> + copy_inline_2(R, E, Ctxt, Env, S); + false -> + S1 = new_active_effort(get_effort_limit(S), S), + case catch {ok, copy_inline_2(R, E, Ctxt, Env, S1)} of + {ok, {E1, S2}} -> + %% Revert to the old effort counter. + {E1, revert_effort(S, S2)}; + {counter_exceeded, effort, _} -> + %% Aborted this inlining attempt because too much + %% effort was spent. Residualize the variable and + %% revert to the previous state. + residualize_var(R, S); + {'EXIT', X} -> + exit(X); + X -> + throw(X) + end + end. + +%% Regardless of whether the current size counter is active or not, we +%% use a new active size counter for each inlining. If the current +%% counter was passive, the new counter gets the inherited size limit; +%% if it was active, the size limit of the new counter will be equal to +%% the remaining budget of the current counter (which itself is not +%% affected by the inlining). This distributes the size budget more +%% evenly over "inlinings within inlinings", so that the whole size +%% budget is not spent on the first few call sites (in an inlined +%% function body) forcing the remaining call sites to be residualised. + +copy_inline_2(R, E, Ctxt, Env, S) -> + Limit = case size_is_active(S) of + true -> + get_size_limit(S) - get_size_value(S); + false -> + get_size_limit(S) + end, + %% Add the cost of the application to the new size limit, so we + %% always inline functions that are small enough, even if `Limit' is + %% close to zero at this point. (This is an extension to the + %% original algorithm.) + S1 = new_active_size(Limit + apply_size(length(Ctxt#app.opnds)), S), + case catch {ok, inline(E, Ctxt, ren__identity(), Env, S1)} of + {ok, {E1, S2}} -> + %% Revert to the old size counter. + {E1, revert_size(S, S2)}; + {counter_exceeded, size, S2} -> + %% Aborted this inlining attempt because it got too big. + %% Residualize the variable and revert to the old size + %% counter. (It is important that we do not also revert the + %% effort counter here. Because the effort and size counters + %% are always set up together, we know that the effort + %% counter returned in S2 is the same that was passed to + %% `inline'.) + S3 = revert_size(S, S2), + %% If we use destructive update for the `inlined' flag, we + %% must make sure to clear the flags of any nested + %% app-contexts upon aborting; see `inline' for details. + S4 = reset_nested_apps(Ctxt, S3), % for effect + residualize_var(R, S4); + {'EXIT', X} -> + exit(X); + X -> + throw(X) + end. + +reset_nested_apps(#app{ctxt = Ctxt, loc = L}, S) -> + reset_nested_apps(Ctxt, st__clear_app_inlined(L, S)); +reset_nested_apps(_, S) -> + S. + + +%% --------------------------------------------------------------------- +%% Support functions + +new_var(Env) -> + Name = env__new_vname(Env), + c_var(Name). + +residualize_var(R, S) -> + S1 = count_size(weight(var), S), + {ref_to_var(R), st__set_var_referenced(R#ref.loc, S1)}. + +%% This function returns the value-producing subexpression of any +%% expression. (Except for sequencing expressions, this is the +%% expression itself.) + +result(E) -> + case is_c_seq(E) of + true -> + %% Also see `make_seq', which is used in all places to build + %% sequences so that they are always nested in the first + %% position. + seq_body(E); + false -> + E + end. + +%% This function rewrites E to `do A1 E' if A is `do A1 A2', and +%% otherwise returns E unchanged. + +hoist_effects(A, E) -> + case type(A) of + seq -> make_seq(seq_arg(A), E); + _ -> E + end. + +%% This "build sequencing expression" operation assures that sequences +%% are always nested in the first position, which makes it easy to find +%% the actual value-producing expression of a sequence (cf. `result'). + +make_seq(E1, E2) -> + case is_safe(E1) of + true -> + %% The first expression can safely be dropped. + E2; + false -> + %% If `E1' is a sequence whose final expression has no side + %% effects, then we can lose *that* expression when we + %% compose the new sequence, since its value will not be + %% used. + E3 = case is_c_seq(E1) of + true -> + case is_safe(seq_body(E1)) of + true -> + %% Drop the final expression. + seq_arg(E1); + false -> + E1 + end; + false -> + E1 + end, + case is_c_seq(E2) of + true -> + %% `E2' is a sequence (E2' E2''), so we must + %% rearrange the nesting to ((E1, E2') E2''), to + %% preserve the invariant. Annotations on `E2' are + %% lost. + c_seq(c_seq(E3, seq_arg(E2)), seq_body(E2)); + false -> + c_seq(E3, E2) + end + end. + +%% Currently, safe expressions include variables, lambda expressions, +%% constructors with safe subexpressions (this includes atoms, integers, +%% empty lists, etc.), seq-, let- and letrec-expressions with safe +%% subexpressions, try- and catch-expressions with safe subexpressions +%% and calls to safe functions with safe argument subexpressions. +%% Binaries seem too tricky to be considered. + +is_safe(E) -> + case is_data(E) of + true -> + is_safe_list(data_es(E)); + false -> + case type(E) of + var -> + true; + 'fun' -> + true; + values -> + is_safe_list(values_es(E)); + 'seq' -> + is_safe(seq_arg(E)) andalso is_safe(seq_body(E)); + 'let' -> + is_safe(let_arg(E)) andalso is_safe(let_body(E)); + letrec -> + is_safe(letrec_body(E)); + 'try' -> + %% If the argument expression is not safe, it could + %% be modifying the state; thus, even if the body is + %% safe, the try-expression as a whole would not be. + %% If the argument is safe, the handler is not used. + is_safe(try_arg(E)) andalso is_safe(try_body(E)); + 'catch' -> + is_safe(catch_body(E)); + call -> + M = call_module(E), + F = call_name(E), + case is_c_atom(M) andalso is_c_atom(F) of + true -> + As = call_args(E), + is_safe_list(As) andalso + is_safe_call(atom_val(M), + atom_val(F), + length(As)); + false -> + false + end; + _ -> + false + end + end. + +is_safe_list([E | Es]) -> + case is_safe(E) of + true -> + is_safe_list(Es); + false -> + false + end; +is_safe_list([]) -> + true. + +is_safe_call(M, F, A) -> + erl_bifs:is_safe(M, F, A). + +%% When setting up local variables, we only create new names if we have +%% to, according to the "no-shadowing" strategy. + +make_locals(Vs, Ren, Env) -> + make_locals(Vs, [], Ren, Env). + +make_locals([V | Vs], As, Ren, Env) -> + Name = var_name(V), + case env__is_defined(Name, Env) of + false -> + %% The variable need not be renamed. Just make sure that the + %% renaming will map it to itself. + Name1 = Name, + Ren1 = ren__add_identity(Name, Ren); + true -> + %% The variable must be renamed to maintain the no-shadowing + %% invariant. Do the right thing for function variables. + Name1 = case Name of + {A, N} -> + env__new_fname(A, N, Env); + _ -> + env__new_vname(Env) + end, + Ren1 = ren__add(Name, Name1, Ren) + end, + %% This temporary binding is added for correct new-key generation. + Env1 = env__bind(Name1, dummy, Env), + make_locals(Vs, [Name1 | As], Ren1, Env1); +make_locals([], As, Ren, Env) -> + {reverse(As), Ren, Env}. + +%% This adds let-bindings for the source code variables in `Es' to the +%% environment `Env'. +%% +%% Note that we always assign a new state location for the +%% residual-program variable, since we cannot know when a location for a +%% particular variable in the source code can be reused. + +bind_locals(Vs, Ren, Env, S) -> + Opnds = [undefined || _ <- Vs], + bind_locals(Vs, Opnds, Ren, Env, S). + +bind_locals(Vs, Opnds, Ren, Env, S) -> + {Ns, Ren1, Env1} = make_locals(Vs, Ren, Env), + {Rs, Env2, S1} = bind_locals_1(Ns, Opnds, [], Env1, S), + {Rs, Ren1, Env2, S1}. + +%% Note that the `Vs' are currently not used for anything except the +%% number of variables. If we were maintaining "source-referenced" +%% flags, then the flag in the new variable should be initialized to the +%% current value of the (residual-) referenced-flag of the "parent". + +bind_locals_1([N | Ns], [Opnd | Opnds], Rs, Env, S) -> + {R, S1} = new_ref(N, Opnd, S), + Env1 = env__bind(N, R, Env), + bind_locals_1(Ns, Opnds, [R | Rs], Env1, S1); +bind_locals_1([], [], Rs, Env, S) -> + {lists:reverse(Rs), Env, S}. + +new_refs(Ns, Opnds, S) -> + new_refs(Ns, Opnds, [], S). + +new_refs([N | Ns], [Opnd | Opnds], Rs, S) -> + {R, S1} = new_ref(N, Opnd, S), + new_refs(Ns, Opnds, [R | Rs], S1); +new_refs([], [], Rs, S) -> + {lists:reverse(Rs), S}. + +new_ref(N, Opnd, S) -> + {L, S1} = st__new_ref_loc(S), + {#ref{name = N, opnd = Opnd, loc = L}, S1}. + +%% This adds recursive bindings for the source code variables in `Es' to +%% the environment `Env'. Note that recursive binding of a set of +%% variables is an atomic operation on the environment - they cannot be +%% added one at a time. + +bind_recursive(Vs, Opnds, Ren, Env, S) -> + {Ns, Ren1, Env1} = make_locals(Vs, Ren, Env), + {Rs, S1} = new_refs(Ns, Opnds, S), + + %% When this fun-expression is evaluated, it updates the operand + %% structure in the ref-structure to contain the recursively defined + %% environment and the correct renaming. + Fun = fun (R, Env) -> + Opnd = R#ref.opnd, + R#ref{opnd = Opnd#opnd{ren = Ren1, env = Env}} + end, + {Rs, Ren1, env__bind_recursive(Ns, Rs, Fun, Env1), S1}. + +safe_context(Ctxt) -> + case Ctxt of + #app{} -> + value; + _ -> + Ctxt + end. + +%% Note that the name of a variable encodes its type: a "plain" variable +%% or a function variable. The latter kind also contains an arity number +%% which should be preserved upon renaming. + +ref_to_var(#ref{name = Name}) -> + %% If we were maintaining "source-referenced" flags, the annotation + %% `add_ann([#source_ref{loc = L}], E)' should also be done here, to + %% make the algorithm reapplicable. This is however not necessary + %% since there are no destructive variable assignments in Erlang. + c_var(Name). + +%% Including the effort counter of the call site assures that the cost +%% of processing an operand via `visit' is charged to the correct +%% counter. In particular, if the effort counter of the call site was +%% passive, the operands will also be processed with a passive counter. + +make_opnd(E, Ren, Env, S) -> + {L, S1} = st__new_opnd_loc(S), + C = st__get_effort(S1), + Opnd = #opnd{expr = E, ren = Ren, env = Env, loc = L, effort = C}, + {Opnd, S1}. + +keep_referenced(Rs, S) -> + [R || R <- Rs, st__get_var_referenced(R#ref.loc, S)]. + +residualize_operands(Opnds, E, S) -> + foldr(fun (Opnd, {E, S}) -> residualize_operand(Opnd, E, S) end, + {E, S}, Opnds). + +%% This is the only case where an operand expression can be visited in +%% `effect' context instead of `value' context. + +residualize_operand(Opnd, E, S) -> + case st__get_opnd_effect(Opnd#opnd.loc, S) of + true -> + %% The operand has not been visited, so we do that now, but + %% in `effect' context. (Waddell's algoritm does some stuff + %% here to account specially for the operand size, which + %% appears unnecessary.) + {E1, S1} = i(Opnd#opnd.expr, effect, Opnd#opnd.ren, + Opnd#opnd.env, S), + {make_seq(E1, E), S1}; + false -> + {E, S} + end. + +%% The `visit' function always visits the operand expression in `value' +%% context (`residualize_operand' visits an unreferenced operand +%% expression in `effect' context when necessary). A new passive size +%% counter is used for visiting the operand, the final value of which is +%% then cached along with the resulting expression. +%% +%% Note that the effort counter of the call site, included in the +%% operand structure, is not a shared object. Thus, the effort budget is +%% actually reused over all occurrences of the operands of a single +%% application. This does not appear to be a problem; just a +%% modification of the algorithm. + +visit(Opnd, S) -> + {C, S1} = visit_1(Opnd, S), + {C#cache.expr, S1}. + +visit_and_count_size(Opnd, S) -> + {C, S1} = visit_1(Opnd, S), + {C#cache.expr, count_size(C#cache.size, S1)}. + +visit_1(Opnd, S) -> + case st__lookup_opnd_cache(Opnd#opnd.loc, S) of + error -> + %% Use a new, passive, size counter for visiting operands, + %% and use the effort counter of the context of the operand. + %% It turns out that if the latter is active, it must be the + %% same object as the one currently used, and if it is + %% passive, it does not matter if it is the same object as + %% any other counter. + Effort = Opnd#opnd.effort, + Active = counter__is_active(Effort), + S1 = case Active of + true -> + S; % don't change effort counter + false -> + st__set_effort(Effort, S) + end, + S2 = new_passive_size(get_size_limit(S1), S1), + + %% Visit the expression and cache the result, along with the + %% final value of the size counter. + {E, S3} = i(Opnd#opnd.expr, value, Opnd#opnd.ren, + Opnd#opnd.env, S2), + Size = get_size_value(S3), + C = #cache{expr = E, size = Size}, + S4 = revert_size(S, st__set_opnd_cache(Opnd#opnd.loc, C, + S3)), + case Active of + true -> + {C, S4}; % keep using the same effort counter + false -> + {C, revert_effort(S, S4)} + end; + {ok, C} -> + {C, S} + end. + +%% Create a pattern matching template for an expression. A template +%% contains only data constructors (including atomic ones) and +%% variables, and compound literals are not folded into a single node. +%% Each node in the template is annotated with the variable which holds +%% the corresponding subexpression; these are new, unique variables not +%% existing in the given `Env'. Returns `{Template, Variables, NewEnv}', +%% where `Variables' is the list of all variables corresponding to nodes +%% in the template *listed in reverse dependency order*, and `NewEnv' is +%% `Env' augmented with mappings from the variable names to +%% subexpressions of `E' (not #ref{} structures!) rewritten so that no +%% computations are duplicated. `Variables' is guaranteed to be nonempty +%% - at least the root node will always be bound to a new variable. + +make_template(E, Env) -> + make_template(E, [], Env). + +make_template(E, Vs0, Env0) -> + case is_data(E) of + true -> + {Ts, {Vs1, Env1}} = mapfoldl( + fun (E, {Vs0, Env0}) -> + {T, Vs1, Env1} = + make_template(E, Vs0, + Env0), + {T, {Vs1, Env1}} + end, + {Vs0, Env0}, data_es(E)), + T = make_data_skel(data_type(E), Ts), + E1 = update_data(E, data_type(E), + [hd(get_ann(T)) || T <- Ts]), + V = new_var(Env1), + Env2 = env__bind(var_name(V), E1, Env1), + {set_ann(T, [V]), [V | Vs1], Env2}; + false -> + case type(E) of + seq -> + %% For a sequencing, we can rebind the variable used + %% for the body, and pass on the template as it is. + {T, Vs1, Env1} = make_template(seq_body(E), Vs0, + Env0), + V = var_name(hd(get_ann(T))), + E1 = update_c_seq(E, seq_arg(E), env__get(V, Env1)), + Env2 = env__bind(V, E1, Env1), + {T, Vs1, Env2}; + _ -> + V = new_var(Env0), + Env1 = env__bind(var_name(V), E, Env0), + {set_ann(V, [V]), [V | Vs0], Env1} + end + end. + +%% Two clauses are equivalent if their bodies are equivalent expressions +%% given that the respective pattern variables are local. + +equivalent_clauses([]) -> + true; +equivalent_clauses([C | Cs]) -> + Env = cerl_trees:variables(c_values(clause_pats(C))), + equivalent_clauses_1(clause_body(C), Cs, Env). + +equivalent_clauses_1(E, [C | Cs], Env) -> + Env1 = cerl_trees:variables(c_values(clause_pats(C))), + case equivalent(E, clause_body(C), ordsets:union(Env, Env1)) of + true -> + equivalent_clauses_1(E, Cs, Env); + false -> + false + end; +equivalent_clauses_1(_, [], _Env) -> + true. + +%% Two expressions are equivalent if and only if they yield the same +%% value and has the same side effects in the same order. Currently, we +%% only accept equality between constructors (constants) and nonlocal +%% variables, since this should cover most cases of interest. If a +%% variable is locally bound in one expression, it cannot be equivalent +%% to one with the same name in the other expression, so we need not +%% keep track of two environments. + +equivalent(E1, E2, Env) -> + case is_data(E1) of + true -> + case is_data(E2) of + true -> + T1 = {data_type(E1), data_arity(E1)}, + T2 = {data_type(E2), data_arity(E2)}, + %% Note that we must test for exact equality. + T1 =:= T2 andalso + equivalent_lists(data_es(E1), data_es(E2), Env); + false -> + false + end; + false -> + case type(E1) of + var -> + case is_c_var(E2) of + true -> + N1 = var_name(E1), + N2 = var_name(E2), + N1 =:= N2 andalso not ordsets:is_element(N1, Env); + false -> + false + end; + _ -> + %% Other constructs are not being considered. + false + end + end. + +equivalent_lists([E1 | Es1], [E2 | Es2], Env) -> + equivalent(E1, E2, Env) and equivalent_lists(Es1, Es2, Env); +equivalent_lists([], [], _) -> + true; +equivalent_lists(_, _, _) -> + false. + +%% Return `false' or `{true, EffectExpr, ValueExpr}'. The environment is +%% passed for new-variable generation. + +reduce_bif_call(M, F, As, Env) -> + reduce_bif_call_1(M, F, length(As), As, Env). + +reduce_bif_call_1(erlang, element, 2, [X, Y], _Env) -> + case is_c_int(X) and is_c_tuple(Y) of + true -> + %% We are free to change the relative evaluation order of + %% the elements, so lifting out a particular element is OK. + T = list_to_tuple(tuple_es(Y)), + N = int_val(X), + if is_integer(N), N > 0, N =< tuple_size(T) -> + E = element(N, T), + Es = tuple_to_list(setelement(N, T, void())), + {true, make_seq(c_tuple(Es), E)}; + true -> + false + end; + false -> + false + end; +reduce_bif_call_1(erlang, hd, 1, [X], _Env) -> + case is_c_cons(X) of + true -> + %% Cf. `element/2' above. + {true, make_seq(cons_tl(X), cons_hd(X))}; + false -> + false + end; +reduce_bif_call_1(erlang, length, 1, [X], _Env) -> + case is_c_list(X) of + true -> + %% Cf. `erlang:size/1' below. + {true, make_seq(X, c_int(list_length(X)))}; + false -> + false + end; +reduce_bif_call_1(erlang, list_to_tuple, 1, [X], _Env) -> + case is_c_list(X) of + true -> + %% This does not actually preserve all the evaluation order + %% constraints of the list, but I don't imagine that it will + %% be a problem. + {true, c_tuple(list_elements(X))}; + false -> + false + end; +reduce_bif_call_1(erlang, setelement, 3, [X, Y, Z], Env) -> + case is_c_int(X) and is_c_tuple(Y) of + true -> + %% Here, unless `Z' is a simple expression, we must bind it + %% to a new variable, because in that case, `Z' must be + %% evaluated before any part of `Y'. + T = list_to_tuple(tuple_es(Y)), + N = int_val(X), + if is_integer(N), N > 0, N =< tuple_size(T) -> + E = element(N, T), + case is_simple(Z) of + true -> + Es = tuple_to_list(setelement(N, T, Z)), + {true, make_seq(E, c_tuple(Es))}; + false -> + V = new_var(Env), + Es = tuple_to_list(setelement(N, T, V)), + E1 = make_seq(E, c_tuple(Es)), + {true, c_let([V], Z, E1)} + end; + true -> + false + end; + false -> + false + end; +reduce_bif_call_1(erlang, size, 1, [X], Env) -> + case is_c_tuple(X) of + true -> + reduce_bif_call_1(erlang, tuple_size, 1, [X], Env); + false -> + false + end; +reduce_bif_call_1(erlang, tl, 1, [X], _Env) -> + case is_c_cons(X) of + true -> + %% Cf. `element/2' above. + {true, make_seq(cons_hd(X), cons_tl(X))}; + false -> + false + end; +reduce_bif_call_1(erlang, tuple_size, 1, [X], _Env) -> + case is_c_tuple(X) of + true -> + %% Just evaluate the tuple for effect and use the size (the + %% arity) as the result. + {true, make_seq(X, c_int(tuple_arity(X)))}; + false -> + false + end; +reduce_bif_call_1(erlang, tuple_to_list, 1, [X], _Env) -> + case is_c_tuple(X) of + true -> + %% This actually introduces slightly stronger constraints on + %% the evaluation order of the subexpressions. + {true, make_list(tuple_es(X))}; + false -> + false + end; +reduce_bif_call_1(_M, _F, _A, _As, _Env) -> + false. + +effort_is_active(S) -> + counter__is_active(st__get_effort(S)). + +size_is_active(S) -> + counter__is_active(st__get_size(S)). + +get_effort_limit(S) -> + counter__limit(st__get_effort(S)). + +new_active_effort(Limit, S) -> + st__set_effort(counter__new_active(Limit), S). + +revert_effort(S1, S2) -> + st__set_effort(st__get_effort(S1), S2). + +new_active_size(Limit, S) -> + st__set_size(counter__new_active(Limit), S). + +new_passive_size(Limit, S) -> + st__set_size(counter__new_passive(Limit), S). + +revert_size(S1, S2) -> + st__set_size(st__get_size(S1), S2). + +count_effort(N, S) -> + C = st__get_effort(S), + C1 = counter__add(N, C, effort, S), + case debug_counters() of + %% true -> + %% case counter__is_active(C1) of + %% true -> + %% V = counter__value(C1), + %% case V > get(counter_effort_max) of + %% true -> + %% put(counter_effort_max, V); + %% false -> + %% ok + %% end; + %% false -> + %% ok + %% end; + false -> + ok + end, + st__set_effort(C1, S). + +count_size(N, S) -> + C = st__get_size(S), + C1 = counter__add(N, C, size, S), + case debug_counters() of + %% true -> + %% case counter__is_active(C1) of + %% true -> + %% V = counter__value(C1), + %% case V > get(counter_size_max) of + %% true -> + %% put(counter_size_max, V); + %% false -> + %% ok + %% end; + %% false -> + %% ok + %% end; + false -> + ok + end, + st__set_size(C1, S). + +get_size_value(S) -> + counter__value(st__get_size(S)). + +get_size_limit(S) -> + counter__limit(st__get_size(S)). + +kill_id_anns([{'id',_} | As]) -> + kill_id_anns(As); +kill_id_anns([A | As]) -> + [A | kill_id_anns(As)]; +kill_id_anns([]) -> + []. + + +%% ===================================================================== +%% General utilities + +%% The atom `ok', is widely used in Erlang for "void" values. + +void() -> abstract(ok). + +is_simple(E) -> + case type(E) of + literal -> true; + var -> true; + 'fun' -> true; + _ -> false + end. + +get_components(N, E) -> + case type(E) of + values -> + Es = values_es(E), + if length(Es) =:= N -> + {true, Es}; + true -> + false + end; + _ when N =:= 1 -> + {true, [E]}; + _ -> + false + end. + +all_static(Es) -> + lists:all(fun (E) -> is_literal(result(E)) end, Es). + +set_clause_bodies([C | Cs], B) -> + [update_c_clause(C, clause_pats(C), clause_guard(C), B) + | set_clause_bodies(Cs, B)]; +set_clause_bodies([], _) -> + []. + +%% ===================================================================== +%% Abstract datatype: renaming() + +ren__identity() -> + dict:new(). + +ren__add(X, Y, Ren) -> + dict:store(X, Y, Ren). + +ren__map(X, Ren) -> + case dict:find(X, Ren) of + {ok, Y} -> + Y; + error -> + X + end. + +ren__add_identity(X, Ren) -> + dict:erase(X, Ren). + + +%% ===================================================================== +%% Abstract datatype: environment() + +env__empty() -> + rec_env:empty(). + +env__bind(Key, Val, Env) -> + rec_env:bind(Key, Val, Env). + +%% `Es' should have type `[{Key, Val}]', and `Fun' should have type +%% `(Val, Env) -> T', mapping a value together with the recursive +%% environment itself to some term `T' to be returned when the entry is +%% looked up. + +env__bind_recursive(Ks, Vs, F, Env) -> + rec_env:bind_recursive(Ks, Vs, F, Env). + +env__lookup(Key, Env) -> + rec_env:lookup(Key, Env). + +env__get(Key, Env) -> + rec_env:get(Key, Env). + +env__is_defined(Key, Env) -> + rec_env:is_defined(Key, Env). + +env__new_vname(Env) -> + rec_env:new_key(Env). + +env__new_fname(A, N, Env) -> + rec_env:new_key(fun (X) -> + S = integer_to_list(X), + {list_to_atom(atom_to_list(A) ++ "_" ++ S), + N} + end, Env). + + +%% ===================================================================== +%% Abstract datatype: state() + +-record(state, {free, % next free location + size, % size counter + effort, % effort counter + unroll, % inner/outer-pending initial value + cache, % operand expression cache + var_flags, % flags for variables (#ref-structures) + opnd_flags, % flags for operands + app_flags}). % flags for #app-structures + +%% Note that we do not have a `var_assigned' flag, since there is no +%% destructive assignment in Erlang. In the original algorithm, the +%% "residual-referenced"-flags of the previous inlining pass (or +%% initialization pass) are used as the "source-referenced"-flags for +%% the subsequent pass. The latter may then be used as a safe +%% approximation whenever we need to base a decision on whether or not a +%% particular variable or function variable could be referenced in the +%% program being generated, and computation of the new +%% "residual-referenced" flag for that variable is not yet finished. In +%% the present algorithm, this can only happen in the presence of +%% variable assignments, which do not exist in Erlang. Therefore, we do +%% not keep "source-referenced" flags for residual-code references in +%% our implementation. +%% +%% The "inner-pending" flag tells us whether we are already in the +%% process of visiting a particular operand, and the "outer-pending" +%% flag whether we are in the process of inlining a propagated +%% functional value. The "pending flags" are really counters limiting +%% the number of times an operand may be inlined recursively, causing +%% loop unrolling. Note that the initial value must be greater than zero +%% in order for any inlining at all to be done. + +%% Flags are stored in ETS-tables, one table for each class. The second +%% element in each stored tuple is the key (the "label"). + +-record(var_flags, {lab, referenced = false}). +-record(opnd_flags, {lab, inner_pending = 1, outer_pending = 1, + effect = false}). +-record(app_flags, {lab, inlined = false}). + +st__new(Effort, Size, Unroll) -> + EtsOpts = [set, private, {keypos, 2}], + #state{free = 0, + size = counter__new_passive(Size), + effort = counter__new_passive(Effort), + unroll = Unroll, + cache = dict:new(), + var_flags = ets:new(var, EtsOpts), + opnd_flags = ets:new(opnd, EtsOpts), + app_flags = ets:new(app, EtsOpts)}. + +st__new_loc(S) -> + N = S#state.free, + {N, S#state{free = N + 1}}. + +st__get_effort(S) -> + S#state.effort. + +st__set_effort(C, S) -> + S#state{effort = C}. + +st__get_size(S) -> + S#state.size. + +st__set_size(C, S) -> + S#state{size = C}. + +st__set_var_referenced(L, S) -> + T = S#state.var_flags, + [F] = ets:lookup(T, L), + ets:insert(T, F#var_flags{referenced = true}), + S. + +st__get_var_referenced(L, S) -> + ets:lookup_element(S#state.var_flags, L, #var_flags.referenced). + +st__lookup_opnd_cache(L, S) -> + dict:find(L, S#state.cache). + +%% Note that setting the cache should only be done once. + +st__set_opnd_cache(L, C, S) -> + S#state{cache = dict:store(L, C, S#state.cache)}. + +st__set_opnd_effect(L, S) -> + T = S#state.opnd_flags, + [F] = ets:lookup(T, L), + ets:insert(T, F#opnd_flags{effect = true}), + S. + +st__get_opnd_effect(L, S) -> + ets:lookup_element(S#state.opnd_flags, L, #opnd_flags.effect). + +st__set_app_inlined(L, S) -> + T = S#state.app_flags, + [F] = ets:lookup(T, L), + ets:insert(T, F#app_flags{inlined = true}), + S. + +st__clear_app_inlined(L, S) -> + T = S#state.app_flags, + [F] = ets:lookup(T, L), + ets:insert(T, F#app_flags{inlined = false}), + S. + +st__get_app_inlined(L, S) -> + ets:lookup_element(S#state.app_flags, L, #app_flags.inlined). + +%% The pending-flags are initialized by `st__new_opnd_loc' below. + +st__test_inner_pending(L, S) -> + T = S#state.opnd_flags, + P = ets:lookup_element(T, L, #opnd_flags.inner_pending), + P =< 0. + +st__mark_inner_pending(L, S) -> + ets:update_counter(S#state.opnd_flags, L, + {#opnd_flags.inner_pending, -1}), + S. + +st__clear_inner_pending(L, S) -> + ets:update_counter(S#state.opnd_flags, L, + {#opnd_flags.inner_pending, 1}), + S. + +st__test_outer_pending(L, S) -> + T = S#state.opnd_flags, + P = ets:lookup_element(T, L, #opnd_flags.outer_pending), + P =< 0. + +st__mark_outer_pending(L, S) -> + ets:update_counter(S#state.opnd_flags, L, + {#opnd_flags.outer_pending, -1}), + S. + +st__clear_outer_pending(L, S) -> + ets:update_counter(S#state.opnd_flags, L, + {#opnd_flags.outer_pending, 1}), + S. + +st__new_app_loc(S) -> + V = {L, _S1} = st__new_loc(S), + ets:insert(S#state.app_flags, #app_flags{lab = L}), + V. + +st__new_ref_loc(S) -> + V = {L, _S1} = st__new_loc(S), + ets:insert(S#state.var_flags, #var_flags{lab = L}), + V. + +st__new_opnd_loc(S) -> + V = {L, _S1} = st__new_loc(S), + N = S#state.unroll, + ets:insert(S#state.opnd_flags, + #opnd_flags{lab = L, + inner_pending = N, + outer_pending = N}), + V. + + +%% ===================================================================== +%% Abstract datatype: counter() +%% +%% `counter__add' throws `{counter_exceeded, Type, Data}' if the +%% resulting counter value would exceed the limit for the counter in +%% question (`Type' and `Data' are given by the user). + +counter__new_passive(Limit) when Limit > 0 -> + {0, Limit}. + +counter__new_active(Limit) when Limit > 0 -> + {Limit, Limit}. + +%% Active counters have values > 0 internally; passive counters start at +%% zero. The 'limit' field is only accessed by the 'counter__limit' +%% function. + +counter__is_active({C, _}) -> + C > 0. + +counter__limit({_, L}) -> + L. + +counter__value({N, L}) -> + if N > 0 -> + L - N; + true -> + -N + end. + +counter__add(N, {V, L}, Type, Data) -> + N1 = V - N, + if V > 0, N1 =< 0 -> + case debug_counters() of + %% true -> + %% case Type of + %% effort -> + %% put(counter_effort_triggers, + %% get(counter_effort_triggers) + 1); + %% size -> + %% put(counter_size_triggers, + %% get(counter_size_triggers) + 1) + %% end; + false -> + ok + end, + throw({counter_exceeded, Type, Data}); + true -> + {N1, L} + end. + + +%% ===================================================================== +%% Reporting + +% report_internal_error(S) -> +% report_internal_error(S, []). + +report_internal_error(S, Vs) -> + report_error("internal error: " ++ S, Vs). + +%% report_error(D) -> +%% report_error(D, []). + +report_error(D, Vs) -> + report({error, D}, Vs). + +report_warning(D) -> + report_warning(D, []). + +report_warning(D, Vs) -> + report({warning, D}, Vs). + +report(D, Vs) -> + io:put_chars(format(D, Vs)). + +format({error, D}, Vs) -> + ["error: ", format(D, Vs)]; +format({warning, D}, Vs) -> + ["warning: ", format(D, Vs)]; +format(S, Vs) when is_list(S) -> + [io_lib:fwrite(S, Vs), $\n]. + + +%% ===================================================================== |