%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%%
%% %CopyrightEnd%
%%
%% @author Richard Carlsson <[email protected]>
%% @copyright 2000-2006 Richard Carlsson
%%
%% @doc Core Erlang pattern matching compiler.
%%
%% <p>For reference, see Simon L. Peyton Jones "The Implementation of
%% Functional Programming Languages", chapter 5 (by Phil Wadler).</p>
%%
%% @type cerl() = cerl:cerl().
%% Abstract Core Erlang syntax trees.
%% @type cerl_records() = cerl:cerl_records().
%% An explicit record representation of Core Erlang syntax trees.
-module(cerl_pmatch).
%%-define(NO_UNUSED, true).
-export([clauses/2]).
-ifndef(NO_UNUSED).
-export([transform/2, core_transform/2, expr/2]).
-endif.
-import(lists, [all/2, splitwith/2, foldr/3, keysort/2, foldl/3,
mapfoldl/3]).
-define(binary_id, {binary}).
-define(cons_id, {cons}).
-define(tuple_id, {tuple}).
-define(literal_id(V), V).
%% @spec core_transform(Module::cerl_records(), Options::[term()]) ->
%% cerl_records()
%%
%% @doc Transforms a module represented by records. See
%% <code>transform/2</code> for details.
%%
%% <p>Use the compiler option <code>{core_transform, cerl_pmatch}</code>
%% to insert this function as a compilation pass.</p>
%%
%% @see transform/2
-ifndef(NO_UNUSED).
-spec core_transform(cerl:c_module(), [_]) -> cerl:c_module().
core_transform(M, Opts) ->
cerl:to_records(transform(cerl:from_records(M), Opts)).
-endif. % NO_UNUSED
%% @clear
%% @spec transform(Module::cerl(), Options::[term()]) -> cerl()
%%
%% @doc Rewrites all <code>case</code>-clauses in <code>Module</code>.
%% <code>receive</code>-clauses are not affected. Currently, no options
%% are available.
%%
%% @see clauses/2
%% @see expr/2
%% @see core_transform/2
-ifndef(NO_UNUSED).
-spec transform(cerl:cerl(), [_]) -> cerl:cerl().
transform(M, _Opts) ->
expr(M, env__empty()).
-endif. % NO_UNUSED
%% @clear
%% @spec clauses(Clauses::[Clause], Env) -> {Expr, Vars}
%% Clause = cerl()
%% Expr = cerl()
%% Vars = [cerl()]
%% Env = rec_env:environment()
%%
%% @doc Rewrites a sequence of clauses to an equivalent expression,
%% removing as much repeated testing as possible. Returns a pair
%% <code>{Expr, Vars}</code>, where <code>Expr</code> is the resulting
%% expression, and <code>Vars</code> is a list of new variables (i.e.,
%% not already in the given environment) to be bound to the arguments to
%% the switch. The following is a typical example (assuming
%% <code>E</code> is a Core Erlang case expression):
%% <pre>
%% handle_case(E, Env) ->
%% Cs = case_clauses(E),
%% {E1, Vs} = cerl_pmatch(Cs, Env),
%% c_let(Vs, case_arg(E), E1).
%% </pre>
%%
%% <p>The environment is used for generating new variables which do not
%% shadow existing bindings.</p>
%%
%% @see rec_env
%% @see expr/2
%% @see transform/2
-spec clauses([cerl:cerl(),...], rec_env:environment()) ->
{cerl:cerl(), [cerl:cerl()]}.
clauses(Cs, Env) ->
clauses(Cs, none, Env).
clauses([C | _] = Cs, Else, Env) ->
Vs = new_vars(cerl:clause_arity(C), Env),
E = match(Vs, Cs, Else, add_vars(Vs, Env)),
{E, Vs}.
%% The implementation very closely follows that described in the book.
match([], Cs, Else, _Env) ->
%% If the "default action" is the atom 'none', it is simply not
%% added; otherwise it is put in the body of a final catch-all
%% clause (which is often removed by the below optimization).
Cs1 = if Else =:= none -> Cs;
true -> Cs ++ [cerl:c_clause([], Else)]
end,
%% This clause reduction is an important optimization. It selects a
%% clause body if possible, and otherwise just removes dead clauses.
case cerl_clauses:reduce(Cs1) of
{true, {C, []}} -> % if we get bindings, something is wrong!
cerl:clause_body(C);
{false, Cs2} ->
%% This happens when guards are nontrivial.
cerl:c_case(cerl:c_values([]), Cs2)
end;
match([V | _] = Vs, Cs, Else, Env) ->
foldr(fun (CsF, ElseF) ->
match_var_con(Vs, CsF, ElseF, Env)
end,
Else,
group([unalias(C, V) || C <- Cs], fun is_var_clause/1)).
group([], _F) ->
[];
group([X | _] = Xs, F) ->
group(Xs, F, F(X)).
group(Xs, F, P) ->
{First, Rest} = splitwith(fun (X) -> F(X) =:= P end, Xs),
[First | group(Rest, F)].
is_var_clause(C) ->
cerl:is_c_var(hd(cerl:clause_pats(C))).
%% To avoid code duplication, if the 'Else' expression is too big, we
%% put it in a local function definition instead, and replace it with a
%% call. (Note that it is important that 'is_lightweight' does not yield
%% 'true' for a simple function application, or we will create a lot of
%% unnecessary extra functions.)
match_var_con(Vs, Cs, none = Else, Env) ->
match_var_con_1(Vs, Cs, Else, Env);
match_var_con(Vs, Cs, Else, Env) ->
case is_lightweight(Else) of
true ->
match_var_con_1(Vs, Cs, Else, Env);
false ->
F = new_fvar("match_", 0, Env),
Else1 = cerl:c_apply(F, []),
Env1 = add_vars([F], Env),
cerl:c_letrec([{F, cerl:c_fun([], Else)}],
match_var_con_1(Vs, Cs, Else1, Env1))
end.
match_var_con_1(Vs, Cs, Else, Env) ->
case is_var_clause(hd(Cs)) of
true ->
match_var(Vs, Cs, Else, Env);
false ->
match_con(Vs, Cs, Else, Env)
end.
match_var([V | Vs], Cs, Else, Env) ->
Cs1 = [begin
[P | Ps] = cerl:clause_pats(C),
G = make_let([P], V, cerl:clause_guard(C)),
B = make_let([P], V, cerl:clause_body(C)),
cerl:update_c_clause(C, Ps, G, B)
end
|| C <- Cs],
match(Vs, Cs1, Else, Env).
%% Since Erlang is dynamically typed, we must include the possibility
%% that none of the constructors in the group will match, and in that
%% case the "Else" code will be executed (unless it is 'none'), in the
%% body of a final catch-all clause.
match_con([V | Vs], Cs, Else, Env) ->
case group_con(Cs) of
[{_, _, Gs}] ->
%% Don't create a group type switch if there is only one
%% such group
make_switch(V, [match_congroup(DG, Vs, CsG, Else, Env)
|| {DG, _, CsG} <- Gs],
Else, Env);
Ts ->
Cs1 = [match_typegroup(T, V, Vs, Gs, Else, Env)
|| {T, _, Gs} <- Ts],
make_switch(V, Cs1, Else, Env)
end.
match_typegroup(_T, _V, Vs, [{D, _, Cs}], Else, Env) when element(1, D) /= ?binary_id ->
%% Don't create a group type switch if there is only one constructor
%% in the group. (Note that this always happens for '[]'.)
%% Special case for binaries which always get a group switch
match_congroup(D, Vs, Cs, Else, Env);
match_typegroup(T, V, Vs, Gs, Else, Env) ->
Body = make_switch(V, [match_congroup(D, Vs, Cs, Else, Env)
|| {D, _, Cs} <- Gs],
Else, Env),
typetest_clause(T, V, Body, Env).
match_congroup({?binary_id, Segs}, Vs, Cs, _Else, Env) ->
Ref = get_unique(),
Guard = cerl:c_primop(cerl:c_atom(set_label), [cerl:c_int(Ref)]),
NewElse = cerl:c_primop(cerl:c_atom(goto_label), [cerl:c_int(Ref)]),
Body = match(Vs, Cs, NewElse, Env),
cerl:c_clause([make_pat(?binary_id, Segs)], Guard, Body);
match_congroup({D, A}, Vs, Cs, Else, Env) ->
Vs1 = new_vars(A, Env),
Body = match(Vs1 ++ Vs, Cs, Else, add_vars(Vs1, Env)),
cerl:c_clause([make_pat(D, Vs1)], Body).
make_switch(V, Cs, Else, Env) ->
cerl:c_case(V, if Else =:= none -> Cs;
true -> Cs ++ [cerl:c_clause([new_var(Env)],
Else)]
end).
%% We preserve the relative order of different-type constructors as they
%% were originally listed. This is done by tracking the clause numbers.
group_con(Cs) ->
{Cs1, _} = mapfoldl(fun (C, N) ->
[P | Ps] = cerl:clause_pats(C),
Ps1 = sub_pats(P) ++ Ps,
G = cerl:clause_guard(C),
B = cerl:clause_body(C),
C1 = cerl:update_c_clause(C, Ps1, G, B),
D = con_desc(P),
{{D, N, C1}, N + 1}
end,
0, Cs),
%% Sort and group constructors.
Css = group(keysort(1, Cs1), fun ({D,_,_}) -> D end),
%% Sort each group "back" by line number, and move the descriptor
%% and line number to the wrapper for the group.
Gs = [finalize_congroup(C) || C <- Css],
%% Group by type only (put e.g. different-arity tuples together).
Gss = group(Gs, fun ({D,_,_}) -> con_desc_type(D) end),
%% Sort and wrap the type groups.
Ts = [finalize_typegroup(G) || G <- Gss],
%% Sort type-groups by first clause order
keysort(2, Ts).
finalize_congroup(Cs) ->
[{D,N,_}|_] = Cs1 = keysort(2, Cs),
{D, N, [C || {_,_,C} <- Cs1]}.
finalize_typegroup(Gs) ->
[{D,N,_}|_] = Gs1 = keysort(2, Gs),
{con_desc_type(D), N, Gs1}.
%% Since Erlang clause patterns can contain "alias patterns", we must
%% eliminate these, by turning them into let-definitions in the guards
%% and bodies of the clauses.
unalias(C, V) ->
[P | Ps] = cerl:clause_pats(C),
B = cerl:clause_body(C),
G = cerl:clause_guard(C),
unalias(P, V, Ps, B, G, C).
unalias(P, V, Ps, B, G, C) ->
case cerl:type(P) of
alias ->
V1 = cerl:alias_var(P),
B1 = make_let([V1], V, B),
G1 = make_let([V1], V, G),
unalias(cerl:alias_pat(P), V, Ps, B1, G1, C);
_ ->
cerl:update_c_clause(C, [P | Ps], G, B)
end.
%% Generating a type-switch clause
typetest_clause([], _V, E, _Env) ->
cerl:c_clause([cerl:c_nil()], E);
typetest_clause(atom, V, E, _Env) ->
typetest_clause_1(is_atom, V, E);
typetest_clause(integer, V, E, _Env) ->
typetest_clause_1(is_integer, V, E);
typetest_clause(float, V, E, _Env) ->
typetest_clause_1(is_float, V, E);
typetest_clause(cons, _V, E, Env) ->
[V1, V2] = new_vars(2, Env),
cerl:c_clause([cerl:c_cons(V1, V2)], E); % there is no 'is cons'
typetest_clause(tuple, V, E, _Env) ->
typetest_clause_1(is_tuple, V, E);
typetest_clause(binary, V, E, _Env) ->
typetest_clause_1(is_binary, V, E).
typetest_clause_1(T, V, E) ->
cerl:c_clause([V], cerl:c_call(cerl:c_atom('erlang'),
cerl:c_atom(T), [V]), E).
%% This returns a constructor descriptor, to be used for grouping and
%% pattern generation. It consists of an identifier term and the arity.
con_desc(E) ->
case cerl:type(E) of
cons -> {?cons_id, 2};
tuple -> {?tuple_id, cerl:tuple_arity(E)};
binary -> {?binary_id, cerl:binary_segments(E)};
literal ->
case cerl:concrete(E) of
[_|_] -> {?cons_id, 2};
T when is_tuple(T) -> {?tuple_id, tuple_size(T)};
V -> {?literal_id(V), 0}
end;
_ ->
throw({bad_constructor, E})
end.
%% This returns the type class for a constructor descriptor, for
%% grouping of clauses. It does not distinguish between tuples of
%% different arity, nor between different values of atoms, integers and
%% floats.
con_desc_type({?literal_id([]), _}) -> [];
con_desc_type({?literal_id(V), _}) when is_atom(V) -> atom;
con_desc_type({?literal_id(V), _}) when is_integer(V) -> integer;
con_desc_type({?literal_id(V), _}) when is_float(V) -> float;
con_desc_type({?cons_id, 2}) -> cons;
con_desc_type({?tuple_id, _}) -> tuple;
con_desc_type({?binary_id, _}) -> binary.
%% This creates a new constructor pattern from a type descriptor and a
%% list of variables.
make_pat(?cons_id, [V1, V2]) -> cerl:c_cons(V1, V2);
make_pat(?tuple_id, Vs) -> cerl:c_tuple(Vs);
make_pat(?binary_id, Segs) -> cerl:c_binary(Segs);
make_pat(?literal_id(Val), []) -> cerl:abstract(Val).
%% This returns the list of subpatterns of a constructor pattern.
sub_pats(E) ->
case cerl:type(E) of
cons ->
[cerl:cons_hd(E), cerl:cons_tl(E)];
tuple ->
cerl:tuple_es(E);
binary ->
[];
literal ->
case cerl:concrete(E) of
[H|T] -> [cerl:abstract(H), cerl:abstract(T)];
T when is_tuple(T) -> [cerl:abstract(X)
|| X <- tuple_to_list(T)];
_ -> []
end;
_ ->
throw({bad_constructor_pattern, E})
end.
%% This avoids generating stupid things like "let X = ... in 'true'",
%% and "let X = Y in X", keeping the generated code cleaner. It also
%% prevents expressions from being considered "non-lightweight" when
%% code duplication is disallowed (see is_lightweight for details).
make_let(Vs, A, B) ->
cerl_lib:reduce_expr(cerl:c_let(Vs, A, B)).
%% ---------------------------------------------------------------------
%% Rewriting a module or other expression:
%% @spec expr(Expression::cerl(), Env) -> cerl()
%% Env = rec_env:environment()
%%
%% @doc Rewrites all <code>case</code>-clauses in
%% <code>Expression</code>. <code>receive</code>-clauses are not
%% affected.
%%
%% <p>The environment is used for generating new variables which do not
%% shadow existing bindings.</p>
%%
%% @see clauses/2
%% @see rec_env
-ifndef(NO_UNUSED).
-spec expr(cerl:cerl(), rec_env:environment()) -> cerl:cerl().
expr(E, Env) ->
case cerl:type(E) of
literal ->
E;
var ->
E;
values ->
Es = expr_list(cerl:values_es(E), Env),
cerl:update_c_values(E, Es);
cons ->
H = expr(cerl:cons_hd(E), Env),
T = expr(cerl:cons_tl(E), Env),
cerl:update_c_cons(E, H, T);
tuple ->
Es = expr_list(cerl:tuple_es(E), Env),
cerl:update_c_tuple(E, Es);
'let' ->
A = expr(cerl:let_arg(E), Env),
Vs = cerl:let_vars(E),
Env1 = add_vars(Vs, Env),
B = expr(cerl:let_body(E), Env1),
cerl:update_c_let(E, Vs, A, B);
seq ->
A = expr(cerl:seq_arg(E), Env),
B = expr(cerl:seq_body(E), Env),
cerl:update_c_seq(E, A, B);
apply ->
Op = expr(cerl:apply_op(E), Env),
As = expr_list(cerl:apply_args(E), Env),
cerl:update_c_apply(E, Op, As);
call ->
M = expr(cerl:call_module(E), Env),
N = expr(cerl:call_name(E), Env),
As = expr_list(cerl:call_args(E), Env),
cerl:update_c_call(E, M, N, As);
primop ->
As = expr_list(cerl:primop_args(E), Env),
cerl:update_c_primop(E, cerl:primop_name(E), As);
'case' ->
A = expr(cerl:case_arg(E), Env),
Cs = expr_list(cerl:case_clauses(E), Env),
{E1, Vs} = clauses(Cs, Env),
make_let(Vs, A, E1);
clause ->
Vs = cerl:clause_vars(E),
Env1 = add_vars(Vs, Env),
G = expr(cerl:clause_guard(E), Env1),
B = expr(cerl:clause_body(E), Env1),
cerl:update_c_clause(E, cerl:clause_pats(E), G, B);
'fun' ->
Vs = cerl:fun_vars(E),
Env1 = add_vars(Vs, Env),
B = expr(cerl:fun_body(E), Env1),
cerl:update_c_fun(E, Vs, B);
'receive' ->
%% NOTE: No pattern matching compilation is done here! The
%% receive-clauses and patterns cannot be staged as long as
%% we are working with "normal" Core Erlang.
Cs = expr_list(cerl:receive_clauses(E), Env),
T = expr(cerl:receive_timeout(E), Env),
A = expr(cerl:receive_action(E), Env),
cerl:update_c_receive(E, Cs, T, A);
'try' ->
A = expr(cerl:try_arg(E), Env),
Vs = cerl:try_vars(E),
B = expr(cerl:try_body(E), add_vars(Vs, Env)),
Evs = cerl:try_evars(E),
H = expr(cerl:try_handler(E), add_vars(Evs, Env)),
cerl:update_c_try(E, A, Vs, B, Evs, H);
'catch' ->
B = expr(cerl:catch_body(E), Env),
cerl:update_c_catch(E, B);
letrec ->
Ds = cerl:letrec_defs(E),
Env1 = add_defs(Ds, Env),
Ds1 = defs(Ds, Env1),
B = expr(cerl:letrec_body(E), Env1),
cerl:update_c_letrec(E, Ds1, B);
module ->
Ds = cerl:module_defs(E),
Env1 = add_defs(Ds, Env),
Ds1 = defs(Ds, Env1),
cerl:update_c_module(E, cerl:module_name(E),
cerl:module_exports(E),
cerl:module_attrs(E), Ds1)
end.
expr_list(Es, Env) ->
[expr(E, Env) || E <- Es].
defs(Ds, Env) ->
[{V, expr(F, Env)} || {V, F} <- Ds].
-endif. % NO_UNUSED
%% @clear
%% ---------------------------------------------------------------------
%% Support functions
new_var(Env) ->
Name = env__new_vname(Env),
cerl:c_var(Name).
new_vars(N, Env) ->
[cerl:c_var(V) || V <- env__new_vnames(N, Env)].
new_fvar(A, N, Env) ->
Name = env__new_fname(A, N, Env),
cerl:c_var(Name).
add_vars(Vs, Env) ->
foldl(fun (V, E) -> env__bind(cerl:var_name(V), [], E) end, Env, Vs).
-ifndef(NO_UNUSED).
add_defs(Ds, Env) ->
foldl(fun ({V, _F}, E) ->
env__bind(cerl:var_name(V), [], E)
end, Env, Ds).
-endif. % NO_UNUSED
%% This decides whether an expression is worth lifting out to a separate
%% function instead of duplicating the code. In other words, whether its
%% cost is about the same or smaller than that of a local function call.
%% Note that variables must always be "lightweight"; otherwise, they may
%% get lifted out of the case switch that introduces them.
is_lightweight(E) ->
case get('cerl_pmatch_duplicate_code') of
never -> cerl:type(E) =:= var; % Avoids all code duplication
always -> true; % Does not lift code to new functions
_ -> is_lightweight_1(E)
end.
is_lightweight_1(E) ->
case cerl:type(E) of
var -> true;
literal -> true;
'fun' -> true;
values -> all(fun is_simple/1, cerl:values_es(E));
cons -> is_simple(cerl:cons_hd(E))
andalso is_simple(cerl:cons_tl(E));
tuple -> all(fun is_simple/1, cerl:tuple_es(E));
'let' -> (is_simple(cerl:let_arg(E)) andalso
is_lightweight_1(cerl:let_body(E)));
seq -> (is_simple(cerl:seq_arg(E)) andalso
is_lightweight_1(cerl:seq_body(E)));
primop ->
all(fun is_simple/1, cerl:primop_args(E));
apply ->
is_simple(cerl:apply_op(E))
andalso all(fun is_simple/1, cerl:apply_args(E));
call ->
is_simple(cerl:call_module(E))
andalso is_simple(cerl:call_name(E))
andalso all(fun is_simple/1, cerl:call_args(E));
_ ->
%% The default is to lift the code to a new function.
false
end.
%% "Simple" things have no (or negligible) runtime cost and are free
%% from side effects.
is_simple(E) ->
case cerl:type(E) of
var -> true;
literal -> true;
values -> all(fun is_simple/1, cerl:values_es(E));
_ -> false
end.
get_unique() ->
case get(unique_label) of
undefined ->
put(unique_label, 1),
0;
N ->
put(unique_label, N+1),
N
end.
%% ---------------------------------------------------------------------
%% Abstract datatype: environment()
env__bind(Key, Val, Env) ->
rec_env:bind(Key, Val, Env).
-ifndef(NO_UNUSED).
%% env__bind_recursive(Ks, Vs, F, Env) ->
%% rec_env:bind_recursive(Ks, Vs, F, Env).
%% env__lookup(Key, Env) ->
%% rec_env:lookup(Key, Env).
%% env__get(Key, Env) ->
%% rec_env:get(Key, Env).
%% env__is_defined(Key, Env) ->
%% rec_env:is_defined(Key, Env).
env__empty() ->
rec_env:empty().
-endif. % NO_UNUSED
env__new_vname(Env) ->
rec_env:new_key(Env).
env__new_vnames(N, Env) ->
rec_env:new_keys(N, Env).
env__new_fname(F, A, Env) ->
rec_env:new_key(fun (X) ->
S = integer_to_list(X),
{list_to_atom(F ++ S), A}
end,
Env).