diff options
Diffstat (limited to 'lib/hipe/cerl/cerl_messagean.erl')
-rw-r--r-- | lib/hipe/cerl/cerl_messagean.erl | 1105 |
1 files changed, 1105 insertions, 0 deletions
diff --git a/lib/hipe/cerl/cerl_messagean.erl b/lib/hipe/cerl/cerl_messagean.erl new file mode 100644 index 0000000000..0753376e7d --- /dev/null +++ b/lib/hipe/cerl/cerl_messagean.erl @@ -0,0 +1,1105 @@ +%% ===================================================================== +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Message analysis of Core Erlang programs. +%% +%% Copyright (C) 2002 Richard Carlsson +%% +%% Author contact: [email protected] +%% ===================================================================== + +%% TODO: might need a "top" (`any') element for any-length value lists. + +-module(cerl_messagean). + +-export([annotate/1]). + +-import(cerl, [alias_pat/1, alias_var/1, ann_c_var/2, ann_c_fun/3, + apply_args/1, apply_op/1, atom_val/1, bitstr_size/1, + bitstr_val/1, binary_segments/1, c_letrec/2, + ann_c_tuple/2, c_nil/0, call_args/1, call_module/1, + call_name/1, case_arg/1, case_clauses/1, catch_body/1, + clause_body/1, clause_guard/1, clause_pats/1, cons_hd/1, + cons_tl/1, fun_body/1, fun_vars/1, get_ann/1, int_val/1, + is_c_atom/1, is_c_int/1, let_arg/1, let_body/1, + let_vars/1, letrec_body/1, letrec_defs/1, module_defs/1, + module_defs/1, module_exports/1, pat_vars/1, + primop_args/1, primop_name/1, receive_action/1, + receive_clauses/1, receive_timeout/1, seq_arg/1, + seq_body/1, set_ann/2, try_arg/1, try_body/1, try_vars/1, + try_evars/1, try_handler/1, tuple_es/1, type/1, + values_es/1]). + +-import(cerl_trees, [get_label/1]). + +-define(DEF_LIMIT, 4). + +%% -export([test/1, test1/1, ttest/1]). + +%% ttest(F) -> +%% {T, _} = cerl_trees:label(user_default:read(F)), +%% {Time0, _} = erlang:statistics(runtime), +%% analyze(T), +%% {Time1, _} = erlang:statistics(runtime), +%% Time1 - Time0. + +%% test(F) -> +%% {T, _} = cerl_trees:label(user_default:read(F)), +%% {Time0, _} = erlang:statistics(runtime), +%% {Esc, _Vars} = analyze(T), +%% {Time1, _} = erlang:statistics(runtime), +%% io:fwrite("messages: ~p.\n", [Esc]), +%% Set = sets:from_list(Esc), +%% H = fun (Node, Ctxt, Cont) -> +%% Doc = case get_ann(Node) of +%% [{label, L} | _] -> +%% B = sets:is_element(L, Set), +%% bf(Node, Ctxt, Cont, B); +%% _ -> +%% bf(Node, Ctxt, Cont, false) +%% end, +%% case type(Node) of +%% cons -> color(Doc); +%% tuple -> color(Doc); +%% _ -> Doc +%% end +%% end, +%% {ok, FD} = file:open("out.html",[write]), +%% Txt = cerl_prettypr:format(T, [{hook, H},{user,false}]), +%% io:put_chars(FD, "<pre>\n"), +%% io:put_chars(FD, html(Txt)), +%% io:put_chars(FD, "</pre>\n"), +%% file:close(FD), +%% {ok, Time1 - Time0}. + +%% test1(F) -> +%% {T, _} = cerl_trees:label(user_default:read(F)), +%% {Time0, _} = erlang:statistics(runtime), +%% {T1, Esc, Vars} = annotate(T), +%% {Time1, _} = erlang:statistics(runtime), +%% io:fwrite("messages: ~p.\n", [Esc]), +%% %%% io:fwrite("vars: ~p.\n", [[X || X <- dict:to_list(Vars)]]), +%% T2 = hhl_transform:transform(T1, Vars), +%% Set = sets:from_list(Esc), +%% H = fun (Node, Ctxt, Cont) -> +%% case get_ann(Node) of +%% [{label, L} | _] -> +%% B = sets:is_element(L, Set), +%% bf(Node, Ctxt, Cont, B); +%% _ -> +%% bf(Node, Ctxt, Cont, false) +%% end +%% end, +%% {ok, FD} = file:open("out.html",[write]), +%% Txt = cerl_prettypr:format(T2, [{hook, H},{user,false}]), +%% io:put_chars(FD, "<pre>\n"), +%% io:put_chars(FD, html(Txt)), +%% io:put_chars(FD, "</pre>\n"), +%% file:close(FD), +%% {ok, Time1 - Time0}. + +%% html(Cs) -> +%% html(Cs, []). + +%% html([$#, $< | Cs], As) -> +%% html_1(Cs, [$< | As]); +%% html([$< | Cs], As) -> +%% html(Cs, ";tl&" ++ As); +%% html([$> | Cs], As) -> +%% html(Cs, ";tg&" ++ As); +%% html([$& | Cs], As) -> +%% html(Cs, ";pma&" ++ As); +%% html([C | Cs], As) -> +%% html(Cs, [C | As]); +%% html([], As) -> +%% lists:reverse(As). + +%% html_1([$> | Cs], As) -> +%% html(Cs, [$> | As]); +%% html_1([C | Cs], As) -> +%% html_1(Cs, [C | As]). + +%% bf(Node, Ctxt, Cont, B) -> +%% B0 = cerl_prettypr:get_ctxt_user(Ctxt), +%% if B /= B0 -> +%% Ctxt1 = cerl_prettypr:set_ctxt_user(Ctxt, B), +%% Doc = Cont(Node, Ctxt1), +%% case B of +%% true -> +%% Start = "<b>", +%% End = "</b>"; +%% false -> +%% Start = "</b>", +%% End = "<b>" +%% end, +%% markup(Doc, Start, End); +%% true -> +%% Cont(Node, Ctxt) +%% end. + +%% color(Doc) -> +%% % Doc. +%% markup(Doc, "<font color=blue>", "</font>"). + +%% markup(Doc, Start, End) -> +%% prettypr:beside( +%% prettypr:null_text([$# | Start]), +%% prettypr:beside(Doc, +%% prettypr:null_text([$# | End]))). + + +%% ===================================================================== +%% annotate(Tree) -> {Tree1, Escapes, Vars} +%% +%% Tree = cerl:cerl() +%% +%% Analyzes `Tree' (see `analyze') and appends a term 'escapes', to +%% the annotation list of each constructor expression node and of +%% `Tree', corresponding to the escape information derived by the +%% analysis. Any previous such annotations are removed from `Tree'. +%% `Tree1' is the modified tree; for details on `OutList', +%% `Outputs' , `Dependencies', `Escapes' and `Parents', see +%% `analyze'. +%% +%% Note: `Tree' must be annotated with labels in order to use this +%% function; see `analyze' for details. + +-type label() :: integer() | 'external' | 'top'. +-type ordset(X) :: [X]. % XXX: TAKE ME OUT + +-spec annotate(cerl:cerl()) -> {cerl:cerl(), ordset(label()), dict()}. + +annotate(Tree) -> + {Esc0, Vars} = analyze(Tree), + Esc = sets:from_list(Esc0), + F = fun (T) -> + case type(T) of + literal -> T; +%%% var -> +%%% L = get_label(T), +%%% T1 = ann_escape(T, L, Esc), +%%% X = dict:fetch(L, Vars), +%%% set_ann(T1, append_ann({s,X}, get_ann(T1))); + _ -> + L = get_label(T), + ann_escape(T, L, Esc) + end + end, + {cerl_trees:map(F, Tree), Esc0, Vars}. + +ann_escape(T, L, Esc) -> + case sets:is_element(L, Esc) of + true -> + set_ann(T, append_ann(escapes, get_ann(T))); + false -> + T + end. + +append_ann(Tag, [X | Xs]) -> + if tuple_size(X) >= 1, element(1, X) =:= Tag -> + append_ann(Tag, Xs); + true -> + [X | append_ann(Tag, Xs)] + end; +append_ann(Tag, []) -> + [Tag]. + + +%% ===================================================================== +%% analyze(Tree) -> Escapes +%% +%% Tree = cerl:cerl() +%% Escapes = ordset(Label) +%% Label = integer() | external | top +%% +%% Analyzes a module or an expression represented by `Tree'. +%% +%% `Escapes' is the set of labels of constructor expressions in +%% `Tree' such that the created values may be accessed from outside +%% `Tree'. +%% +%% Note: `Tree' must be annotated with labels (as done by the +%% function `cerl_trees:label/1') in order to use this function. +%% The label annotation `{label, L}' (where L should be an integer) +%% must be the first element of the annotation list of each node in +%% the tree. Instances of variables bound in `Tree' which denote +%% the same variable must have the same label; apart from this, +%% labels should be unique. Constant literals do not need to be +%% labeled. + +-record(state, {vars, out, dep, work, funs, k}). + +%% Note: We assume that all remote calls and primops return a single +%% value. + +%% The analysis determines which objects (identified by the +%% corresponding "cons-point" labels in the code) are likely to be +%% passed in a message. (If so, we say that they "escape".) It is always +%% safe to assume either case, because the send operation will assure +%% that things are copied if necessary. This analysis tries to +%% anticipate that copying will be done. +%% +%% Rules: +%% 1) An object passed as message argument (or part of such an +%% argument) to a known send-operation, will probably be a message. +%% 2) A received value is always a message (safe). +%% 3) The external function can return any object (unsafe). +%% 4) A function called from the external function can receive any +%% object (unsafe) as argument. +%% 5) Unknown functions/operations can return any object (unsafe). + +%% We wrap the given syntax tree T in a fun-expression labeled `top', +%% which is initially in the set of escaped labels. `top' will be +%% visited at least once. +%% +%% We create a separate function labeled `external', defined as: +%% "'external'/1 = fun () -> Any", which will represent any and all +%% functions outside T, and which returns the 'unsafe' value. + +analyze(Tree) -> + analyze(Tree, ?DEF_LIMIT). + +analyze(Tree, Limit) -> + {_, _, Esc, Dep, _Par} = cerl_closurean:analyze(Tree), +%%% io:fwrite("dependencies: ~w.\n", [dict:to_list(Dep)]), + analyze(Tree, Limit, Dep, Esc). + +analyze(Tree, Limit, Dep0, Esc0) -> + %% Note that we use different name spaces for variable labels and + %% function/call site labels, so we can reuse some names here. We + %% assume that the labeling of Tree only uses integers, not atoms. + Any = ann_c_var([{label, any}], 'Any'), + External = ann_c_var([{label, external}], {external, 1}), + ExtFun = ann_c_fun([{label, external}], [], Any), +%%% io:fwrite("external fun:\n~s.\n", +%%% [cerl_prettypr:format(ExtFun, [noann, {paper, 80}])]), + Top = ann_c_var([{label, top}], {top, 0}), + TopFun = ann_c_fun([{label, top}], [], Tree), + + %% The "start fun" just makes the initialisation easier. It is not + %% itself in the call graph. + StartFun = ann_c_fun([{label, start}], [], + c_letrec([{External, ExtFun}, {Top, TopFun}], + c_nil())), +%%% io:fwrite("start fun:\n~s.\n", +%%% [cerl_prettypr:format(StartFun, [{paper, 80}])]), + + %% Initialise the Any and Escape variables. Gather a database of all + %% fun-expressions in Tree and initialise their outputs and parameter + %% variables. All escaping functions can receive any values as + %% inputs. Bind all module- and letrec-defined variables to their + %% corresponding labels. + Esc = sets:from_list(Esc0), + Unsafe = unsafe(), + Empty = empty(), + Funs0 = dict:new(), + Vars0 = dict:store(escape, empty(), + dict:store(any, Unsafe, dict:new())), + Out0 = dict:new(), + F = fun (T, S = {Fs, Vs, Os}) -> + case type(T) of + 'fun' -> + L = get_label(T), + As = fun_vars(T), + X = case sets:is_element(L, Esc) of + true -> Unsafe; + false -> Empty + end, + {dict:store(L, T, Fs), + bind_vars_single(As, X, Vs), + dict:store(L, none, Os)}; + letrec -> + {Fs, bind_defs(letrec_defs(T), Vs), Os}; + module -> + {Fs, bind_defs(module_defs(T), Vs), Os}; + _ -> + S + end + end, + {Funs, Vars, Out} = cerl_trees:fold(F, {Funs0, Vars0, Out0}, StartFun), + + %% Add the dependency for the loop in 'external': + Dep = add_dep(loop, external, Dep0), + + %% Enter the fixpoint iteration at the StartFun. + St = loop(StartFun, start, #state{vars = Vars, + out = Out, + dep = Dep, + work = init_work(), + funs = Funs, + k = Limit}), + Ms = labels(dict:fetch(escape, St#state.vars)), + {Ms, St#state.vars}. + +loop(T, L, St0) -> +%%% io:fwrite("analyzing: ~w.\n",[L]), +%%% io:fwrite("work: ~w.\n", [St0#state.work]), + Xs0 = dict:fetch(L, St0#state.out), + {Xs1, St1} = visit(fun_body(T), L, St0), + Xs = limit(Xs1, St1#state.k), + {W, M} = case equal(Xs0, Xs) of + true -> + {St1#state.work, St1#state.out}; + false -> +%%% io:fwrite("out (~w) changed: ~w <- ~w.\n", +%%% [L, Xs, Xs0]), + M1 = dict:store(L, Xs, St1#state.out), + case dict:find(L, St1#state.dep) of + {ok, S} -> + {add_work(set__to_list(S), St1#state.work), + M1}; + error -> + {St1#state.work, M1} + end + end, + St2 = St1#state{out = M}, + case take_work(W) of + {ok, L1, W1} -> + T1 = dict:fetch(L1, St2#state.funs), + loop(T1, L1, St2#state{work = W1}); + none -> + St2 + end. + +visit(T, L, St) -> +%%% io:fwrite("visiting: ~w.\n",[type(T)]), + case type(T) of + literal -> + %% This is (or should be) a constant, even if it's compound, + %% so it's bugger all whether it is sent or not. + case cerl:concrete(T) of + [] -> {[empty()], St}; + X when is_atom(X) -> {[empty()], St}; + X when is_integer(X) -> {[empty()], St}; + X when is_float(X) -> {[empty()], St}; + _ -> + exit({not_literal, T}) + end; + var -> + %% If a variable is not already in the store here, it must + %% be free in the program. + L1 = get_label(T), + Vars = St#state.vars, + case dict:find(L1, Vars) of + {ok, X} -> + {[X], St}; + error -> +%%% io:fwrite("free var: ~w.\n",[L1]), + X = unsafe(), + St1 = St#state{vars = dict:store(L1, X, Vars)}, + {[X], St1} + end; + 'fun' -> + %% Must revisit the fun also, because its environment might + %% have changed. (We don't keep track of such dependencies.) + L1 = get_label(T), + St1 = St#state{work = add_work([L1], St#state.work)}, + %% Currently, lambda expressions can only be locally + %% allocated, and therefore we have to force copying by + %% treating them as "unsafe" for now. + {[unsafe()], St1}; + %% {[singleton(L1)], St1}; + values -> + visit_list(values_es(T), L, St); + cons -> + {[X1, X2], St1} = visit_list([cons_hd(T), cons_tl(T)], L, St), + L1 = get_label(T), + X = make_cons(L1, X1, X2), + %% Also store the values of the elements. + Hd = get_hd(X), + Tl = get_tl(X), + St2 = St1#state{vars = dict:store(L1, [Hd, Tl], St1#state.vars)}, + {[X], St2}; + tuple -> + {Xs, St1} = visit_list(tuple_es(T), L, St), + L1 = get_label(T), + %% Also store the values of the elements. + St2 = St1#state{vars = dict:store(L1, Xs, St1#state.vars)}, + {[struct(L1, Xs)], St2}; + 'let' -> + {Xs, St1} = visit(let_arg(T), L, St), + Vars = bind_vars(let_vars(T), Xs, St1#state.vars), + visit(let_body(T), L, St1#state{vars = Vars}); + seq -> + {_, St1} = visit(seq_arg(T), L, St), + visit(seq_body(T), L, St1); + apply -> + {_F, St1} = visit(apply_op(T), L, St), + {As, St2} = visit_list(apply_args(T), L, St1), + L1 = get_label(T), + Ls = get_deps(L1, St#state.dep), + Out = St2#state.out, + Xs1 = join_list([dict:fetch(X, Out) || X <- Ls]), + {Xs1, call_site(Ls, As, St2)}; + call -> + M = call_module(T), + F = call_name(T), + As = call_args(T), + {_, St1} = visit(M, L, St), + {_, St2} = visit(F, L, St1), + {Xs, St3} = visit_list(As, L, St2), + L1 = get_label(T), + remote_call(M, F, Xs, As, L1, St3); + primop -> + As = primop_args(T), + {Xs, St1} = visit_list(As, L, St), + F = atom_val(primop_name(T)), + primop_call(F, length(Xs), Xs, As, St1); + 'case' -> + {Xs, St1} = visit(case_arg(T), L, St), + visit_clauses(Xs, case_clauses(T), L, St1); + 'receive' -> + %% The received value is of course a message, so it + %% is 'empty()', not 'unsafe()'. + X = empty(), + {Xs1, St1} = visit_clauses([X], receive_clauses(T), L, St), + {_, St2} = visit(receive_timeout(T), L, St1), + {Xs2, St3} = visit(receive_action(T), L, St2), + {join(Xs1, Xs2), St3}; + 'try' -> + {Xs1, St1} = visit(try_arg(T), L, St), + X = unsafe(), + Vars = bind_vars(try_vars(T), Xs1, St1#state.vars), + {Xs2, St2} = visit(try_body(T), L, St1#state{vars = Vars}), + EVars = bind_vars(try_evars(T), [X, X, X], St2#state.vars), + {Xs3, St3} = visit(try_handler(T), L, St2#state{vars = EVars}), + {join(Xs2, Xs3), St3}; + 'catch' -> + %% If we catch an exception, we can get unsafe data. + {Xs, St1} = visit(catch_body(T), L, St), + {join([unsafe()], Xs), St1}; + binary -> + %% Binaries are heap objects, but we don't have special + %% shared-heap allocation operators for them at the moment. + %% They must therefore be treated as unsafe. + {_, St1} = visit_list(binary_segments(T), L, St), + {[unsafe()], St1}; + bitstr -> + %% The other fields are constant literals. + {_, St1} = visit(bitstr_val(T), L, St), + {_, St2} = visit(bitstr_size(T), L, St1), + {none, St2}; + letrec -> + %% All the bound funs should be revisited, because the + %% environment might have changed. + Ls = [get_label(F) || {_, F} <- letrec_defs(T)], + St1 = St#state{work = add_work(Ls, St#state.work)}, + visit(letrec_body(T), L, St1); + module -> + %% We regard a module as a tuple of function variables in + %% the body of a `letrec'. + visit(c_letrec(module_defs(T), + ann_c_tuple([{label, get_label(T)}], + module_exports(T))), + L, St) + end. + +visit_clause(T, Xs, L, St) -> + Vars = bind_pats(clause_pats(T), Xs, St#state.vars), + {_, St1} = visit(clause_guard(T), L, St#state{vars = Vars}), + visit(clause_body(T), L, St1). + +%% We assume correct value-list typing. + +visit_list([T | Ts], L, St) -> + {Xs, St1} = visit(T, L, St), + {Xs1, St2} = visit_list(Ts, L, St1), + X = case Xs of + [X1] -> X1; + _ -> empty() + end, + {[X | Xs1], St2}; +visit_list([], _L, St) -> + {[], St}. + +visit_clauses(Xs, [T | Ts], L, St) -> + {Xs1, St1} = visit_clause(T, Xs, L, St), + {Xs2, St2} = visit_clauses(Xs, Ts, L, St1), + {join(Xs1, Xs2), St2}; +visit_clauses(_, [], _L, St) -> + {none, St}. + +bind_defs([{V, F} | Ds], Vars) -> + bind_defs(Ds, dict:store(get_label(V), singleton(get_label(F)), Vars)); +bind_defs([], Vars) -> + Vars. + +bind_pats(Ps, none, Vars) -> + bind_pats_single(Ps, empty(), Vars); +bind_pats(Ps, Xs, Vars) -> + if length(Xs) =:= length(Ps) -> + bind_pats_list(Ps, Xs, Vars); + true -> + bind_pats_single(Ps, empty(), Vars) + end. + +%% The lists might not be of the same length. + +bind_pats_list([P | Ps], [X | Xs], Vars) -> + bind_pats_list(Ps, Xs, bind_pat_vars(P, X, Vars)); +bind_pats_list(Ps, [], Vars) -> + bind_pats_single(Ps, empty(), Vars); +bind_pats_list([], _, Vars) -> + Vars. + +bind_pats_single([P | Ps], X, Vars) -> + bind_pats_single(Ps, X, bind_pat_vars(P, X, Vars)); +bind_pats_single([], _X, Vars) -> + Vars. + +bind_pat_vars(P, X, Vars) -> + case type(P) of + var -> + dict:store(get_label(P), X, Vars); + literal -> + Vars; + cons -> + bind_pats_list([cons_hd(P), cons_tl(P)], + [get_hd(X), get_tl(X)], Vars); + tuple -> + case elements(X) of + none -> + bind_vars_single(pat_vars(P), X, Vars); + Xs -> + bind_pats_list(tuple_es(P), Xs, Vars) + end; + binary -> + %% See the handling of binary-expressions. + bind_pats_single(binary_segments(P), unsafe(), Vars); + bitstr -> + %% See the handling of binary-expressions. + bind_pats_single([bitstr_val(P), bitstr_size(P)], + unsafe(), Vars); + alias -> + P1 = alias_pat(P), + Vars1 = bind_pat_vars(P1, X, Vars), + dict:store(get_label(alias_var(P)), X, Vars1) + end. + +%%% %% This is the "exact" version of list representation, which simply +%%% %% mimics the actual cons, head and tail operations. +%%% make_cons(L, X1, X2) -> +%%% struct(L1, [X1, X2]). +%%% get_hd(X) -> +%%% case elements(X) of +%%% none -> X; +%%% [X1 | _] -> X1; +%%% _ -> empty() +%%% end. +%%% get_tl(X) -> +%%% case elements(X) of +%%% none -> X; +%%% [_, X2 | _] -> X2; +%%% _ -> empty() +%%% end. + +%% This version does not unnecessarily confuse spine labels with element +%% labels, and is safe. However, it loses precision if cons cells are +%% used for other things than proper lists. + +make_cons(L, X1, X2) -> + %% join subtypes and cons locations + join_single(struct(L, [X1]), X2). + +get_hd(X) -> + case elements(X) of + none -> X; + [X1 | _] -> X1; % First element represents list subtype. + _ -> empty() + end. + +get_tl(X) -> X. % Tail of X has same type as X. + +bind_vars(Vs, none, Vars) -> + bind_vars_single(Vs, empty(), Vars); +bind_vars(Vs, Xs, Vars) -> + if length(Vs) =:= length(Xs) -> + bind_vars_list(Vs, Xs, Vars); + true -> + bind_vars_single(Vs, empty(), Vars) + end. + +bind_vars_list([V | Vs], [X | Xs], Vars) -> + bind_vars_list(Vs, Xs, dict:store(get_label(V), X, Vars)); +bind_vars_list([], [], Vars) -> + Vars. + +bind_vars_single([V | Vs], X, Vars) -> + bind_vars_single(Vs, X, dict:store(get_label(V), X, Vars)); +bind_vars_single([], _X, Vars) -> + Vars. + +%% This handles a call site, updating parameter variables with respect +%% to the actual parameters. The 'external' function is handled +%% specially, since it can get an arbitrary number of arguments. For our +%% purposes here, calls to the external function can be ignored. + +call_site(Ls, Xs, St) -> +%%% io:fwrite("call site: ~w -> ~w (~w).\n", [L, Ls, Xs]), + {W, V} = call_site(Ls, Xs, St#state.work, St#state.vars, + St#state.funs, St#state.k), + St#state{work = W, vars = V}. + +call_site([external | Ls], Xs, W, V, Fs, Limit) -> + call_site(Ls, Xs, W, V, Fs, Limit); +call_site([L | Ls], Xs, W, V, Fs, Limit) -> + Vs = fun_vars(dict:fetch(L, Fs)), + case bind_args(Vs, Xs, V, Limit) of + {V1, true} -> + call_site(Ls, Xs, add_work([L], W), V1, Fs, Limit); + {V1, false} -> + call_site(Ls, Xs, W, V1, Fs, Limit) + end; +call_site([], _, W, V, _, _) -> + {W, V}. + +add_dep(Source, Target, Deps) -> + case dict:find(Source, Deps) of + {ok, X} -> + case set__is_member(Target, X) of + true -> + Deps; + false -> +%%% io:fwrite("new dep: ~w <- ~w.\n", [Target, Source]), + dict:store(Source, set__add(Target, X), Deps) + end; + error -> +%%% io:fwrite("new dep: ~w <- ~w.\n", [Target, Source]), + dict:store(Source, set__singleton(Target), Deps) + end. + +%% If the arity does not match the call, nothing is done here. + +bind_args(Vs, Xs, Vars, Limit) -> + if length(Vs) =:= length(Xs) -> + bind_args(Vs, Xs, Vars, Limit, false); + true -> + {Vars, false} + end. + +bind_args([V | Vs], [X | Xs], Vars, Limit, Ch) -> + L = get_label(V), + {Vars1, Ch1} = bind_arg(L, X, Vars, Limit, Ch), + bind_args(Vs, Xs, Vars1, Limit, Ch1); +bind_args([], [], Vars, _Limit, Ch) -> + {Vars, Ch}. + +%% bind_arg(L, X, Vars, Limit) -> +%% bind_arg(L, X, Vars, Limit, false). + +bind_arg(L, X, Vars, Limit, Ch) -> + X0 = dict:fetch(L, Vars), + X1 = limit_single(join_single(X, X0), Limit), + case equal_single(X0, X1) of + true -> + {Vars, Ch}; + false -> +%%% io:fwrite("arg (~w) changed: ~w <- ~w + ~w.\n", +%%% [L, X1, X0, X]), + {dict:store(L, X1, Vars), true} + end. + +%% This handles escapes from things like primops and remote calls. + +escape(Xs, Ns, St) -> + escape(Xs, Ns, 1, St). + +escape([_ | Xs], Ns=[N1 | _], N, St) when is_integer(N1), N1 > N -> + escape(Xs, Ns, N + 1, St); +escape([X | Xs], [N | Ns], N, St) -> + Vars = St#state.vars, + X0 = dict:fetch(escape, Vars), + X1 = join_single(X, X0), + case equal_single(X0, X1) of + true -> + escape(Xs, Ns, N + 1, St); + false -> +%%% io:fwrite("escape changed: ~w <- ~w + ~w.\n", [X1, X0, X]), + Vars1 = dict:store(escape, X1, Vars), + escape(Xs, Ns, N + 1, St#state{vars = Vars1}) + end; +escape(Xs, [_ | Ns], N, St) -> + escape(Xs, Ns, N + 1, St); +escape(_, _, _, St) -> + St. + +%% Handle primop calls: (At present, we assume that all unknown calls +%% yield exactly one value. This might have to be changed.) + +primop_call(F, A, Xs, _As, St0) -> + %% St1 = case is_escape_op(F, A) of + %% [] -> St0; + %% Ns -> escape(Xs, Ns, St0) + %% end, + St1 = St0, + case is_imm_op(F, A) of + true -> + {[empty()], St1}; + false -> + call_unknown(Xs, St1) + end. + +%% Handle remote-calls: (At present, we assume that all unknown calls +%% yield exactly one value. This might have to be changed.) + +remote_call(M, F, Xs, As, L, St) -> + case is_c_atom(M) andalso is_c_atom(F) of + true -> + remote_call_1(atom_val(M), atom_val(F), length(Xs), + Xs, As, L, St); + false -> + %% Unknown function + call_unknown(Xs, St) + end. + +%% When calling an unknown function, we assume that the result does +%% *not* contain any of the constructors in its arguments (but it could +%% return locally allocated data that we don't know about). Note that +%% even a "pure" function can still cons up new data. + +call_unknown(_Xs, St) -> + {[unsafe()], St}. + +%% We need to handle some important standard functions in order to get +%% decent precision. +%% TODO: foldl, map, mapfoldl + +remote_call_1(erlang, hd, 1, [X], _As, _L, St) -> + {[get_hd(X)], St}; +remote_call_1(erlang, tl, 1, [X], _As, _L, St) -> + {[get_tl(X)], St}; +remote_call_1(erlang, element, 2, [_,X], [N|_], _L, St) -> + case elements(X) of + none -> {[X], St}; + Xs -> + case is_c_int(N) of + true -> + N1 = int_val(N), + if is_integer(N1), 1 =< N1, N1 =< length(Xs) -> + {[nth(N1, Xs)], St}; + true -> + {none, St} + end; + false -> + %% Even if we don't know which element is selected, + %% we know that the top level is never part of the + %% returned value. + {[join_single_list(Xs)], St} + end + end; +remote_call_1(erlang, setelement, 3, [_,X, Y], [N|_], L, St) -> + %% The constructor gets the label of the call operation. + case elements(X) of + none -> {[join_single(singleton(L), join_single(X, Y))], St}; + Xs -> + case is_c_int(N) of + true -> + N1 = int_val(N), + if is_integer(N1), 1 =< N1, N1 =< length(Xs) -> + Xs1 = set_nth(N1, Y, Xs), + {[struct(L, Xs1)], St}; + true -> + {none, St} + end; + false -> + %% Even if we don't know which element is selected, + %% we know that the top level is never part of the + %% returned value (a new tuple is always created). + Xs1 = [join_single(Y, X1) || X1 <- Xs], + {[struct(L, Xs1)], St} + end + end; +remote_call_1(erlang, '++', 2, [X1,X2], _As, _L, St) -> + %% Note: this is unsafe for non-proper lists! (See make_cons/3). + %% No safe version is implemented. + {[join_single(X1, X2)], St}; +remote_call_1(erlang, '--', 2, [X1,_X2], _As, _L, St) -> + {[X1], St}; +remote_call_1(lists, append, 2, Xs, As, L, St) -> + remote_call_1(erlang, '++', 2, Xs, As, L, St); +remote_call_1(lists, subtract, 2, Xs, As, L, St) -> + remote_call_1(erlang, '--', 2, Xs, As, L, St); +remote_call_1(M, F, A, Xs, _As, _L, St0) -> + St1 = case is_escape_op(M, F, A) of + [] -> St0; + Ns -> escape(Xs, Ns, St0) + end, + case is_imm_op(M, F, A) of + true -> + {[empty()], St1}; + false -> + call_unknown(Xs, St1) + end. + +%% 1-based n:th-element list selector and update function. + +nth(1, [X | _Xs]) -> X; +nth(N, [_X | Xs]) when N > 1 -> nth(N - 1, Xs). + +set_nth(1, Y, [_X | Xs]) -> [Y | Xs]; +set_nth(N, Y, [X | Xs]) when N > 1 -> [X | set_nth(N - 1, Y, Xs)]. + +%% Domain: none | [V], where V = {S, none} | {S, [V]}, S = set(integer()). + +join(none, Xs2) -> Xs2; +join(Xs1, none) -> Xs1; +join(Xs1, Xs2) -> + if length(Xs1) =:= length(Xs2) -> + join_1(Xs1, Xs2); + true -> + none + end. + +join_1([X1 | Xs1], [X2 | Xs2]) -> + [join_single(X1, X2) | join_1(Xs1, Xs2)]; +join_1([], []) -> + []. + +join_list([Xs | Xss]) -> + join(Xs, join_list(Xss)); +join_list([]) -> + none. + +empty() -> {set__new(), []}. + +singleton(X) -> {set__singleton(X), []}. + +struct(X, Xs) -> {set__singleton(X), Xs}. + +elements({_, Xs}) -> Xs. + +unsafe() -> {set__singleton(unsafe), none}. + +equal(none, none) -> true; +equal(none, _) -> false; +equal(_, none) -> false; +equal(X1, X2) -> equal_1(X1, X2). + +equal_1([X1 | Xs1], [X2 | Xs2]) -> + equal_single(X1, X2) andalso equal_1(Xs1, Xs2); +equal_1([], []) -> true; +equal_1(_, _) -> false. + +equal_single({S1, none}, {S2, none}) -> + set__equal(S1, S2); +equal_single({_, none}, _) -> + false; +equal_single(_, {_, none}) -> + false; +equal_single({S1, Vs1}, {S2, Vs2}) -> + set__equal(S1, S2) andalso equal_single_lists(Vs1, Vs2). + +equal_single_lists([X1 | Xs1], [X2 | Xs2]) -> + equal_single(X1, X2) andalso equal_single_lists(Xs1, Xs2); +equal_single_lists([], []) -> + true; +equal_single_lists(_, _) -> + false. + +join_single({S, none}, V) -> + {set__union(S, labels(V)), none}; +join_single(V, {S, none}) -> + {set__union(S, labels(V)), none}; +join_single({S1, Vs1}, {S2, Vs2}) -> + {set__union(S1, S2), join_single_lists(Vs1, Vs2)}. + +join_single_list([V | Vs]) -> + join_single(V, join_single_list(Vs)); +join_single_list([]) -> + empty(). + +%% If one list has more elements that the other, and N is the length of +%% the longer list, then the result has N elements. + +join_single_lists([V1], [V2]) -> + [join_single(V1, V2)]; +join_single_lists([V1 | Vs1], [V2 | Vs2]) -> + [join_single(V1, V2) | join_single_lists(Vs1, Vs2)]; +join_single_lists([], Vs) -> Vs; +join_single_lists(Vs, []) -> Vs. + +collapse(V) -> + {labels(V), none}. + +%% collapse_list([]) -> +%% empty(); +%% collapse_list(Vs) -> +%% {labels_list(Vs), none}. + +labels({S, none}) -> S; +labels({S, []}) -> S; +labels({S, Vs}) -> set__union(S, labels_list(Vs)). + +labels_list([V]) -> + labels(V); +labels_list([V | Vs]) -> + set__union(labels(V), labels_list(Vs)). + +limit(none, _K) -> none; +limit(X, K) -> limit_list(X, K). + +limit_list([X | Xs], K) -> + [limit_single(X, K) | limit_list(Xs, K)]; +limit_list([], _) -> + []. + +limit_single({_, none} = V, _K) -> + V; +limit_single({_, []} = V, _K) -> + V; +limit_single({S, Vs}, K) when K > 0 -> + {S, limit_list(Vs, K - 1)}; +limit_single(V, _K) -> + collapse(V). + +%% Set abstraction for label sets in the domain. + +%% set__is_empty([]) -> true; +%% set__is_empty(_) -> false. + +set__new() -> []. + +set__singleton(X) -> [X]. + +set__to_list(S) -> S. + +%% set__from_list(S) -> ordsets:from_list(S). + +set__union(X, Y) -> ordsets:union(X, Y). + +set__add(X, S) -> ordsets:add_element(X, S). + +set__is_member(X, S) -> ordsets:is_element(X, S). + +%% set__subtract(X, Y) -> ordsets:subtract(X, Y). + +set__equal(X, Y) -> X =:= Y. + +%% A simple but efficient functional queue. + +queue__new() -> {[], []}. + +queue__put(X, {In, Out}) -> {[X | In], Out}. + +queue__get({In, [X | Out]}) -> {ok, X, {In, Out}}; +queue__get({[], _}) -> empty; +queue__get({In, _}) -> + [X | In1] = lists:reverse(In), + {ok, X, {[], In1}}. + +%% The work list - a queue without repeated elements. + +init_work() -> + {queue__new(), sets:new()}. + +add_work(Ls, {Q, Set}) -> + add_work(Ls, Q, Set). + +%% Note that the elements are enqueued in order. + +add_work([L | Ls], Q, Set) -> + case sets:is_element(L, Set) of + true -> + add_work(Ls, Q, Set); + false -> + add_work(Ls, queue__put(L, Q), sets:add_element(L, Set)) + end; +add_work([], Q, Set) -> + {Q, Set}. + +take_work({Queue0, Set0}) -> + case queue__get(Queue0) of + {ok, L, Queue1} -> + Set1 = sets:del_element(L, Set0), + {ok, L, {Queue1, Set1}}; + empty -> + none + end. + +get_deps(L, Dep) -> + case dict:find(L, Dep) of + {ok, Ls} -> Ls; + error -> [] + end. + +%% Escape operators may let their arguments escape. For this analysis, +%% only send-operations are considered as causing escapement, and only +%% in specific arguments. + +%% is_escape_op(_F, _A) -> []. + +-spec is_escape_op(module(), atom(), arity()) -> [arity()]. + +is_escape_op(erlang, '!', 2) -> [2]; +is_escape_op(erlang, send, 2) -> [2]; +is_escape_op(erlang, spawn, 1) -> [1]; +is_escape_op(erlang, spawn, 3) -> [3]; +is_escape_op(erlang, spawn, 4) -> [4]; +is_escape_op(erlang, spawn_link, 3) -> [3]; +is_escape_op(erlang, spawn_link, 4) -> [4]; +is_escape_op(_M, _F, _A) -> []. + +%% "Immediate" operators will never return heap allocated data. This is +%% of course true for operators that never return, like 'exit/1'. (Note +%% that floats are always heap allocated objects, and that most integer +%% arithmetic can return a bignum on the heap.) + +-spec is_imm_op(atom(), arity()) -> boolean(). + +is_imm_op(match_fail, 1) -> true; +is_imm_op(_, _) -> false. + +-spec is_imm_op(module(), atom(), arity()) -> boolean(). + +is_imm_op(erlang, self, 0) -> true; +is_imm_op(erlang, '=:=', 2) -> true; +is_imm_op(erlang, '==', 2) -> true; +is_imm_op(erlang, '=/=', 2) -> true; +is_imm_op(erlang, '/=', 2) -> true; +is_imm_op(erlang, '<', 2) -> true; +is_imm_op(erlang, '=<', 2) -> true; +is_imm_op(erlang, '>', 2) -> true; +is_imm_op(erlang, '>=', 2) -> true; +is_imm_op(erlang, 'and', 2) -> true; +is_imm_op(erlang, 'or', 2) -> true; +is_imm_op(erlang, 'xor', 2) -> true; +is_imm_op(erlang, 'not', 1) -> true; +is_imm_op(erlang, is_alive, 0) -> true; +is_imm_op(erlang, is_atom, 1) -> true; +is_imm_op(erlang, is_binary, 1) -> true; +is_imm_op(erlang, is_builtin, 3) -> true; +is_imm_op(erlang, is_constant, 1) -> true; +is_imm_op(erlang, is_float, 1) -> true; +is_imm_op(erlang, is_function, 1) -> true; +is_imm_op(erlang, is_integer, 1) -> true; +is_imm_op(erlang, is_list, 1) -> true; +is_imm_op(erlang, is_number, 1) -> true; +is_imm_op(erlang, is_pid, 1) -> true; +is_imm_op(erlang, is_port, 1) -> true; +is_imm_op(erlang, is_process_alive, 1) -> true; +is_imm_op(erlang, is_reference, 1) -> true; +is_imm_op(erlang, is_tuple, 1) -> true; +is_imm_op(erlang, length, 1) -> true; % never a bignum +is_imm_op(erlang, list_to_atom, 1) -> true; +is_imm_op(erlang, node, 0) -> true; +is_imm_op(erlang, node, 1) -> true; +is_imm_op(erlang, throw, 1) -> true; +is_imm_op(erlang, exit, 1) -> true; +is_imm_op(erlang, error, 1) -> true; +is_imm_op(erlang, error, 2) -> true; +is_imm_op(_, _, _) -> false. |