diff options
author | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/syntax_tools/src | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/syntax_tools/src')
-rw-r--r-- | lib/syntax_tools/src/Makefile | 84 | ||||
-rw-r--r-- | lib/syntax_tools/src/epp_dodger.erl | 791 | ||||
-rw-r--r-- | lib/syntax_tools/src/erl_comment_scan.erl | 280 | ||||
-rw-r--r-- | lib/syntax_tools/src/erl_prettypr.erl | 1153 | ||||
-rw-r--r-- | lib/syntax_tools/src/erl_recomment.erl | 757 | ||||
-rw-r--r-- | lib/syntax_tools/src/erl_syntax.erl | 6938 | ||||
-rw-r--r-- | lib/syntax_tools/src/erl_syntax_lib.erl | 2168 | ||||
-rw-r--r-- | lib/syntax_tools/src/erl_tidy.erl | 1898 | ||||
-rw-r--r-- | lib/syntax_tools/src/igor.erl | 3023 | ||||
-rw-r--r-- | lib/syntax_tools/src/prettypr.erl | 1301 | ||||
-rw-r--r-- | lib/syntax_tools/src/syntax_tools.app.src | 17 | ||||
-rw-r--r-- | lib/syntax_tools/src/syntax_tools.appup.src | 1 |
12 files changed, 18411 insertions, 0 deletions
diff --git a/lib/syntax_tools/src/Makefile b/lib/syntax_tools/src/Makefile new file mode 100644 index 0000000000..5ffe85c975 --- /dev/null +++ b/lib/syntax_tools/src/Makefile @@ -0,0 +1,84 @@ +# +# Copyright (C) 2004, Ericsson Telecommunications +# Authors: Richard Carlsson, Bertil Karlsson +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(SYNTAX_TOOLS_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/syntax_tools-$(VSN) + + +# +# Common Macros +# + +EBIN = ../ebin + +ERL_COMPILE_FLAGS += +warn_unused_vars +nowarn_shadow_vars +warn_unused_import +warn_obsolete_guard + +SOURCES=erl_syntax.erl erl_prettypr.erl erl_syntax_lib.erl \ + erl_comment_scan.erl erl_recomment.erl erl_tidy.erl \ + epp_dodger.erl prettypr.erl igor.erl + +OBJECTS=$(SOURCES:%.erl=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET) + +APP_FILE= syntax_tools.app +APP_SRC= $(APP_FILE).src +APP_TARGET= $(EBIN)/$(APP_FILE) + +APPUP_FILE= syntax_tools.appup +APPUP_SRC= $(APPUP_FILE).src +APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(OBJECTS) + +all: $(OBJECTS) + + +clean: + rm -f $(OBJECTS) + rm -f core *~ + +distclean: clean + +realclean: clean + +$(EBIN)/%.$(EMULATOR):%.erl + erlc -W $(ERL_COMPILE_FLAGS) -o$(EBIN) $< + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +$(APP_TARGET): $(APP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(OBJECTS) $(RELSYSDIR)/ebin + $(INSTALL_DIR) $(RELSYSDIR)/src + $(INSTALL_DATA) $(SOURCES) $(RELSYSDIR)/src + +release_docs_spec: + diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl new file mode 100644 index 0000000000..7aef549574 --- /dev/null +++ b/lib/syntax_tools/src/epp_dodger.erl @@ -0,0 +1,791 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 2001-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc `epp_dodger' - bypasses the Erlang preprocessor. +%% +%% <p>This module tokenises and parses most Erlang source code without +%% expanding preprocessor directives and macro applications, as long as +%% these are syntactically "well-behaved". Because the normal parse +%% trees of the `erl_parse' module cannot represent these things +%% (normally, they are expanded by the Erlang preprocessor {@link +%% //stdlib/epp} before the parser sees them), an extended syntax tree +%% is created, using the {@link erl_syntax} module.</p> + + +%% NOTES: +%% +%% * It's OK if the result does not parse - then at least nothing +%% strange happens, and the user can resort to full preprocessing. +%% However, we must avoid generating a token stream that is accepted by +%% the parser, but has a different meaning than the intended. A typical +%% example is when someone uses token-level string concatenation with +%% macros, as in `"foo" ?bar' (where `?bar' expands to a string). If we +%% replace the tokens `? bar' with `( ... )', to preserve precedence, +%% the result will be parsed as an application `"foo" ( ... )' and cause +%% trouble later on. We must detect such cases and report an error. +%% +%% * It is pointless to add a mechanism for tracking which macros are +%% known to take arguments, and which are known to take no arguments, +%% since a lot of the time we will not have seen the macro definition +%% anyway (it's usually in a header file). Hence, we try to use +%% heuristics instead. In most cases, the token sequence `? foo (' +%% indicates that it is a call of a macro that is supposed to take +%% arguments, but e.g., in the context `: ? foo (', the argument list +%% typically belongs to a remote function call, as in `m:?f(...)' and +%% should be parsed as `m:(?f)(...)' unless it is actually a try-clause +%% pattern such as `throw:?f(...) ->'. +%% +%% * We do our best to make macros without arguments pass the parsing +%% stage transparently. Atoms are accepted in most contexts, but +%% variables are not, so we use only atoms to encode these macros. +%% Sadly, the parsing sometimes discards even the line number info from +%% atom tokens, so we can only use the actual characters for this. +%% +%% * We recognize `?m(...' at the start of a form and prevent this from +%% being interpreted as a macro with arguments, since it is probably a +%% function definition. Likewise with attributes `-?m(...'. + +-module(epp_dodger). + +-export([parse_file/1, quick_parse_file/1, parse_file/2, + quick_parse_file/2, parse/1, quick_parse/1, parse/2, + quick_parse/2, parse/3, quick_parse/3, parse_form/2, + parse_form/3, quick_parse_form/2, quick_parse_form/3, + format_error/1, tokens_to_string/1]). + + +%% The following should be: 1) pseudo-uniquely identifiable, and 2) +%% cause nice looking error messages when the parser has to give up. + +-define(macro_call, '? <macro> ('). +-define(atom_prefix, "? "). +-define(var_prefix, "?,"). +-define(pp_form, '?preprocessor declaration?'). + + +%% @type errorinfo() = {ErrorLine::integer(), +%% Module::atom(), +%% Descriptor::term()}. +%% +%% This is a so-called Erlang I/O ErrorInfo structure; see the {@link +%% //stdlib/io} module for details. + + +%% ===================================================================== +%% @spec parse_file(File) -> {ok, Forms} | {error, errorinfo()} +%% File = file:filename() +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @equiv parse_file(File, []) + +parse_file(File) -> + parse_file(File, []). + +%% @spec parse_file(File, Options) -> {ok, Forms} | {error, errorinfo()} +%% File = file:filename() +%% Options = [term()] +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @doc Reads and parses a file. If successful, `{ok, Forms}' +%% is returned, where `Forms' is a list of abstract syntax +%% trees representing the "program forms" of the file (cf. +%% `erl_syntax:is_form/1'). Otherwise, `{error, +%% errorinfo()}' is returned, typically if the file could not be +%% opened. Note that parse errors show up as error markers in the +%% returned list of forms; they do not cause this function to fail or +%% return `{error,errorinfo()}'. +%% +%% Options: +%% <dl> +%% <dt>{@type {no_fail, boolean()@}}</dt> +%% <dd>If `true', this makes `epp_dodger' replace any program forms +%% that could not be parsed with nodes of type `text' (see {@link +%% erl_syntax:text/1}), representing the raw token sequence of the +%% form, instead of reporting a parse error. The default value is +%% `false'.</dd> +%% <dt>{@type {clever, boolean()@}}</dt> +%% <dd>If set to `true', this makes `epp_dodger' try to repair the +%% source code as it seems fit, in certain cases where parsing would +%% otherwise fail. Currently, it inserts `++'-operators between string +%% literals and macros where it looks like concatenation was intended. +%% The default value is `false'.</dd> +%% </dl> +%% +%% @see parse/2 +%% @see quick_parse_file/1 +%% @see erl_syntax:is_form/1 + +parse_file(File, Options) -> + parse_file(File, fun parse/3, Options). + +%% @spec quick_parse_file(File) -> {ok, Forms} | {error, errorinfo()} +%% File = file:filename() +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @equiv quick_parse_file(File, []) + +quick_parse_file(File) -> + quick_parse_file(File, []). + +%% @spec quick_parse_file(File, Options) -> +%% {ok, Forms} | {error, errorinfo()} +%% File = file:filename() +%% Options = [term()] +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @doc Similar to {@link parse_file/2}, but does a more quick-and-dirty +%% processing of the code. Macro definitions and other preprocessor +%% directives are discarded, and all macro calls are replaced with +%% atoms. This is useful when only the main structure of the code is of +%% interest, and not the details. Furthermore, the quick-parse method +%% can usually handle more strange cases than the normal, more exact +%% parsing. +%% +%% Options: see {@link parse_file/2}. Note however that for +%% `quick_parse_file/2', the option `no_fail' is `true' by default. +%% +%% @see quick_parse/2 +%% @see parse_file/2 + +quick_parse_file(File, Options) -> + parse_file(File, fun quick_parse/3, Options ++ [no_fail]). + +parse_file(File, Parser, Options) -> + case file:open(File, [read]) of + {ok, Dev} -> + try Parser(Dev, 1, Options) + after ok = file:close(Dev) + end; + {error, _} = Error -> + Error + end. + + +%% ===================================================================== +%% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()} +%% @equiv parse(IODevice, 1) + +parse(Dev) -> + parse(Dev, 1). + +%% @spec parse(IODevice, StartLine) -> {ok, Forms} | {error, errorinfo()} +%% IODevice = pid() +%% StartLine = integer() +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @equiv parse(IODevice, StartLine, []) +%% @see parse/1 + +parse(Dev, L) -> + parse(Dev, L, []). + +%% @spec parse(IODevice, StartLine, Options) -> +%% {ok, Forms} | {error, errorinfo()} +%% IODevice = pid() +%% StartLine = integer() +%% Options = [term()] +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @doc Reads and parses program text from an I/O stream. Characters are +%% read from `IODevice' until end-of-file; apart from this, the +%% behaviour is the same as for {@link parse_file/2}. `StartLine' is the +%% initial line number, which should be a positive integer. +%% +%% @see parse/2 +%% @see parse_file/2 +%% @see parse_form/2 +%% @see quick_parse/3 + +parse(Dev, L0, Options) -> + parse(Dev, L0, fun parse_form/3, Options). + +%% @spec quick_parse(IODevice) -> {ok, Forms} | {error, errorinfo()} +%% @equiv quick_parse(IODevice, 1) + +quick_parse(Dev) -> + quick_parse(Dev, 1). + +%% @spec quick_parse(IODevice, StartLine) -> +%% {ok, Forms} | {error, errorinfo()} +%% IODevice = pid() +%% StartLine = integer() +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @equiv quick_parse(IODevice, StartLine, []) +%% @see quick_parse/1 + +quick_parse(Dev, L) -> + quick_parse(Dev, L, []). + +%% @spec (IODevice, StartLine, Options) -> +%% {ok, Forms} | {error, errorinfo()} +%% IODevice = pid() +%% StartLine = integer() +%% Options = [term()] +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @doc Similar to {@link parse/3}, but does a more quick-and-dirty +%% processing of the code. See {@link quick_parse_file/2} for details. +%% +%% @see quick_parse/2 +%% @see quick_parse_file/2 +%% @see quick_parse_form/2 +%% @see parse/3 + +quick_parse(Dev, L0, Options) -> + parse(Dev, L0, fun quick_parse_form/3, Options). + +parse(Dev, L0, Parser, Options) -> + parse(Dev, L0, [], Parser, Options). + +parse(Dev, L0, Fs, Parser, Options) -> + case Parser(Dev, L0, Options) of + {ok, none, L1} -> + parse(Dev, L1, Fs, Parser, Options); + {ok, F, L1} -> + parse(Dev, L1, [F | Fs], Parser, Options); + {error, IoErr, L1} -> + parse(Dev, L1, [{error, IoErr} | Fs], Parser, Options); + {eof, _L1} -> + {ok, lists:reverse(Fs)} + end. + + +%% ===================================================================== +%% @spec parse_form(IODevice, StartLine) -> {ok, Form, LineNo} +%% | {eof, LineNo} +%% | {error, errorinfo(), LineNo} +%% IODevice = pid() +%% StartLine = integer() +%% Form = erl_syntax:syntaxTree() +%% LineNo = integer() +%% +%% @equiv parse_form(IODevice, StartLine, []) +%% +%% @see quick_parse_form/2 + +parse_form(Dev, L0) -> + parse_form(Dev, L0, []). + +%% @spec parse_form(IODevice, StartLine, Options) -> +%% {ok, Form, LineNo} +%% | {eof, LineNo} +%% | {error, errorinfo(), LineNo} +%% +%% IODevice = pid() +%% StartLine = integer() +%% Options = [term()] +%% Form = erl_syntax:syntaxTree() +%% LineNo = integer() +%% +%% @doc Reads and parses a single program form from an I/O stream. +%% Characters are read from `IODevice' until an end-of-form +%% marker is found (a period character followed by whitespace), or until +%% end-of-file; apart from this, the behaviour is similar to that of +%% `parse/3', except that the return values also contain the +%% final line number given that `StartLine' is the initial +%% line number, and that `{eof, LineNo}' may be returned. +%% +%% @see parse/3 +%% @see parse_form/2 +%% @see quick_parse_form/3 + +parse_form(Dev, L0, Options) -> + parse_form(Dev, L0, fun normal_parser/2, Options). + +%% @spec quick_parse_form(IODevice, StartLine) -> +%% {ok, Form, LineNo} +%% | {eof, LineNo} +%% | {error, errorinfo(), LineNo} +%% IODevice = pid() +%% StartLine = integer() +%% Form = erl_syntax:syntaxTree() | none +%% LineNo = integer() +%% +%% @equiv quick_parse_form(IODevice, StartLine, []) +%% +%% @see parse_form/2 + +quick_parse_form(Dev, L0) -> + quick_parse_form(Dev, L0, []). + +%% @spec quick_parse_form(IODevice, StartLine, Options) -> +%% {ok, Form, LineNo} +%% | {eof, LineNo} +%% | {error, errorinfo(), LineNo} +%% +%% IODevice = pid() +%% StartLine = integer() +%% Options = [term()] +%% Form = erl_syntax:syntaxTree() +%% LineNo = integer() +%% +%% @doc Similar to {@link parse_form/3}, but does a more quick-and-dirty +%% processing of the code. See {@link quick_parse_file/2} for details. +%% +%% @see parse/3 +%% @see quick_parse_form/2 +%% @see parse_form/3 + +quick_parse_form(Dev, L0, Options) -> + parse_form(Dev, L0, fun quick_parser/2, Options). + +-record(opt, {clever = false :: boolean()}). + +parse_form(Dev, L0, Parser, Options) -> + NoFail = proplists:get_bool(no_fail, Options), + Opt = #opt{clever = proplists:get_bool(clever, Options)}, + case io:scan_erl_form(Dev, "", L0) of + {ok, Ts, L1} -> + case catch {ok, Parser(Ts, Opt)} of + {'EXIT', Term} -> + {error, io_error(L1, {unknown, Term}), L1}; + {error, Term} -> + IoErr = io_error(L1, Term), + {error, IoErr, L1}; + {parse_error, _IoErr} when NoFail -> + {ok, erl_syntax:set_pos( + erl_syntax:text(tokens_to_string(Ts)), + start_pos(Ts, L1)), + L1}; + {parse_error, IoErr} -> + {error, IoErr, L1}; + {ok, F} -> + {ok, F, L1} + end; + {error, _IoErr, _L1} = Err -> Err; + {eof, _L1} = Eof -> Eof + end. + +io_error(L, Desc) -> + {L, ?MODULE, Desc}. + +start_pos([T | _Ts], _L) -> + element(2, T); +start_pos([], L) -> + L. + +%% Exception-throwing wrapper for the standard Erlang parser stage + +parse_tokens(Ts) -> + parse_tokens(Ts, fun fix_form/1). + +parse_tokens(Ts, Fix) -> + case erl_parse:parse_form(Ts) of + {ok, Form} -> + Form; + {error, IoErr} -> + case Fix(Ts) of + {form, Form} -> + Form; + {retry, Ts1, Fix1} -> + parse_tokens(Ts1, Fix1); + error -> + throw({parse_error, IoErr}) + end + end. + +%% --------------------------------------------------------------------- +%% Quick scanning/parsing - deletes macro definitions and other +%% preprocessor directives, and replaces all macro calls with atoms. + +quick_parser(Ts, _Opt) -> + filter_form(parse_tokens(quickscan_form(Ts))). + +quickscan_form([{'-', _L}, {atom, La, define} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, undef} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, include} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, include_lib} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, ifdef} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, ifndef} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, else} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, endif} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', L}, {'?', _}, {Type, _, _}=N | [{'(', _} | _]=Ts]) + when Type =:= atom; Type =:= var -> + %% minus, macro and open parenthesis at start of form - assume that + %% the macro takes no arguments; e.g. `-?foo(...).' + quickscan_macros_1(N, Ts, [{'-', L}]); +quickscan_form([{'?', _L}, {Type, _, _}=N | [{'(', _} | _]=Ts]) + when Type =:= atom; Type =:= var -> + %% macro and open parenthesis at start of form - assume that the + %% macro takes no arguments (see scan_macros for details) + quickscan_macros_1(N, Ts, []); +quickscan_form(Ts) -> + quickscan_macros(Ts). + +kill_form(L) -> + [{atom, L, ?pp_form}, {'(', L}, {')', L}, {'->', L}, {atom, L, kill}, + {dot, L}]. + +quickscan_macros(Ts) -> + quickscan_macros(Ts, []). + +quickscan_macros([{'?',_}, {Type, _, A} | Ts], [{string, L, S} | As]) + when Type =:= atom; Type =:= var -> + %% macro after a string literal: change to a single string + {_, Ts1} = skip_macro_args(Ts), + S1 = S ++ quick_macro_string(A), + quickscan_macros(Ts1, [{string, L, S1} | As]); +quickscan_macros([{'?',_}, {Type, _, _}=N | [{'(',_}|_]=Ts], + [{':',_}|_]=As) + when Type =:= atom; Type =:= var -> + %% macro and open parenthesis after colon - check the token + %% following the arguments (see scan_macros for details) + Ts1 = case skip_macro_args(Ts) of + {_, [{'->',_} | _] = Ts2} -> Ts2; + {_, [{'when',_} | _] = Ts2} -> Ts2; + _ -> Ts %% assume macro without arguments + end, + quickscan_macros_1(N, Ts1, As); +quickscan_macros([{'?',_}, {Type, _, _}=N | Ts], As) + when Type =:= atom; Type =:= var -> + %% macro with or without arguments + {_, Ts1} = skip_macro_args(Ts), + quickscan_macros_1(N, Ts1, As); +quickscan_macros([T | Ts], As) -> + quickscan_macros(Ts, [T | As]); +quickscan_macros([], As) -> + lists:reverse(As). + +%% (after a macro has been found and the arglist skipped, if any) +quickscan_macros_1({_Type, _, A}, [{string, L, S} | Ts], As) -> + %% string literal following macro: change to single string + S1 = quick_macro_string(A) ++ S, + quickscan_macros(Ts, [{string, L, S1} | As]); +quickscan_macros_1({_Type, L, A}, Ts, As) -> + %% normal case - just replace the macro with an atom + quickscan_macros(Ts, [{atom, L, quick_macro_atom(A)} | As]). + +quick_macro_atom(A) -> + list_to_atom("?" ++ atom_to_list(A)). + +quick_macro_string(A) -> + "(?" ++ atom_to_list(A) ++ ")". + +%% Skipping to the end of a macro call, tracking open/close constructs. +%% @spec (Tokens) -> {Skipped, Rest} + +skip_macro_args([{'(',_}=T | Ts]) -> + skip_macro_args(Ts, [')'], [T]); +skip_macro_args(Ts) -> + {[], Ts}. + +skip_macro_args([{'(',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, [')' | Es], [T | As]); +skip_macro_args([{'{',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['}' | Es], [T | As]); +skip_macro_args([{'[',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, [']' | Es], [T | As]); +skip_macro_args([{'<<',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['>>' | Es], [T | As]); +skip_macro_args([{'begin',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'if',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'case',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'receive',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'try',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'cond',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{E,_}=T | Ts], [E], As) -> %final close + {lists:reverse([T | As]), Ts}; +skip_macro_args([{E,_}=T | Ts], [E | Es], As) -> %matching close + skip_macro_args(Ts, Es, [T | As]); +skip_macro_args([T | Ts], Es, As) -> + skip_macro_args(Ts, Es, [T | As]); +skip_macro_args([], _Es, _As) -> + throw({error, macro_args}). + +filter_form({function, _, ?pp_form, _, + [{clause, _, [], [], [{atom, _, kill}]}]}) -> + none; +filter_form(T) -> + T. + + +%% --------------------------------------------------------------------- +%% Normal parsing - try to preserve all information + +normal_parser(Ts, Opt) -> + rewrite_form(parse_tokens(scan_form(Ts, Opt))). + +scan_form([{'-', _L}, {atom, La, define} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, define} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, undef} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, undef} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, include} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, include} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, include_lib} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, include_lib} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, ifdef} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, ifdef} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, ifndef} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, ifndef} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, else} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, else} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, endif} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, endif} | scan_macros(Ts, Opt)]; +scan_form([{'-', L}, {'?', L1}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt) + when Type =:= atom; Type =:= var -> + %% minus, macro and open parenthesis at start of form - assume that + %% the macro takes no arguments; e.g. `-?foo(...).' + macro(L1, N, Ts, [{'-', L}], Opt); +scan_form([{'?', L}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt) + when Type =:= atom; Type =:= var -> + %% macro and open parenthesis at start of form - assume that the + %% macro takes no arguments; probably a function declaration on the + %% form `?m(...) -> ...', which will not parse if it is rewritten as + %% `(?m(...)) -> ...', so it must be handled as `(?m)(...) -> ...' + macro(L, N, Ts, [], Opt); +scan_form(Ts, Opt) -> + scan_macros(Ts, Opt). + +scan_macros(Ts, Opt) -> + scan_macros(Ts, [], Opt). + +scan_macros([{'?', _}=M, {Type, _, _}=N | Ts], [{string, L, _}=S | As], + #opt{clever = true}=Opt) + when Type =:= atom; Type =:= var -> + %% macro after a string literal: be clever and insert ++ + scan_macros([M, N | Ts], [{'++', L}, S | As], Opt); +scan_macros([{'?', L}, {Type, _, _}=N | [{'(',_}|_]=Ts], + [{':',_}|_]=As, Opt) + when Type =:= atom; Type =:= var -> + %% macro and open parentheses after colon - probably a call + %% `m:?F(...)' so the argument list might belong to the call, not + %% the macro - but it could also be a try-clause pattern + %% `...:?T(...) ->' - we need to check the token following the + %% arguments to decide + {Args, Rest} = skip_macro_args(Ts), + case Rest of + [{'->',_} | _] -> + macro_call(Args, L, N, Rest, As, Opt); + [{'when',_} | _] -> + macro_call(Args, L, N, Rest, As, Opt); + _ -> + macro(L, N, Ts, As, Opt) + end; +scan_macros([{'?', L}, {Type, _, _}=N | [{'(',_}|_]=Ts], As, Opt) + when Type =:= atom; Type =:= var -> + %% macro with arguments + {Args, Rest} = skip_macro_args(Ts), + macro_call(Args, L, N, Rest, As, Opt); +scan_macros([{'?', L }, {Type, _, _}=N | Ts], As, Opt) + when Type =:= atom; Type =:= var -> + %% macro without arguments + macro(L, N, Ts, As, Opt); +scan_macros([T | Ts], As, Opt) -> + scan_macros(Ts, [T | As], Opt); +scan_macros([], As, _Opt) -> + lists:reverse(As). + +%% Rewriting to a call which will be recognized by the post-parse pass +%% (we insert parentheses to preserve the precedences when parsing). + +macro(L, {Type, _, A}, Rest, As, Opt) -> + scan_macros_1([], Rest, [{atom,L,macro_atom(Type,A)} | As], Opt). + +macro_call([{'(',_}, {')',_}], L, {_, Ln, _}=N, Rest, As, Opt) -> + {Open, Close} = parentheses(As), + scan_macros_1([], Rest, + lists:reverse(Open ++ [{atom,L,?macro_call}, + {'(',L}, N, {')',Ln}] ++ Close, + As), Opt); +macro_call([{'(',_} | Args], L, {_, Ln, _}=N, Rest, As, Opt) -> + {Open, Close} = parentheses(As), + %% note that we must scan the argument list; it may not be skipped + scan_macros_1(Args ++ Close, + Rest, + lists:reverse(Open ++ [{atom,L,?macro_call}, + {'(',L}, N, {',',Ln}], + As), Opt). + +macro_atom(atom, A) -> + list_to_atom(?atom_prefix ++ atom_to_list(A)); +macro_atom(var, A) -> + list_to_atom(?var_prefix ++ atom_to_list(A)). + +%% don't insert parentheses after a string token, to avoid turning +%% `"string" ?macro' into a "function application" `"string"(...)' +%% (see note at top of file) +parentheses([{string, _, _} | _]) -> + {[], []}; +parentheses(_) -> + {[{'(',0}], [{')',0}]}. + +%% (after a macro has been found and the arglist skipped, if any) +scan_macros_1(Args, [{string, L, _} | _]=Rest, As, + #opt{clever = true}=Opt) -> + %% string literal following macro: be clever and insert ++ + scan_macros(Args ++ [{'++', L} | Rest], As, Opt); +scan_macros_1(Args, Rest, As, Opt) -> + %% normal case - continue scanning + scan_macros(Args ++ Rest, As, Opt). + +rewrite_form({function, L, ?pp_form, _, + [{clause, _, [], [], [{call, _, A, As}]}]}) -> + erl_syntax:set_pos(erl_syntax:attribute(A, rewrite_list(As)), L); +rewrite_form({function, L, ?pp_form, _, [{clause, _, [], [], [A]}]}) -> + erl_syntax:set_pos(erl_syntax:attribute(A), L); +rewrite_form(T) -> + rewrite(T). + +rewrite_list([T | Ts]) -> + [rewrite(T) | rewrite_list(Ts)]; +rewrite_list([]) -> + []. + +%% Note: as soon as we start using erl_syntax:subtrees/1 and similar +%% functions, we cannot assume that we know the exact representation of +%% the syntax tree anymore - we must use erl_syntax functions to analyze +%% and decompose the data. + +rewrite(Node) -> + case erl_syntax:type(Node) of + atom -> + case atom_to_list(erl_syntax:atom_value(Node)) of + ?atom_prefix ++ As -> + A1 = list_to_atom(As), + N = erl_syntax:copy_pos(Node, erl_syntax:atom(A1)), + erl_syntax:copy_pos(Node, erl_syntax:macro(N)); + ?var_prefix ++ As -> + A1 = list_to_atom(As), + N = erl_syntax:copy_pos(Node, erl_syntax:variable(A1)), + erl_syntax:copy_pos(Node, erl_syntax:macro(N)); + _ -> + Node + end; + application -> + F = erl_syntax:application_operator(Node), + case erl_syntax:type(F) of + atom -> + case erl_syntax:atom_value(F) of + ?macro_call -> + [A | As] = erl_syntax:application_arguments(Node), + M = erl_syntax:macro(A, rewrite_list(As)), + erl_syntax:copy_pos(Node, M); + _ -> + rewrite_1(Node) + end; + _ -> + rewrite_1(Node) + end; + _ -> + rewrite_1(Node) + end. + +rewrite_1(Node) -> + case erl_syntax:subtrees(Node) of + [] -> + Node; + Gs -> + Node1 = erl_syntax:make_tree(erl_syntax:type(Node), + [[rewrite(T) || T <- Ts] + || Ts <- Gs]), + erl_syntax:copy_pos(Node, Node1) + end. + +%% attempting a rescue operation on a token sequence for a single form +%% if it could not be parsed after the normal treatment + +fix_form([{atom, _, ?pp_form}, {'(', _}, {')', _}, {'->', _}, + {atom, _, define}, {'(', _} | _]=Ts) -> + case lists:reverse(Ts) of + [{dot, _}, {')', _} | _] -> + {retry, Ts, fun fix_define/1}; + [{dot, L} | Ts1] -> + Ts2 = lists:reverse([{dot, L}, {')', L} | Ts1]), + {retry, Ts2, fun fix_define/1}; + _ -> + error + end; +fix_form(_Ts) -> + error. + +fix_define([{atom, L, ?pp_form}, {'(', _}, {')', _}, {'->', _}, + {atom, La, define}, {'(', _}, N, {',', _} | Ts]) -> + [{dot, _}, {')', _} | Ts1] = lists:reverse(Ts), + S = tokens_to_string(lists:reverse(Ts1)), + A = erl_syntax:set_pos(erl_syntax:atom(define), La), + Txt = erl_syntax:set_pos(erl_syntax:text(S), La), + {form, erl_syntax:set_pos(erl_syntax:attribute(A, [N, Txt]), L)}; +fix_define(_Ts) -> + error. + +%% @spec (Tokens::[term()]) -> string() +%% +%% @doc Generates a string corresponding to the given token sequence. +%% The string can be re-tokenized to yield the same token list again. + +tokens_to_string([{atom,_,A} | Ts]) -> + io_lib:write_atom(A) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{string, _, S} | Ts]) -> + io_lib:write_string(S) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{float, _, F} | Ts]) -> + float_to_list(F) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{integer, _, N} | Ts]) -> + integer_to_list(N) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{var,_,A} | Ts]) -> + atom_to_list(A) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{dot,_} | Ts]) -> + ".\n" ++ tokens_to_string(Ts); +tokens_to_string([{A,_} | Ts]) -> + atom_to_list(A) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([]) -> + "". + + +%% @spec (Descriptor::term()) -> string() +%% @hidden +%% @doc Callback function for formatting error descriptors. Not for +%% normal use. + +format_error(macro_args) -> + errormsg("macro call missing end parenthesis"); +format_error({unknown, Reason}) -> + errormsg(io_lib:format("unknown error: ~P", [Reason, 15])). + +errormsg(String) -> + io_lib:format("~s: ~s", [?MODULE, String]). + + +%% ===================================================================== diff --git a/lib/syntax_tools/src/erl_comment_scan.erl b/lib/syntax_tools/src/erl_comment_scan.erl new file mode 100644 index 0000000000..df1449da4e --- /dev/null +++ b/lib/syntax_tools/src/erl_comment_scan.erl @@ -0,0 +1,280 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 1997-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc Functions for reading comment lines from Erlang source code. + +-module(erl_comment_scan). + +-export([file/1, join_lines/1, scan_lines/1, string/1]). + + +%% ===================================================================== +%% @spec file(FileName::file:filename()) -> [Comment] +%% +%% Comment = {Line, Column, Indentation, Text} +%% Line = integer() +%% Column = integer() +%% Indentation = integer() +%% Text = [string()] +%% +%% @doc Extracts comments from an Erlang source code file. Returns a +%% list of entries representing <em>multi-line</em> comments, listed in +%% order of increasing line-numbers. For each entry, `Text' +%% is a list of strings representing the consecutive comment lines in +%% top-down order; the strings contain <em>all</em> characters following +%% (but not including) the first comment-introducing `%' +%% character on the line, up to (but not including) the line-terminating +%% newline. +%% +%% Furthermore, `Line' is the line number and +%% `Column' the left column of the comment (i.e., the column +%% of the comment-introducing `%' character). +%% `Indent' is the indentation (or padding), measured in +%% character positions between the last non-whitespace character before +%% the comment (or the left margin), and the left column of the comment. +%% `Line' and `Column' are always positive +%% integers, and `Indentation' is a nonnegative integer. +%% +%% Evaluation exits with reason `{read, Reason}' if a read +%% error occurred, where `Reason' is an atom corresponding to +%% a Posix error code; see the module {@link //kernel/file} for details. + +file(Name) -> + Name1 = filename(Name), + case catch {ok, file:read_file(Name1)} of + {ok, V} -> + case V of + {ok, B} -> + string(binary_to_list(B)); + {error, E} -> + error_read_file(Name1), + exit({read, E}) + end; + {'EXIT', E} -> + error_read_file(Name1), + exit(E); + R -> + error_read_file(Name1), + throw(R) + end. + + +%% ===================================================================== +%% string(string()) -> [Comment] +%% +%% Comment = {Line, Column, Indentation, Text} +%% Line = integer() +%% Column = integer() +%% Indentation = integer() +%% Text = [string()] +%% +%% @doc Extracts comments from a string containing Erlang source code. +%% Except for reading directly from a string, the behaviour is the same +%% as for {@link file/1}. +%% +%% @see file/1 + +string(Text) -> + lists:reverse(join_lines(scan_lines(Text))). + + +%% ===================================================================== +%% @spec scan_lines(string()) -> [CommentLine] +%% +%% CommentLine = {Line, Column, Indent, Text} +%% Line = integer() +%% Column = integer() +%% Indent = integer() +%% Text = string() +%% +%% @doc Extracts individual comment lines from a source code string. +%% Returns a list of comment lines found in the text, listed in order of +%% <em>decreasing</em> line-numbers, i.e., the last comment line in the +%% input is first in the resulting list. `Text' is a single +%% string, containing all characters following (but not including) the +%% first comment-introducing `%' character on the line, up +%% to (but not including) the line-terminating newline. For details on +%% `Line', `Column' and `Indent', see {@link file/1}. + +scan_lines(Text) -> + scan_lines(Text, 1, 0, 0, []). + +scan_lines([$\040 | Cs], L, Col, M, Ack) -> + scan_lines(Cs, L, Col + 1, M, Ack); +scan_lines([$\t | Cs], L, Col, M, Ack) -> + scan_lines(Cs, L, tab(Col), M, Ack); +scan_lines([$\n | Cs], L, _Col, _M, Ack) -> + scan_lines(Cs, L + 1, 0, 0, Ack); +scan_lines([$\r, $\n | Cs], L, _Col, _M, Ack) -> + scan_lines(Cs, L + 1, 0, 0, Ack); +scan_lines([$\r | Cs], L, _Col, _M, Ack) -> + scan_lines(Cs, L + 1, 0, 0, Ack); +scan_lines([$% | Cs], L, Col, M, Ack) -> + scan_comment(Cs, "", L, Col, M, Ack); +scan_lines([$$ | Cs], L, Col, _M, Ack) -> + scan_char(Cs, L, Col + 1, Ack); +scan_lines([$" | Cs], L, Col, _M, Ack) -> + scan_string(Cs, $", L, Col + 1, Ack); +scan_lines([$' | Cs], L, Col, _M, Ack) -> + scan_string(Cs, $', L, Col + 1, Ack); +scan_lines([_C | Cs], L, Col, _M, Ack) -> + N = Col + 1, + scan_lines(Cs, L, N, N, Ack); +scan_lines([], _L, _Col, _M, Ack) -> + Ack. + +tab(Col) -> + Col - (Col rem 8) + 8. + +scan_comment([$\n | Cs], Cs1, L, Col, M, Ack) -> + seen_comment(Cs, Cs1, L, Col, M, Ack); +scan_comment([$\r, $\n | Cs], Cs1, L, Col, M, Ack) -> + seen_comment(Cs, Cs1, L, Col, M, Ack); +scan_comment([$\r | Cs], Cs1, L, Col, M, Ack) -> + seen_comment(Cs, Cs1, L, Col, M, Ack); +scan_comment([C | Cs], Cs1, L, Col, M, Ack) -> + scan_comment(Cs, [C | Cs1], L, Col, M, Ack); +scan_comment([], Cs1, L, Col, M, Ack) -> + seen_comment([], Cs1, L, Col, M, Ack). + +%% Add a comment line to the ackumulator and return to normal +%% scanning. Note that we compute column positions starting at 0 +%% internally, but the column values in the comment descriptors +%% should start at 1. + +seen_comment(Cs, Cs1, L, Col, M, Ack) -> + %% Compute indentation and strip trailing spaces + N = Col - M, + Text = lists:reverse(string:strip(Cs1, left)), + Ack1 = [{L, Col + 1, N, Text} | Ack], + scan_lines(Cs, L + 1, 0, 0, Ack1). + +scan_string([Quote | Cs], Quote, L, Col, Ack) -> + N = Col + 1, + scan_lines(Cs, L, N, N, Ack); +scan_string([$\t | Cs], Quote, L, Col, Ack) -> + scan_string(Cs, Quote, L, tab(Col), Ack); +scan_string([$\n | Cs], Quote, L, _Col, Ack) -> + %% Newlines should really not occur in strings/atoms, but we + %% want to be well behaved even if the input is not. + scan_string(Cs, Quote, L + 1, 0, Ack); +scan_string([$\r, $\n | Cs], Quote, L, _Col, Ack) -> + scan_string(Cs, Quote, L + 1, 0, Ack); +scan_string([$\r | Cs], Quote, L, _Col, Ack) -> + scan_string(Cs, Quote, L + 1, 0, Ack); +scan_string([$\\, _C | Cs], Quote, L, Col, Ack) -> + scan_string(Cs, Quote, L, Col + 2, Ack); % ignore character C +scan_string([_C | Cs], Quote, L, Col, Ack) -> + scan_string(Cs, Quote, L, Col + 1, Ack); +scan_string([], _Quote, _L, _Col, Ack) -> + %% Finish quietly. + Ack. + +scan_char([$\t | Cs], L, Col, Ack) -> + N = tab(Col), + scan_lines(Cs, L, N, N, Ack); % this is not just any whitespace +scan_char([$\n | Cs], L, _Col, Ack) -> + scan_lines(Cs, L + 1, 0, 0, Ack); % handle this, just in case +scan_char([$\r, $\n | Cs], L, _Col, Ack) -> + scan_lines(Cs, L + 1, 0, 0, Ack); +scan_char([$\r | Cs], L, _Col, Ack) -> + scan_lines(Cs, L + 1, 0, 0, Ack); +scan_char([$\\, _C | Cs], L, Col, Ack) -> + N = Col + 2, % character C must be ignored + scan_lines(Cs, L, N, N, Ack); +scan_char([_C | Cs], L, Col, Ack) -> + N = Col + 1, % character C must be ignored + scan_lines(Cs, L, N, N, Ack); +scan_char([], _L, _Col, Ack) -> + %% Finish quietly. + Ack. + + +%% ===================================================================== +%% @spec join_lines([CommentLine]) -> [Comment] +%% +%% CommentLine = {Line, Column, Indent, string()} +%% Line = integer() +%% Column = integer() +%% Indent = integer() +%% Comment = {Line, Column, Indent, Text} +%% Text = [string()] +%% +%% @doc Joins individual comment lines into multi-line comments. The +%% input is a list of entries representing individual comment lines, +%% <em>in order of decreasing line-numbers</em>; see +%% {@link scan_lines/1} for details. The result is a list of +%% entries representing <em>multi-line</em> comments, <em>still listed +%% in order of decreasing line-numbers</em>, but where for each entry, +%% `Text' is a list of consecutive comment lines in order of +%% <em>increasing</em> line-numbers (i.e., top-down). +%% +%% @see scan_lines/1 + +join_lines([{L, Col, Ind, Txt} | Lines]) -> + join_lines(Lines, [Txt], L, Col, Ind); +join_lines([]) -> + []. + +%% In the following, we assume that the current `Txt' is never empty. +%% Recall that the list is in reverse line-number order. + +join_lines([{L1, Col1, Ind1, Txt1} | Lines], Txt, L, Col, Ind) -> + if L1 =:= L - 1, Col1 =:= Col, Ind + 1 =:= Col -> + %% The last test above checks that the previous + %% comment was alone on its line; otherwise it won't + %% be joined with the current; this is not always what + %% one wants, but works well in general. + join_lines(Lines, [Txt1 | Txt], L1, Col1, Ind1); + true -> + %% Finish the current comment and let the new line + %% start the next one. + [{L, Col, Ind, Txt} + | join_lines(Lines, [Txt1], L1, Col1, Ind1)] + end; +join_lines([], Txt, L, Col, Ind) -> + [{L, Col, Ind, Txt}]. + + +%% ===================================================================== +%% Utility functions for internal use + +filename([C|T]) when is_integer(C), C > 0, C =< 255 -> + [C | filename(T)]; +filename([H|T]) -> + filename(H) ++ filename(T); +filename([]) -> + []; +filename(N) when is_atom(N) -> + atom_to_list(N); +filename(N) -> + report_error("bad filename: `~P'.", [N, 25]), + exit(error). + +error_read_file(Name) -> + report_error("error reading file `~s'.", [Name]). + +report_error(S, Vs) -> + error_logger:error_msg(lists:concat([?MODULE, ": ", S, "\n"]), Vs). + +%% ===================================================================== diff --git a/lib/syntax_tools/src/erl_prettypr.erl b/lib/syntax_tools/src/erl_prettypr.erl new file mode 100644 index 0000000000..8d2f4facea --- /dev/null +++ b/lib/syntax_tools/src/erl_prettypr.erl @@ -0,0 +1,1153 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 1997-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc Pretty printing of abstract Erlang syntax trees. +%% +%% This module is a front end to the pretty-printing library module +%% `prettypr', for text formatting of abstract syntax trees defined by +%% the module `erl_syntax'. + +-module(erl_prettypr). + +-export([format/1, format/2, best/1, best/2, layout/1, layout/2, + get_ctxt_precedence/1, set_ctxt_precedence/2, + get_ctxt_paperwidth/1, set_ctxt_paperwidth/2, + get_ctxt_linewidth/1, set_ctxt_linewidth/2, get_ctxt_hook/1, + set_ctxt_hook/2, get_ctxt_user/1, set_ctxt_user/2]). + +-import(prettypr, [text/1, nest/2, above/2, beside/2, sep/1, par/1, + par/2, floating/3, floating/1, break/1, follow/2, + follow/3, empty/0]). + +-import(erl_parse, [preop_prec/1, inop_prec/1, func_prec/0, + max_prec/0]). + +-define(PADDING, 2). +-define(PAPER, 80). +-define(RIBBON, 56). +-define(NOUSER, undefined). +-define(NOHOOK, none). + +-record(ctxt, {prec = 0, + sub_indent = 2, + break_indent = 4, + clause = undefined, + hook = ?NOHOOK, + paper = ?PAPER, + ribbon = ?RIBBON, + user = ?NOUSER}). + + +%% ===================================================================== +%% The following functions examine and modify contexts: + +%% @spec (context()) -> context() +%% @doc Returns the operator precedence field of the prettyprinter +%% context. +%% +%% @see set_ctxt_precedence/2 + +get_ctxt_precedence(Ctxt) -> + Ctxt#ctxt.prec. + +%% @spec (context(), integer()) -> context() +%% +%% @doc Updates the operator precedence field of the prettyprinter +%% context. See the {@link //stdlib/erl_parse} module for operator precedences. +%% +%% @see //stdlib/erl_parse +%% @see get_ctxt_precedence/1 + +set_ctxt_precedence(Ctxt, Prec) -> + set_prec(Ctxt, Prec). + +set_prec(Ctxt, Prec) -> + Ctxt#ctxt{prec = Prec}. % used internally + +reset_prec(Ctxt) -> + set_prec(Ctxt, 0). % used internally + +%% @spec (context()) -> integer() +%% @doc Returns the paper widh field of the prettyprinter context. +%% @see set_ctxt_paperwidth/2 + +get_ctxt_paperwidth(Ctxt) -> + Ctxt#ctxt.paper. + +%% @spec (context(), integer()) -> context() +%% +%% @doc Updates the paper widh field of the prettyprinter context. +%% +%% Note: changing this value (and passing the resulting context to a +%% continuation function) does not affect the normal formatting, but may +%% affect user-defined behaviour in hook functions. +%% +%% @see get_ctxt_paperwidth/1 + +set_ctxt_paperwidth(Ctxt, W) -> + Ctxt#ctxt{paper = W}. + +%% @spec (context()) -> integer() +%% @doc Returns the line widh field of the prettyprinter context. +%% @see set_ctxt_linewidth/2 + +get_ctxt_linewidth(Ctxt) -> + Ctxt#ctxt.ribbon. + +%% @spec (context(), integer()) -> context() +%% +%% @doc Updates the line widh field of the prettyprinter context. +%% +%% Note: changing this value (and passing the resulting context to a +%% continuation function) does not affect the normal formatting, but may +%% affect user-defined behaviour in hook functions. +%% +%% @see get_ctxt_linewidth/1 + +set_ctxt_linewidth(Ctxt, W) -> + Ctxt#ctxt{ribbon = W}. + +%% @spec (context()) -> hook() +%% @doc Returns the hook function field of the prettyprinter context. +%% @see set_ctxt_hook/2 + +get_ctxt_hook(Ctxt) -> + Ctxt#ctxt.hook. + +%% @spec (context(), hook()) -> context() +%% @doc Updates the hook function field of the prettyprinter context. +%% @see get_ctxt_hook/1 + +set_ctxt_hook(Ctxt, Hook) -> + Ctxt#ctxt{hook = Hook}. + +%% @spec (context()) -> term() +%% @doc Returns the user data field of the prettyprinter context. +%% @see set_ctxt_user/2 + +get_ctxt_user(Ctxt) -> + Ctxt#ctxt.user. + +%% @spec (context(), term()) -> context() +%% @doc Updates the user data field of the prettyprinter context. +%% @see get_ctxt_user/1 + +set_ctxt_user(Ctxt, X) -> + Ctxt#ctxt{user = X}. + + +%% ===================================================================== +%% @spec format(Tree::syntaxTree()) -> string() +%% @equiv format(Tree, []) + +format(Node) -> + format(Node, []). + + +%% ===================================================================== +%% @spec format(Tree::syntaxTree(), Options::[term()]) -> string() +%% syntaxTree() = erl_syntax:syntaxTree() +%% +%% @type hook() = (syntaxTree(), context(), Continuation) -> document() +%% Continuation = (syntaxTree(), context()) -> document(). +%% +%% A call-back function for user-controlled formatting. See {@link +%% format/2}. +%% +%% @type context(). A representation of the current context of the +%% pretty-printer. Can be accessed in hook functions. +%% +%% @doc Prettyprint-formats an abstract Erlang syntax tree as text. For +%% example, if you have a `.beam' file that has been compiled with +%% `debug_info', the following should print the source code for the +%% module (as it looks in the debug info representation): +%% ```{ok,{_,[{abstract_code,{_,AC}}]}} = +%% beam_lib:chunks("myfile.beam",[abstract_code]), +%% io:put_chars(erl_prettypr:format(erl_syntax:form_list(AC))) +%% ''' +%% +%% Available options: +%% <dl> +%% <dt>{hook, none | {@link hook()}}</dt> +%% <dd>Unless the value is `none', the given function is called +%% for each node whose list of annotations is not empty; see below +%% for details. The default value is `none'.</dd> +%% +%% <dt>{paper, integer()}</dt> +%% <dd>Specifies the preferred maximum number of characters on any +%% line, including indentation. The default value is 80.</dd> +%% +%% <dt>{ribbon, integer()}</dt> +%% <dd>Specifies the preferred maximum number of characters on any +%% line, not counting indentation. The default value is 65.</dd> +%% +%% <dt>{user, term()}</dt> +%% <dd>User-specific data for use in hook functions. The default +%% value is `undefined'.</dd> +%% </dl> +%% +%% A hook function (cf. the {@link hook()} type) is passed the current +%% syntax tree node, the context, and a continuation. The context can be +%% examined and manipulated by functions such as `get_ctxt_user/1' and +%% `set_ctxt_user/2'. The hook must return a "document" data structure +%% (see {@link layout/2} and {@link best/2}); this may be constructed in +%% part or in whole by applying the continuation function. For example, +%% the following is a trivial hook: +%% ``` +%% fun (Node, Ctxt, Cont) -> Cont(Node, Ctxt) end +%% ''' +%% which yields the same result as if no hook was given. +%% The following, however: +%% ``` +%% fun (Node, Ctxt, Cont) -> +%% Doc = Cont(Node, Ctxt), +%% prettypr:beside(prettypr:text("<b>"), +%% prettypr:beside(Doc, +%% prettypr:text("</b>"))) +%% end +%% ''' +%% will place the text of any annotated node (regardless of the +%% annotation data) between HTML "boldface begin" and "boldface end" +%% tags. +%% +%% @see erl_syntax +%% @see format/1 +%% @see layout/2 +%% @see best/2 +%% @see get_ctxt_user/1 +%% @see set_ctxt_user/2 + +format(Node, Options) -> + W = proplists:get_value(paper, Options, ?PAPER), + L = proplists:get_value(ribbon, Options, ?RIBBON), + prettypr:format(layout(Node, Options), W, L). + + +%% ===================================================================== +%% @spec best(Tree::syntaxTree()) -> empty | document() +%% @equiv best(Tree, []) + +best(Node) -> + best(Node, []). + + +%% ===================================================================== +%% @spec best(Tree::syntaxTree(), Options::[term()]) -> +%% empty | document() +%% +%% @doc Creates a fixed "best" abstract layout for a syntax tree. This +%% is similar to the `layout/2' function, except that here, the final +%% layout has been selected with respect to the given options. The atom +%% `empty' is returned if no such layout could be produced. For +%% information on the options, see the `format/2' function. +%% +%% @see best/1 +%% @see layout/2 +%% @see format/2 +%% @see prettypr:best/3 + +best(Node, Options) -> + W = proplists:get_value(paper, Options, ?PAPER), + L = proplists:get_value(ribbon, Options, ?RIBBON), + prettypr:best(layout(Node, Options), W, L). + + +%% ===================================================================== +%% @spec layout(Tree::syntaxTree()) -> document() +%% @equiv layout(Tree, []) + +layout(Node) -> + layout(Node, []). + + +%% ===================================================================== +%% @spec layout(Tree::syntaxTree(), Options::[term()]) -> document() +%% document() = prettypr:document() +%% +%% @doc Creates an abstract document layout for a syntax tree. The +%% result represents a set of possible layouts (cf. module `prettypr'). +%% For information on the options, see {@link format/2}; note, however, +%% that the `paper' and `ribbon' options are ignored by this function. +%% +%% This function provides a low-level interface to the pretty printer, +%% returning a flexible representation of possible layouts, independent +%% of the paper width eventually to be used for formatting. This can be +%% included as part of another document and/or further processed +%% directly by the functions in the `prettypr' module, or used in a hook +%% function (see `format/2' for details). +%% +%% @see prettypr +%% @see format/2 +%% @see layout/1 + +layout(Node, Options) -> + lay(Node, + #ctxt{hook = proplists:get_value(hook, Options, ?NOHOOK), + paper = proplists:get_value(paper, Options, ?PAPER), + ribbon = proplists:get_value(ribbon, Options, ?RIBBON), + user = proplists:get_value(user, Options)}). + +lay(Node, Ctxt) -> + case erl_syntax:get_ann(Node) of + [] -> + %% Hooks are not called if there are no annotations. + lay_1(Node, Ctxt); + _As -> + case Ctxt#ctxt.hook of + ?NOHOOK -> + lay_1(Node, Ctxt); + Hook -> + Hook(Node, Ctxt, fun lay_1/2) + end + end. + +%% This handles attached comments: + +lay_1(Node, Ctxt) -> + case erl_syntax:has_comments(Node) of + true -> + D1 = lay_2(Node, Ctxt), + D2 = lay_postcomments(erl_syntax:get_postcomments(Node), + D1), + lay_precomments(erl_syntax:get_precomments(Node), D2); + false -> + lay_2(Node, Ctxt) + end. + +%% For pre-comments, all padding is ignored. + +lay_precomments([], D) -> + D; +lay_precomments(Cs, D) -> + above(floating(break(stack_comments(Cs, false)), -1, -1), D). + +%% For postcomments, individual padding is added. + +lay_postcomments([], D) -> + D; +lay_postcomments(Cs, D) -> + beside(D, floating(break(stack_comments(Cs, true)), 1, 0)). + +%% Format (including padding, if `Pad' is `true', otherwise not) +%% and stack the listed comments above each other, + +stack_comments([C | Cs], Pad) -> + D = stack_comment_lines(erl_syntax:comment_text(C)), + D1 = case Pad of + true -> + P = case erl_syntax:comment_padding(C) of + none -> + ?PADDING; + P1 -> + P1 + end, + beside(text(spaces(P)), D); + false -> + D + end, + case Cs of + [] -> + D1; % done + _ -> + above(D1, stack_comments(Cs, Pad)) + end; +stack_comments([], _) -> + empty(). + +%% Stack lines of text above each other and prefix each string in +%% the list with a single `%' character. + +stack_comment_lines([S | Ss]) -> + D = text(add_comment_prefix(S)), + case Ss of + [] -> + D; + _ -> + above(D, stack_comment_lines(Ss)) + end; +stack_comment_lines([]) -> + empty(). + +add_comment_prefix(S) -> + [$% | S]. + +%% This part ignores annotations and comments: + +lay_2(Node, Ctxt) -> + case erl_syntax:type(Node) of + %% We list literals and other common cases first. + + variable -> + text(erl_syntax:variable_literal(Node)); + + atom -> + text(erl_syntax:atom_literal(Node)); + + integer -> + text(erl_syntax:integer_literal(Node)); + + float -> + text(tidy_float(erl_syntax:float_literal(Node))); + + char -> + text(erl_syntax:char_literal(Node)); + + string -> + lay_string(erl_syntax:string_literal(Node), Ctxt); + + nil -> + text("[]"); + + tuple -> + Es = seq(erl_syntax:tuple_elements(Node), + floating(text(",")), reset_prec(Ctxt), + fun lay/2), + beside(floating(text("{")), + beside(par(Es), + floating(text("}")))); + + list -> + Ctxt1 = reset_prec(Ctxt), + Node1 = erl_syntax:compact_list(Node), + D1 = par(seq(erl_syntax:list_prefix(Node1), + floating(text(",")), Ctxt1, + fun lay/2)), + D = case erl_syntax:list_suffix(Node1) of + none -> + beside(D1, floating(text("]"))); + S -> + follow(D1, + beside( + floating(text("| ")), + beside(lay(S, Ctxt1), + floating(text("]"))))) + end, + beside(floating(text("[")), D); + + operator -> + floating(text(erl_syntax:operator_literal(Node))); + + infix_expr -> + Operator = erl_syntax:infix_expr_operator(Node), + {PrecL, Prec, PrecR} = + case erl_syntax:type(Operator) of + operator -> + inop_prec( + erl_syntax:operator_name(Operator)); + _ -> + {0, 0, 0} + end, + D1 = lay(erl_syntax:infix_expr_left(Node), + set_prec(Ctxt, PrecL)), + D2 = lay(Operator, reset_prec(Ctxt)), + D3 = lay(erl_syntax:infix_expr_right(Node), + set_prec(Ctxt, PrecR)), + D4 = par([D1, D2, D3], Ctxt#ctxt.sub_indent), + maybe_parentheses(D4, Prec, Ctxt); + + prefix_expr -> + Operator = erl_syntax:prefix_expr_operator(Node), + {{Prec, PrecR}, Name} = + case erl_syntax:type(Operator) of + operator -> + N = erl_syntax:operator_name(Operator), + {preop_prec(N), N}; + _ -> + {{0, 0}, any} + end, + D1 = lay(Operator, reset_prec(Ctxt)), + D2 = lay(erl_syntax:prefix_expr_argument(Node), + set_prec(Ctxt, PrecR)), + D3 = case Name of + '+' -> + beside(D1, D2); + '-' -> + beside(D1, D2); + _ -> + par([D1, D2], Ctxt#ctxt.sub_indent) + end, + maybe_parentheses(D3, Prec, Ctxt); + + application -> + {PrecL, Prec} = func_prec(), + D = lay(erl_syntax:application_operator(Node), + set_prec(Ctxt, PrecL)), + As = seq(erl_syntax:application_arguments(Node), + floating(text(",")), reset_prec(Ctxt), + fun lay/2), +%% D1 = beside(D, beside(text("("), +%% beside(par(As), +%% floating(text(")"))))), + D1 = beside(D, beside(text("("), + beside(par(As), + floating(text(")"))))), + maybe_parentheses(D1, Prec, Ctxt); + + match_expr -> + {PrecL, Prec, PrecR} = inop_prec('='), + D1 = lay(erl_syntax:match_expr_pattern(Node), + set_prec(Ctxt, PrecL)), + D2 = lay(erl_syntax:match_expr_body(Node), + set_prec(Ctxt, PrecR)), + D3 = follow(beside(D1, floating(text(" ="))), D2, + Ctxt#ctxt.break_indent), + maybe_parentheses(D3, Prec, Ctxt); + + underscore -> + text("_"); + + clause -> + %% The style used for a clause depends on its context + Ctxt1 = (reset_prec(Ctxt))#ctxt{clause = undefined}, + D1 = par(seq(erl_syntax:clause_patterns(Node), + floating(text(",")), Ctxt1, + fun lay/2)), + D2 = case erl_syntax:clause_guard(Node) of + none -> + none; + G -> + lay(G, Ctxt1) + end, + D3 = sep(seq(erl_syntax:clause_body(Node), + floating(text(",")), Ctxt1, + fun lay/2)), + case Ctxt#ctxt.clause of + fun_expr -> + make_fun_clause(D1, D2, D3, Ctxt); + {function, N} -> + make_fun_clause(N, D1, D2, D3, Ctxt); + if_expr -> + make_if_clause(D1, D2, D3, Ctxt); + cond_expr -> + make_if_clause(D1, D2, D3, Ctxt); + case_expr -> + make_case_clause(D1, D2, D3, Ctxt); + receive_expr -> + make_case_clause(D1, D2, D3, Ctxt); + try_expr -> + make_case_clause(D1, D2, D3, Ctxt); + {rule, N} -> + make_rule_clause(N, D1, D2, D3, Ctxt); + undefined -> + %% If a clause is formatted out of context, we + %% use a "fun-expression" clause style. + make_fun_clause(D1, D2, D3, Ctxt) + end; + + function -> + %% Comments on the name itself will be repeated for each + %% clause, but that seems to be the best way to handle it. + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:function_name(Node), Ctxt1), + D2 = lay_clauses(erl_syntax:function_clauses(Node), + {function, D1}, Ctxt1), + beside(D2, floating(text("."))); + + case_expr -> + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:case_expr_argument(Node), Ctxt1), + D2 = lay_clauses( + erl_syntax:case_expr_clauses(Node), + case_expr, Ctxt1), + sep([par([follow(text("case"), D1, Ctxt1#ctxt.sub_indent), + text("of")], + Ctxt1#ctxt.break_indent), + nest(Ctxt1#ctxt.sub_indent, D2), + text("end")]); + + if_expr -> + Ctxt1 = reset_prec(Ctxt), + D = lay_clauses(erl_syntax:if_expr_clauses(Node), + if_expr, Ctxt1), + sep([follow(text("if"), D, Ctxt1#ctxt.sub_indent), + text("end")]); + + cond_expr -> + Ctxt1 = reset_prec(Ctxt), + D = lay_clauses(erl_syntax:cond_expr_clauses(Node), + cond_expr, Ctxt1), + sep([text("cond"), + nest(Ctxt1#ctxt.sub_indent, D), + text("end")]); + + fun_expr -> + Ctxt1 = reset_prec(Ctxt), + D = lay_clauses(erl_syntax:fun_expr_clauses(Node), + fun_expr, Ctxt1), + sep([follow(text("fun"), D, Ctxt1#ctxt.sub_indent), + text("end")]); + + module_qualifier -> + {PrecL, _Prec, PrecR} = inop_prec(':'), + D1 = lay(erl_syntax:module_qualifier_argument(Node), + set_prec(Ctxt, PrecL)), + D2 = lay(erl_syntax:module_qualifier_body(Node), + set_prec(Ctxt, PrecR)), + beside(D1, beside(text(":"), D2)); + + qualified_name -> + Ss = erl_syntax:qualified_name_segments(Node), + lay_qualified_name(Ss, Ctxt); + + %% + %% The rest is in alphabetical order + %% + + arity_qualifier -> + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:arity_qualifier_body(Node), Ctxt1), + D2 = lay(erl_syntax:arity_qualifier_argument(Node), Ctxt1), + beside(D1, beside(text("/"), D2)); + + attribute -> + %% The attribute name and arguments are formatted similar to + %% a function call, but prefixed with a "-" and followed by + %% a period. If the arguments is `none', we only output the + %% attribute name, without following parentheses. + Ctxt1 = reset_prec(Ctxt), + N = erl_syntax:attribute_name(Node), + D = case erl_syntax:attribute_arguments(Node) of + none -> + lay(N, Ctxt1); + Args -> + As = seq(Args, floating(text(",")), Ctxt1, + fun lay/2), + beside(lay(N, Ctxt1), + beside(text("("), + beside(par(As), + floating(text(")"))))) + end, + beside(floating(text("-")), beside(D, floating(text(".")))); + + binary -> + Ctxt1 = reset_prec(Ctxt), + Es = seq(erl_syntax:binary_fields(Node), + floating(text(",")), Ctxt1, fun lay/2), + beside(floating(text("<<")), + beside(par(Es), floating(text(">>")))); + + binary_field -> + Ctxt1 = set_prec(Ctxt, max_prec()), + D1 = lay(erl_syntax:binary_field_body(Node), Ctxt1), + D2 = case erl_syntax:binary_field_types(Node) of + [] -> + empty(); + Ts -> + beside(floating(text("/")), + lay_bit_types(Ts, Ctxt1)) + end, + beside(D1, D2); + + block_expr -> + Ctxt1 = reset_prec(Ctxt), + Es = seq(erl_syntax:block_expr_body(Node), + floating(text(",")), Ctxt1, fun lay/2), + sep([text("begin"), + nest(Ctxt1#ctxt.sub_indent, sep(Es)), + text("end")]); + + catch_expr -> + {Prec, PrecR} = preop_prec('catch'), + D = lay(erl_syntax:catch_expr_body(Node), + set_prec(Ctxt, PrecR)), + D1 = follow(text("catch"), D, Ctxt#ctxt.sub_indent), + maybe_parentheses(D1, Prec, Ctxt); + + class_qualifier -> + Ctxt1 = set_prec(Ctxt, max_prec()), + D1 = lay(erl_syntax:class_qualifier_argument(Node), Ctxt1), + D2 = lay(erl_syntax:class_qualifier_body(Node), Ctxt1), + beside(D1, beside(text(":"), D2)); + + comment -> + D = stack_comment_lines( + erl_syntax:comment_text(Node)), + %% Default padding for standalone comments is empty. + case erl_syntax:comment_padding(Node) of + none -> + floating(break(D)); + P -> + floating(break(beside(text(spaces(P)), D))) + end; + + conjunction -> + par(seq(erl_syntax:conjunction_body(Node), + floating(text(",")), reset_prec(Ctxt), + fun lay/2)); + + disjunction -> + %% For clarity, we don't paragraph-format + %% disjunctions; only conjunctions (see above). + sep(seq(erl_syntax:disjunction_body(Node), + floating(text(";")), reset_prec(Ctxt), + fun lay/2)); + + error_marker -> + E = erl_syntax:error_marker_info(Node), + beside(text("** "), + beside(lay_error_info(E, reset_prec(Ctxt)), + text(" **"))); + + eof_marker -> + empty(); + + form_list -> + Es = seq(erl_syntax:form_list_elements(Node), none, + reset_prec(Ctxt), fun lay/2), + vertical_sep(text(""), Es); + + generator -> + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:generator_pattern(Node), Ctxt1), + D2 = lay(erl_syntax:generator_body(Node), Ctxt1), + par([D1, beside(text("<- "), D2)], Ctxt1#ctxt.break_indent); + + binary_generator -> + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:binary_generator_pattern(Node), Ctxt1), + D2 = lay(erl_syntax:binary_generator_body(Node), Ctxt1), + par([D1, beside(text("<= "), D2)], Ctxt1#ctxt.break_indent); + + implicit_fun -> + D = lay(erl_syntax:implicit_fun_name(Node), + reset_prec(Ctxt)), + beside(floating(text("fun ")), D); + + list_comp -> + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:list_comp_template(Node), Ctxt1), + D2 = par(seq(erl_syntax:list_comp_body(Node), + floating(text(",")), Ctxt1, + fun lay/2)), + beside(floating(text("[")), + par([D1, beside(floating(text("|| ")), + beside(D2, floating(text("]"))))])); + + binary_comp -> + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:binary_comp_template(Node), Ctxt1), + D2 = par(seq(erl_syntax:binary_comp_body(Node), + floating(text(",")), Ctxt1, + fun lay/2)), + beside(floating(text("<< ")), + par([D1, beside(floating(text(" || ")), + beside(D2, floating(text(" >>"))))])); + + macro -> + %% This is formatted similar to a normal function call, but + %% prefixed with a "?". + Ctxt1 = reset_prec(Ctxt), + N = erl_syntax:macro_name(Node), + D = case erl_syntax:macro_arguments(Node) of + none-> + lay(N, Ctxt1); + Args -> + As = seq(Args, floating(text(",")), + set_prec(Ctxt1, max_prec()), fun lay/2), + beside(lay(N, Ctxt1), + beside(text("("), + beside(par(As), + floating(text(")"))))) + end, + D1 = beside(floating(text("?")), D), + maybe_parentheses(D1, 0, Ctxt); % must be conservative! + + parentheses -> + D = lay(erl_syntax:parentheses_body(Node), + reset_prec(Ctxt)), + lay_parentheses(D, Ctxt); + + query_expr -> + Ctxt1 = reset_prec(Ctxt), + D = lay(erl_syntax:query_expr_body(Node), Ctxt1), + sep([text("query"), + nest(Ctxt1#ctxt.sub_indent, D), + text("end")]); + + receive_expr -> + Ctxt1 = reset_prec(Ctxt), + D1 = lay_clauses( + erl_syntax:receive_expr_clauses(Node), + receive_expr, Ctxt1), + D2 = case erl_syntax:receive_expr_timeout(Node) of + none -> + D1; + T -> + D3 = lay(T, Ctxt1), + A = erl_syntax:receive_expr_action(Node), + D4 = sep(seq(A, floating(text(",")), + Ctxt1, fun lay/2)), + sep([D1, + follow(floating(text("after")), + append_clause_body(D4, D3, + Ctxt1), + Ctxt1#ctxt.sub_indent)]) + end, + sep([text("receive"), + nest(Ctxt1#ctxt.sub_indent, D2), + text("end")]); + + record_access -> + {PrecL, Prec, PrecR} = inop_prec('#'), + D1 = lay(erl_syntax:record_access_argument(Node), + set_prec(Ctxt, PrecL)), + D2 = beside( + floating(text(".")), + lay(erl_syntax:record_access_field(Node), + set_prec(Ctxt, PrecR))), + D3 = case erl_syntax:record_access_type(Node) of + none -> + D2; + T -> + beside(beside(floating(text("#")), + lay(T, reset_prec(Ctxt))), + D2) + end, + maybe_parentheses(beside(D1, D3), Prec, Ctxt); + + record_expr -> + {PrecL, Prec, _} = inop_prec('#'), + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:record_expr_type(Node), Ctxt1), + D2 = par(seq(erl_syntax:record_expr_fields(Node), + floating(text(",")), Ctxt1, + fun lay/2)), + D3 = beside(beside(floating(text("#")), D1), + beside(text("{"), + beside(D2, floating(text("}"))))), + D4 = case erl_syntax:record_expr_argument(Node) of + none -> + D3; + A -> + beside(lay(A, set_prec(Ctxt, PrecL)), D3) + end, + maybe_parentheses(D4, Prec, Ctxt); + + record_field -> + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:record_field_name(Node), Ctxt1), + case erl_syntax:record_field_value(Node) of + none -> + D1; + V -> + par([D1, floating(text("=")), lay(V, Ctxt1)], + Ctxt1#ctxt.break_indent) + end; + + record_index_expr -> + {Prec, PrecR} = preop_prec('#'), + D1 = lay(erl_syntax:record_index_expr_type(Node), + reset_prec(Ctxt)), + D2 = lay(erl_syntax:record_index_expr_field(Node), + set_prec(Ctxt, PrecR)), + D3 = beside(beside(floating(text("#")), D1), + beside(floating(text(".")), D2)), + maybe_parentheses(D3, Prec, Ctxt); + + rule -> + %% Comments on the name will be repeated; cf. + %% `function'. + Ctxt1 = reset_prec(Ctxt), + D1 = lay(erl_syntax:rule_name(Node), Ctxt1), + D2 = lay_clauses(erl_syntax:rule_clauses(Node), + {rule, D1}, Ctxt1), + beside(D2, floating(text("."))); + + size_qualifier -> + Ctxt1 = set_prec(Ctxt, max_prec()), + D1 = lay(erl_syntax:size_qualifier_body(Node), Ctxt1), + D2 = lay(erl_syntax:size_qualifier_argument(Node), Ctxt1), + beside(D1, beside(text(":"), D2)); + + text -> + text(erl_syntax:text_string(Node)); + + try_expr -> + Ctxt1 = reset_prec(Ctxt), + D1 = sep(seq(erl_syntax:try_expr_body(Node), + floating(text(",")), Ctxt1, fun lay/2)), + Es0 = [text("end")], + Es1 = case erl_syntax:try_expr_after(Node) of + [] -> Es0; + As -> + D2 = sep(seq(As, floating(text(",")), Ctxt1, + fun lay/2)), + [text("after"), + nest(Ctxt1#ctxt.sub_indent, D2) + | Es0] + end, + Es2 = case erl_syntax:try_expr_handlers(Node) of + [] -> Es1; + Hs -> + D3 = lay_clauses(Hs, try_expr, Ctxt1), + [text("catch"), + nest(Ctxt1#ctxt.sub_indent, D3) + | Es1] + end, + Es3 = case erl_syntax:try_expr_clauses(Node) of + [] -> Es2; + Cs -> + D4 = lay_clauses(Cs, try_expr, Ctxt1), + [text("of"), + nest(Ctxt1#ctxt.sub_indent, D4) + | Es2] + end, + sep([par([follow(text("try"), D1, Ctxt1#ctxt.sub_indent), + hd(Es3)]) + | tl(Es3)]); + + warning_marker -> + E = erl_syntax:warning_marker_info(Node), + beside(text("%% WARNING: "), + lay_error_info(E, reset_prec(Ctxt))) + end. + +lay_parentheses(D, _Ctxt) -> + beside(floating(text("(")), beside(D, floating(text(")")))). + +maybe_parentheses(D, Prec, Ctxt) -> + case Ctxt#ctxt.prec of + P when P > Prec -> + lay_parentheses(D, Ctxt); + _ -> + D + end. + +lay_qualified_name([S | Ss1] = Ss, Ctxt) -> + case erl_syntax:type(S) of + atom -> + case erl_syntax:atom_value(S) of + '' -> + beside(text("."), + lay_qualified_name_1(Ss1, Ctxt)); + _ -> + lay_qualified_name_1(Ss, Ctxt) + end; + _ -> + lay_qualified_name_1(Ss, Ctxt) + end. + +lay_qualified_name_1([S], Ctxt) -> + lay(S, Ctxt); +lay_qualified_name_1([S | Ss], Ctxt) -> + beside(lay(S, Ctxt), beside(text("."), + lay_qualified_name_1(Ss, Ctxt))). + +lay_string(S, Ctxt) -> + %% S includes leading/trailing double-quote characters. The segment + %% width is 2/3 of the ribbon width - this seems to work well. + W = (Ctxt#ctxt.ribbon * 2) div 3, + lay_string_1(S, length(S), W). + +lay_string_1(S, L, W) when L > W, W > 0 -> + %% Note that L is the minimum, not the exact, printed length. + case split_string(S, W - 1, L) of + {_S1, ""} -> + text(S); + {S1, S2} -> + above(text(S1 ++ "\""), + lay_string_1([$" | S2], L - W + 1, W)) %" stupid emacs + end; +lay_string_1(S, _L, _W) -> + text(S). + +split_string(Xs, N, L) -> + split_string_1(Xs, N, L, []). + +%% We only split strings at whitespace, if possible. We must make sure +%% we do not split an escape sequence. + +split_string_1([$\s | Xs], N, L, As) when N =< 0, L >= 5 -> + {lists:reverse([$\s | As]), Xs}; +split_string_1([$\t | Xs], N, L, As) when N =< 0, L >= 5 -> + {lists:reverse([$t, $\\ | As]), Xs}; +split_string_1([$\n | Xs], N, L, As) when N =< 0, L >= 5 -> + {lists:reverse([$n, $\\ | As]), Xs}; +split_string_1([$\\ | Xs], N, L, As) -> + split_string_2(Xs, N - 1, L - 1, [$\\ | As]); +split_string_1(Xs, N, L, As) when N =< -10, L >= 5 -> + {lists:reverse(As), Xs}; +split_string_1([X | Xs], N, L, As) -> + split_string_1(Xs, N - 1, L - 1, [X | As]); +split_string_1([], _N, _L, As) -> + {lists:reverse(As), ""}. + +split_string_2([$^, X | Xs], N, L, As) -> + split_string_1(Xs, N - 2, L - 2, [X, $^ | As]); +split_string_2([X1, X2, X3 | Xs], N, L, As) when + X1 >= $0, X1 =< $7, X2 >= $0, X2 =< $7, X3 >= $0, X3 =< $7 -> + split_string_1(Xs, N - 3, L - 3, [X3, X2, X1 | As]); +split_string_2([X1, X2 | Xs], N, L, As) when + X1 >= $0, X1 =< $7, X2 >= $0, X2 =< $7 -> + split_string_1(Xs, N - 2, L - 2, [X2, X1 | As]); +split_string_2([X | Xs], N, L, As) -> + split_string_1(Xs, N - 1, L - 1, [X | As]). + +%% Note that there is nothing in `lay_clauses' that actually requires +%% that the elements have type `clause'; it just sets up the proper +%% context and arranges the elements suitably for clauses. + +lay_clauses(Cs, Type, Ctxt) -> + vertical(seq(Cs, floating(text(";")), + Ctxt#ctxt{clause = Type}, + fun lay/2)). + +%% Note that for the clause-making functions, the guard argument +%% can be `none', which has different interpretations in different +%% contexts. + +make_fun_clause(P, G, B, Ctxt) -> + make_fun_clause(none, P, G, B, Ctxt). + +make_fun_clause(N, P, G, B, Ctxt) -> + D = make_fun_clause_head(N, P, Ctxt), + make_case_clause(D, G, B, Ctxt). + +make_fun_clause_head(N, P, Ctxt) -> + D = lay_parentheses(P, Ctxt), + if N =:= none -> + D; + true -> + beside(N, D) + end. + +make_rule_clause(N, P, G, B, Ctxt) -> + D = make_fun_clause_head(N, P, Ctxt), + append_rule_body(B, append_guard(G, D, Ctxt), Ctxt). + +make_case_clause(P, G, B, Ctxt) -> + append_clause_body(B, append_guard(G, P, Ctxt), Ctxt). + +make_if_clause(_P, G, B, Ctxt) -> + %% We ignore the patterns; they should be empty anyway. + G1 = if G =:= none -> + text("true"); + true -> + G + end, + append_clause_body(B, G1, Ctxt). + +append_clause_body(B, D, Ctxt) -> + append_clause_body(B, D, floating(text(" ->")), Ctxt). + +append_rule_body(B, D, Ctxt) -> + append_clause_body(B, D, floating(text(" :-")), Ctxt). + +append_clause_body(B, D, S, Ctxt) -> + sep([beside(D, S), nest(Ctxt#ctxt.break_indent, B)]). + +append_guard(none, D, _) -> + D; +append_guard(G, D, Ctxt) -> + par([D, follow(text("when"), G, Ctxt#ctxt.sub_indent)], + Ctxt#ctxt.break_indent). + +lay_bit_types([T], Ctxt) -> + lay(T, Ctxt); +lay_bit_types([T | Ts], Ctxt) -> + beside(lay(T, Ctxt), + beside(floating(text("-")), + lay_bit_types(Ts, Ctxt))). + +lay_error_info({L, M, T}=T0, Ctxt) when is_integer(L), is_atom(M) -> + case catch M:format_error(T) of + S when is_list(S) -> + if L > 0 -> + beside(text(io_lib:format("~w: ",[L])), text(S)); + true -> + text(S) + end; + _ -> + lay_concrete(T0, Ctxt) + end; +lay_error_info(T, Ctxt) -> + lay_concrete(T, Ctxt). + +lay_concrete(T, Ctxt) -> + lay(erl_syntax:abstract(T), Ctxt). + +seq([H | T], Separator, Ctxt, Fun) -> + case T of + [] -> + [Fun(H, Ctxt)]; + _ -> + [maybe_append(Separator, Fun(H, Ctxt)) + | seq(T, Separator, Ctxt, Fun)] + end; +seq([], _, _, _) -> + [empty()]. + +maybe_append(none, D) -> + D; +maybe_append(Suffix, D) -> + beside(D, Suffix). + +vertical([D]) -> + D; +vertical([D | Ds]) -> + above(D, vertical(Ds)); +vertical([]) -> + []. + +vertical_sep(_Sep, [D]) -> + D; +vertical_sep(Sep, [D | Ds]) -> + above(above(D, Sep), vertical_sep(Sep, Ds)); +vertical_sep(_Sep, []) -> + []. + +spaces(N) when N > 0 -> + [$\040 | spaces(N - 1)]; +spaces(_) -> + []. + +tidy_float([$., C | Cs]) -> + [$., C | tidy_float_1(Cs)]; % preserve first decimal digit +tidy_float([$e | _] = Cs) -> + tidy_float_2(Cs); +tidy_float([C | Cs]) -> + [C | tidy_float(Cs)]; +tidy_float([]) -> + []. + +tidy_float_1([$0, $0, $0 | Cs]) -> + tidy_float_2(Cs); % cut mantissa at three consecutive zeros. +tidy_float_1([$e | _] = Cs) -> + tidy_float_2(Cs); +tidy_float_1([C | Cs]) -> + [C | tidy_float_1(Cs)]; +tidy_float_1([]) -> + []. + +tidy_float_2([$e, $+, $0]) -> []; +tidy_float_2([$e, $+, $0 | Cs]) -> tidy_float_2([$e, $+ | Cs]); +tidy_float_2([$e, $+ | _] = Cs) -> Cs; +tidy_float_2([$e, $-, $0]) -> []; +tidy_float_2([$e, $-, $0 | Cs]) -> tidy_float_2([$e, $- | Cs]); +tidy_float_2([$e, $- | _] = Cs) -> Cs; +tidy_float_2([$e | Cs]) -> tidy_float_2([$e, $+ | Cs]); +tidy_float_2([_C | Cs]) -> tidy_float_2(Cs); +tidy_float_2([]) -> []. + + +%% ===================================================================== diff --git a/lib/syntax_tools/src/erl_recomment.erl b/lib/syntax_tools/src/erl_recomment.erl new file mode 100644 index 0000000000..62ec7da200 --- /dev/null +++ b/lib/syntax_tools/src/erl_recomment.erl @@ -0,0 +1,757 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 1997-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc Inserting comments into abstract Erlang syntax trees +%% +%% <p>This module contains functions for inserting comments, described +%% by position, indentation and text, as attachments on an abstract +%% syntax tree, at the correct places.</p> + +-module(erl_recomment). + +-export([recomment_forms/2, quick_recomment_forms/2, recomment_tree/2]). + + +%% ===================================================================== +%% @spec quick_recomment_forms(Forms, Comments::[Comment]) -> +%% syntaxTree() +%% +%% Forms = syntaxTree() | [syntaxTree()] +%% Comment = {Line, Column, Indentation, Text} +%% Line = integer() +%% Column = integer() +%% Indentation = integer() +%% Text = [string()] +%% +%% @doc Like {@link recomment_forms/2}, but only inserts top-level +%% comments. Comments within function definitions or declarations +%% ("forms") are simply ignored. + +quick_recomment_forms(Tree, Cs) -> + recomment_forms(Tree, Cs, false). + + +%% ===================================================================== +%% @spec recomment_forms(Forms, Comments::[Comment]) -> syntaxTree() +%% +%% syntaxTree() = erl_syntax:syntaxTree() +%% Forms = syntaxTree() | [syntaxTree()] +%% Comment = {Line, Column, Indentation, Text} +%% Line = integer() +%% Column = integer() +%% Indentation = integer() +%% Text = [string()] +%% +%% @doc Attaches comments to the syntax tree/trees representing a +%% program. The given <code>Forms</code> should be a single syntax tree +%% of type <code>form_list</code>, or a list of syntax trees +%% representing "program forms". The syntax trees must contain valid +%% position information (for details, see +%% <code>recomment_tree/2</code>). The result is a corresponding syntax +%% tree of type <code>form_list</code> in which all comments in the list +%% <code>Comments</code> have been attached at the proper places. +%% +%% <p>Assuming <code>Forms</code> represents a program (or any sequence +%% of "program forms"), any comments whose first lines are not directly +%% associated with a specific program form will become standalone +%% comments inserted between the neighbouring program forms. +%% Furthermore, comments whose column position is less than or equal to +%% one will not be attached to a program form that begins at a +%% conflicting line number (this can happen with preprocessor-generated +%% <code>line</code>-attributes).</p> +%% +%% <p>If <code>Forms</code> is a syntax tree of some other type than +%% <code>form_list</code>, the comments will be inserted directly using +%% <code>recomment_tree/2</code>, and any comments left over from that +%% process are added as postcomments on the result.</p> +%% +%% <p>Entries in <code>Comments</code> represent multi-line comments. +%% For each entry, <code>Line</code> is the line number and +%% <code>Column</code> the left column of the comment (the column of the +%% first comment-introducing "<code>%</code>" character). +%% <code>Indentation</code> is the number of character positions between +%% the last non-whitespace character before the comment (or the left +%% margin) and the left column of the comment. <code>Text</code> is a +%% list of strings representing the consecutive comment lines in +%% top-down order, where each string contains all characters following +%% (but not including) the comment-introducing "<code>%</code>" and up +%% to (but not including) the terminating newline. (Cf. module +%% <code>erl_comment_scan</code>.)</p> +%% +%% <p>Evaluation exits with reason <code>{bad_position, Pos}</code> if +%% the associated position information <code>Pos</code> of some subtree +%% in the input does not have a recognizable format, or with reason +%% <code>{bad_tree, L, C}</code> if insertion of a comment at line +%% <code>L</code>, column <code>C</code>, fails because the tree +%% structure is ill-formed.</p> +%% +%% @see erl_comment_scan +%% @see recomment_tree/2 +%% @see quick_recomment_forms/2 + +recomment_forms(Tree, Cs) -> + recomment_forms(Tree, Cs, true). + +recomment_forms(Tree, Cs, Insert) when is_list(Tree) -> + recomment_forms(erl_syntax:form_list(Tree), Cs, Insert); +recomment_forms(Tree, Cs, Insert) -> + case erl_syntax:type(Tree) of + form_list -> + Tree1 = erl_syntax:flatten_form_list(Tree), + Node = build_tree(Tree1), + + %% Here we make a small assumption about the substructure of + %% a `form_list' tree: it has exactly one group of subtrees. + [Node1] = node_subtrees(Node), + List = filter_forms(node_subtrees(Node1)), + List1 = recomment_forms_1(Cs, List, Insert), + revert_tree(set_node_subtrees(Node, + [set_node_subtrees(Node1, + List1)])); + _ -> + %% Not a form list - just call `recomment_tree' and + %% append any leftover comments. + {Tree1, Cs1} = recomment_tree(Tree, Cs), + revert_tree(append_comments(Cs1, Tree1)) + end. + +append_comments([C | Cs], Tree) -> + append_comments(Cs, node_add_postcomment(C, Tree)); +append_comments([], Tree) -> + Tree. + +%% This part goes over each comment in turn and inserts it into the +%% proper place in the given list of program forms: + +recomment_forms_1([C | Cs], Ns, Insert) -> + Ns1 = recomment_forms_2(C, Ns, Insert), + recomment_forms_1(Cs, Ns1, Insert); +recomment_forms_1([], Ns, _Insert) -> + Ns. + +recomment_forms_2(C, [N | Ns] = Nodes, Insert) -> + {L, Col, Ind, Text} = C, + Min = node_min(N), + Max = node_max(N), + Delta = comment_delta(Text), + Trailing = + case Ns of + [] -> true; + [Next | _] -> L < node_min(Next) - 2 + end, + if L > Max + 1 ; L =:= Max + 1, not Trailing -> + [N | recomment_forms_2(C, Ns, Insert)]; + L + Delta < Min - 1 -> + %% At least one empty line between the current form + %% and the comment, so we make it a standalone. + [standalone_comment(C) | Nodes]; + L < Min -> + %% The comment line should be above this node. + %% (This duplicates what insert/5 would have done.) + [node_add_precomment(C, N) | Ns]; + Col =< 1, L =< Min, L + Delta >= Min -> + %% This is a conflict - the "first" token of the node + %% overlaps with some comment line, but the comment + %% started at column 1. + N1 = standalone_comment(C), + if L < Min -> + [N1 | Nodes]; + true -> + [N, N1 | Ns] + end; + Insert =:= true -> + [insert(N, L, Col, Ind, C) | Ns]; + true -> + Nodes % skipping non-toplevel comment + end; +recomment_forms_2(C, [], _Top) -> + [standalone_comment(C)]. + +%% Creating a leaf node for a standalone comment. Note that we try to +%% preserve the original starting column rather than the indentation. + +standalone_comment({L, Col, _Ind, Text}) -> + leaf_node(L, L + comment_delta(Text), + erl_syntax:set_pos(erl_syntax:comment(Col - 1, Text), L)). + +%% Compute delta between first and last line of a comment, given +%% the lines of text. + +comment_delta(Text) -> + case length(Text) of + N when N > 0 -> + N - 1; + _ -> + 0 % avoid negative delta + end. + +%% This kills line information for program forms that do not come from +%% the source file itself, but have been included by preprocessing. This +%% way, comments will not be inserted into such parts by mistake. + +-record(filter, {file = undefined, line = 0}). + +filter_forms(Fs) -> + filter_forms(Fs, false, #filter{}). + +filter_forms([F | Fs], Kill, S) -> + case check_file_attr(F) of + {true, A1, A2} -> + S1 = case S#filter.file of + undefined -> + S#filter{file = A1, line = A2}; + _ -> + S + end, + if S1#filter.file =:= A1, + S1#filter.line =< A2 -> + [F | filter_forms(Fs, false, + S1#filter{line = A2})]; + Kill =:= true -> + [node_kill_range(F) + | filter_forms(Fs, true, S1)]; + true -> + [F | filter_forms(Fs, true, S1)] + end; + false -> + case Kill of + true -> + [node_kill_range(F) + | filter_forms(Fs, Kill, S)]; + false -> + [F | filter_forms(Fs, Kill, S)] + end + end; +filter_forms([], _, _) -> + []. + +%% This structure matching gets a bit painful... + +check_file_attr(F) -> + case node_type(F) of + tree_node -> + case tree_node_type(F) of + attribute -> + case node_subtrees(F) of + [L1, L2 | _] -> + check_file_attr_1(L1, L2); + _ -> + false + end; + _ -> + false + end; + _ -> + false + end. + +check_file_attr_1(L1, L2) -> + case node_subtrees(L1) of + [N1 | _] -> + N2 = leaf_node_value(N1), + case erl_syntax:type(N2) of + atom -> + case erl_syntax:atom_value(N2) of + file -> + check_file_attr_2(L2); + _ -> + false + end; + _ -> + false + end; + _ -> + false + end. + +check_file_attr_2(L) -> + case node_subtrees(L) of + [N1, N2 | _] -> + T1 = erl_syntax:concrete(revert_tree(N1)), + T2 = erl_syntax:concrete(revert_tree(N2)), + {true, T1, T2}; + _ -> + false + end. + + +%% ===================================================================== +%% @spec recomment_tree(Tree::syntaxTree(), Comments::[Comment]) -> +%% {syntaxTree(), [Comment]} +%% +%% Comment = {Line, Column, Indentation, Text} +%% Line = integer() +%% Column = integer() +%% Indentation = integer() +%% Text = [string()] +%% +%% @doc Attaches comments to a syntax tree. The result is a pair +%% <code>{NewTree, Remainder}</code> where <code>NewTree</code> is the +%% given <code>Tree</code> where comments from the list +%% <code>Comments</code> have been attached at the proper places. +%% <code>Remainder</code> is the list of entries in +%% <code>Comments</code> which have not been inserted, because their +%% line numbers are greater than those of any node in the tree. The +%% entries in <code>Comments</code> are inserted in order; if two +%% comments become attached to the same node, they will appear in the +%% same order in the program text. +%% +%% <p>The nodes of the syntax tree must contain valid position +%% information. This can be single integers, assumed to represent a line +%% number, or 2- or 3-tuples where the first or second element is an +%% integer, in which case the leftmost integer element is assumed to +%% represent the line number. Line numbers less than one are ignored +%% (usually, the default line number for newly created nodes is +%% zero).</p> +%% +%% <p>For details on the <code>Line</code>, <code>Column</code> and +%% <code>Indentation</code> fields, and the behaviour in case of errors, +%% see <code>recomment_forms/2</code>.</p> +%% +%% @see recomment_forms/2 + +recomment_tree(Tree, Cs) -> + {Tree1, Cs1} = insert_comments(Cs, build_tree(Tree)), + {revert_tree(Tree1), Cs1}. + +%% Comments are inserted in the tree one at a time. Note that this +%% part makes no assumptions about how tree nodes and list nodes +%% are nested; only `build_tree' and `revert_tree' knows about +%% such things. + +insert_comments(Cs, Node) -> + insert_comments(Cs, Node, []). + +insert_comments([C | Cs], Node, Cs1) -> + {L, Col, Ind, _Text} = C, + Max = node_max(Node), + if L =< Max -> + insert_comments(Cs, insert(Node, L, Col, Ind, C), + Cs1); + true -> + insert_comments(Cs, Node, [C | Cs1]) + end; +insert_comments([], Node, Cs) -> + {Node, lists:reverse(Cs)}. + +%% Here, we assume that the comment is located on some line not +%% below the last element of the given node. + +insert(Node, L, Col, Ind, C) -> + case node_type(Node) of + list_node -> + %% We cannot attach comments directly to a list node. + set_node_subtrees(Node, + insert_in_list(node_subtrees(Node), + L, Col, Ind, C)); + _ -> + %% We check if the comment belongs before, or inside + %% the range of the current node. + Min = node_min(Node), + Max = node_max(Node), + if L < Min -> + %% The comment line should be above this node. + node_add_precomment(C, Node); + Min =:= Max -> + %% The whole node is on a single line (this + %% should usually catch all leaf nodes), so we + %% postfix the comment. + node_add_postcomment(C, Node); + true -> + %% The comment should be inserted in the + %% subrange of the node, i.e., attached either + %% to the node itself, or to one of its + %% subtrees. + insert_1(Node, L, Col, Ind, C) + end + end. + +insert_1(Node, L, Col, Ind, C) -> + case node_type(Node) of + tree_node -> + %% Insert in one of the subtrees. + set_node_subtrees(Node, + insert_in_list(node_subtrees(Node), + L, Col, Ind, C)); + leaf_node -> + %% Odd case: no components, but not on a single line. + %% (Never mind anyway - just postfix the comment.) + node_add_postcomment(C, Node) + end. + +%% We assume that there exists at least one tree node in some tree +%% in the list; since we have decided to insert here, we're +%% screwed if there isn't one. + +insert_in_list([Node | Ns], L, Col, Ind, C) -> + Max = node_max(Node), + + %% Get the `Min' of the next node that follows in the + %% flattened left-to-right order, or -1 (minus one) if no such + %% tree node exists. + NextMin = next_min_in_list(Ns), + + %% `NextMin' could be less than `Max', in inconsistent trees. + if NextMin < 0 -> + %% There is no following leaf/tree node, so we try + %% to insert at this node. + insert_here(Node, L, Col, Ind, C, Ns); + L >= NextMin, NextMin >= Max -> + %% Tend to select the later node, in case the next + %% node should also match. + insert_later(Node, L, Col, Ind, C, Ns); + L =< Max -> + insert_here(Node, L, Col, Ind, C, Ns); + true -> + insert_later(Node, L, Col, Ind, C, Ns) + end; +insert_in_list([], L, Col, _, _) -> + exit({bad_tree, L, Col}). + +%% The comment belongs to the current subrange + +insert_here(Node, L, Col, Ind, C, Ns) -> + [insert(Node, L, Col, Ind, C) | Ns]. + +%% The comment should be inserted later + +insert_later(Node, L, Col, Ind, C, Ns) -> + [Node | insert_in_list(Ns, L, Col, Ind, C)]. + +%% `next_min_in_list' returns the `Min' field of the leftmost tree +%% or leaf node in the given node list, or the integer -1 (minus +%% one) if no such element exists. + +next_min_in_list(Ts) -> + next_min_in_list(Ts, []). + +next_min_in_list([T | Ts], Ack) -> + next_min_in_node(T, [Ts | Ack]); +next_min_in_list([], [T | Ts]) -> + next_min_in_list(T, Ts); +next_min_in_list([], []) -> + -1. + +next_min_in_node(Node, Ack) -> + case node_type(Node) of + leaf_node -> + node_min(Node); + tree_node -> + node_min(Node); + list_node -> + next_min_in_list(node_subtrees(Node), Ack) + end. + +%% Building an extended syntax tree from an `erl_syntax' abstract +%% syntax tree. + +build_tree(Node) -> + L = get_line(Node), + case erl_syntax:subtrees(Node) of + [] -> + %% This guarantees that Min =< Max for the base case. + leaf_node(L, L, Node); + Ts -> + %% `Ts' is a list of lists of abstract terms. + {Subtrees, Min, Max} = build_list_list(Ts), + + %% Include L, while preserving Min =< Max. + tree_node(minpos(L, Min), + max(L, Max), + erl_syntax:type(Node), + erl_syntax:get_attrs(Node), + Subtrees) + end. + +%% Since `erl_syntax:subtrees' yields the components in +%% left-to-right textual order, the line numbers should grow +%% monotonically as the list is traversed, and the maximum line +%% number of the list should therefore be the dito of the last +%% component. However, we do not want to make such a strong +%% assumption about the consistency of the line numbering, so we +%% take the trouble to find the maximum line number in the subtree +%% taken over all its elements. + +build_list(Ts) -> + build_list(Ts, 0, 0, []). + +build_list([T | Ts], Min, Max, Ack) -> + Node = build_tree(T), + Min1 = minpos(node_min(Node), Min), + Max1 = max(node_max(Node), Max), + build_list(Ts, Min1, Max1, [Node | Ack]); +build_list([], Min, Max, Ack) -> + list_node(Min, Max, lists:reverse(Ack)). + +build_list_list(Ls) -> + build_list_list(Ls, 0, 0, []). + +build_list_list([L | Ls], Min, Max, Ack) -> + Node = build_list(L), + Min1 = minpos(node_min(Node), Min), + Max1 = max(node_max(Node), Max), + build_list_list(Ls, Min1, Max1, [Node | Ack]); +build_list_list([], Min, Max, Ack) -> + {lists:reverse(Ack), Min, Max}. + +%% Reverting to an abstract syntax tree from the extended form. +%% Note that the new comments are inserted after the original +%% attributes are restored. + +revert_tree(Node) -> + case node_type(Node) of + leaf_node -> + add_comments(Node, leaf_node_value(Node)); + tree_node -> + add_comments(Node, + erl_syntax:set_attrs( + erl_syntax:make_tree( + tree_node_type(Node), + revert_list(node_subtrees(Node))), + tree_node_attrs(Node))); + list_node -> + revert_list(node_subtrees(Node)) + end. + +revert_list([T | Ts]) -> + [revert_tree(T) | revert_list(Ts)]; +revert_list([]) -> + []. + +add_comments(Node, Tree) -> + case node_precomments(Node) of + [] -> + add_comments_1(Node, Tree); + Cs -> + Cs1 = lists:reverse(expand_comments(Cs)), + add_comments_1(Node, + erl_syntax:add_precomments(Cs1, Tree)) + end. + +add_comments_1(Node, Tree) -> + case node_postcomments(Node) of + [] -> + Tree; + Cs -> + Cs1 = lists:reverse(expand_comments(Cs)), + erl_syntax:add_postcomments(Cs1, Tree) + end. + +expand_comments([C | Cs]) -> + [expand_comment(C) | expand_comments(Cs)]; +expand_comments([]) -> + []. + +expand_comment(C) -> + {L, _Col, Ind, Text} = C, + erl_syntax:set_pos(erl_syntax:comment(Ind, Text), L). + + +%% ===================================================================== +%% Abstract data type for extended syntax trees. +%% +%% These explicitly distinguish between leaf and tree nodes, both +%% corresponding to a single abstract syntax tree, and list nodes, +%% corresponding to a left-to-right ordered sequence of such trees. +%% +%% All nodes have `min' and `max' fields, containing the first and last +%% source lines, respectively, over which the tree extends. +%% +%% Tree nodes and list nodes have a `subtrees' field, containing the +%% (extended) subtrees of the node. Tree nodes also have a `type' field, +%% containing the atom returned by `erl_syntax:type' for the +%% corresponding abstract syntax tree, and an `attrs' field, containing +%% the value of `erl_syntax:get_attrs' for the abstract syntax tree. +%% +%% Leaf nodes and tree nodes also have `precomments' and `postcomments' +%% fields. The comment fields are lists of comment structures (in +%% top-down order); the representation of comments has no consequence to +%% the tree representation. +%% +%% Leaf nodes, lastly, have a `value' field containing the abstract +%% syntax tree for any such tree that can have no subtrees, i.e., such +%% that `erl_syntax:is_leaf' yields `true'. + +-record(leaf, {min = 0, + max = 0, + precomments = [], + postcomments = [], + value}). + +-record(tree, {min = 0, + max = 0, + type, + attrs, + precomments = [], + postcomments = [], + subtrees = []}). + +-record(list, {min = 0, + max = 0, + subtrees = []}). + +leaf_node(Min, Max, Value) -> + #leaf{min = Min, + max = Max, + value = Value}. + +tree_node(Min, Max, Type, Attrs, Subtrees) -> + #tree{min = Min, + max = Max, + type = Type, + attrs = Attrs, + subtrees = Subtrees}. + +list_node(Min, Max, Subtrees) -> + #list{min = Min, + max = Max, + subtrees = Subtrees}. + +node_type(#leaf{}) -> + leaf_node; +node_type(#tree{}) -> + tree_node; +node_type(#list{}) -> + list_node. + +node_min(#leaf{min = Min}) -> + Min; +node_min(#tree{min = Min}) -> + Min; +node_min(#list{min = Min}) -> + Min. + +node_max(#leaf{max = Max}) -> + Max; +node_max(#tree{max = Max}) -> + Max; +node_max(#list{max = Max}) -> + Max. + +node_kill_range(Node) -> + case Node of + #leaf{} -> + Node#leaf{min = -1, max = -1}; + #tree{} -> + Node#tree{min = -1, max = -1}; + #list{} -> + Node#list{min = -1, max = -1} + end. + +node_precomments(#leaf{precomments = Cs}) -> + Cs; +node_precomments(#tree{precomments = Cs}) -> + Cs. + +node_add_precomment(C, Node) -> + case Node of + #leaf{} -> + Node#leaf{precomments = [C | Node#leaf.precomments]}; + #tree{} -> + Node#tree{precomments = [C | Node#tree.precomments]} + end. + +node_postcomments(#leaf{postcomments = Cs}) -> + Cs; +node_postcomments(#tree{postcomments = Cs}) -> + Cs. + +node_add_postcomment(C, Node) -> + case Node of + #leaf{} -> + Node#leaf{postcomments = + [C | Node#leaf.postcomments]}; + #tree{} -> + Node#tree{postcomments = + [C | Node#tree.postcomments]} + end. + +node_subtrees(#tree{subtrees = Subtrees}) -> + Subtrees; +node_subtrees(#list{subtrees = Subtrees}) -> + Subtrees. + +leaf_node_value(#leaf{value = Value}) -> + Value. + +tree_node_type(#tree{type = Type}) -> + Type. + +set_node_subtrees(Node, Subtrees) -> + case Node of + #tree{} -> + Node#tree{subtrees = Subtrees}; + #list{} -> + Node#list{subtrees = Subtrees} + end. + +tree_node_attrs(#tree{attrs = Attrs}) -> + Attrs. + + +%% ===================================================================== +%% General utility functions + +%% Just the generic "maximum" function + +max(X, Y) when X > Y -> X; +max(_, Y) -> Y. + +%% Return the least positive integer of X and Y, or zero if none of them +%% are positive. (This is necessary for computing minimum source line +%% numbers, since zero (or negative) numbers may occur, but they +%% represent the "undefined" line number.) + +minpos(X, Y) when X < Y -> + minpos1(X, Y); +minpos(X, Y) -> + minpos1(Y, X). + +minpos1(X, Y) when X < 1 -> + minpos2(Y); +minpos1(X, _) -> + X. + +minpos2(X) when X < 1 -> + 0; +minpos2(X) -> + X. + +get_line(Node) -> + case erl_syntax:get_pos(Node) of + L when is_integer(L) -> + L; + {L, _} when is_integer(L) -> + L; + {_, L} when is_integer(L) -> + L; + {L, _, _} when is_integer(L) -> + L; + {_, L, _} when is_integer(L) -> + L; + Pos -> + exit({bad_position, Pos}) + end. + + +%% ===================================================================== diff --git a/lib/syntax_tools/src/erl_syntax.erl b/lib/syntax_tools/src/erl_syntax.erl new file mode 100644 index 0000000000..6ceb3ddcaf --- /dev/null +++ b/lib/syntax_tools/src/erl_syntax.erl @@ -0,0 +1,6938 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 1997-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc Abstract Erlang syntax trees. +%% +%% This module defines an abstract data type for representing Erlang +%% source code as syntax trees, in a way that is backwards compatible +%% with the data structures created by the Erlang standard library +%% parser module <code>erl_parse</code> (often referred to as "parse +%% trees", which is a bit of a misnomer). This means that all +%% <code>erl_parse</code> trees are valid abstract syntax trees, but the +%% reverse is not true: abstract syntax trees can in general not be used +%% as input to functions expecting an <code>erl_parse</code> tree. +%% However, as long as an abstract syntax tree represents a correct +%% Erlang program, the function <a +%% href="#revert-1"><code>revert/1</code></a> should be able to +%% transform it to the corresponding <code>erl_parse</code> +%% representation. +%% +%% A recommended starting point for the first-time user is the +%% documentation of the <a +%% href="#type-syntaxTree"><code>syntaxTree()</code></a> data type, and +%% the function <a href="#type-1"><code>type/1</code></a>. +%% +%% <h3><b>NOTES:</b></h3> +%% +%% This module deals with the composition and decomposition of +%% <em>syntactic</em> entities (as opposed to semantic ones); its +%% purpose is to hide all direct references to the data structures used +%% to represent these entities. With few exceptions, the functions in +%% this module perform no semantic interpretation of their inputs, and +%% in general, the user is assumed to pass type-correct arguments - if +%% this is not done, the effects are not defined. +%% +%% With the exception of the <code>erl_parse</code> data structures, +%% the internal representations of abstract syntax trees are subject to +%% change without notice, and should not be documented outside this +%% module. Furthermore, we do not give any guarantees on how an abstract +%% syntax tree may or may not be represented, <em>with the following +%% exceptions</em>: no syntax tree is represented by a single atom, such +%% as <code>none</code>, by a list constructor <code>[X | Y]</code>, or +%% by the empty list <code>[]</code>. This can be relied on when writing +%% functions that operate on syntax trees. + +%% @type syntaxTree(). An abstract syntax tree. The +%% <code>erl_parse</code> "parse tree" representation is a subset of the +%% <code>syntaxTree()</code> representation. +%% +%% Every abstract syntax tree node has a <em>type</em>, given by the +%% function <a href="#type-1"><code>type/1</code></a>. Each node also +%% has associated <em>attributes</em>; see <a +%% href="#get_attrs-1"><code>get_attrs/1</code></a> for details. The +%% functions <a href="#make_tree-2"><code>make_tree/2</code></a> and <a +%% href="#subtrees-1"><code>subtrees/1</code></a> are generic +%% constructor/decomposition functions for abstract syntax trees. The +%% functions <a href="#abstract-1"><code>abstract/1</code></a> and <a +%% href="#concrete-1"><code>concrete/1</code></a> convert between +%% constant Erlang terms and their syntactic representations. The set of +%% syntax tree nodes is extensible through the <a +%% href="#tree-2"><code>tree/2</code></a> function. +%% +%% A syntax tree can be transformed to the <code>erl_parse</code> +%% representation with the <a href="#revert-1"><code>revert/1</code></a> +%% function. + +-module(erl_syntax). + +-export([type/1, + is_leaf/1, + is_form/1, + is_literal/1, + abstract/1, + concrete/1, + revert/1, + revert_forms/1, + subtrees/1, + make_tree/2, + update_tree/2, + meta/1, + + get_pos/1, + set_pos/2, + copy_pos/2, + get_precomments/1, + set_precomments/2, + add_precomments/2, + get_postcomments/1, + set_postcomments/2, + add_postcomments/2, + has_comments/1, + remove_comments/1, + copy_comments/2, + join_comments/2, + get_ann/1, + set_ann/2, + add_ann/2, + copy_ann/2, + get_attrs/1, + set_attrs/2, + copy_attrs/2, + + flatten_form_list/1, + cons/2, + list_head/1, + list_tail/1, + is_list_skeleton/1, + is_proper_list/1, + list_elements/1, + list_length/1, + normalize_list/1, + compact_list/1, + + application/2, + application/3, + application_arguments/1, + application_operator/1, + arity_qualifier/2, + arity_qualifier_argument/1, + arity_qualifier_body/1, + atom/1, + is_atom/2, + atom_value/1, + atom_literal/1, + atom_name/1, + attribute/1, + attribute/2, + attribute_arguments/1, + attribute_name/1, + binary/1, + binary_comp/2, + binary_comp_template/1, + binary_comp_body/1, + binary_field/1, + binary_field/2, + binary_field/3, + binary_field_body/1, + binary_field_types/1, + binary_field_size/1, + binary_fields/1, + binary_generator/2, + binary_generator_body/1, + binary_generator_pattern/1, + block_expr/1, + block_expr_body/1, + case_expr/2, + case_expr_argument/1, + case_expr_clauses/1, + catch_expr/1, + catch_expr_body/1, + char/1, + is_char/2, + char_value/1, + char_literal/1, + clause/2, + clause/3, + clause_body/1, + clause_guard/1, + clause_patterns/1, + comment/1, + comment/2, + comment_padding/1, + comment_text/1, + cond_expr/1, + cond_expr_clauses/1, + conjunction/1, + conjunction_body/1, + disjunction/1, + disjunction_body/1, + eof_marker/0, + error_marker/1, + error_marker_info/1, + float/1, + float_value/1, + float_literal/1, + form_list/1, + form_list_elements/1, + fun_expr/1, + fun_expr_arity/1, + fun_expr_clauses/1, + function/2, + function_arity/1, + function_clauses/1, + function_name/1, + generator/2, + generator_body/1, + generator_pattern/1, + if_expr/1, + if_expr_clauses/1, + implicit_fun/1, + implicit_fun/2, + implicit_fun/3, + implicit_fun_name/1, + infix_expr/3, + infix_expr_left/1, + infix_expr_operator/1, + infix_expr_right/1, + integer/1, + is_integer/2, + integer_value/1, + integer_literal/1, + list/1, + list/2, + list_comp/2, + list_comp_body/1, + list_comp_template/1, + list_prefix/1, + list_suffix/1, + macro/1, + macro/2, + macro_arguments/1, + macro_name/1, + match_expr/2, + match_expr_body/1, + match_expr_pattern/1, + module_qualifier/2, + module_qualifier_argument/1, + module_qualifier_body/1, + nil/0, + operator/1, + operator_literal/1, + operator_name/1, + parentheses/1, + parentheses_body/1, + prefix_expr/2, + prefix_expr_argument/1, + prefix_expr_operator/1, + qualified_name/1, + qualified_name_segments/1, + query_expr/1, + query_expr_body/1, + receive_expr/1, + receive_expr/3, + receive_expr_action/1, + receive_expr_clauses/1, + receive_expr_timeout/1, + record_access/2, + record_access/3, + record_access_argument/1, + record_access_field/1, + record_access_type/1, + record_expr/2, + record_expr/3, + record_expr_argument/1, + record_expr_fields/1, + record_expr_type/1, + record_field/1, + record_field/2, + record_field_name/1, + record_field_value/1, + record_index_expr/2, + record_index_expr_field/1, + record_index_expr_type/1, + rule/2, + rule_arity/1, + rule_clauses/1, + rule_name/1, + size_qualifier/2, + size_qualifier_argument/1, + size_qualifier_body/1, + string/1, + is_string/2, + string_value/1, + string_literal/1, + text/1, + text_string/1, + try_expr/2, + try_expr/3, + try_expr/4, + try_after_expr/2, + try_expr_body/1, + try_expr_clauses/1, + try_expr_handlers/1, + try_expr_after/1, + class_qualifier/2, + class_qualifier_argument/1, + class_qualifier_body/1, + tuple/1, + tuple_elements/1, + tuple_size/1, + underscore/0, + variable/1, + variable_name/1, + variable_literal/1, + warning_marker/1, + warning_marker_info/1, + + tree/1, + tree/2, + data/1, + is_tree/1]). + + +%% ===================================================================== +%% IMPLEMENTATION NOTES: +%% +%% All nodes are represented by tuples of arity 2 or greater, whose +%% first element is an atom which uniquely identifies the type of the +%% node. (In the backwards-compatible representation, the interpretation +%% is also often dependent on the context; the second element generally +%% holds the position information - with a couple of exceptions; see +%% `get_pos' and `set_pos' for details). In the documentation of this +%% module, `Pos' is the source code position information associated with +%% a node; usually, this is a positive integer indicating the original +%% source code line, but no assumptions are made in this module +%% regarding the format or interpretation of position information. When +%% a syntax tree node is constructed, its associated position is by +%% default set to the integer zero. +%% ===================================================================== + +-define(NO_UNUSED, true). + +%% ===================================================================== +%% Declarations of globally used internal data structures +%% ===================================================================== + +%% `com' records are used to hold comment information attached to a +%% syntax tree node or a wrapper structure. +%% +%% #com{pre :: Pre, post :: Post} +%% +%% Pre = Post = [Com] +%% Com = syntaxTree() +%% +%% type(Com) = comment + +-record(com, {pre = [], + post = []}). + +%% `attr' records store node attributes as an aggregate. +%% +%% #attr{pos :: Pos, ann :: Ann, com :: Comments} +%% +%% Pos = term() +%% Ann = [term()] +%% Comments = none | #com{} +%% +%% where `Pos' `Ann' and `Comments' are the corresponding values of a +%% `tree' or `wrapper' record. + +-record(attr, {pos = 0, + ann = [], + com = none}). + +%% `tree' records represent new-form syntax tree nodes. +%% +%% Tree = #tree{type :: Type, attr :: Attr, data :: Data} +%% +%% Type = atom() +%% Attr = #attr{} +%% Data = term() +%% +%% is_tree(Tree) = true + +-record(tree, {type, + attr = #attr{} :: #attr{}, + data}). + +%% `wrapper' records are used for attaching new-form node information to +%% `erl_parse' trees. +%% +%% Wrapper = #wrapper{type :: Type, attr :: Attr, tree :: ParseTree} +%% +%% Type = atom() +%% Attr = #attr{} +%% ParseTree = term() +%% +%% is_tree(Wrapper) = false + +-record(wrapper, {type, + attr = #attr{} :: #attr{}, + tree}). + + +%% ===================================================================== +%% +%% Exported functions +%% +%% ===================================================================== + + +%% ===================================================================== +%% @spec type(Node::syntaxTree()) -> atom() +%% +%% @doc Returns the type tag of <code>Node</code>. If <code>Node</code> +%% does not represent a syntax tree, evaluation fails with reason +%% <code>badarg</code>. Node types currently defined by this module are: +%% <p><center><table border="1"> +%% <tr> +%% <td>application</td> +%% <td>arity_qualifier</td> +%% <td>atom</td> +%% <td>attribute</td> +%% </tr><tr> +%% <td>binary</td> +%% <td>binary_field</td> +%% <td>block_expr</td> +%% <td>case_expr</td> +%% </tr><tr> +%% <td>catch_expr</td> +%% <td>char</td> +%% <td>class_qualifier</td> +%% <td>clause</td> +%% </tr><tr> +%% <td>comment</td> +%% <td>cond_expr</td> +%% <td>conjunction</td> +%% <td>disjunction</td> +%% </tr><tr> +%% <td>eof_marker</td> +%% <td>error_marker</td> +%% <td>float</td> +%% <td>form_list</td> +%% </tr><tr> +%% <td>fun_expr</td> +%% <td>function</td> +%% <td>generator</td> +%% <td>if_expr</td> +%% </tr><tr> +%% <td>implicit_fun</td> +%% <td>infix_expr</td> +%% <td>integer</td> +%% <td>list</td> +%% </tr><tr> +%% <td>list_comp</td> +%% <td>macro</td> +%% <td>match_expr</td> +%% <td>module_qualifier</td> +%% </tr><tr> +%% <td>nil</td> +%% <td>operator</td> +%% <td>parentheses</td> +%% <td>prefix_expr</td> +%% </tr><tr> +%% <td>qualified_name</td> +%% <td>query_expr</td> +%% <td>receive_expr</td> +%% <td>record_access</td> +%% </tr><tr> +%% <td>record_expr</td> +%% <td>record_field</td> +%% <td>record_index_expr</td> +%% <td>rule</td> +%% </tr><tr> +%% <td>size_qualifier</td> +%% <td>string</td> +%% <td>text</td> +%% <td>try_expr</td> +%% </tr><tr> +%% <td>tuple</td> +%% <td>underscore</td> +%% <td>variable</td> +%% <td>warning_marker</td> +%% </tr> +%% </table></center></p> +%% <p>The user may (for special purposes) create additional nodes +%% with other type tags, using the <code>tree/2</code> function.</p> +%% +%% <p>Note: The primary constructor functions for a node type should +%% always have the same name as the node type itself.</p> +%% +%% @see tree/2 +%% @see application/3 +%% @see arity_qualifier/2 +%% @see atom/1 +%% @see attribute/2 +%% @see binary/1 +%% @see binary_field/2 +%% @see block_expr/1 +%% @see case_expr/2 +%% @see catch_expr/1 +%% @see char/1 +%% @see class_qualifier/2 +%% @see clause/3 +%% @see comment/2 +%% @see cond_expr/1 +%% @see conjunction/1 +%% @see disjunction/1 +%% @see eof_marker/0 +%% @see error_marker/1 +%% @see float/1 +%% @see form_list/1 +%% @see fun_expr/1 +%% @see function/2 +%% @see generator/2 +%% @see if_expr/1 +%% @see implicit_fun/2 +%% @see infix_expr/3 +%% @see integer/1 +%% @see list/2 +%% @see list_comp/2 +%% @see macro/2 +%% @see match_expr/2 +%% @see module_qualifier/2 +%% @see nil/0 +%% @see operator/1 +%% @see parentheses/1 +%% @see prefix_expr/2 +%% @see qualified_name/1 +%% @see query_expr/1 +%% @see receive_expr/3 +%% @see record_access/3 +%% @see record_expr/2 +%% @see record_field/2 +%% @see record_index_expr/2 +%% @see rule/2 +%% @see size_qualifier/2 +%% @see string/1 +%% @see text/1 +%% @see try_expr/3 +%% @see tuple/1 +%% @see underscore/0 +%% @see variable/1 +%% @see warning_marker/1 + +type(#tree{type = T}) -> + T; +type(#wrapper{type = T}) -> + T; +type(Node) -> + %% Check for `erl_parse'-compatible nodes, and otherwise fail. + case Node of + %% Leaf types + {atom, _, _} -> atom; + {char, _, _} -> char; + {float, _, _} -> float; + {integer, _, _} -> integer; + {nil, _} -> nil; + {string, _, _} -> string; + {var, _, Name} -> + if Name =:= '_' -> underscore; + true -> variable + end; + {error, _} -> error_marker; + {warning, _} -> warning_marker; + {eof, _} -> eof_marker; + + %% Composite types + {'case', _, _, _} -> case_expr; + {'catch', _, _} -> catch_expr; + {'cond', _, _} -> cond_expr; + {'fun', _, {clauses, _}} -> fun_expr; + {'fun', _, {function, _, _}} -> implicit_fun; + {'fun', _, {function, _, _, _}} -> implicit_fun; + {'if', _, _} -> if_expr; + {'receive', _, _, _, _} -> receive_expr; + {'receive', _, _} -> receive_expr; + {attribute, _, _, _} -> attribute; + {bin, _, _} -> binary; + {bin_element, _, _, _, _} -> binary_field; + {block, _, _} -> block_expr; + {call, _, _, _} -> application; + {clause, _, _, _, _} -> clause; + {cons, _, _, _} -> list; + {function, _, _, _, _} -> function; + {b_generate, _, _, _} -> binary_generator; + {generate, _, _, _} -> generator; + {lc, _, _, _} -> list_comp; + {bc, _, _, _} -> binary_comp; + {match, _, _, _} -> match_expr; + {op, _, _, _, _} -> infix_expr; + {op, _, _, _} -> prefix_expr; + {'query', _, _} -> query_expr; + {record, _, _, _, _} -> record_expr; + {record, _, _, _} -> record_expr; + {record_field, _, _, _, _} -> record_access; + {record_field, _, _, _} -> + case is_qualified_name(Node) of + true -> qualified_name; + false -> record_access + end; + {record_index, _, _, _} -> record_index_expr; + {remote, _, _, _} -> module_qualifier; + {rule, _, _, _, _} -> rule; + {'try', _, _, _, _, _} -> try_expr; + {tuple, _, _} -> tuple; + _ -> + erlang:error({badarg, Node}) + end. + + +%% ===================================================================== +%% @spec is_leaf(Node::syntaxTree()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> is a leaf node, +%% otherwise <code>false</code>. The currently recognised leaf node +%% types are: +%% <p><center><table border="1"> +%% <tr> +%% <td><code>atom</code></td> +%% <td><code>char</code></td> +%% <td><code>comment</code></td> +%% <td><code>eof_marker</code></td> +%% <td><code>error_marker</code></td> +%% </tr><tr> +%% <td><code>float</code></td> +%% <td><code>integer</code></td> +%% <td><code>nil</code></td> +%% <td><code>operator</code></td> +%% <td><code>string</code></td> +%% </tr><tr> +%% <td><code>text</code></td> +%% <td><code>underscore</code></td> +%% <td><code>variable</code></td> +%% <td><code>warning_marker</code></td> +%% </tr> +%% </table></center></p> +%% <p>A node of type <code>tuple</code> is a leaf node if and only if +%% its arity is zero.</p> +%% +%% <p>Note: not all literals are leaf nodes, and vice versa. E.g., +%% tuples with nonzero arity and nonempty lists may be literals, but are +%% not leaf nodes. Variables, on the other hand, are leaf nodes but not +%% literals.</p> +%% +%% @see type/1 +%% @see is_literal/1 + +is_leaf(Node) -> + case type(Node) of + atom -> true; + char -> true; + comment -> true; % nonstandard type + eof_marker -> true; + error_marker -> true; + float -> true; + integer -> true; + nil -> true; + operator -> true; % nonstandard type + string -> true; + text -> true; % nonstandard type + tuple -> tuple_elements(Node) =:= []; + underscore -> true; + variable -> true; + warning_marker -> true; + _ -> false + end. + + +%% ===================================================================== +%% @spec is_form(Node::syntaxTree()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> is a syntax tree +%% representing a so-called "source code form", otherwise +%% <code>false</code>. Forms are the Erlang source code units which, +%% placed in sequence, constitute an Erlang program. Current form types +%% are: +%% <p><center><table border="1"> +%% <tr> +%% <td><code>attribute</code></td> +%% <td><code>comment</code></td> +%% <td><code>error_marker</code></td> +%% <td><code>eof_marker</code></td> +%% <td><code>form_list</code></td> +%% </tr><tr> +%% <td><code>function</code></td> +%% <td><code>rule</code></td> +%% <td><code>warning_marker</code></td> +%% <td><code>text</code></td> +%% </tr> +%% </table></center></p> +%% @see type/1 +%% @see attribute/2 +%% @see comment/2 +%% @see eof_marker/0 +%% @see error_marker/1 +%% @see form_list/1 +%% @see function/2 +%% @see rule/2 +%% @see warning_marker/1 + +is_form(Node) -> + case type(Node) of + attribute -> true; + comment -> true; + function -> true; + eof_marker -> true; + error_marker -> true; + form_list -> true; + rule -> true; + warning_marker -> true; + text -> true; + _ -> false + end. + + +%% ===================================================================== +%% @spec get_pos(Node::syntaxTree()) -> term() +%% +%% @doc Returns the position information associated with +%% <code>Node</code>. This is usually a nonnegative integer (indicating +%% the source code line number), but may be any term. By default, all +%% new tree nodes have their associated position information set to the +%% integer zero. +%% +%% @see set_pos/2 +%% @see get_attrs/1 + +%% All `erl_parse' tree nodes are represented by tuples whose second +%% field is the position information (usually an integer), *with the +%% exceptions of* `{error, ...}' (type `error_marker') and `{warning, +%% ...}' (type `warning_marker'), which only contain the associated line +%% number *of the error descriptor*; this is all handled transparently +%% by `get_pos' and `set_pos'. + +get_pos(#tree{attr = Attr}) -> + Attr#attr.pos; +get_pos(#wrapper{attr = Attr}) -> + Attr#attr.pos; +get_pos({error, {Pos, _, _}}) -> + Pos; +get_pos({warning, {Pos, _, _}}) -> + Pos; +get_pos(Node) -> + %% Here, we assume that we have an `erl_parse' node with position + %% information in element 2. + element(2, Node). + + +%% ===================================================================== +%% @spec set_pos(Node::syntaxTree(), Pos::term()) -> syntaxTree() +%% +%% @doc Sets the position information of <code>Node</code> to +%% <code>Pos</code>. +%% +%% @see get_pos/1 +%% @see copy_pos/2 + +set_pos(Node, Pos) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = Attr#attr{pos = Pos}}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = Attr#attr{pos = Pos}}; + _ -> + %% We then assume we have an `erl_parse' node, and create a + %% wrapper around it to make things more uniform. + set_pos(wrap(Node), Pos) + end. + + +%% ===================================================================== +%% @spec copy_pos(Source::syntaxTree(), Target::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Copies the position information from <code>Source</code> to +%% <code>Target</code>. +%% +%% <p>This is equivalent to <code>set_pos(Target, +%% get_pos(Source))</code>, but potentially more efficient.</p> +%% +%% @see get_pos/1 +%% @see set_pos/2 + +copy_pos(Source, Target) -> + set_pos(Target, get_pos(Source)). + + +%% ===================================================================== +%% `get_com' and `set_com' are for internal use only. + +get_com(#tree{attr = Attr}) -> Attr#attr.com; +get_com(#wrapper{attr = Attr}) -> Attr#attr.com; +get_com(_) -> none. + +set_com(Node, Com) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = Attr#attr{com = Com}}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = Attr#attr{com = Com}}; + _ -> + set_com(wrap(Node), Com) + end. + + +%% ===================================================================== +%% @spec get_precomments(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the associated pre-comments of a node. This is a +%% possibly empty list of abstract comments, in top-down textual order. +%% When the code is formatted, pre-comments are typically displayed +%% directly above the node. For example: +%% <pre> +%% % Pre-comment of function +%% foo(X) -> {bar, X}.</pre> +%% +%% <p>If possible, the comment should be moved before any preceding +%% separator characters on the same line. E.g.: +%% <pre> +%% foo([X | Xs]) -> +%% % Pre-comment of 'bar(X)' node +%% [bar(X) | foo(Xs)]; +%% ...</pre> +%% (where the comment is moved before the "<code>[</code>").</p> +%% +%% @see comment/2 +%% @see set_precomments/2 +%% @see get_postcomments/1 +%% @see get_attrs/1 + +get_precomments(#tree{attr = Attr}) -> get_precomments_1(Attr); +get_precomments(#wrapper{attr = Attr}) -> get_precomments_1(Attr); +get_precomments(_) -> []. + +get_precomments_1(#attr{com = none}) -> []; +get_precomments_1(#attr{com = #com{pre = Cs}}) -> Cs. + + +%% ===================================================================== +%% @spec set_precomments(Node::syntaxTree(), +%% Comments::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Sets the pre-comments of <code>Node</code> to +%% <code>Comments</code>. <code>Comments</code> should be a possibly +%% empty list of abstract comments, in top-down textual order. +%% +%% @see comment/2 +%% @see get_precomments/1 +%% @see add_precomments/2 +%% @see set_postcomments/2 +%% @see copy_comments/2 +%% @see remove_comments/1 +%% @see join_comments/2 + +set_precomments(Node, Cs) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = set_precomments_1(Attr, Cs)}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = set_precomments_1(Attr, Cs)}; + _ -> + set_precomments(wrap(Node), Cs) + end. + +set_precomments_1(#attr{com = none} = Attr, Cs) -> + Attr#attr{com = #com{pre = Cs}}; +set_precomments_1(#attr{com = Com} = Attr, Cs) -> + Attr#attr{com = Com#com{pre = Cs}}. + + +%% ===================================================================== +%% @spec add_precomments(Comments::[syntaxTree()], +%% Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Appends <code>Comments</code> to the pre-comments of +%% <code>Node</code>. +%% +%% <p>Note: This is equivalent to <code>set_precomments(Node, +%% get_precomments(Node) ++ Comments)</code>, but potentially more +%% efficient.</p> +%% +%% @see comment/2 +%% @see get_precomments/1 +%% @see set_precomments/2 +%% @see add_postcomments/2 +%% @see join_comments/2 + +add_precomments(Cs, Node) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = add_precomments_1(Cs, Attr)}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = add_precomments_1(Cs, Attr)}; + _ -> + add_precomments(Cs, wrap(Node)) + end. + +add_precomments_1(Cs, #attr{com = none} = Attr) -> + Attr#attr{com = #com{pre = Cs}}; +add_precomments_1(Cs, #attr{com = Com} = Attr) -> + Attr#attr{com = Com#com{pre = Com#com.pre ++ Cs}}. + + +%% ===================================================================== +%% @spec get_postcomments(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the associated post-comments of a node. This is a +%% possibly empty list of abstract comments, in top-down textual order. +%% When the code is formatted, post-comments are typically displayed to +%% the right of and/or below the node. For example: +%% <pre> +%% {foo, X, Y} % Post-comment of tuple</pre> +%% +%% <p>If possible, the comment should be moved past any following +%% separator characters on the same line, rather than placing the +%% separators on the following line. E.g.: +%% <pre> +%% foo([X | Xs], Y) -> +%% foo(Xs, bar(X)); % Post-comment of 'bar(X)' node +%% ...</pre> +%% (where the comment is moved past the rightmost "<code>)</code>" and +%% the "<code>;</code>").</p> +%% +%% @see comment/2 +%% @see set_postcomments/2 +%% @see get_precomments/1 +%% @see get_attrs/1 + +get_postcomments(#tree{attr = Attr}) -> get_postcomments_1(Attr); +get_postcomments(#wrapper{attr = Attr}) -> get_postcomments_1(Attr); +get_postcomments(_) -> []. + +get_postcomments_1(#attr{com = none}) -> []; +get_postcomments_1(#attr{com = #com{post = Cs}}) -> Cs. + + +%% ===================================================================== +%% @spec set_postcomments(Node::syntaxTree(), +%% Comments::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Sets the post-comments of <code>Node</code> to +%% <code>Comments</code>. <code>Comments</code> should be a possibly +%% empty list of abstract comments, in top-down textual order +%% +%% @see comment/2 +%% @see get_postcomments/1 +%% @see add_postcomments/2 +%% @see set_precomments/2 +%% @see copy_comments/2 +%% @see remove_comments/1 +%% @see join_comments/2 + +set_postcomments(Node, Cs) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = set_postcomments_1(Attr, Cs)}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = set_postcomments_1(Attr, Cs)}; + _ -> + set_postcomments(wrap(Node), Cs) + end. + +set_postcomments_1(#attr{com = none} = Attr, Cs) -> + Attr#attr{com = #com{post = Cs}}; +set_postcomments_1(#attr{com = Com} = Attr, Cs) -> + Attr#attr{com = Com#com{post = Cs}}. + + +%% ===================================================================== +%% @spec add_postcomments(Comments::[syntaxTree()], +%% Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Appends <code>Comments</code> to the post-comments of +%% <code>Node</code>. +%% +%% <p>Note: This is equivalent to <code>set_postcomments(Node, +%% get_postcomments(Node) ++ Comments)</code>, but potentially more +%% efficient.</p> +%% +%% @see comment/2 +%% @see get_postcomments/1 +%% @see set_postcomments/2 +%% @see add_precomments/2 +%% @see join_comments/2 + +add_postcomments(Cs, Node) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = add_postcomments_1(Cs, Attr)}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = add_postcomments_1(Cs, Attr)}; + _ -> + add_postcomments(Cs, wrap(Node)) + end. + +add_postcomments_1(Cs, #attr{com = none} = Attr) -> + Attr#attr{com = #com{post = Cs}}; +add_postcomments_1(Cs, #attr{com = Com} = Attr) -> + Attr#attr{com = Com#com{post = Com#com.post ++ Cs}}. + + +%% ===================================================================== +%% @spec has_comments(Node::syntaxTree()) -> bool() +%% +%% @doc Yields <code>false</code> if the node has no associated +%% comments, and <code>true</code> otherwise. +%% +%% <p>Note: This is equivalent to <code>(get_precomments(Node) == []) +%% and (get_postcomments(Node) == [])</code>, but potentially more +%% efficient.</p> +%% +%% @see get_precomments/1 +%% @see get_postcomments/1 +%% @see remove_comments/1 + +has_comments(#tree{attr = Attr}) -> + case Attr#attr.com of + none -> false; + #com{pre = [], post = []} -> false; + _ -> true + end; +has_comments(#wrapper{attr = Attr}) -> + case Attr#attr.com of + none -> false; + #com{pre = [], post = []} -> false; + _ -> true + end; +has_comments(_) -> false. + + +%% ===================================================================== +%% @spec remove_comments(Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Clears the associated comments of <code>Node</code>. +%% +%% <p>Note: This is equivalent to +%% <code>set_precomments(set_postcomments(Node, []), [])</code>, but +%% potentially more efficient.</p> +%% +%% @see set_precomments/2 +%% @see set_postcomments/2 + +remove_comments(Node) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = Attr#attr{com = none}}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = Attr#attr{com = none}}; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec copy_comments(Source::syntaxTree(), Target::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Copies the pre- and postcomments from <code>Source</code> to +%% <code>Target</code>. +%% +%% <p>Note: This is equivalent to +%% <code>set_postcomments(set_precomments(Target, +%% get_precomments(Source)), get_postcomments(Source))</code>, but +%% potentially more efficient.</p> +%% +%% @see comment/2 +%% @see get_precomments/1 +%% @see get_postcomments/1 +%% @see set_precomments/2 +%% @see set_postcomments/2 + +copy_comments(Source, Target) -> + set_com(Target, get_com(Source)). + + +%% ===================================================================== +%% @spec join_comments(Source::syntaxTree(), Target::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Appends the comments of <code>Source</code> to the current +%% comments of <code>Target</code>. +%% +%% <p>Note: This is equivalent to +%% <code>add_postcomments(get_postcomments(Source), +%% add_precomments(get_precomments(Source), Target))</code>, but +%% potentially more efficient.</p> +%% +%% @see comment/2 +%% @see get_precomments/1 +%% @see get_postcomments/1 +%% @see add_precomments/2 +%% @see add_postcomments/2 + +join_comments(Source, Target) -> + add_postcomments( + get_postcomments(Source), + add_precomments(get_precomments(Source), Target)). + + +%% ===================================================================== +%% @spec get_ann(syntaxTree()) -> [term()] +%% +%% @doc Returns the list of user annotations associated with a syntax +%% tree node. For a newly created node, this is the empty list. The +%% annotations may be any terms. +%% +%% @see set_ann/2 +%% @see get_attrs/1 + +get_ann(#tree{attr = Attr}) -> Attr#attr.ann; +get_ann(#wrapper{attr = Attr}) -> Attr#attr.ann; +get_ann(_) -> []. + + +%% ===================================================================== +%% @spec set_ann(Node::syntaxTree(), Annotations::[term()]) -> +%% syntaxTree() +%% +%% @doc Sets the list of user annotations of <code>Node</code> to +%% <code>Annotations</code>. +%% +%% @see get_ann/1 +%% @see add_ann/2 +%% @see copy_ann/2 + +set_ann(Node, As) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = Attr#attr{ann = As}}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = Attr#attr{ann = As}}; + _ -> + %% Assume we have an `erl_parse' node and create a wrapper + %% structure to carry the annotation. + set_ann(wrap(Node), As) + end. + + +%% ===================================================================== +%% @spec add_ann(Annotation::term(), Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Appends the term <code>Annotation</code> to the list of user +%% annotations of <code>Node</code>. +%% +%% <p>Note: this is equivalent to <code>set_ann(Node, [Annotation | +%% get_ann(Node)])</code>, but potentially more efficient.</p> +%% +%% @see get_ann/1 +%% @see set_ann/2 + +add_ann(A, Node) -> + case Node of + #tree{attr = Attr} -> + Node#tree{attr = Attr#attr{ann = [A | Attr#attr.ann]}}; + #wrapper{attr = Attr} -> + Node#wrapper{attr = Attr#attr{ann = [A | Attr#attr.ann]}}; + _ -> + %% Assume we have an `erl_parse' node and create a wrapper + %% structure to carry the annotation. + add_ann(A, wrap(Node)) + end. + + +%% ===================================================================== +%% @spec copy_ann(Source::syntaxTree(), Target::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Copies the list of user annotations from <code>Source</code> to +%% <code>Target</code>. +%% +%% <p>Note: this is equivalent to <code>set_ann(Target, +%% get_ann(Source))</code>, but potentially more efficient.</p> +%% +%% @see get_ann/1 +%% @see set_ann/2 + +copy_ann(Source, Target) -> + set_ann(Target, get_ann(Source)). + + +%% ===================================================================== +%% @spec get_attrs(syntaxTree()) -> syntaxTreeAttributes() +%% +%% @doc Returns a representation of the attributes associated with a +%% syntax tree node. The attributes are all the extra information that +%% can be attached to a node. Currently, this includes position +%% information, source code comments, and user annotations. The result +%% of this function cannot be inspected directly; only attached to +%% another node (cf. <code>set_attrs/2</code>). +%% +%% <p>For accessing individual attributes, see <code>get_pos/1</code>, +%% <code>get_ann/1</code>, <code>get_precomments/1</code> and +%% <code>get_postcomments/1</code>.</p> +%% +%% @type syntaxTreeAttributes(). This is an abstract representation of +%% syntax tree node attributes; see the function <a +%% href="#get_attrs-1"><code>get_attrs/1</code></a>. +%% +%% @see set_attrs/2 +%% @see get_pos/1 +%% @see get_ann/1 +%% @see get_precomments/1 +%% @see get_postcomments/1 + +get_attrs(#tree{attr = Attr}) -> Attr; +get_attrs(#wrapper{attr = Attr}) -> Attr; +get_attrs(Node) -> #attr{pos = get_pos(Node), + ann = get_ann(Node), + com = get_com(Node)}. + + +%% ===================================================================== +%% @spec set_attrs(Node::syntaxTree(), +%% Attributes::syntaxTreeAttributes()) -> syntaxTree() +%% +%% @doc Sets the attributes of <code>Node</code> to +%% <code>Attributes</code>. +%% +%% @see get_attrs/1 +%% @see copy_attrs/2 + +set_attrs(Node, Attr) -> + case Node of + #tree{} -> + Node#tree{attr = Attr}; + #wrapper{} -> + Node#wrapper{attr = Attr}; + _ -> + set_attrs(wrap(Node), Attr) + end. + + +%% ===================================================================== +%% @spec copy_attrs(Source::syntaxTree(), Target::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Copies the attributes from <code>Source</code> to +%% <code>Target</code>. +%% +%% <p>Note: this is equivalent to <code>set_attrs(Target, +%% get_attrs(Source))</code>, but potentially more efficient.</p> +%% +%% @see get_attrs/1 +%% @see set_attrs/2 + +copy_attrs(S, T) -> + set_attrs(T, get_attrs(S)). + + +%% ===================================================================== +%% @spec comment(Strings) -> syntaxTree() +%% @equiv comment(none, Strings) + +comment(Strings) -> + comment(none, Strings). + + +%% ===================================================================== +%% @spec comment(Padding, Strings::[string()]) -> syntaxTree() +%% Padding = none | integer() +%% +%% @doc Creates an abstract comment with the given padding and text. If +%% <code>Strings</code> is a (possibly empty) list +%% <code>["<em>Txt1</em>", ..., "<em>TxtN</em>"]</code>, the result +%% represents the source code text +%% <pre> +%% %<em>Txt1</em> +%% ... +%% %<em>TxtN</em></pre> +%% <code>Padding</code> states the number of empty character positions +%% to the left of the comment separating it horizontally from +%% source code on the same line (if any). If <code>Padding</code> is +%% <code>none</code>, a default positive number is used. If +%% <code>Padding</code> is an integer less than 1, there should be no +%% separating space. Comments are in themselves regarded as source +%% program forms. +%% +%% @see comment/1 +%% @see is_form/1 + +-record(comment, {pad, text}). + +%% type(Node) = comment +%% data(Node) = #comment{pad :: Padding, text :: Strings} +%% +%% Padding = none | integer() +%% Strings = [string()] + +comment(Pad, Strings) -> + tree(comment, #comment{pad = Pad, text = Strings}). + + +%% ===================================================================== +%% @spec comment_text(syntaxTree()) -> [string()] +%% +%% @doc Returns the lines of text of the abstract comment. +%% +%% @see comment/2 + +comment_text(Node) -> + (data(Node))#comment.text. + + +%% ===================================================================== +%% @spec comment_padding(syntaxTree()) -> none | integer() +%% +%% @doc Returns the amount of padding before the comment, or +%% <code>none</code>. The latter means that a default padding may be +%% used. +%% +%% @see comment/2 + +comment_padding(Node) -> + (data(Node))#comment.pad. + + +%% ===================================================================== +%% @spec form_list(Forms::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract sequence of "source code forms". If +%% <code>Forms</code> is <code>[F1, ..., Fn]</code>, where each +%% <code>Fi</code> is a form (cf. <code>is_form/1</code>, the result +%% represents +%% <pre> +%% <em>F1</em> +%% ... +%% <em>Fn</em></pre> +%% where the <code>Fi</code> are separated by one or more line breaks. A +%% node of type <code>form_list</code> is itself regarded as a source +%% code form; cf. <code>flatten_form_list/1</code>. +%% +%% <p>Note: this is simply a way of grouping source code forms as a +%% single syntax tree, usually in order to form an Erlang module +%% definition.</p> +%% +%% @see form_list_elements/1 +%% @see is_form/1 +%% @see flatten_form_list/1 + +%% type(Node) = form_list +%% data(Node) = [Form] +%% +%% Form = syntaxTree() +%% is_form(Form) = true + +form_list(Forms) -> + tree(form_list, Forms). + + +%% ===================================================================== +%% @spec form_list_elements(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of subnodes of a <code>form_list</code> node. +%% +%% @see form_list/1 + +form_list_elements(Node) -> + data(Node). + + +%% ===================================================================== +%% @spec flatten_form_list(Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Flattens sublists of a <code>form_list</code> node. Returns +%% <code>Node</code> with all subtrees of type <code>form_list</code> +%% recursively expanded, yielding a single "flat" abstract form +%% sequence. +%% +%% @see form_list/1 + +flatten_form_list(Node) -> + Fs = form_list_elements(Node), + Fs1 = lists:reverse(flatten_form_list_1(Fs, [])), + copy_attrs(Node, form_list(Fs1)). + +flatten_form_list_1([F | Fs], As) -> + case type(F) of + form_list -> + As1 = flatten_form_list_1(form_list_elements(F), As), + flatten_form_list_1(Fs, As1); + _ -> + flatten_form_list_1(Fs, [F | As]) + end; +flatten_form_list_1([], As) -> + As. + + +%% ===================================================================== +%% @spec text(String::string()) -> syntaxTree() +%% +%% @doc Creates an abstract piece of source code text. The result +%% represents exactly the sequence of characters in <code>String</code>. +%% This is useful in cases when one wants full control of the resulting +%% output, e.g., for the appearance of floating-point numbers or macro +%% definitions. +%% +%% @see text_string/1 + +%% type(Node) = text +%% data(Node) = string() + +text(String) -> + tree(text, String). + + +%% ===================================================================== +%% @spec text_string(syntaxTree()) -> string() +%% +%% @doc Returns the character sequence represented by a +%% <code>text</code> node. +%% +%% @see text/1 + +text_string(Node) -> + data(Node). + + +%% ===================================================================== +%% @spec variable(Name) -> syntaxTree() +%% Name = atom() | string() +%% +%% @doc Creates an abstract variable with the given name. +%% <code>Name</code> may be any atom or string that represents a +%% lexically valid variable name, but <em>not</em> a single underscore +%% character; cf. <code>underscore/0</code>. +%% +%% <p>Note: no checking is done whether the character sequence +%% represents a proper variable name, i.e., whether or not its first +%% character is an uppercase Erlang character, or whether it does not +%% contain control characters, whitespace, etc.</p> +%% +%% @see variable_name/1 +%% @see variable_literal/1 +%% @see underscore/0 + +%% type(Node) = variable +%% data(Node) = atom() +%% +%% `erl_parse' representation: +%% +%% {var, Pos, Name} +%% +%% Name = atom() \ '_' + +variable(Name) when is_atom(Name) -> + tree(variable, Name); +variable(Name) -> + tree(variable, list_to_atom(Name)). + +revert_variable(Node) -> + Pos = get_pos(Node), + Name = variable_name(Node), + {var, Pos, Name}. + + +%% ===================================================================== +%% @spec variable_name(syntaxTree()) -> atom() +%% +%% @doc Returns the name of a <code>variable</code> node as an atom. +%% +%% @see variable/1 + +variable_name(Node) -> + case unwrap(Node) of + {var, _, Name} -> + Name; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec variable_literal(syntaxTree()) -> string() +%% +%% @doc Returns the name of a <code>variable</code> node as a string. +%% +%% @see variable/1 + +variable_literal(Node) -> + case unwrap(Node) of + {var, _, Name} -> + atom_to_list(Name); + Node1 -> + atom_to_list(data(Node1)) + end. + + +%% ===================================================================== +%% @spec underscore() -> syntaxTree() +%% +%% @doc Creates an abstract universal pattern ("<code>_</code>"). The +%% lexical representation is a single underscore character. Note that +%% this is <em>not</em> a variable, lexically speaking. +%% +%% @see variable/1 + +%% type(Node) = underscore +%% data(Node) = [] +%% +%% `erl_parse' representation: +%% +%% {var, Pos, '_'} + +underscore() -> + tree(underscore, []). + +revert_underscore(Node) -> + Pos = get_pos(Node), + {var, Pos, '_'}. + + +%% ===================================================================== +%% @spec integer(Value::integer()) -> syntaxTree() +%% +%% @doc Creates an abstract integer literal. The lexical representation +%% is the canonical decimal numeral of <code>Value</code>. +%% +%% @see integer_value/1 +%% @see integer_literal/1 +%% @see is_integer/2 + +%% type(Node) = integer +%% data(Node) = integer() +%% +%% `erl_parse' representation: +%% +%% {integer, Pos, Value} +%% +%% Value = integer() + +integer(Value) -> + tree(integer, Value). + +revert_integer(Node) -> + Pos = get_pos(Node), + {integer, Pos, integer_value(Node)}. + + +%% ===================================================================== +%% @spec is_integer(Node::syntaxTree(), Value::integer()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> has type +%% <code>integer</code> and represents <code>Value</code>, otherwise +%% <code>false</code>. +%% +%% @see integer/1 + +is_integer(Node, Value) -> + case unwrap(Node) of + {integer, _, Value} -> + true; + #tree{type = integer, data = Value} -> + true; + _ -> + false + end. + + +%% ===================================================================== +%% @spec integer_value(syntaxTree()) -> integer() +%% +%% @doc Returns the value represented by an <code>integer</code> node. +%% +%% @see integer/1 + +integer_value(Node) -> + case unwrap(Node) of + {integer, _, Value} -> + Value; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec integer_literal(syntaxTree()) -> string() +%% +%% @doc Returns the numeral string represented by an +%% <code>integer</code> node. +%% +%% @see integer/1 + +integer_literal(Node) -> + integer_to_list(integer_value(Node)). + + +%% ===================================================================== +%% @spec float(Value::float()) -> syntaxTree() +%% +%% @doc Creates an abstract floating-point literal. The lexical +%% representation is the decimal floating-point numeral of +%% <code>Value</code>. +%% +%% @see float_value/1 +%% @see float_literal/1 + +%% type(Node) = float +%% data(Node) = Value +%% +%% Value = float() +%% +%% `erl_parse' representation: +%% +%% {float, Pos, Value} +%% +%% Value = float() + +%% Note that under current versions of Erlang, the name `float/1' cannot +%% be used for local calls (i.e., within the module) - it will be +%% overridden by the type conversion BIF of the same name, so always use +%% `make_float/1' for local calls. + +float(Value) -> + make_float(Value). + +make_float(Value) -> + tree(float, Value). + +revert_float(Node) -> + Pos = get_pos(Node), + {float, Pos, float_value(Node)}. + + +%% ===================================================================== +%% @spec float_value(syntaxTree()) -> float() +%% +%% @doc Returns the value represented by a <code>float</code> node. Note +%% that floating-point values should usually not be compared for +%% equality. +%% +%% @see float/1 + +float_value(Node) -> + case unwrap(Node) of + {float, _, Value} -> + Value; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec float_literal(syntaxTree()) -> string() +%% +%% @doc Returns the numeral string represented by a <code>float</code> +%% node. +%% +%% @see float/1 + +float_literal(Node) -> + float_to_list(float_value(Node)). + + +%% ===================================================================== +%% @spec char(Value::char()) -> syntaxTree() +%% +%% @doc Creates an abstract character literal. The result represents +%% "<code>$<em>Name</em></code>", where <code>Name</code> corresponds to +%% <code>Value</code>. +%% +%% <p>Note: the literal corresponding to a particular character value is +%% not uniquely defined. E.g., the character "<code>a</code>" can be +%% written both as "<code>$a</code>" and "<code>$\141</code>", and a Tab +%% character can be written as "<code>$\11</code>", "<code>$\011</code>" +%% or "<code>$\t</code>".</p> +%% +%% @see char_value/1 +%% @see char_literal/1 +%% @see is_char/2 + +%% type(Node) = char +%% data(Node) = char() +%% +%% `erl_parse' representation: +%% +%% {char, Pos, Code} +%% +%% Code = integer() + +char(Char) -> + tree(char, Char). + +revert_char(Node) -> + Pos = get_pos(Node), + {char, Pos, char_value(Node)}. + + +%% ===================================================================== +%% @spec is_char(Node::syntaxTree(), Value::char()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> has type +%% <code>char</code> and represents <code>Value</code>, otherwise +%% <code>false</code>. +%% +%% @see char/1 + +is_char(Node, Value) -> + case unwrap(Node) of + {char, _, Value} -> + true; + #tree{type = char, data = Value} -> + true; + _ -> + false + end. + + +%% ===================================================================== +%% @spec char_value(syntaxTree()) -> char() +%% +%% @doc Returns the value represented by a <code>char</code> node. +%% +%% @see char/1 + +char_value(Node) -> + case unwrap(Node) of + {char, _, Char} -> + Char; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec char_literal(syntaxTree()) -> string() +%% +%% @doc Returns the literal string represented by a <code>char</code> +%% node. This includes the leading "<code>$</code>" character. +%% +%% @see char/1 + +char_literal(Node) -> + io_lib:write_char(char_value(Node)). + + +%% ===================================================================== +%% @spec string(Value::string()) -> syntaxTree() +%% +%% @doc Creates an abstract string literal. The result represents +%% <code>"<em>Text</em>"</code> (including the surrounding +%% double-quotes), where <code>Text</code> corresponds to the sequence +%% of characters in <code>Value</code>, but not representing a +%% <em>specific</em> string literal. E.g., the result of +%% <code>string("x\ny")</code> represents any and all of +%% <code>"x\ny"</code>, <code>"x\12y"</code>, <code>"x\012y"</code> and +%% <code>"x\^Jy"</code>; cf. <code>char/1</code>. +%% +%% @see string_value/1 +%% @see string_literal/1 +%% @see is_string/2 +%% @see char/1 + +%% type(Node) = string +%% data(Node) = string() +%% +%% `erl_parse' representation: +%% +%% {string, Pos, Chars} +%% +%% Chars = string() + +string(String) -> + tree(string, String). + +revert_string(Node) -> + Pos = get_pos(Node), + {string, Pos, string_value(Node)}. + + +%% ===================================================================== +%% @spec is_string(Node::syntaxTree(), Value::string()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> has type +%% <code>string</code> and represents <code>Value</code>, otherwise +%% <code>false</code>. +%% +%% @see string/1 + +is_string(Node, Value) -> + case unwrap(Node) of + {string, _, Value} -> + true; + #tree{type = string, data = Value} -> + true; + _ -> + false + end. + + +%% ===================================================================== +%% @spec string_value(syntaxTree()) -> string() +%% +%% @doc Returns the value represented by a <code>string</code> node. +%% +%% @see string/1 + +string_value(Node) -> + case unwrap(Node) of + {string, _, List} -> + List; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec string_literal(syntaxTree()) -> string() +%% +%% @doc Returns the literal string represented by a <code>string</code> +%% node. This includes surrounding double-quote characters. +%% +%% @see string/1 + +string_literal(Node) -> + io_lib:write_string(string_value(Node)). + + +%% ===================================================================== +%% @spec atom(Name) -> syntaxTree() +%% Name = atom() | string() +%% +%% @doc Creates an abstract atom literal. The print name of the atom is +%% the character sequence represented by <code>Name</code>. +%% +%% @see atom_value/1 +%% @see atom_name/1 +%% @see atom_literal/1 +%% @see is_atom/2 + +%% type(Node) = atom +%% data(Node) = atom() +%% +%% `erl_parse' representation: +%% +%% {atom, Pos, Value} +%% +%% Value = atom() + +atom(Name) when is_atom(Name) -> + tree(atom, Name); +atom(Name) -> + tree(atom, list_to_atom(Name)). + +revert_atom(Node) -> + Pos = get_pos(Node), + {atom, Pos, atom_value(Node)}. + + +%% ===================================================================== +%% @spec is_atom(Node::syntaxTree(), Value::atom()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> has type +%% <code>atom</code> and represents <code>Value</code>, otherwise +%% <code>false</code>. +%% +%% @see atom/1 + +is_atom(Node, Value) -> + case unwrap(Node) of + {atom, _, Value} -> + true; + #tree{type = atom, data = Value} -> + true; + _ -> + false + end. + + +%% ===================================================================== +%% @spec atom_value(syntaxTree()) -> atom() +%% +%% @doc Returns the value represented by an <code>atom</code> node. +%% +%% @see atom/1 + +atom_value(Node) -> + case unwrap(Node) of + {atom, _, Name} -> + Name; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec atom_name(syntaxTree()) -> string() +%% +%% @doc Returns the printname of an <code>atom</code> node. +%% +%% @see atom/1 + +atom_name(Node) -> + atom_to_list(atom_value(Node)). + + +%% ===================================================================== +%% @spec atom_literal(syntaxTree()) -> string() +%% +%% @doc Returns the literal string represented by an <code>atom</code> +%% node. This includes surrounding single-quote characters if necessary. +%% +%% <p>Note that e.g. the result of <code>atom("x\ny")</code> represents +%% any and all of <code>'x\ny'</code>, <code>'x\12y'</code>, +%% <code>'x\012y'</code> and <code>'x\^Jy\'</code>; cf. +%% <code>string/1</code>.</p> +%% +%% @see atom/1 +%% @see string/1 + +atom_literal(Node) -> + io_lib:write_atom(atom_value(Node)). + + +%% ===================================================================== +%% @spec tuple(Elements::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract tuple. If <code>Elements</code> is +%% <code>[X1, ..., Xn]</code>, the result represents +%% "<code>{<em>X1</em>, ..., <em>Xn</em>}</code>". +%% +%% <p>Note: The Erlang language has distinct 1-tuples, i.e., +%% <code>{X}</code> is always distinct from <code>X</code> itself.</p> +%% +%% @see tuple_elements/1 +%% @see tuple_size/1 + +%% type(Node) = tuple +%% data(Node) = Elements +%% +%% Elements = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {tuple, Pos, Elements} +%% +%% Elements = [erl_parse()] + +tuple(List) -> + tree(tuple, List). + +revert_tuple(Node) -> + Pos = get_pos(Node), + {tuple, Pos, tuple_elements(Node)}. + + +%% ===================================================================== +%% @spec tuple_elements(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of element subtrees of a <code>tuple</code> +%% node. +%% +%% @see tuple/1 + +tuple_elements(Node) -> + case unwrap(Node) of + {tuple, _, List} -> + List; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec tuple_size(syntaxTree()) -> integer() +%% +%% @doc Returns the number of elements of a <code>tuple</code> node. +%% +%% <p>Note: this is equivalent to +%% <code>length(tuple_elements(Node))</code>, but potentially more +%% efficient.</p> +%% +%% @see tuple/1 +%% @see tuple_elements/1 + +tuple_size(Node) -> + length(tuple_elements(Node)). + + +%% ===================================================================== +%% @spec list(List) -> syntaxTree() +%% @equiv list(List, none) + +list(List) -> + list(List, none). + + +%% ===================================================================== +%% @spec list(List, Tail) -> syntaxTree() +%% List = [syntaxTree()] +%% Tail = none | syntaxTree() +%% +%% @doc Constructs an abstract list skeleton. The result has type +%% <code>list</code> or <code>nil</code>. If <code>List</code> is a +%% nonempty list <code>[E1, ..., En]</code>, the result has type +%% <code>list</code> and represents either "<code>[<em>E1</em>, ..., +%% <em>En</em>]</code>", if <code>Tail</code> is <code>none</code>, or +%% otherwise "<code>[<em>E1</em>, ..., <em>En</em> | +%% <em>Tail</em>]</code>". If <code>List</code> is the empty list, +%% <code>Tail</code> <em>must</em> be <code>none</code>, and in that +%% case the result has type <code>nil</code> and represents +%% "<code>[]</code>" (cf. <code>nil/0</code>). +%% +%% <p>The difference between lists as semantic objects (built up of +%% individual "cons" and "nil" terms) and the various syntactic forms +%% for denoting lists may be bewildering at first. This module provides +%% functions both for exact control of the syntactic representation as +%% well as for the simple composition and deconstruction in terms of +%% cons and head/tail operations.</p> +%% +%% <p>Note: in <code>list(Elements, none)</code>, the "nil" list +%% terminator is implicit and has no associated information (cf. +%% <code>get_attrs/1</code>), while in the seemingly equivalent +%% <code>list(Elements, Tail)</code> when <code>Tail</code> has type +%% <code>nil</code>, the list terminator subtree <code>Tail</code> may +%% have attached attributes such as position, comments, and annotations, +%% which will be preserved in the result.</p> +%% +%% @see nil/0 +%% @see list/1 +%% @see list_prefix/1 +%% @see list_suffix/1 +%% @see cons/2 +%% @see list_head/1 +%% @see list_tail/1 +%% @see is_list_skeleton/1 +%% @see is_proper_list/1 +%% @see list_elements/1 +%% @see list_length/1 +%% @see normalize_list/1 +%% @see compact_list/1 +%% @see get_attrs/1 + +-record(list, {prefix, suffix}). + +%% type(Node) = list +%% data(Node) = #list{prefix :: Elements, suffix :: Tail} +%% +%% Elements = [syntaxTree()] +%% Tail = none | syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {cons, Pos, Head, Tail} +%% +%% Head = Tail = [erl_parse()] +%% +%% This represents `[<Head> | <Tail>]', or more generally `[<Head> +%% <Suffix>]' where the form of <Suffix> can depend on the +%% structure of <Tail>; there is no fixed printed form. + +list([], none) -> + nil(); +list(Elements, Tail) when Elements /= [] -> + tree(list, #list{prefix = Elements, suffix = Tail}). + +revert_list(Node) -> + Pos = get_pos(Node), + P = list_prefix(Node), + S = case list_suffix(Node) of + none -> + revert_nil(set_pos(nil(), Pos)); + S1 -> + S1 + end, + lists:foldr(fun (X, A) -> + {cons, Pos, X, A} + end, + S, P). + +%% ===================================================================== +%% @spec nil() -> syntaxTree() +%% +%% @doc Creates an abstract empty list. The result represents +%% "<code>[]</code>". The empty list is traditionally called "nil". +%% +%% @see list/2 +%% @see is_list_skeleton/1 + +%% type(Node) = nil +%% data(Node) = term() +%% +%% `erl_parse' representation: +%% +%% {nil, Pos} + +nil() -> + tree(nil). + +revert_nil(Node) -> + Pos = get_pos(Node), + {nil, Pos}. + + +%% ===================================================================== +%% @spec list_prefix(Node::syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the prefix element subtrees of a <code>list</code> node. +%% If <code>Node</code> represents "<code>[<em>E1</em>, ..., +%% <em>En</em>]</code>" or "<code>[<em>E1</em>, ..., <em>En</em> | +%% <em>Tail</em>]</code>", the returned value is <code>[E1, ..., +%% En]</code>. +%% +%% @see list/2 + +list_prefix(Node) -> + case unwrap(Node) of + {cons, _, Head, _} -> + [Head]; + Node1 -> + (data(Node1))#list.prefix + end. + + +%% ===================================================================== +%% @spec list_suffix(Node::syntaxTree()) -> none | syntaxTree() +%% +%% @doc Returns the suffix subtree of a <code>list</code> node, if one +%% exists. If <code>Node</code> represents "<code>[<em>E1</em>, ..., +%% <em>En</em> | <em>Tail</em>]</code>", the returned value is +%% <code>Tail</code>, otherwise, i.e., if <code>Node</code> represents +%% "<code>[<em>E1</em>, ..., <em>En</em>]</code>", <code>none</code> is +%% returned. +%% +%% <p>Note that even if this function returns some <code>Tail</code> +%% that is not <code>none</code>, the type of <code>Tail</code> can be +%% <code>nil</code>, if the tail has been given explicitly, and the list +%% skeleton has not been compacted (cf. +%% <code>compact_list/1</code>).</p> +%% +%% @see list/2 +%% @see nil/0 +%% @see compact_list/1 + +list_suffix(Node) -> + case unwrap(Node) of + {cons, _, _, Tail} -> + %% If there could be comments/annotations on the tail node, + %% we should not return `none' even if it has type `nil'. + case Tail of + {nil, _} -> + none; % no interesting information is lost + _ -> + Tail + end; + Node1 -> + (data(Node1))#list.suffix + end. + + +%% ===================================================================== +%% @spec cons(Head::syntaxTree(), Tail::syntaxTree()) -> syntaxTree() +%% +%% @doc "Optimising" list skeleton cons operation. Creates an abstract +%% list skeleton whose first element is <code>Head</code> and whose tail +%% corresponds to <code>Tail</code>. This is similar to +%% <code>list([Head], Tail)</code>, except that <code>Tail</code> may +%% not be <code>none</code>, and that the result does not necessarily +%% represent exactly "<code>[<em>Head</em> | <em>Tail</em>]</code>", but +%% may depend on the <code>Tail</code> subtree. E.g., if +%% <code>Tail</code> represents <code>[X, Y]</code>, the result may +%% represent "<code>[<em>Head</em>, X, Y]</code>", rather than +%% "<code>[<em>Head</em> | [X, Y]]</code>". Annotations on +%% <code>Tail</code> itself may be lost if <code>Tail</code> represents +%% a list skeleton, but comments on <code>Tail</code> are propagated to +%% the result. +%% +%% @see list/2 +%% @see list_head/1 +%% @see list_tail/1 + +cons(Head, Tail) -> + case type(Tail) of + list -> + copy_comments(Tail, list([Head | list_prefix(Tail)], + list_suffix(Tail))); + nil -> + copy_comments(Tail, list([Head])); + _ -> + list([Head], Tail) + end. + + +%% ===================================================================== +%% @spec list_head(Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the head element subtree of a <code>list</code> node. If +%% <code>Node</code> represents "<code>[<em>Head</em> ...]</code>", the +%% result will represent "<code><em>Head</em></code>". +%% +%% @see list/2 +%% @see list_tail/1 +%% @see cons/2 + +list_head(Node) -> + hd(list_prefix(Node)). + + +%% ===================================================================== +%% @spec list_tail(Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the tail of a <code>list</code> node. If +%% <code>Node</code> represents a single-element list +%% "<code>[<em>E</em>]</code>", then the result has type +%% <code>nil</code>, representing "<code>[]</code>". If +%% <code>Node</code> represents "<code>[<em>E1</em>, <em>E2</em> +%% ...]</code>", the result will represent "<code>[<em>E2</em> +%% ...]</code>", and if <code>Node</code> represents +%% "<code>[<em>Head</em> | <em>Tail</em>]</code>", the result will +%% represent "<code><em>Tail</em></code>". +%% +%% @see list/2 +%% @see list_head/1 +%% @see cons/2 + +list_tail(Node) -> + Tail = list_suffix(Node), + case tl(list_prefix(Node)) of + [] -> + if Tail =:= none -> + nil(); % implicit list terminator. + true -> + Tail + end; + Es -> + list(Es, Tail) % `Es' is nonempty. + end. + + +%% ===================================================================== +%% @spec is_list_skeleton(syntaxTree()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> has type +%% <code>list</code> or <code>nil</code>, otherwise <code>false</code>. +%% +%% @see list/2 +%% @see nil/0 + +is_list_skeleton(Node) -> + case type(Node) of + list -> true; + nil -> true; + _ -> false + end. + + +%% ===================================================================== +%% @spec is_proper_list(Node::syntaxTree()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> represents a +%% proper list, and <code>false</code> otherwise. A proper list is a +%% list skeleton either on the form "<code>[]</code>" or +%% "<code>[<em>E1</em>, ..., <em>En</em>]</code>", or "<code>[... | +%% <em>Tail</em>]</code>" where recursively <code>Tail</code> also +%% represents a proper list. +%% +%% <p>Note: Since <code>Node</code> is a syntax tree, the actual +%% run-time values corresponding to its subtrees may often be partially +%% or completely unknown. Thus, if <code>Node</code> represents e.g. +%% "<code>[... | Ns]</code>" (where <code>Ns</code> is a variable), then +%% the function will return <code>false</code>, because it is not known +%% whether <code>Ns</code> will be bound to a list at run-time. If +%% <code>Node</code> instead represents e.g. "<code>[1, 2, 3]</code>" or +%% "<code>[A | []]</code>", then the function will return +%% <code>true</code>.</p> +%% +%% @see list/2 + +is_proper_list(Node) -> + case type(Node) of + list -> + case list_suffix(Node) of + none -> + true; + Tail -> + is_proper_list(Tail) + end; + nil -> + true; + _ -> + false + end. + + +%% ===================================================================== +%% @spec list_elements(Node::syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of element subtrees of a list skeleton. +%% <code>Node</code> must represent a proper list. E.g., if +%% <code>Node</code> represents "<code>[<em>X1</em>, <em>X2</em> | +%% [<em>X3</em>, <em>X4</em> | []]</code>", then +%% <code>list_elements(Node)</code> yields the list <code>[X1, X2, X3, +%% X4]</code>. +%% +%% @see list/2 +%% @see is_proper_list/1 + +list_elements(Node) -> + lists:reverse(list_elements(Node, [])). + +list_elements(Node, As) -> + case type(Node) of + list -> + As1 = lists:reverse(list_prefix(Node)) ++ As, + case list_suffix(Node) of + none -> + As1; + Tail -> + list_elements(Tail, As1) + end; + nil -> + As + end. + + +%% ===================================================================== +%% @spec list_length(Node::syntaxTree()) -> integer() +%% +%% @doc Returns the number of element subtrees of a list skeleton. +%% <code>Node</code> must represent a proper list. E.g., if +%% <code>Node</code> represents "<code>[X1 | [X2, X3 | [X4, X5, +%% X6]]]</code>", then <code>list_length(Node)</code> returns the +%% integer 6. +%% +%% <p>Note: this is equivalent to +%% <code>length(list_elements(Node))</code>, but potentially more +%% efficient.</p> +%% +%% @see list/2 +%% @see is_proper_list/1 +%% @see list_elements/1 + +list_length(Node) -> + list_length(Node, 0). + +list_length(Node, A) -> + case type(Node) of + list -> + A1 = length(list_prefix(Node)) + A, + case list_suffix(Node) of + none -> + A1; + Tail -> + list_length(Tail, A1) + end; + nil -> + A + end. + + +%% ===================================================================== +%% @spec normalize_list(Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Expands an abstract list skeleton to its most explicit form. If +%% <code>Node</code> represents "<code>[<em>E1</em>, ..., <em>En</em> | +%% <em>Tail</em>]</code>", the result represents "<code>[<em>E1</em> | +%% ... [<em>En</em> | <em>Tail1</em>] ... ]</code>", where +%% <code>Tail1</code> is the result of +%% <code>normalize_list(Tail)</code>. If <code>Node</code> represents +%% "<code>[<em>E1</em>, ..., <em>En</em>]</code>", the result simply +%% represents "<code>[<em>E1</em> | ... [<em>En</em> | []] ... +%% ]</code>". If <code>Node</code> does not represent a list skeleton, +%% <code>Node</code> itself is returned. +%% +%% @see list/2 +%% @see compact_list/1 + +normalize_list(Node) -> + case type(Node) of + list -> + P = list_prefix(Node), + case list_suffix(Node) of + none -> + copy_attrs(Node, normalize_list_1(P, nil())); + Tail -> + Tail1 = normalize_list(Tail), + copy_attrs(Node, normalize_list_1(P, Tail1)) + end; + _ -> + Node + end. + +normalize_list_1(Es, Tail) -> + lists:foldr(fun (X, A) -> + list([X], A) % not `cons'! + end, + Tail, Es). + + +%% ===================================================================== +%% @spec compact_list(Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Yields the most compact form for an abstract list skeleton. The +%% result either represents "<code>[<em>E1</em>, ..., <em>En</em> | +%% <em>Tail</em>]</code>", where <code>Tail</code> is not a list +%% skeleton, or otherwise simply "<code>[<em>E1</em>, ..., +%% <em>En</em>]</code>". Annotations on subtrees of <code>Node</code> +%% that represent list skeletons may be lost, but comments will be +%% propagated to the result. Returns <code>Node</code> itself if +%% <code>Node</code> does not represent a list skeleton. +%% +%% @see list/2 +%% @see normalize_list/1 + +compact_list(Node) -> + case type(Node) of + list -> + case list_suffix(Node) of + none -> + Node; + Tail -> + case type(Tail) of + list -> + Tail1 = compact_list(Tail), + Node1 = list(list_prefix(Node) ++ + list_prefix(Tail1), + list_suffix(Tail1)), + join_comments(Tail1, + copy_attrs(Node, + Node1)); + nil -> + Node1 = list(list_prefix(Node)), + join_comments(Tail, + copy_attrs(Node, + Node1)); + _ -> + Node + end + end; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec binary(Fields::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract binary-object template. If +%% <code>Fields</code> is <code>[F1, ..., Fn]</code>, the result +%% represents "<code><<<em>F1</em>, ..., +%% <em>Fn</em>>></code>". +%% +%% @see binary_fields/1 +%% @see binary_field/2 + +%% type(Node) = binary +%% data(Node) = Fields +%% +%% Fields = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {bin, Pos, Fields} +%% +%% Fields = [Field] +%% Field = {bin_element, ...} +%% +%% See `binary_field' for documentation on `erl_parse' binary +%% fields (or "elements"). + +binary(List) -> + tree(binary, List). + +revert_binary(Node) -> + Pos = get_pos(Node), + {bin, Pos, binary_fields(Node)}. + + +%% ===================================================================== +%% @spec binary_fields(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of field subtrees of a <code>binary</code> +%% node. +%% +%% @see binary/1 +%% @see binary_field/2 + +binary_fields(Node) -> + case unwrap(Node) of + {bin, _, List} -> + List; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec binary_field(Body) -> syntaxTree() +%% @equiv binary_field(Body, []) + +binary_field(Body) -> + binary_field(Body, []). + + +%% ===================================================================== +%% @spec binary_field(Body::syntaxTree(), Size, +%% Types::[syntaxTree()]) -> syntaxTree() +%% Size = none | syntaxTree() +%% +%% @doc Creates an abstract binary template field. +%% If <code>Size</code> is <code>none</code>, this is equivalent to +%% "<code>binary_field(Body, Types)</code>", otherwise it is +%% equivalent to "<code>binary_field(size_qualifier(Body, Size), +%% Types)</code>". +%% +%% (This is a utility function.) +%% +%% @see binary/1 +%% @see binary_field/2 +%% @see size_qualifier/2 + +binary_field(Body, none, Types) -> + binary_field(Body, Types); +binary_field(Body, Size, Types) -> + binary_field(size_qualifier(Body, Size), Types). + + +%% ===================================================================== +%% @spec binary_field(Body::syntaxTree(), Types::[syntaxTree()]) -> +%% syntaxTree() +%% +%% @doc Creates an abstract binary template field. If +%% <code>Types</code> is the empty list, the result simply represents +%% "<code><em>Body</em></code>", otherwise, if <code>Types</code> is +%% <code>[T1, ..., Tn]</code>, the result represents +%% "<code><em>Body</em>/<em>T1</em>-...-<em>Tn</em></code>". +%% +%% @see binary/1 +%% @see binary_field/1 +%% @see binary_field/3 +%% @see binary_field_body/1 +%% @see binary_field_types/1 +%% @see binary_field_size/1 + +-record(binary_field, {body, types}). + +%% type(Node) = binary_field +%% data(Node) = #binary_field{body :: Body, types :: Types} +%% +%% Body = syntaxTree() +%% Types = [Type] +%% +%% `erl_parse' representation: +%% +%% {bin_element, Pos, Expr, Size, TypeList} +%% +%% Expr = erl_parse() +%% Size = default | erl_parse() +%% TypeList = default | [Type] \ [] +%% Type = atom() | {atom(), integer()} + +binary_field(Body, Types) -> + tree(binary_field, #binary_field{body = Body, types = Types}). + +revert_binary_field(Node) -> + Pos = get_pos(Node), + Body = binary_field_body(Node), + {Expr, Size} = case type(Body) of + size_qualifier -> + %% Note that size qualifiers are not + %% revertible out of context. + {size_qualifier_body(Body), + size_qualifier_argument(Body)}; + _ -> + {Body, default} + end, + Types = case binary_field_types(Node) of + [] -> + default; + Ts -> + fold_binary_field_types(Ts) + end, + {bin_element, Pos, Expr, Size, Types}. + + +%% ===================================================================== +%% @spec binary_field_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>binary_field</code>. +%% +%% @see binary_field/2 + +binary_field_body(Node) -> + case unwrap(Node) of + {bin_element, _, Body, Size, _} -> + if Size =:= default -> + Body; + true -> + size_qualifier(Body, Size) + end; + Node1 -> + (data(Node1))#binary_field.body + end. + + +%% ===================================================================== +%% @spec binary_field_types(Node::syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of type-specifier subtrees of a +%% <code>binary_field</code> node. If <code>Node</code> represents +%% "<code>.../<em>T1</em>, ..., <em>Tn</em></code>", the result is +%% <code>[T1, ..., Tn]</code>, otherwise the result is the empty list. +%% +%% @see binary_field/2 + +binary_field_types(Node) -> + case unwrap(Node) of + {bin_element, Pos, _, _, Types} -> + if Types =:= default -> + []; + true -> + unfold_binary_field_types(Types, Pos) + end; + Node1 -> + (data(Node1))#binary_field.types + end. + + +%% ===================================================================== +%% @spec binary_field_size(Node::syntaxTree()) -> none | syntaxTree() +%% +%% @doc Returns the size specifier subtree of a +%% <code>binary_field</code> node, if any. If <code>Node</code> +%% represents "<code><em>Body</em>:<em>Size</em></code>" or +%% "<code><em>Body</em>:<em>Size</em>/<em>T1</em>, ..., +%% <em>Tn</em></code>", the result is <code>Size</code>, otherwise +%% <code>none</code> is returned. +%% +%% (This is a utility function.) +%% +%% @see binary_field/2 +%% @see binary_field/3 + +binary_field_size(Node) -> + case unwrap(Node) of + {bin_element, _, _, Size, _} -> + if Size =:= default -> + none; + true -> + Size + end; + Node1 -> + Body = (data(Node1))#binary_field.body, + case type(Body) of + size_qualifier -> + size_qualifier_argument(Body); + _ -> + none + end + end. + + +%% ===================================================================== +%% @spec size_qualifier(Body::syntaxTree(), Size::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract size qualifier. The result represents +%% "<code><em>Body</em>:<em>Size</em></code>". +%% +%% @see size_qualifier_body/1 +%% @see size_qualifier_argument/1 + +-record(size_qualifier, {body, size}). + +%% type(Node) = size_qualifier +%% data(Node) = #size_qualifier{body :: Body, size :: Size} +%% +%% Body = Size = syntaxTree() + +size_qualifier(Body, Size) -> + tree(size_qualifier, + #size_qualifier{body = Body, size = Size}). + + +%% ===================================================================== +%% @spec size_qualifier_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>size_qualifier</code> +%% node. +%% +%% @see size_qualifier/2 + +size_qualifier_body(Node) -> + (data(Node))#size_qualifier.body. + + +%% ===================================================================== +%% @spec size_qualifier_argument(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the argument subtree (the size) of a +%% <code>size_qualifier</code> node. +%% +%% @see size_qualifier/2 + +size_qualifier_argument(Node) -> + (data(Node))#size_qualifier.size. + + +%% ===================================================================== +%% @spec error_marker(Error::term()) -> syntaxTree() +%% +%% @doc Creates an abstract error marker. The result represents an +%% occurrence of an error in the source code, with an associated Erlang +%% I/O ErrorInfo structure given by <code>Error</code> (see module +%% {@link //stdlib/io} for details). Error markers are regarded as source +%% code forms, but have no defined lexical form. +%% +%% <p>Note: this is supported only for backwards compatibility with +%% existing parsers and tools.</p> +%% +%% @see error_marker_info/1 +%% @see warning_marker/1 +%% @see eof_marker/0 +%% @see is_form/1 + +%% type(Node) = error_marker +%% data(Node) = term() +%% +%% `erl_parse' representation: +%% +%% {error, Error} +%% +%% Error = term() +%% +%% Note that there is no position information for the node +%% itself: `get_pos' and `set_pos' handle this as a special case. + +error_marker(Error) -> + tree(error_marker, Error). + +revert_error_marker(Node) -> + %% Note that the position information of the node itself is not + %% preserved. + {error, error_marker_info(Node)}. + + +%% ===================================================================== +%% @spec error_marker_info(syntaxTree()) -> term() +%% +%% @doc Returns the ErrorInfo structure of an <code>error_marker</code> +%% node. +%% +%% @see error_marker/1 + +error_marker_info(Node) -> + case unwrap(Node) of + {error, Error} -> + Error; + T -> + data(T) + end. + + +%% ===================================================================== +%% @spec warning_marker(Error::term()) -> syntaxTree() +%% +%% @doc Creates an abstract warning marker. The result represents an +%% occurrence of a possible problem in the source code, with an +%% associated Erlang I/O ErrorInfo structure given by <code>Error</code> +%% (see module {@link //stdlib/io} for details). Warning markers are +%% regarded as source code forms, but have no defined lexical form. +%% +%% <p>Note: this is supported only for backwards compatibility with +%% existing parsers and tools.</p> +%% +%% @see warning_marker_info/1 +%% @see error_marker/1 +%% @see eof_marker/0 +%% @see is_form/1 + +%% type(Node) = warning_marker +%% data(Node) = term() +%% +%% `erl_parse' representation: +%% +%% {warning, Error} +%% +%% Error = term() +%% +%% Note that there is no position information for the node +%% itself: `get_pos' and `set_pos' handle this as a special case. + +warning_marker(Warning) -> + tree(warning_marker, Warning). + +revert_warning_marker(Node) -> + %% Note that the position information of the node itself is not + %% preserved. + {warning, warning_marker_info(Node)}. + + +%% ===================================================================== +%% @spec warning_marker_info(syntaxTree()) -> term() +%% +%% @doc Returns the ErrorInfo structure of a <code>warning_marker</code> +%% node. +%% +%% @see warning_marker/1 + +warning_marker_info(Node) -> + case unwrap(Node) of + {warning, Error} -> + Error; + T -> + data(T) + end. + + +%% ===================================================================== +%% @spec eof_marker() -> syntaxTree() +%% +%% @doc Creates an abstract end-of-file marker. This represents the +%% end of input when reading a sequence of source code forms. An +%% end-of-file marker is itself regarded as a source code form +%% (namely, the last in any sequence in which it occurs). It has no +%% defined lexical form. +%% +%% <p>Note: this is retained only for backwards compatibility with +%% existing parsers and tools.</p> +%% +%% @see error_marker/1 +%% @see warning_marker/1 +%% @see is_form/1 + +%% type(Node) = eof_marker +%% data(Node) = term() +%% +%% `erl_parse' representation: +%% +%% {eof, Pos} + +eof_marker() -> + tree(eof_marker). + +revert_eof_marker(Node) -> + Pos = get_pos(Node), + {eof, Pos}. + + +%% ===================================================================== +%% @spec attribute(Name) -> syntaxTree() +%% @equiv attribute(Name, none) + +attribute(Name) -> + attribute(Name, none). + + +%% ===================================================================== +%% @spec attribute(Name::syntaxTree(), Arguments) -> syntaxTree() +%% Arguments = none | [syntaxTree()] +%% +%% @doc Creates an abstract program attribute. If +%% <code>Arguments</code> is <code>[A1, ..., An]</code>, the result +%% represents "<code>-<em>Name</em>(<em>A1</em>, ..., +%% <em>An</em>).</code>". Otherwise, if <code>Arguments</code> is +%% <code>none</code>, the result represents +%% "<code>-<em>Name</em>.</code>". The latter form makes it possible +%% to represent preprocessor directives such as +%% "<code>-endif.</code>". Attributes are source code forms. +%% +%% <p>Note: The preprocessor macro definition directive +%% "<code>-define(<em>Name</em>, <em>Body</em>).</code>" has relatively +%% few requirements on the syntactical form of <code>Body</code> (viewed +%% as a sequence of tokens). The <code>text</code> node type can be used +%% for a <code>Body</code> that is not a normal Erlang construct.</p> +%% +%% @see attribute/1 +%% @see attribute_name/1 +%% @see attribute_arguments/1 +%% @see text/1 +%% @see is_form/1 + +-record(attribute, {name, args}). + +%% type(Node) = attribute +%% data(Node) = #attribute{name :: Name, args :: Arguments} +%% +%% Name = syntaxTree() +%% Arguments = none | [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {attribute, Pos, module, {Name,Vars}} +%% {attribute, Pos, module, Name} +%% +%% Name = atom() | [atom()] +%% Vars = [atom()] +%% +%% Representing `-module(M).', or `-module(M, Vs).', where M is +%% `A1.A2.....An' if Name is `[A1, A2, ..., An]', and Vs is `[V1, +%% ..., Vm]' if Vars is `[V1, ..., Vm]'. +%% +%% {attribute, Pos, export, Exports} +%% +%% Exports = [{atom(), integer()}] +%% +%% Representing `-export([A1/N1, ..., Ak/Nk]).', if `Exports' is +%% `[{A1, N1}, ..., {Ak, Nk}]'. +%% +%% {attribute, Pos, import, Imports} +%% +%% Imports = {atom(), Pairs} | [atom()] +%% Pairs = [{atom(), integer()] +%% +%% Representing `-import(Module, [A1/N1, ..., Ak/Nk]).', if +%% `Imports' is `{Module, [{A1, N1}, ..., {Ak, Nk}]}', or +%% `-import(A1.....An).', if `Imports' is `[A1, ..., An]'. +%% +%% {attribute, Pos, file, Position} +%% +%% Position = {filename(), integer()} +%% +%% Representing `-file(Name, Line).', if `Position' is `{Name, +%% Line}'. +%% +%% {attribute, Pos, record, Info} +%% +%% Info = {Name, [Entries]} +%% Name = atom() +%% Entries = {record_field, Pos, atom()} +%% | {record_field, Pos, atom(), erl_parse()} +%% +%% Representing `-record(Name, {<F1>, ..., <Fn>}).', if `Info' is +%% `{Name, [D1, ..., D1]}', where each `Fi' is either `Ai = <Ei>', +%% if the corresponding `Di' is `{record_field, Pos, Ai, Ei}', or +%% otherwise simply `Ai', if `Di' is `{record_field, Pos, Ai}'. +%% +%% {attribute, L, Name, Term} +%% +%% Name = atom() \ StandardName +%% StandardName = module | export | import | file | record +%% Term = term() +%% +%% Representing `-Name(Term).'. + +attribute(Name, Args) -> + tree(attribute, #attribute{name = Name, args = Args}). + +revert_attribute(Node) -> + Name = attribute_name(Node), + Args = attribute_arguments(Node), + Pos = get_pos(Node), + case type(Name) of + atom -> + revert_attribute_1(atom_value(Name), Args, Pos, Node); + _ -> + Node + end. + +%% All the checking makes this part a bit messy: + +revert_attribute_1(module, [M], Pos, Node) -> + case revert_module_name(M) of + {ok, A} -> + {attribute, Pos, module, A}; + error -> Node + end; +revert_attribute_1(module, [M, List], Pos, Node) -> + Vs = case is_list_skeleton(List) of + true -> + case is_proper_list(List) of + true -> + fold_variable_names(list_elements(List)); + false -> + Node + end; + false -> + Node + end, + case revert_module_name(M) of + {ok, A} -> + {attribute, Pos, module, {A, Vs}}; + error -> Node + end; +revert_attribute_1(export, [List], Pos, Node) -> + case is_list_skeleton(List) of + true -> + case is_proper_list(List) of + true -> + Fs = fold_function_names(list_elements(List)), + {attribute, Pos, export, Fs}; + false -> + Node + end; + false -> + Node + end; +revert_attribute_1(import, [M], Pos, Node) -> + case revert_module_name(M) of + {ok, A} -> {attribute, Pos, import, A}; + error -> Node + end; +revert_attribute_1(import, [M, List], Pos, Node) -> + case revert_module_name(M) of + {ok, A} -> + case is_list_skeleton(List) of + true -> + case is_proper_list(List) of + true -> + Fs = fold_function_names( + list_elements(List)), + {attribute, Pos, import, {A, Fs}}; + false -> + Node + end; + false -> + Node + end; + error -> + Node + end; +revert_attribute_1(file, [A, Line], Pos, Node) -> + case type(A) of + string -> + case type(Line) of + integer -> + {attribute, Pos, file, + {concrete(A), concrete(Line)}}; + _ -> + Node + end; + _ -> + Node + end; +revert_attribute_1(record, [A, Tuple], Pos, Node) -> + case type(A) of + atom -> + case type(Tuple) of + tuple -> + Fs = fold_record_fields( + tuple_elements(Tuple)), + {attribute, Pos, record, {concrete(A), Fs}}; + _ -> + Node + end; + _ -> + Node + end; +revert_attribute_1(N, [T], Pos, _) -> + {attribute, Pos, N, concrete(T)}; +revert_attribute_1(_, _, _, Node) -> + Node. + +revert_module_name(A) -> + case type(A) of + atom -> + {ok, concrete(A)}; + qualified_name -> + Ss = qualified_name_segments(A), + {ok, [concrete(S) || S <- Ss]}; + _ -> + error + end. + + +%% ===================================================================== +%% @spec attribute_name(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the name subtree of an <code>attribute</code> node. +%% +%% @see attribute/1 + +attribute_name(Node) -> + case unwrap(Node) of + {attribute, Pos, Name, _} -> + set_pos(atom(Name), Pos); + Node1 -> + (data(Node1))#attribute.name + end. + + +%% ===================================================================== +%% @spec attribute_arguments(Node::syntaxTree()) -> +%% none | [syntaxTree()] +%% +%% @doc Returns the list of argument subtrees of an +%% <code>attribute</code> node, if any. If <code>Node</code> +%% represents "<code>-<em>Name</em>.</code>", the result is +%% <code>none</code>. Otherwise, if <code>Node</code> represents +%% "<code>-<em>Name</em>(<em>E1</em>, ..., <em>En</em>).</code>", +%% <code>[E1, ..., E1]</code> is returned. +%% +%% @see attribute/1 + +attribute_arguments(Node) -> + case unwrap(Node) of + {attribute, Pos, Name, Data} -> + case Name of + module -> + {M1, Vs} = + case Data of + {M0, Vs0} -> + {M0, unfold_variable_names(Vs0, Pos)}; + M0 -> + {M0, none} + end, + M2 = if is_list(M1) -> + qualified_name([atom(A) || A <- M1]); + true -> + atom(M1) + end, + M = set_pos(M2, Pos), + if Vs == none -> [M]; + true -> [M, set_pos(list(Vs), Pos)] + end; + export -> + [set_pos( + list(unfold_function_names(Data, Pos)), + Pos)]; + import -> + case Data of + {Module, Imports} -> + [if is_list(Module) -> + qualified_name([atom(A) + || A <- Module]); + true -> + set_pos(atom(Module), Pos) + end, + set_pos( + list(unfold_function_names(Imports, Pos)), + Pos)]; + _ -> + [qualified_name([atom(A) || A <- Data])] + end; + file -> + {File, Line} = Data, + [set_pos(string(File), Pos), + set_pos(integer(Line), Pos)]; + record -> + %% Note that we create a tuple as container + %% for the second argument! + {Type, Entries} = Data, + [set_pos(atom(Type), Pos), + set_pos(tuple(unfold_record_fields(Entries)), + Pos)]; + _ -> + %% Standard single-term generic attribute. + [set_pos(abstract(Data), Pos)] + end; + Node1 -> + (data(Node1))#attribute.args + end. + + +%% ===================================================================== +%% @spec arity_qualifier(Body::syntaxTree(), Arity::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract arity qualifier. The result represents +%% "<code><em>Body</em>/<em>Arity</em></code>". +%% +%% @see arity_qualifier_body/1 +%% @see arity_qualifier_argument/1 + +-record(arity_qualifier, {body, arity}). + +%% type(Node) = arity_qualifier +%% data(Node) = #arity_qualifier{body :: Body, arity :: Arity} +%% +%% Body = Arity = syntaxTree() + +arity_qualifier(Body, Arity) -> + tree(arity_qualifier, + #arity_qualifier{body = Body, arity = Arity}). + + +%% ===================================================================== +%% @spec arity_qualifier_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of an <code>arity_qualifier</code> +%% node. +%% +%% @see arity_qualifier/2 + +arity_qualifier_body(Node) -> + (data(Node))#arity_qualifier.body. + + +%% ===================================================================== +%% @spec arity_qualifier_argument(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the argument (the arity) subtree of an +%% <code>arity_qualifier</code> node. +%% +%% @see arity_qualifier/2 + +arity_qualifier_argument(Node) -> + (data(Node))#arity_qualifier.arity. + + +%% ===================================================================== +%% @spec module_qualifier(Module::syntaxTree(), Body::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract module qualifier. The result represents +%% "<code><em>Module</em>:<em>Body</em></code>". +%% +%% @see module_qualifier_argument/1 +%% @see module_qualifier_body/1 + +-record(module_qualifier, {module, body}). + +%% type(Node) = module_qualifier +%% data(Node) = #module_qualifier{module :: Module, body :: Body} +%% +%% Module = Body = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {remote, Pos, Module, Arg} +%% +%% Module = Arg = erl_parse() + +module_qualifier(Module, Body) -> + tree(module_qualifier, + #module_qualifier{module = Module, body = Body}). + +revert_module_qualifier(Node) -> + Pos = get_pos(Node), + Module = module_qualifier_argument(Node), + Body = module_qualifier_body(Node), + {remote, Pos, Module, Body}. + + +%% ===================================================================== +%% @spec module_qualifier_argument(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the argument (the module) subtree of a +%% <code>module_qualifier</code> node. +%% +%% @see module_qualifier/2 + +module_qualifier_argument(Node) -> + case unwrap(Node) of + {remote, _, Module, _} -> + Module; + Node1 -> + (data(Node1))#module_qualifier.module + end. + + +%% ===================================================================== +%% @spec module_qualifier_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>module_qualifier</code> +%% node. +%% +%% @see module_qualifier/2 + +module_qualifier_body(Node) -> + case unwrap(Node) of + {remote, _, _, Body} -> + Body; + Node1 -> + (data(Node1))#module_qualifier.body + end. + + +%% ===================================================================== +%% @spec qualified_name(Segments::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract qualified name. The result represents +%% "<code><em>S1</em>.<em>S2</em>. ... .<em>Sn</em></code>", if +%% <code>Segments</code> is <code>[S1, S2, ..., Sn]</code>. +%% +%% @see qualified_name_segments/1 + +%% type(Node) = qualified_name +%% data(Node) = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {record_field, Pos, Node, Node} +%% +%% Node = {atom, Pos, Value} | {record_field, Pos, Node, Node} +%% +%% Note that if not all leaf subnodes are (abstract) atoms, then Node +%% represents a Mnemosyne query record field access ('record_access'); +%% see type/1 for details. + +qualified_name(Segments) -> + tree(qualified_name, Segments). + +revert_qualified_name(Node) -> + Pos = get_pos(Node), + fold_qualified_name(qualified_name_segments(Node), Pos). + + +%% ===================================================================== +%% @spec qualified_name_segments(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of name segments of a +%% <code>qualified_name</code> node. +%% +%% @see qualified_name/1 + +qualified_name_segments(Node) -> + case unwrap(Node) of + {record_field, _, _, _} = Node1 -> + unfold_qualified_name(Node1); + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec function(Name::syntaxTree(), Clauses::[syntaxTree()]) -> +%% syntaxTree() +%% +%% @doc Creates an abstract function definition. If <code>Clauses</code> +%% is <code>[C1, ..., Cn]</code>, the result represents +%% "<code><em>Name</em> <em>C1</em>; ...; <em>Name</em> +%% <em>Cn</em>.</code>". More exactly, if each <code>Ci</code> +%% represents "<code>(<em>Pi1</em>, ..., <em>Pim</em>) <em>Gi</em> -> +%% <em>Bi</em></code>", then the result represents +%% "<code><em>Name</em>(<em>P11</em>, ..., <em>P1m</em>) <em>G1</em> -> +%% <em>B1</em>; ...; <em>Name</em>(<em>Pn1</em>, ..., <em>Pnm</em>) +%% <em>Gn</em> -> <em>Bn</em>.</code>". Function definitions are source +%% code forms. +%% +%% @see function_name/1 +%% @see function_clauses/1 +%% @see function_arity/1 +%% @see is_form/1 +%% @see rule/2 + +-record(function, {name, clauses}). + +%% type(Node) = function +%% data(Node) = #function{name :: Name, clauses :: Clauses} +%% +%% Name = syntaxTree() +%% Clauses = [syntaxTree()] +%% +%% (There's no real point in precomputing and storing the arity, +%% and passing it as a constructor argument makes it possible to +%% end up with an inconsistent value. Besides, some people might +%% want to check all clauses, and not just the first, so the +%% computation is not obvious.) +%% +%% `erl_parse' representation: +%% +%% {function, Pos, Name, Arity, Clauses} +%% +%% Name = atom() +%% Arity = integer() +%% Clauses = [Clause] \ [] +%% Clause = {clause, ...} +%% +%% where the number of patterns in each clause should be equal to +%% the integer `Arity'; see `clause' for documentation on +%% `erl_parse' clauses. + +function(Name, Clauses) -> + tree(function, #function{name = Name, clauses = Clauses}). + +revert_function(Node) -> + Name = function_name(Node), + Clauses = [revert_clause(C) || C <- function_clauses(Node)], + Pos = get_pos(Node), + case type(Name) of + atom -> + A = function_arity(Node), + {function, Pos, concrete(Name), A, Clauses}; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec function_name(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the name subtree of a <code>function</code> node. +%% +%% @see function/2 + +function_name(Node) -> + case unwrap(Node) of + {function, Pos, Name, _, _} -> + set_pos(atom(Name), Pos); + Node1 -> + (data(Node1))#function.name + end. + + +%% ===================================================================== +%% @spec function_clauses(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of clause subtrees of a <code>function</code> +%% node. +%% +%% @see function/2 + +function_clauses(Node) -> + case unwrap(Node) of + {function, _, _, _, Clauses} -> + Clauses; + Node1 -> + (data(Node1))#function.clauses + end. + + +%% ===================================================================== +%% @spec function_arity(Node::syntaxTree()) -> integer() +%% +%% @doc Returns the arity of a <code>function</code> node. The result +%% is the number of parameter patterns in the first clause of the +%% function; subsequent clauses are ignored. +%% +%% <p>An exception is thrown if <code>function_clauses(Node)</code> +%% returns an empty list, or if the first element of that list is not +%% a syntax tree <code>C</code> of type <code>clause</code> such that +%% <code>clause_patterns(C)</code> is a nonempty list.</p> +%% +%% @see function/2 +%% @see function_clauses/1 +%% @see clause/3 +%% @see clause_patterns/1 + +function_arity(Node) -> + %% Note that this never accesses the arity field of `erl_parse' + %% function nodes. + length(clause_patterns(hd(function_clauses(Node)))). + + +%% ===================================================================== +%% @spec clause(Guard, Body) -> syntaxTree() +%% @equiv clause([], Guard, Body) + +clause(Guard, Body) -> + clause([], Guard, Body). + + +%% ===================================================================== +%% @spec clause(Patterns::[syntaxTree()], Guard, +%% Body::[syntaxTree()]) -> syntaxTree() +%% Guard = none | syntaxTree() +%% | [syntaxTree()] | [[syntaxTree()]] +%% +%% @doc Creates an abstract clause. If <code>Patterns</code> is +%% <code>[P1, ..., Pn]</code> and <code>Body</code> is <code>[B1, ..., +%% Bm]</code>, then if <code>Guard</code> is <code>none</code>, the +%% result represents "<code>(<em>P1</em>, ..., <em>Pn</em>) -> +%% <em>B1</em>, ..., <em>Bm</em></code>", otherwise, unless +%% <code>Guard</code> is a list, the result represents +%% "<code>(<em>P1</em>, ..., <em>Pn</em>) when <em>Guard</em> -> +%% <em>B1</em>, ..., <em>Bm</em></code>". +%% +%% <p>For simplicity, the <code>Guard</code> argument may also be any +%% of the following: +%% <ul> +%% <li>An empty list <code>[]</code>. This is equivalent to passing +%% <code>none</code>.</li> +%% <li>A nonempty list <code>[E1, ..., Ej]</code> of syntax trees. +%% This is equivalent to passing <code>conjunction([E1, ..., +%% Ej])</code>.</li> +%% <li>A nonempty list of lists of syntax trees <code>[[E1_1, ..., +%% E1_k1], ..., [Ej_1, ..., Ej_kj]]</code>, which is equivalent +%% to passing <code>disjunction([conjunction([E1_1, ..., +%% E1_k1]), ..., conjunction([Ej_1, ..., Ej_kj])])</code>.</li> +%% </ul> +%% </p> +%% +%% @see clause/2 +%% @see clause_patterns/1 +%% @see clause_guard/1 +%% @see clause_body/1 + +-record(clause, {patterns, guard, body}). + +%% type(Node) = clause +%% data(Node) = #clause{patterns :: Patterns, guard :: Guard, +%% body :: Body} +%% +%% Patterns = [syntaxTree()] +%% Guard = syntaxTree() | none +%% Body = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {clause, Pos, Patterns, Guard, Body} +%% +%% Patterns = [erl_parse()] +%% Guard = [[erl_parse()]] | [erl_parse()] +%% Body = [erl_parse()] \ [] +%% +%% Taken out of context, if `Patterns' is `[P1, ..., Pn]' and +%% `Body' is `[B1, ..., Bm]', this represents `(<P1>, ..., <Pn>) -> +%% <B1>, ..., <Bm>' if `Guard' is `[]', or otherwise `(<P1>, ..., +%% <Pn>) when <G> -> <Body>', where `G' is `<E1_1>, ..., <E1_k1>; +%% ...; <Ej_1>, ..., <Ej_kj>', if `Guard' is a list of lists +%% `[[E1_1, ..., E1_k1], ..., [Ej_1, ..., Ej_kj]]'. In older +%% versions, `Guard' was simply a list `[E1, ..., En]' of parse +%% trees, which is equivalent to the new form `[[E1, ..., En]]'. + +clause(Patterns, Guard, Body) -> + Guard1 = case Guard of + [] -> + none; + [X | _] when is_list(X) -> + disjunction(conjunction_list(Guard)); + [_ | _] -> + %% Handle older forms also. + conjunction(Guard); + _ -> + %% This should be `none' or a syntax tree. + Guard + end, + tree(clause, #clause{patterns = Patterns, guard = Guard1, + body = Body}). + +conjunction_list([L | Ls]) -> + [conjunction(L) | conjunction_list(Ls)]; +conjunction_list([]) -> + []. + +revert_clause(Node) -> + Pos = get_pos(Node), + Guard = case clause_guard(Node) of + none -> + []; + E -> + case type(E) of + disjunction -> + revert_clause_disjunction(E); + conjunction -> + %% Only the top level expression is + %% unfolded here; no recursion. + [conjunction_body(E)]; + _ -> + [[E]] % a single expression + end + end, + {clause, Pos, clause_patterns(Node), Guard, + clause_body(Node)}. + +revert_clause_disjunction(D) -> + %% We handle conjunctions within a disjunction, but only at + %% the top level; no recursion. + [case type(E) of + conjunction -> + conjunction_body(E); + _ -> + [E] + end + || E <- disjunction_body(D)]. + +revert_try_clause(Node) -> + fold_try_clause(revert_clause(Node)). + +fold_try_clause({clause, Pos, [P], Guard, Body}) -> + P1 = case type(P) of + class_qualifier -> + {tuple, Pos, [class_qualifier_argument(P), + class_qualifier_body(P), + {var, Pos, '_'}]}; + _ -> + {tuple, Pos, [{atom, Pos, throw}, P, {var, Pos, '_'}]} + end, + {clause, Pos, [P1], Guard, Body}. + +unfold_try_clauses(Cs) -> + [unfold_try_clause(C) || C <- Cs]. + +unfold_try_clause({clause, Pos, [{tuple, _, [{atom,_,throw}, V, _]}], + Guard, Body}) -> + {clause, Pos, [V], Guard, Body}; +unfold_try_clause({clause, Pos, [{tuple, _, [C, V, _]}], + Guard, Body}) -> + {clause, Pos, [class_qualifier(C, V)], Guard, Body}. + + +%% ===================================================================== +%% @spec clause_patterns(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of pattern subtrees of a <code>clause</code> +%% node. +%% +%% @see clause/3 + +clause_patterns(Node) -> + case unwrap(Node) of + {clause, _, Patterns, _, _} -> + Patterns; + Node1 -> + (data(Node1))#clause.patterns + end. + + +%% ===================================================================== +%% @spec clause_guard(Node::syntaxTree()) -> none | syntaxTree() +%% +%% @doc Returns the guard subtree of a <code>clause</code> node, if +%% any. If <code>Node</code> represents "<code>(<em>P1</em>, ..., +%% <em>Pn</em>) when <em>Guard</em> -> <em>B1</em>, ..., +%% <em>Bm</em></code>", <code>Guard</code> is returned. Otherwise, the +%% result is <code>none</code>. +%% +%% @see clause/3 + +clause_guard(Node) -> + case unwrap(Node) of + {clause, _, _, Guard, _} -> + case Guard of + [] -> none; + [L | _] when is_list(L) -> + disjunction(conjunction_list(Guard)); + [_ | _] -> + conjunction(Guard) + end; + Node1 -> + (data(Node1))#clause.guard + end. + + +%% ===================================================================== +%% @spec clause_body(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Return the list of body subtrees of a <code>clause</code> +%% node. +%% +%% @see clause/3 + +clause_body(Node) -> + case unwrap(Node) of + {clause, _, _, _, Body} -> + Body; + Node1 -> + (data(Node1))#clause.body + end. + + +%% ===================================================================== +%% @spec disjunction(List::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract disjunction. If <code>List</code> is +%% <code>[E1, ..., En]</code>, the result represents +%% "<code><em>E1</em>; ...; <em>En</em></code>". +%% +%% @see disjunction_body/1 +%% @see conjunction/1 + +%% type(Node) = disjunction +%% data(Node) = [syntaxTree()] + +disjunction(Tests) -> + tree(disjunction, Tests). + + +%% ===================================================================== +%% @spec disjunction_body(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of body subtrees of a +%% <code>disjunction</code> node. +%% +%% @see disjunction/1 + +disjunction_body(Node) -> + data(Node). + + +%% ===================================================================== +%% @spec conjunction(List::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract conjunction. If <code>List</code> is +%% <code>[E1, ..., En]</code>, the result represents +%% "<code><em>E1</em>, ..., <em>En</em></code>". +%% +%% @see conjunction_body/1 +%% @see disjunction/1 + +%% type(Node) = conjunction +%% data(Node) = [syntaxTree()] + +conjunction(Tests) -> + tree(conjunction, Tests). + + +%% ===================================================================== +%% @spec conjunction_body(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of body subtrees of a +%% <code>conjunction</code> node. +%% +%% @see conjunction/1 + +conjunction_body(Node) -> + data(Node). + + +%% ===================================================================== +%% @spec catch_expr(Expr::syntaxTree()) -> syntaxTree() +%% +%% @doc Creates an abstract catch-expression. The result represents +%% "<code>catch <em>Expr</em></code>". +%% +%% @see catch_expr_body/1 + +%% type(Node) = catch_expr +%% data(Node) = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {'catch', Pos, Expr} +%% +%% Expr = erl_parse() + +catch_expr(Expr) -> + tree(catch_expr, Expr). + +revert_catch_expr(Node) -> + Pos = get_pos(Node), + Expr = catch_expr_body(Node), + {'catch', Pos, Expr}. + + +%% ===================================================================== +%% @spec catch_expr_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>catch_expr</code> node. +%% +%% @see catch_expr/1 + +catch_expr_body(Node) -> + case unwrap(Node) of + {'catch', _, Expr} -> + Expr; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec match_expr(Pattern::syntaxTree(), Body::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract match-expression. The result represents +%% "<code><em>Pattern</em> = <em>Body</em></code>". +%% +%% @see match_expr_pattern/1 +%% @see match_expr_body/1 + +-record(match_expr, {pattern, body}). + +%% type(Node) = match_expr +%% data(Node) = #match_expr{pattern :: Pattern, body :: Body} +%% +%% Pattern = Body = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {match, Pos, Pattern, Body} +%% +%% Pattern = Body = erl_parse() + +match_expr(Pattern, Body) -> + tree(match_expr, #match_expr{pattern = Pattern, body = Body}). + +revert_match_expr(Node) -> + Pos = get_pos(Node), + Pattern = match_expr_pattern(Node), + Body = match_expr_body(Node), + {match, Pos, Pattern, Body}. + + +%% ===================================================================== +%% @spec match_expr_pattern(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the pattern subtree of a <code>match_expr</code> node. +%% +%% @see match_expr/2 + +match_expr_pattern(Node) -> + case unwrap(Node) of + {match, _, Pattern, _} -> + Pattern; + Node1 -> + (data(Node1))#match_expr.pattern + end. + + +%% ===================================================================== +%% @spec match_expr_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>match_expr</code> node. +%% +%% @see match_expr/2 + +match_expr_body(Node) -> + case unwrap(Node) of + {match, _, _, Body} -> + Body; + Node1 -> + (data(Node1))#match_expr.body + end. + + +%% ===================================================================== +%% @spec operator(Name) -> syntaxTree() +%% Name = atom() | string() +%% +%% @doc Creates an abstract operator. The name of the operator is the +%% character sequence represented by <code>Name</code>. This is +%% analogous to the print name of an atom, but an operator is never +%% written within single-quotes; e.g., the result of +%% <code>operator('++')</code> represents "<code>++</code>" rather +%% than "<code>'++'</code>". +%% +%% @see operator_name/1 +%% @see operator_literal/1 +%% @see atom/1 + +%% type(Node) = operator +%% data(Node) = atom() + +operator(Name) when is_atom(Name) -> + tree(operator, Name); +operator(Name) -> + tree(operator, list_to_atom(Name)). + + +%% ===================================================================== +%% @spec operator_name(syntaxTree()) -> atom() +%% +%% @doc Returns the name of an <code>operator</code> node. Note that +%% the name is returned as an atom. +%% +%% @see operator/1 + +operator_name(Node) -> + data(Node). + + +%% ===================================================================== +%% @spec operator_literal(syntaxTree()) -> string() +%% +%% @doc Returns the literal string represented by an +%% <code>operator</code> node. This is simply the operator name as a +%% string. +%% +%% @see operator/1 + +operator_literal(Node) -> + atom_to_list(operator_name(Node)). + + +%% ===================================================================== +%% @spec infix_expr(Left::syntaxTree(), Operator::syntaxTree(), +%% Right::syntaxTree()) -> syntaxTree() +%% +%% @doc Creates an abstract infix operator expression. The result +%% represents "<code><em>Left</em> <em>Operator</em> +%% <em>Right</em></code>". +%% +%% @see infix_expr_left/1 +%% @see infix_expr_right/1 +%% @see infix_expr_operator/1 +%% @see prefix_expr/2 + +-record(infix_expr, {operator, left, right}). + +%% type(Node) = infix_expr +%% data(Node) = #infix_expr{left :: Left, operator :: Operator, +%% right :: Right} +%% +%% Left = Operator = Right = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {op, Pos, Operator, Left, Right} +%% +%% Operator = atom() +%% Left = Right = erl_parse() + +infix_expr(Left, Operator, Right) -> + tree(infix_expr, #infix_expr{operator = Operator, left = Left, + right = Right}). + +revert_infix_expr(Node) -> + Pos = get_pos(Node), + Operator = infix_expr_operator(Node), + Left = infix_expr_left(Node), + Right = infix_expr_right(Node), + case type(Operator) of + operator -> + %% Note that the operator itself is not revertible out + %% of context. + {op, Pos, operator_name(Operator), Left, Right}; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec infix_expr_left(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the left argument subtree of an +%% <code>infix_expr</code> node. +%% +%% @see infix_expr/3 + +infix_expr_left(Node) -> + case unwrap(Node) of + {op, _, _, Left, _} -> + Left; + Node1 -> + (data(Node1))#infix_expr.left + end. + + +%% ===================================================================== +%% @spec infix_expr_operator(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the operator subtree of an <code>infix_expr</code> +%% node. +%% +%% @see infix_expr/3 + +infix_expr_operator(Node) -> + case unwrap(Node) of + {op, Pos, Operator, _, _} -> + set_pos(operator(Operator), Pos); + Node1 -> + (data(Node1))#infix_expr.operator + end. + + +%% ===================================================================== +%% @spec infix_expr_right(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the right argument subtree of an +%% <code>infix_expr</code> node. +%% +%% @see infix_expr/3 + +infix_expr_right(Node) -> + case unwrap(Node) of + {op, _, _, _, Right} -> + Right; + Node1 -> + (data(Node1))#infix_expr.right + end. + + +%% ===================================================================== +%% @spec prefix_expr(Operator::syntaxTree(), Argument::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract prefix operator expression. The result +%% represents "<code><em>Operator</em> <em>Argument</em></code>". +%% +%% @see prefix_expr_argument/1 +%% @see prefix_expr_operator/1 +%% @see infix_expr/3 + +-record(prefix_expr, {operator, argument}). + +%% type(Node) = prefix_expr +%% data(Node) = #prefix_expr{operator :: Operator, +%% argument :: Argument} +%% +%% Operator = Argument = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {op, Pos, Operator, Arg} +%% +%% Operator = atom() +%% Argument = erl_parse() + +prefix_expr(Operator, Argument) -> + tree(prefix_expr, #prefix_expr{operator = Operator, + argument = Argument}). + +revert_prefix_expr(Node) -> + Pos = get_pos(Node), + Operator = prefix_expr_operator(Node), + Argument = prefix_expr_argument(Node), + case type(Operator) of + operator -> + %% Note that the operator itself is not revertible out + %% of context. + {op, Pos, operator_name(Operator), Argument}; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec prefix_expr_operator(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the operator subtree of a <code>prefix_expr</code> +%% node. +%% +%% @see prefix_expr/2 + +prefix_expr_operator(Node) -> + case unwrap(Node) of + {op, Pos, Operator, _} -> + set_pos(operator(Operator), Pos); + Node1 -> + (data(Node1))#prefix_expr.operator + end. + + +%% ===================================================================== +%% @spec prefix_expr_argument(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the argument subtree of a <code>prefix_expr</code> +%% node. +%% +%% @see prefix_expr/2 + +prefix_expr_argument(Node) -> + case unwrap(Node) of + {op, _, _, Argument} -> + Argument; + Node1 -> + (data(Node1))#prefix_expr.argument + end. + + +%% ===================================================================== +%% @spec record_field(Name) -> syntaxTree() +%% @equiv record_field(Name, none) + +record_field(Name) -> + record_field(Name, none). + + +%% ===================================================================== +%% @spec record_field(Name::syntaxTree(), Value) -> syntaxTree() +%% Value = none | syntaxTree() +%% +%% @doc Creates an abstract record field specification. If +%% <code>Value</code> is <code>none</code>, the result represents +%% simply "<code><em>Name</em></code>", otherwise it represents +%% "<code><em>Name</em> = <em>Value</em></code>". +%% +%% @see record_field_name/1 +%% @see record_field_value/1 +%% @see record_expr/3 + +-record(record_field, {name, value}). + +%% type(Node) = record_field +%% data(Node) = #record_field{name :: Name, value :: Value} +%% +%% Name = Value = syntaxTree() + +record_field(Name, Value) -> + tree(record_field, #record_field{name = Name, value = Value}). + + +%% ===================================================================== +%% @spec record_field_name(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the name subtree of a <code>record_field</code> node. +%% +%% @see record_field/2 + +record_field_name(Node) -> + (data(Node))#record_field.name. + + +%% ===================================================================== +%% @spec record_field_value(syntaxTree()) -> none | syntaxTree() +%% +%% @doc Returns the value subtree of a <code>record_field</code> node, +%% if any. If <code>Node</code> represents +%% "<code><em>Name</em></code>", <code>none</code> is +%% returned. Otherwise, if <code>Node</code> represents +%% "<code><em>Name</em> = <em>Value</em></code>", <code>Value</code> +%% is returned. +%% +%% @see record_field/2 + +record_field_value(Node) -> + (data(Node))#record_field.value. + + +%% ===================================================================== +%% @spec record_index_expr(Type::syntaxTree(), Field::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract record field index expression. The result +%% represents "<code>#<em>Type</em>.<em>Field</em></code>". +%% +%% <p>(Note: the function name <code>record_index/2</code> is reserved +%% by the Erlang compiler, which is why that name could not be used +%% for this constructor.)</p> +%% +%% @see record_index_expr_type/1 +%% @see record_index_expr_field/1 +%% @see record_expr/3 + +-record(record_index_expr, {type, field}). + +%% type(Node) = record_index_expr +%% data(Node) = #record_index_expr{type :: Type, field :: Field} +%% +%% Type = Field = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {record_index, Pos, Type, Field} +%% +%% Type = atom() +%% Field = erl_parse() + +record_index_expr(Type, Field) -> + tree(record_index_expr, #record_index_expr{type = Type, + field = Field}). + +revert_record_index_expr(Node) -> + Pos = get_pos(Node), + Type = record_index_expr_type(Node), + Field = record_index_expr_field(Node), + case type(Type) of + atom -> + {record_index, Pos, concrete(Type), Field}; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec record_index_expr_type(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the type subtree of a <code>record_index_expr</code> +%% node. +%% +%% @see record_index_expr/2 + +record_index_expr_type(Node) -> + case unwrap(Node) of + {record_index, Pos, Type, _} -> + set_pos(atom(Type), Pos); + Node1 -> + (data(Node1))#record_index_expr.type + end. + + +%% ===================================================================== +%% @spec record_index_expr_field(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the field subtree of a <code>record_index_expr</code> +%% node. +%% +%% @see record_index_expr/2 + +record_index_expr_field(Node) -> + case unwrap(Node) of + {record_index, _, _, Field} -> + Field; + Node1 -> + (data(Node1))#record_index_expr.field + end. + + +%% ===================================================================== +%% @spec record_access(Argument, Field) -> syntaxTree() +%% @equiv record_access(Argument, none, Field) + +record_access(Argument, Field) -> + record_access(Argument, none, Field). + + +%% ===================================================================== +%% @spec record_access(Argument::syntaxTree(), Type, +%% Field::syntaxTree()) -> syntaxTree() +%% Type = none | syntaxTree() +%% +%% @doc Creates an abstract record field access expression. If +%% <code>Type</code> is not <code>none</code>, the result represents +%% "<code><em>Argument</em>#<em>Type</em>.<em>Field</em></code>". +%% +%% <p>If <code>Type</code> is <code>none</code>, the result represents +%% "<code><em>Argument</em>.<em>Field</em></code>". This is a special +%% form only allowed within Mnemosyne queries.</p> +%% +%% @see record_access/2 +%% @see record_access_argument/1 +%% @see record_access_type/1 +%% @see record_access_field/1 +%% @see record_expr/3 +%% @see query_expr/1 + +-record(record_access, {argument, type, field}). + +%% type(Node) = record_access +%% data(Node) = #record_access{argument :: Argument, type :: Type, +%% field :: Field} +%% +%% Argument = Field = syntaxTree() +%% Type = none | syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {record_field, Pos, Argument, Type, Field} +%% {record_field, Pos, Argument, Field} +%% +%% Argument = Field = erl_parse() +%% Type = atom() + +record_access(Argument, Type, Field) -> + tree(record_access,#record_access{argument = Argument, + type = Type, + field = Field}). + +revert_record_access(Node) -> + Pos = get_pos(Node), + Argument = record_access_argument(Node), + Type = record_access_type(Node), + Field = record_access_field(Node), + if Type =:= none -> + {record_field, Pos, Argument, Field}; + true -> + case type(Type) of + atom -> + {record_field, Pos, + Argument, concrete(Type), Field}; + _ -> + Node + end + end. + + +%% ===================================================================== +%% @spec record_access_argument(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the argument subtree of a <code>record_access</code> +%% node. +%% +%% @see record_access/3 + +record_access_argument(Node) -> + case unwrap(Node) of + {record_field, _, Argument, _} -> + Argument; + {record_field, _, Argument, _, _} -> + Argument; + Node1 -> + (data(Node1))#record_access.argument + end. + + +%% ===================================================================== +%% @spec record_access_type(syntaxTree()) -> none | syntaxTree() +%% +%% @doc Returns the type subtree of a <code>record_access</code> node, +%% if any. If <code>Node</code> represents +%% "<code><em>Argument</em>.<em>Field</em></code>", <code>none</code> +%% is returned, otherwise if <code>Node</code> represents +%% "<code><em>Argument</em>#<em>Type</em>.<em>Field</em></code>", +%% <code>Type</code> is returned. +%% +%% @see record_access/3 + +record_access_type(Node) -> + case unwrap(Node) of + {record_field, _, _, _} -> + none; + {record_field, Pos, _, Type, _} -> + set_pos(atom(Type), Pos); + Node1 -> + (data(Node1))#record_access.type + end. + + +%% ===================================================================== +%% @spec record_access_field(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the field subtree of a <code>record_access</code> +%% node. +%% +%% @see record_access/3 + +record_access_field(Node) -> + case unwrap(Node) of + {record_field, _, _, Field} -> + Field; + {record_field, _, _, _, Field} -> + Field; + Node1 -> + (data(Node1))#record_access.field + end. + + +%% ===================================================================== +%% @spec record_expr(Type, Fields) -> syntaxTree() +%% @equiv record_expr(none, Type, Fields) + +record_expr(Type, Fields) -> + record_expr(none, Type, Fields). + + +%% ===================================================================== +%% @spec record_expr(Argument, Type::syntaxTree(), +%% Fields::[syntaxTree()]) -> syntaxTree() +%% Argument = none | syntaxTree() +%% +%% @doc Creates an abstract record expression. If <code>Fields</code> is +%% <code>[F1, ..., Fn]</code>, then if <code>Argument</code> is +%% <code>none</code>, the result represents +%% "<code>#<em>Type</em>{<em>F1</em>, ..., <em>Fn</em>}</code>", +%% otherwise it represents +%% "<code><em>Argument</em>#<em>Type</em>{<em>F1</em>, ..., +%% <em>Fn</em>}</code>". +%% +%% @see record_expr/2 +%% @see record_expr_argument/1 +%% @see record_expr_fields/1 +%% @see record_expr_type/1 +%% @see record_field/2 +%% @see record_index_expr/2 +%% @see record_access/3 + +-record(record_expr, {argument, type, fields}). + +%% type(Node) = record_expr +%% data(Node) = #record_expr{argument :: Argument, type :: Type, +%% fields :: Fields} +%% +%% Argument = none | syntaxTree() +%% Type = syntaxTree +%% Fields = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {record, Pos, Type, Fields} +%% {record, Pos, Argument, Type, Fields} +%% +%% Argument = erl_parse() +%% Type = atom() +%% Fields = [Entry] +%% Entry = {record_field, Pos, Field, Value} +%% | {record_field, Pos, Field} +%% Field = Value = erl_parse() + +record_expr(Argument, Type, Fields) -> + tree(record_expr, #record_expr{argument = Argument, + type = Type, fields = Fields}). + +revert_record_expr(Node) -> + Pos = get_pos(Node), + Argument = record_expr_argument(Node), + Type = record_expr_type(Node), + Fields = record_expr_fields(Node), + case type(Type) of + atom -> + T = concrete(Type), + Fs = fold_record_fields(Fields), + case Argument of + none -> + {record, Pos, T, Fs}; + _ -> + {record, Pos, Argument, T, Fs} + end; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec record_expr_argument(syntaxTree()) -> none | syntaxTree() +%% +%% @doc Returns the argument subtree of a <code>record_expr</code> node, +%% if any. If <code>Node</code> represents +%% "<code>#<em>Type</em>{...}</code>", <code>none</code> is returned. +%% Otherwise, if <code>Node</code> represents +%% "<code><em>Argument</em>#<em>Type</em>{...}</code>", +%% <code>Argument</code> is returned. +%% +%% @see record_expr/3 + +record_expr_argument(Node) -> + case unwrap(Node) of + {record, _, _, _} -> + none; + {record, _, Argument, _, _} -> + Argument; + Node1 -> + (data(Node1))#record_expr.argument + end. + + +%% ===================================================================== +%% @spec record_expr_type(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the type subtree of a <code>record_expr</code> node. +%% +%% @see record_expr/3 + +record_expr_type(Node) -> + case unwrap(Node) of + {record, Pos, Type, _} -> + set_pos(atom(Type), Pos); + {record, Pos, _, Type, _} -> + set_pos(atom(Type), Pos); + Node1 -> + (data(Node1))#record_expr.type + end. + + +%% ===================================================================== +%% @spec record_expr_fields(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of field subtrees of a +%% <code>record_expr</code> node. +%% +%% @see record_expr/3 + +record_expr_fields(Node) -> + case unwrap(Node) of + {record, _, _, Fields} -> + unfold_record_fields(Fields); + {record, _, _, _, Fields} -> + unfold_record_fields(Fields); + Node1 -> + (data(Node1))#record_expr.fields + end. + + +%% ===================================================================== +%% @spec application(Module, Function::syntaxTree(), +%% Arguments::[syntaxTree()]) -> syntaxTree() +%% Module = none | syntaxTree() +%% +%% @doc Creates an abstract function application expression. If +%% <code>Module</code> is <code>none</code>, this is call is equivalent +%% to <code>application(Function, Arguments)</code>, otherwise it is +%% equivalent to <code>application(module_qualifier(Module, Function), +%% Arguments)</code>. +%% +%% (This is a utility function.) +%% +%% @see application/2 +%% @see module_qualifier/2 + +application(none, Name, Arguments) -> + application(Name, Arguments); +application(Module, Name, Arguments) -> + application(module_qualifier(Module, Name), Arguments). + + +%% ===================================================================== +%% @spec application(Operator::syntaxTree(), +%% Arguments::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract function application expression. If +%% <code>Arguments</code> is <code>[A1, ..., An]</code>, the result +%% represents "<code><em>Operator</em>(<em>A1</em>, ..., +%% <em>An</em>)</code>". +%% +%% @see application_operator/1 +%% @see application_arguments/1 +%% @see application/3 + +-record(application, {operator, arguments}). + +%% type(Node) = application +%% data(Node) = #application{operator :: Operator, +%% arguments :: Arguments} +%% +%% Operator = syntaxTree() +%% Arguments = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {call, Pos, Fun, Args} +%% +%% Operator = erl_parse() +%% Arguments = [erl_parse()] + +application(Operator, Arguments) -> + tree(application, #application{operator = Operator, + arguments = Arguments}). + +revert_application(Node) -> + Pos = get_pos(Node), + Operator = application_operator(Node), + Arguments = application_arguments(Node), + {call, Pos, Operator, Arguments}. + + +%% ===================================================================== +%% @spec application_operator(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the operator subtree of an <code>application</code> +%% node. +%% +%% <p>Note: if <code>Node</code> represents +%% "<code><em>M</em>:<em>F</em>(...)</code>", then the result is the +%% subtree representing "<code><em>M</em>:<em>F</em></code>".</p> +%% +%% @see application/2 +%% @see module_qualifier/2 + +application_operator(Node) -> + case unwrap(Node) of + {call, _, Operator, _} -> + Operator; + Node1 -> + (data(Node1))#application.operator + end. + + +%% ===================================================================== +%% @spec application_arguments(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of argument subtrees of an +%% <code>application</code> node. +%% +%% @see application/2 + +application_arguments(Node) -> + case unwrap(Node) of + {call, _, _, Arguments} -> + Arguments; + Node1 -> + (data(Node1))#application.arguments + end. + + +%% ===================================================================== +%% @spec list_comp(Template::syntaxTree(), Body::[syntaxTree()]) -> +%% syntaxTree() +%% +%% @doc Creates an abstract list comprehension. If <code>Body</code> is +%% <code>[E1, ..., En]</code>, the result represents +%% "<code>[<em>Template</em> || <em>E1</em>, ..., <em>En</em>]</code>". +%% +%% @see list_comp_template/1 +%% @see list_comp_body/1 +%% @see generator/2 + +-record(list_comp, {template, body}). + +%% type(Node) = list_comp +%% data(Node) = #list_comp{template :: Template, body :: Body} +%% +%% Template = Node = syntaxTree() +%% Body = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {lc, Pos, Template, Body} +%% +%% Template = erl_parse() +%% Body = [erl_parse()] \ [] + +list_comp(Template, Body) -> + tree(list_comp, #list_comp{template = Template, body = Body}). + +revert_list_comp(Node) -> + Pos = get_pos(Node), + Template = list_comp_template(Node), + Body = list_comp_body(Node), + {lc, Pos, Template, Body}. + + +%% ===================================================================== +%% @spec list_comp_template(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the template subtree of a <code>list_comp</code> node. +%% +%% @see list_comp/2 + +list_comp_template(Node) -> + case unwrap(Node) of + {lc, _, Template, _} -> + Template; + Node1 -> + (data(Node1))#list_comp.template + end. + + +%% ===================================================================== +%% @spec list_comp_body(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of body subtrees of a <code>list_comp</code> +%% node. +%% +%% @see list_comp/2 + +list_comp_body(Node) -> + case unwrap(Node) of + {lc, _, _, Body} -> + Body; + Node1 -> + (data(Node1))#list_comp.body + end. + +%% ===================================================================== +%% @spec binary_comp(Template::syntaxTree(), Body::[syntaxTree()]) -> +%% syntaxTree() +%% +%% @doc Creates an abstract binary comprehension. If <code>Body</code> is +%% <code>[E1, ..., En]</code>, the result represents +%% "<code><<<em>Template</em> || <em>E1</em>, ..., <em>En</em>>></code>". +%% +%% @see binary_comp_template/1 +%% @see binary_comp_body/1 +%% @see generator/2 + +-record(binary_comp, {template, body}). + +%% type(Node) = binary_comp +%% data(Node) = #binary_comp{template :: Template, body :: Body} +%% +%% Template = Node = syntaxTree() +%% Body = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {bc, Pos, Template, Body} +%% +%% Template = erl_parse() +%% Body = [erl_parse()] \ [] + +binary_comp(Template, Body) -> + tree(binary_comp, #binary_comp{template = Template, body = Body}). + +revert_binary_comp(Node) -> + Pos = get_pos(Node), + Template = binary_comp_template(Node), + Body = binary_comp_body(Node), + {bc, Pos, Template, Body}. + + +%% ===================================================================== +%% @spec binary_comp_template(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the template subtree of a <code>binary_comp</code> node. +%% +%% @see binary_comp/2 + +binary_comp_template(Node) -> + case unwrap(Node) of + {bc, _, Template, _} -> + Template; + Node1 -> + (data(Node1))#binary_comp.template + end. + + +%% ===================================================================== +%% @spec binary_comp_body(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of body subtrees of a <code>binary_comp</code> +%% node. +%% +%% @see binary_comp/2 + +binary_comp_body(Node) -> + case unwrap(Node) of + {bc, _, _, Body} -> + Body; + Node1 -> + (data(Node1))#binary_comp.body + end. + + +%% ===================================================================== +%% @spec query_expr(Body::syntaxTree()) -> syntaxTree() +%% +%% @doc Creates an abstract Mnemosyne query expression. The result +%% represents "<code>query <em>Body</em> end</code>". +%% +%% @see query_expr_body/1 +%% @see record_access/2 +%% @see rule/2 + +%% type(Node) = query_expr +%% data(Node) = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {'query', Pos, Body} +%% +%% Body = erl_parse() + +query_expr(Body) -> + tree(query_expr, Body). + +revert_query_expr(Node) -> + Pos = get_pos(Node), + Body = list_comp_body(Node), + {'query', Pos, Body}. + + +%% ===================================================================== +%% @spec query_expr_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>query_expr</code> node. +%% +%% @see query_expr/1 + +query_expr_body(Node) -> + case unwrap(Node) of + {'query', _, Body} -> + Body; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec rule(Name::syntaxTree(), Clauses::[syntaxTree()]) -> +%% syntaxTree() +%% +%% @doc Creates an abstract Mnemosyne rule. If <code>Clauses</code> is +%% <code>[C1, ..., Cn]</code>, the results represents +%% "<code><em>Name</em> <em>C1</em>; ...; <em>Name</em> +%% <em>Cn</em>.</code>". More exactly, if each <code>Ci</code> +%% represents "<code>(<em>Pi1</em>, ..., <em>Pim</em>) <em>Gi</em> -> +%% <em>Bi</em></code>", then the result represents +%% "<code><em>Name</em>(<em>P11</em>, ..., <em>P1m</em>) <em>G1</em> :- +%% <em>B1</em>; ...; <em>Name</em>(<em>Pn1</em>, ..., <em>Pnm</em>) +%% <em>Gn</em> :- <em>Bn</em>.</code>". Rules are source code forms. +%% +%% @see rule_name/1 +%% @see rule_clauses/1 +%% @see rule_arity/1 +%% @see is_form/1 +%% @see function/2 + +-record(rule, {name, clauses}). + +%% type(Node) = rule +%% data(Node) = #rule{name :: Name, clauses :: Clauses} +%% +%% Name = syntaxTree() +%% Clauses = [syntaxTree()] +%% +%% (See `function' for notes on why the arity is not stored.) +%% +%% `erl_parse' representation: +%% +%% {rule, Pos, Name, Arity, Clauses} +%% +%% Name = atom() +%% Arity = integer() +%% Clauses = [Clause] \ [] +%% Clause = {clause, ...} +%% +%% where the number of patterns in each clause should be equal to +%% the integer `Arity'; see `clause' for documentation on +%% `erl_parse' clauses. + +rule(Name, Clauses) -> + tree(rule, #rule{name = Name, clauses = Clauses}). + +revert_rule(Node) -> + Name = rule_name(Node), + Clauses = [revert_clause(C) || C <- rule_clauses(Node)], + Pos = get_pos(Node), + case type(Name) of + atom -> + A = rule_arity(Node), + {rule, Pos, concrete(Name), A, Clauses}; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec rule_name(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the name subtree of a <code>rule</code> node. +%% +%% @see rule/2 + +rule_name(Node) -> + case unwrap(Node) of + {rule, Pos, Name, _, _} -> + set_pos(atom(Name), Pos); + Node1 -> + (data(Node1))#rule.name + end. + +%% ===================================================================== +%% @spec rule_clauses(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of clause subtrees of a <code>rule</code> node. +%% +%% @see rule/2 + +rule_clauses(Node) -> + case unwrap(Node) of + {rule, _, _, _, Clauses} -> + Clauses; + Node1 -> + (data(Node1))#rule.clauses + end. + +%% ===================================================================== +%% @spec rule_arity(Node::syntaxTree()) -> integer() +%% +%% @doc Returns the arity of a <code>rule</code> node. The result is the +%% number of parameter patterns in the first clause of the rule; +%% subsequent clauses are ignored. +%% +%% <p>An exception is thrown if <code>rule_clauses(Node)</code> returns +%% an empty list, or if the first element of that list is not a syntax +%% tree <code>C</code> of type <code>clause</code> such that +%% <code>clause_patterns(C)</code> is a nonempty list.</p> +%% +%% @see rule/2 +%% @see rule_clauses/1 +%% @see clause/3 +%% @see clause_patterns/1 + +rule_arity(Node) -> + %% Note that this never accesses the arity field of + %% `erl_parse' rule nodes. + length(clause_patterns(hd(rule_clauses(Node)))). + + +%% ===================================================================== +%% @spec generator(Pattern::syntaxTree(), Body::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract generator. The result represents +%% "<code><em>Pattern</em> <- <em>Body</em></code>". +%% +%% @see generator_pattern/1 +%% @see generator_body/1 +%% @see list_comp/2 +%% @see binary_comp/2 + +-record(generator, {pattern, body}). + +%% type(Node) = generator +%% data(Node) = #generator{pattern :: Pattern, body :: Body} +%% +%% Pattern = Argument = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {generate, Pos, Pattern, Body} +%% +%% Pattern = Body = erl_parse() + +generator(Pattern, Body) -> + tree(generator, #generator{pattern = Pattern, body = Body}). + +revert_generator(Node) -> + Pos = get_pos(Node), + Pattern = generator_pattern(Node), + Body = generator_body(Node), + {generate, Pos, Pattern, Body}. + + +%% ===================================================================== +%% @spec generator_pattern(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the pattern subtree of a <code>generator</code> node. +%% +%% @see generator/2 + +generator_pattern(Node) -> + case unwrap(Node) of + {generate, _, Pattern, _} -> + Pattern; + Node1 -> + (data(Node1))#generator.pattern + end. + + +%% ===================================================================== +%% @spec generator_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>generator</code> node. +%% +%% @see generator/2 + +generator_body(Node) -> + case unwrap(Node) of + {generate, _, _, Body} -> + Body; + Node1 -> + (data(Node1))#generator.body + end. + + +%% ===================================================================== +%% @spec binary_generator(Pattern::syntaxTree(), Body::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract binary_generator. The result represents +%% "<code><em>Pattern</em> <- <em>Body</em></code>". +%% +%% @see binary_generator_pattern/1 +%% @see binary_generator_body/1 +%% @see list_comp/2 +%% @see binary_comp/2 + +-record(binary_generator, {pattern, body}). + +%% type(Node) = binary_generator +%% data(Node) = #binary_generator{pattern :: Pattern, body :: Body} +%% +%% Pattern = Argument = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {b_generate, Pos, Pattern, Body} +%% +%% Pattern = Body = erl_parse() + +binary_generator(Pattern, Body) -> + tree(binary_generator, #binary_generator{pattern = Pattern, body = Body}). + +revert_binary_generator(Node) -> + Pos = get_pos(Node), + Pattern = binary_generator_pattern(Node), + Body = binary_generator_body(Node), + {b_generate, Pos, Pattern, Body}. + + +%% ===================================================================== +%% @spec binary_generator_pattern(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the pattern subtree of a <code>generator</code> node. +%% +%% @see binary_generator/2 + +binary_generator_pattern(Node) -> + case unwrap(Node) of + {b_generate, _, Pattern, _} -> + Pattern; + Node1 -> + (data(Node1))#binary_generator.pattern + end. + + +%% ===================================================================== +%% @spec binary_generator_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>generator</code> node. +%% +%% @see binary_generator/2 + +binary_generator_body(Node) -> + case unwrap(Node) of + {b_generate, _, _, Body} -> + Body; + Node1 -> + (data(Node1))#binary_generator.body + end. + +%% ===================================================================== +%% @spec block_expr(Body::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract block expression. If <code>Body</code> is +%% <code>[B1, ..., Bn]</code>, the result represents "<code>begin +%% <em>B1</em>, ..., <em>Bn</em> end</code>". +%% +%% @see block_expr_body/1 + +%% type(Node) = block_expr +%% data(Node) = Body +%% +%% Body = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {block, Pos, Body} +%% +%% Body = [erl_parse()] \ [] + +block_expr(Body) -> + tree(block_expr, Body). + +revert_block_expr(Node) -> + Pos = get_pos(Node), + Body = block_expr_body(Node), + {block, Pos, Body}. + + +%% ===================================================================== +%% @spec block_expr_body(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of body subtrees of a <code>block_expr</code> +%% node. +%% +%% @see block_expr/1 + +block_expr_body(Node) -> + case unwrap(Node) of + {block, _, Body} -> + Body; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec if_expr(Clauses::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract if-expression. If <code>Clauses</code> is +%% <code>[C1, ..., Cn]</code>, the result represents "<code>if +%% <em>C1</em>; ...; <em>Cn</em> end</code>". More exactly, if each +%% <code>Ci</code> represents "<code>() <em>Gi</em> -> +%% <em>Bi</em></code>", then the result represents "<code>if +%% <em>G1</em> -> <em>B1</em>; ...; <em>Gn</em> -> <em>Bn</em> +%% end</code>". +%% +%% @see if_expr_clauses/1 +%% @see clause/3 +%% @see case_expr/2 + +%% type(Node) = if_expr +%% data(Node) = Clauses +%% +%% Clauses = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {'if', Pos, Clauses} +%% +%% Clauses = [Clause] \ [] +%% Clause = {clause, ...} +%% +%% See `clause' for documentation on `erl_parse' clauses. + +if_expr(Clauses) -> + tree(if_expr, Clauses). + +revert_if_expr(Node) -> + Pos = get_pos(Node), + Clauses = [revert_clause(C) || C <- if_expr_clauses(Node)], + {'if', Pos, Clauses}. + + +%% ===================================================================== +%% @spec if_expr_clauses(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of clause subtrees of an <code>if_expr</code> +%% node. +%% +%% @see if_expr/1 + +if_expr_clauses(Node) -> + case unwrap(Node) of + {'if', _, Clauses} -> + Clauses; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec case_expr(Argument::syntaxTree(), Clauses::[syntaxTree()]) -> +%% syntaxTree() +%% +%% @doc Creates an abstract case-expression. If <code>Clauses</code> is +%% <code>[C1, ..., Cn]</code>, the result represents "<code>case +%% <em>Argument</em> of <em>C1</em>; ...; <em>Cn</em> end</code>". More +%% exactly, if each <code>Ci</code> represents "<code>(<em>Pi</em>) +%% <em>Gi</em> -> <em>Bi</em></code>", then the result represents +%% "<code>case <em>Argument</em> of <em>P1</em> <em>G1</em> -> +%% <em>B1</em>; ...; <em>Pn</em> <em>Gn</em> -> <em>Bn</em> end</code>". +%% +%% @see case_expr_clauses/1 +%% @see case_expr_argument/1 +%% @see clause/3 +%% @see if_expr/1 +%% @see cond_expr/1 + +-record(case_expr, {argument, clauses}). + +%% type(Node) = case_expr +%% data(Node) = #case_expr{argument :: Argument, +%% clauses :: Clauses} +%% +%% Argument = syntaxTree() +%% Clauses = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {'case', Pos, Argument, Clauses} +%% +%% Argument = erl_parse() +%% Clauses = [Clause] \ [] +%% Clause = {clause, ...} +%% +%% See `clause' for documentation on `erl_parse' clauses. + +case_expr(Argument, Clauses) -> + tree(case_expr, #case_expr{argument = Argument, + clauses = Clauses}). + +revert_case_expr(Node) -> + Pos = get_pos(Node), + Argument = case_expr_argument(Node), + Clauses = [revert_clause(C) || C <- case_expr_clauses(Node)], + {'case', Pos, Argument, Clauses}. + + +%% ===================================================================== +%% @spec case_expr_argument(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the argument subtree of a <code>case_expr</code> node. +%% +%% @see case_expr/2 + +case_expr_argument(Node) -> + case unwrap(Node) of + {'case', _, Argument, _} -> + Argument; + Node1 -> + (data(Node1))#case_expr.argument + end. + + +%% ===================================================================== +%% @spec case_expr_clauses(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of clause subtrees of a <code>case_expr</code> +%% node. +%% +%% @see case_expr/2 + +case_expr_clauses(Node) -> + case unwrap(Node) of + {'case', _, _, Clauses} -> + Clauses; + Node1 -> + (data(Node1))#case_expr.clauses + end. + + +%% ===================================================================== +%% @spec cond_expr(Clauses::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract cond-expression. If <code>Clauses</code> is +%% <code>[C1, ..., Cn]</code>, the result represents "<code>cond +%% <em>C1</em>; ...; <em>Cn</em> end</code>". More exactly, if each +%% <code>Ci</code> represents "<code>() <em>Ei</em> -> +%% <em>Bi</em></code>", then the result represents "<code>cond +%% <em>E1</em> -> <em>B1</em>; ...; <em>En</em> -> <em>Bn</em> +%% end</code>". +%% +%% @see cond_expr_clauses/1 +%% @see clause/3 +%% @see case_expr/2 + +%% type(Node) = cond_expr +%% data(Node) = Clauses +%% +%% Clauses = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {'cond', Pos, Clauses} +%% +%% Clauses = [Clause] \ [] +%% Clause = {clause, ...} +%% +%% See `clause' for documentation on `erl_parse' clauses. + +cond_expr(Clauses) -> + tree(cond_expr, Clauses). + +revert_cond_expr(Node) -> + Pos = get_pos(Node), + Clauses = [revert_clause(C) || C <- cond_expr_clauses(Node)], + {'cond', Pos, Clauses}. + + +%% ===================================================================== +%% @spec cond_expr_clauses(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of clause subtrees of a <code>cond_expr</code> +%% node. +%% +%% @see cond_expr/1 + +cond_expr_clauses(Node) -> + case unwrap(Node) of + {'cond', _, Clauses} -> + Clauses; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec receive_expr(Clauses) -> syntaxTree() +%% @equiv receive_expr(Clauses, none, []) + +receive_expr(Clauses) -> + receive_expr(Clauses, none, []). + + +%% ===================================================================== +%% @spec receive_expr(Clauses::[syntaxTree()], Timeout, +%% Action::[syntaxTree()]) -> syntaxTree() +%% Timeout = none | syntaxTree() +%% +%% @doc Creates an abstract receive-expression. If <code>Timeout</code> +%% is <code>none</code>, the result represents "<code>receive +%% <em>C1</em>; ...; <em>Cn</em> end</code>" (the <code>Action</code> +%% argument is ignored). Otherwise, if <code>Clauses</code> is +%% <code>[C1, ..., Cn]</code> and <code>Action</code> is <code>[A1, ..., +%% Am]</code>, the result represents "<code>receive <em>C1</em>; ...; +%% <em>Cn</em> after <em>Timeout</em> -> <em>A1</em>, ..., <em>Am</em> +%% end</code>". More exactly, if each <code>Ci</code> represents +%% "<code>(<em>Pi</em>) <em>Gi</em> -> <em>Bi</em></code>", then the +%% result represents "<code>receive <em>P1</em> <em>G1</em> -> +%% <em>B1</em>; ...; <em>Pn</em> <em>Gn</em> -> <em>Bn</em> ... +%% end</code>". +%% +%% <p>Note that in Erlang, a receive-expression must have at least one +%% clause if no timeout part is specified.</p> +%% +%% @see receive_expr_clauses/1 +%% @see receive_expr_timeout/1 +%% @see receive_expr_action/1 +%% @see receive_expr/1 +%% @see clause/3 +%% @see case_expr/2 + +-record(receive_expr, {clauses, timeout, action}). + +%% type(Node) = receive_expr +%% data(Node) = #receive_expr{clauses :: Clauses, +%% timeout :: Timeout, +%% action :: Action} +%% +%% Clauses = [syntaxTree()] +%% Timeout = none | syntaxTree() +%% Action = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {'receive', Pos, Clauses} +%% {'receive', Pos, Clauses, Timeout, Action} +%% +%% Clauses = [Clause] \ [] +%% Clause = {clause, ...} +%% Timeout = erl_parse() +%% Action = [erl_parse()] \ [] +%% +%% See `clause' for documentation on `erl_parse' clauses. + +receive_expr(Clauses, Timeout, Action) -> + %% If `Timeout' is `none', we always replace the actual + %% `Action' argument with an empty list, since + %% `receive_expr_action' should in that case return the empty + %% list regardless. + Action1 = case Timeout of + none -> []; + _ -> Action + end, + tree(receive_expr, #receive_expr{clauses = Clauses, + timeout = Timeout, + action = Action1}). + +revert_receive_expr(Node) -> + Pos = get_pos(Node), + Clauses = [revert_clause(C) || C <- receive_expr_clauses(Node)], + Timeout = receive_expr_timeout(Node), + Action = receive_expr_action(Node), + case Timeout of + none -> + {'receive', Pos, Clauses}; + _ -> + {'receive', Pos, Clauses, Timeout, Action} + end. + + +%% ===================================================================== +%% @spec receive_expr_clauses(syntaxTree()) -> [syntaxTree()] +%% type(Node) = receive_expr +%% +%% @doc Returns the list of clause subtrees of a +%% <code>receive_expr</code> node. +%% +%% @see receive_expr/3 + +receive_expr_clauses(Node) -> + case unwrap(Node) of + {'receive', _, Clauses} -> + Clauses; + {'receive', _, Clauses, _, _} -> + Clauses; + Node1 -> + (data(Node1))#receive_expr.clauses + end. + + +%% ===================================================================== +%% @spec receive_expr_timeout(Node::syntaxTree()) -> Timeout +%% Timeout = none | syntaxTree() +%% +%% @doc Returns the timeout subtree of a <code>receive_expr</code> node, +%% if any. If <code>Node</code> represents "<code>receive <em>C1</em>; +%% ...; <em>Cn</em> end</code>", <code>none</code> is returned. +%% Otherwise, if <code>Node</code> represents "<code>receive +%% <em>C1</em>; ...; <em>Cn</em> after <em>Timeout</em> -> ... end</code>", +%% <code>Timeout</code> is returned. +%% +%% @see receive_expr/3 + +receive_expr_timeout(Node) -> + case unwrap(Node) of + {'receive', _, _} -> + none; + {'receive', _, _, Timeout, _} -> + Timeout; + Node1 -> + (data(Node1))#receive_expr.timeout + end. + + +%% ===================================================================== +%% @spec receive_expr_action(Node::syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of action body subtrees of a +%% <code>receive_expr</code> node. If <code>Node</code> represents +%% "<code>receive <em>C1</em>; ...; <em>Cn</em> end</code>", this is the +%% empty list. +%% +%% @see receive_expr/3 + +receive_expr_action(Node) -> + case unwrap(Node) of + {'receive', _, _} -> + []; + {'receive', _, _, _, Action} -> + Action; + Node1 -> + (data(Node1))#receive_expr.action + end. + + +%% ===================================================================== +%% @spec try_expr(Body::syntaxTree(), Handlers::[syntaxTree()]) -> +%% syntaxTree() +%% @equiv try_expr(Body, [], Handlers) + +try_expr(Body, Handlers) -> + try_expr(Body, [], Handlers). + + +%% ===================================================================== +%% @spec try_expr(Body::syntaxTree(), Clauses::[syntaxTree()], +%% Handlers::[syntaxTree()]) -> syntaxTree() +%% @equiv try_expr(Body, Clauses, Handlers, []) + +try_expr(Body, Clauses, Handlers) -> + try_expr(Body, Clauses, Handlers, []). + + +%% ===================================================================== +%% @spec try_after_expr(Body::syntaxTree(), After::[syntaxTree()]) -> +%% syntaxTree() +%% @equiv try_expr(Body, [], [], After) + +try_after_expr(Body, After) -> + try_expr(Body, [], [], After). + + +%% ===================================================================== +%% @spec try_expr(Body::[syntaxTree()], Clauses::[syntaxTree()], +%% Handlers::[syntaxTree()], After::[syntaxTree()]) -> +%% syntaxTree() +%% +%% @doc Creates an abstract try-expression. If <code>Body</code> is +%% <code>[B1, ..., Bn]</code>, <code>Clauses</code> is <code>[C1, ..., +%% Cj]</code>, <code>Handlers</code> is <code>[H1, ..., Hk]</code>, and +%% <code>After</code> is <code>[A1, ..., Am]</code>, the result +%% represents "<code>try <em>B1</em>, ..., <em>Bn</em> of <em>C1</em>; +%% ...; <em>Cj</em> catch <em>H1</em>; ...; <em>Hk</em> after +%% <em>A1</em>, ..., <em>Am</em> end</code>". More exactly, if each +%% <code>Ci</code> represents "<code>(<em>CPi</em>) <em>CGi</em> -> +%% <em>CBi</em></code>", and each <code>Hi</code> represents +%% "<code>(<em>HPi</em>) <em>HGi</em> -> <em>HBi</em></code>", then the +%% result represents "<code>try <em>B1</em>, ..., <em>Bn</em> of +%% <em>CP1</em> <em>CG1</em> -> <em>CB1</em>; ...; <em>CPj</em> +%% <em>CGj</em> -> <em>CBj</em> catch <em>HP1</em> <em>HG1</em> -> +%% <em>HB1</em>; ...; <em>HPk</em> <em>HGk</em> -> <em>HBk</em> after +%% <em>A1</em>, ..., <em>Am</em> end</code>"; cf. +%% <code>case_expr/2</code>. If <code>Clauses</code> is the empty list, +%% the <code>of ...</code> section is left out. If <code>After</code> is +%% the empty list, the <code>after ...</code> section is left out. If +%% <code>Handlers</code> is the empty list, and <code>After</code> is +%% nonempty, the <code>catch ...</code> section is left out. +%% +%% @see try_expr_body/1 +%% @see try_expr_clauses/1 +%% @see try_expr_handlers/1 +%% @see try_expr_after/1 +%% @see try_expr/2 +%% @see try_expr/3 +%% @see try_after_expr/2 +%% @see clause/3 +%% @see class_qualifier/2 +%% @see case_expr/2 + +-record(try_expr, {body, clauses, handlers, 'after'}). + +%% type(Node) = try_expr +%% data(Node) = #try_expr{body :: Body, +%% clauses :: Clauses, +%% handlers :: Clauses, +%% after :: Body} +%% +%% Body = syntaxTree() +%% Clauses = [syntaxTree()] +%% +%% `erl_parse' representation: +%% +%% {'try', Pos, Body, Clauses, Handlers, After} +%% +%% Body = [erl_parse()] +%% Clauses = [Clause] +%% Handlers = [Clause] \ [] +%% Clause = {clause, ...} +%% After = [erl_parse()] +%% +%% See `clause' for documentation on `erl_parse' clauses. + +try_expr(Body, Clauses, Handlers, After) -> + tree(try_expr, #try_expr{body = Body, + clauses = Clauses, + handlers = Handlers, + 'after' = After}). + +revert_try_expr(Node) -> + Pos = get_pos(Node), + Body = try_expr_body(Node), + Clauses = [revert_clause(C) || C <- try_expr_clauses(Node)], + Handlers = [revert_try_clause(C) || C <- try_expr_handlers(Node)], + After = try_expr_after(Node), + {'try', Pos, Body, Clauses, Handlers, After}. + + +%% ===================================================================== +%% @spec try_expr_body(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of body subtrees of a <code>try_expr</code> +%% node. +%% +%% @see try_expr/4 + +try_expr_body(Node) -> + case unwrap(Node) of + {'try', _, Body, _, _, _} -> + Body; + Node1 -> + (data(Node1))#try_expr.body + end. + + +%% ===================================================================== +%% @spec try_expr_clauses(Node::syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of case-clause subtrees of a +%% <code>try_expr</code> node. If <code>Node</code> represents +%% "<code>try <em>Body</em> catch <em>H1</em>; ...; <em>Hn</em> +%% end</code>", the result is the empty list. +%% +%% @see try_expr/4 + +try_expr_clauses(Node) -> + case unwrap(Node) of + {'try', _, _, Clauses, _, _} -> + Clauses; + Node1 -> + (data(Node1))#try_expr.clauses + end. + + +%% ===================================================================== +%% @spec try_expr_handlers(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of handler-clause subtrees of a +%% <code>try_expr</code> node. +%% +%% @see try_expr/4 + +try_expr_handlers(Node) -> + case unwrap(Node) of + {'try', _, _, _, Handlers, _} -> + unfold_try_clauses(Handlers); + Node1 -> + (data(Node1))#try_expr.handlers + end. + + +%% ===================================================================== +%% @spec try_expr_after(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of "after" subtrees of a <code>try_expr</code> +%% node. +%% +%% @see try_expr/4 + +try_expr_after(Node) -> + case unwrap(Node) of + {'try', _, _, _, _, After} -> + After; + Node1 -> + (data(Node1))#try_expr.'after' + end. + + +%% ===================================================================== +%% @spec class_qualifier(Class::syntaxTree(), Body::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract class qualifier. The result represents +%% "<code><em>Class</em>:<em>Body</em></code>". +%% +%% @see class_qualifier_argument/1 +%% @see class_qualifier_body/1 +%% @see try_expr/4 + +-record(class_qualifier, {class, body}). + +%% type(Node) = class_qualifier +%% data(Node) = #class_qualifier{class :: Class, body :: Body} +%% +%% Class = Body = syntaxTree() + +class_qualifier(Class, Body) -> + tree(class_qualifier, + #class_qualifier{class = Class, body = Body}). + + +%% ===================================================================== +%% @spec class_qualifier_argument(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the argument (the class) subtree of a +%% <code>class_qualifier</code> node. +%% +%% @see class_qualifier/2 + +class_qualifier_argument(Node) -> + (data(Node))#class_qualifier.class. + + +%% ===================================================================== +%% @spec class_qualifier_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>class_qualifier</code> node. +%% +%% @see class_qualifier/2 + +class_qualifier_body(Node) -> + (data(Node))#class_qualifier.body. + + +%% ===================================================================== +%% @spec implicit_fun(Name::syntaxTree(), Arity::syntaxTree()) -> +%% syntaxTree() +%% +%% @doc Creates an abstract "implicit fun" expression. If +%% <code>Arity</code> is <code>none</code>, this is equivalent to +%% <code>implicit_fun(Name)</code>, otherwise it is equivalent to +%% <code>implicit_fun(arity_qualifier(Name, Arity))</code>. +%% +%% (This is a utility function.) +%% +%% @see implicit_fun/1 +%% @see implicit_fun/3 + +implicit_fun(Name, none) -> + implicit_fun(Name); +implicit_fun(Name, Arity) -> + implicit_fun(arity_qualifier(Name, Arity)). + + +%% ===================================================================== +%% @spec implicit_fun(Module::syntaxTree(), Name::syntaxTree(), +%% Arity::syntaxTree()) -> syntaxTree() +%% +%% @doc Creates an abstract module-qualified "implicit fun" expression. +%% If <code>Module</code> is <code>none</code>, this is equivalent to +%% <code>implicit_fun(Name, Arity)</code>, otherwise it is equivalent to +%% <code>implicit_fun(module_qualifier(Module, arity_qualifier(Name, +%% Arity))</code>. +%% +%% (This is a utility function.) +%% +%% @see implicit_fun/1 +%% @see implicit_fun/2 + +implicit_fun(none, Name, Arity) -> + implicit_fun(Name, Arity); +implicit_fun(Module, Name, Arity) -> + implicit_fun(module_qualifier(Module, arity_qualifier(Name, Arity))). + + +%% ===================================================================== +%% @spec implicit_fun(Name::syntaxTree()) -> syntaxTree() +%% +%% @doc Creates an abstract "implicit fun" expression. The result +%% represents "<code>fun <em>Name</em></code>". <code>Name</code> should +%% represent either <code><em>F</em>/<em>A</em></code> or +%% <code><em>M</em>:<em>F</em>/<em>A</em></code> +%% +%% @see implicit_fun_name/1 +%% @see implicit_fun/2 +%% @see implicit_fun/3 +%% @see arity_qualifier/2 +%% @see module_qualifier/2 + +%% type(Node) = implicit_fun +%% data(Node) = syntaxTree() +%% +%% `erl_parse' representation: +%% +%% {'fun', Pos, {function, Name, Arity}} +%% {'fun', Pos, {function, Module, Name, Arity}} +%% +%% Module = atom() +%% Name = atom() +%% Arity = integer() + +implicit_fun(Name) -> + tree(implicit_fun, Name). + +revert_implicit_fun(Node) -> + Pos = get_pos(Node), + Name = implicit_fun_name(Node), + case type(Name) of + arity_qualifier -> + F = arity_qualifier_body(Name), + A = arity_qualifier_argument(Name), + case {type(F), type(A)} of + {atom, integer} -> + {'fun', Pos, + {function, concrete(F), concrete(A)}}; + _ -> + Node + end; + module_qualifier -> + M = module_qualifier_argument(Name), + Name1 = module_qualifier_body(Name), + F = arity_qualifier_body(Name1), + A = arity_qualifier_argument(Name1), + case {type(M), type(F), type(A)} of + {atom, atom, integer} -> + {'fun', Pos, + {function, concrete(M), concrete(F), concrete(A)}}; + _ -> + Node + end; + _ -> + Node + end. + + +%% ===================================================================== +%% @spec implicit_fun_name(Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the name subtree of an <code>implicit_fun</code> node. +%% +%% <p>Note: if <code>Node</code> represents "<code>fun +%% <em>N</em>/<em>A</em></code>" or "<code>fun +%% <em>M</em>:<em>N</em>/<em>A</em></code>", then the result is the +%% subtree representing "<code><em>N</em>/<em>A</em></code>" or +%% "<code><em>M</em>:<em>N</em>/<em>A</em></code>", respectively.</p> +%% +%% @see implicit_fun/1 +%% @see arity_qualifier/2 +%% @see module_qualifier/2 + +implicit_fun_name(Node) -> + case unwrap(Node) of + {'fun', Pos, {function, Atom, Arity}} -> + arity_qualifier(set_pos(atom(Atom), Pos), + set_pos(integer(Arity), Pos)); + {'fun', Pos, {function, Module, Atom, Arity}} -> + module_qualifier(set_pos(atom(Module), Pos), + arity_qualifier( + set_pos(atom(Atom), Pos), + set_pos(integer(Arity), Pos))); + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec fun_expr(Clauses::[syntaxTree()]) -> syntaxTree() +%% +%% @doc Creates an abstract fun-expression. If <code>Clauses</code> is +%% <code>[C1, ..., Cn]</code>, the result represents "<code>fun +%% <em>C1</em>; ...; <em>Cn</em> end</code>". More exactly, if each +%% <code>Ci</code> represents "<code>(<em>Pi1</em>, ..., <em>Pim</em>) +%% <em>Gi</em> -> <em>Bi</em></code>", then the result represents +%% "<code>fun (<em>P11</em>, ..., <em>P1m</em>) <em>G1</em> -> +%% <em>B1</em>; ...; (<em>Pn1</em>, ..., <em>Pnm</em>) <em>Gn</em> -> +%% <em>Bn</em> end</code>". +%% +%% @see fun_expr_clauses/1 +%% @see fun_expr_arity/1 + +%% type(Node) = fun_expr +%% data(Node) = Clauses +%% +%% Clauses = [syntaxTree()] +%% +%% (See `function' for notes; e.g. why the arity is not stored.) +%% +%% `erl_parse' representation: +%% +%% {'fun', Pos, {clauses, Clauses}} +%% +%% Clauses = [Clause] \ [] +%% Clause = {clause, ...} +%% +%% See `clause' for documentation on `erl_parse' clauses. + +fun_expr(Clauses) -> + tree(fun_expr, Clauses). + +revert_fun_expr(Node) -> + Clauses = [revert_clause(C) || C <- fun_expr_clauses(Node)], + Pos = get_pos(Node), + {'fun', Pos, {clauses, Clauses}}. + + +%% ===================================================================== +%% @spec fun_expr_clauses(syntaxTree()) -> [syntaxTree()] +%% +%% @doc Returns the list of clause subtrees of a <code>fun_expr</code> +%% node. +%% +%% @see fun_expr/1 + +fun_expr_clauses(Node) -> + case unwrap(Node) of + {'fun', _, {clauses, Clauses}} -> + Clauses; + Node1 -> + data(Node1) + end. + + +%% ===================================================================== +%% @spec fun_expr_arity(syntaxTree()) -> integer() +%% +%% @doc Returns the arity of a <code>fun_expr</code> node. The result is +%% the number of parameter patterns in the first clause of the +%% fun-expression; subsequent clauses are ignored. +%% +%% <p>An exception is thrown if <code>fun_expr_clauses(Node)</code> +%% returns an empty list, or if the first element of that list is not a +%% syntax tree <code>C</code> of type <code>clause</code> such that +%% <code>clause_patterns(C)</code> is a nonempty list.</p> +%% +%% @see fun_expr/1 +%% @see fun_expr_clauses/1 +%% @see clause/3 +%% @see clause_patterns/1 + +fun_expr_arity(Node) -> + length(clause_patterns(hd(fun_expr_clauses(Node)))). + + +%% ===================================================================== +%% @spec parentheses(Body::syntaxTree()) -> syntaxTree() +%% +%% @doc Creates an abstract parenthesised expression. The result +%% represents "<code>(<em>Body</em>)</code>", independently of the +%% context. +%% +%% @see parentheses_body/1 + +%% type(Node) = parentheses +%% data(Node) = syntaxTree() + +parentheses(Expr) -> + tree(parentheses, Expr). + +revert_parentheses(Node) -> + parentheses_body(Node). + + +%% ===================================================================== +%% @spec parentheses_body(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the body subtree of a <code>parentheses</code> node. +%% +%% @see parentheses/1 + +parentheses_body(Node) -> + data(Node). + + +%% ===================================================================== +%% @spec macro(Name) -> syntaxTree() +%% @equiv macro(Name, none) + +macro(Name) -> + macro(Name, none). + + +%% ===================================================================== +%% @spec macro(Name::syntaxTree(), Arguments) -> syntaxTree() +%% Arguments = none | [syntaxTree()] +%% +%% @doc Creates an abstract macro application. If <code>Arguments</code> +%% is <code>none</code>, the result represents +%% "<code>?<em>Name</em></code>", otherwise, if <code>Arguments</code> +%% is <code>[A1, ..., An]</code>, the result represents +%% "<code>?<em>Name</em>(<em>A1</em>, ..., <em>An</em>)</code>". +%% +%% <p>Notes: if <code>Arguments</code> is the empty list, the result +%% will thus represent "<code>?<em>Name</em>()</code>", including a pair +%% of matching parentheses.</p> +%% +%% <p>The only syntactical limitation imposed by the preprocessor on the +%% arguments to a macro application (viewed as sequences of tokens) is +%% that they must be balanced with respect to parentheses, brackets, +%% <code>begin ... end</code>, <code>case ... end</code>, etc. The +%% <code>text</code> node type can be used to represent arguments which +%% are not regular Erlang constructs.</p> +%% +%% @see macro_name/1 +%% @see macro_arguments/1 +%% @see macro/1 +%% @see text/1 + +-record(macro, {name, arguments}). + +%% type(Node) = macro +%% data(Node) = #macro{name :: Name, arguments :: Arguments} +%% +%% Name = syntaxTree() +%% Arguments = none | [syntaxTree()] + +macro(Name, Arguments) -> + tree(macro, #macro{name = Name, arguments = Arguments}). + + +%% ===================================================================== +%% @spec macro_name(syntaxTree()) -> syntaxTree() +%% +%% @doc Returns the name subtree of a <code>macro</code> node. +%% +%% @see macro/2 + +macro_name(Node) -> + (data(Node))#macro.name. + + +%% ===================================================================== +%% @spec macro_arguments(Node::syntaxTree()) -> none | [syntaxTree()] +%% +%% @doc Returns the list of argument subtrees of a <code>macro</code> +%% node, if any. If <code>Node</code> represents +%% "<code>?<em>Name</em></code>", <code>none</code> is returned. +%% Otherwise, if <code>Node</code> represents +%% "<code>?<em>Name</em>(<em>A1</em>, ..., <em>An</em>)</code>", +%% <code>[A1, ..., An]</code> is returned. +%% +%% @see macro/2 + +macro_arguments(Node) -> + (data(Node))#macro.arguments. + + +%% ===================================================================== +%% @spec abstract(Term::term()) -> syntaxTree() +%% +%% @doc Returns the syntax tree corresponding to an Erlang term. +%% <code>Term</code> must be a literal term, i.e., one that can be +%% represented as a source code literal. Thus, it may not contain a +%% process identifier, port, reference, binary or function value as a +%% subterm. The function recognises printable strings, in order to get a +%% compact and readable representation. Evaluation fails with reason +%% <code>badarg</code> if <code>Term</code> is not a literal term. +%% +%% @see concrete/1 +%% @see is_literal/1 + +abstract([H | T] = L) when is_integer(H) -> + case is_printable(L) of + true -> + string(L); + false -> + abstract_tail(H, T) + end; +abstract([H | T]) -> + abstract_tail(H, T); +abstract(T) when is_atom(T) -> + atom(T); +abstract(T) when is_integer(T) -> + integer(T); +abstract(T) when is_float(T) -> + make_float(T); % (not `float', which would call the BIF) +abstract([]) -> + nil(); +abstract(T) when is_tuple(T) -> + tuple(abstract_list(tuple_to_list(T))); +abstract(T) when is_binary(T) -> + binary([binary_field(integer(B)) || B <- binary_to_list(T)]); +abstract(T) -> + erlang:error({badarg, T}). + +abstract_list([T | Ts]) -> + [abstract(T) | abstract_list(Ts)]; +abstract_list([]) -> + []. + +%% This is entered when we might have a sequence of conses that might or +%% might not be a proper list, but which should not be considered as a +%% potential string, to avoid unnecessary checking. This also avoids +%% that a list like `[4711, 42, 10]' could be abstracted to represent +%% `[4711 | "*\n"]'. + +abstract_tail(H1, [H2 | T]) -> + %% Recall that `cons' does "intelligent" composition + cons(abstract(H1), abstract_tail(H2, T)); +abstract_tail(H, T) -> + cons(abstract(H), abstract(T)). + + +%% ===================================================================== +%% @spec concrete(Node::syntaxTree()) -> term() +%% +%% @doc Returns the Erlang term represented by a syntax tree. Evaluation +%% fails with reason <code>badarg</code> if <code>Node</code> does not +%% represent a literal term. +%% +%% <p>Note: Currently, the set of syntax trees which have a concrete +%% representation is larger than the set of trees which can be built +%% using the function <code>abstract/1</code>. An abstract character +%% will be concretised as an integer, while <code>abstract/1</code> does +%% not at present yield an abstract character for any input. (Use the +%% <code>char/1</code> function to explicitly create an abstract +%% character.)</p> +%% +%% @see abstract/1 +%% @see is_literal/1 +%% @see char/1 + +concrete(Node) -> + case type(Node) of + atom -> + atom_value(Node); + integer -> + integer_value(Node); + float -> + float_value(Node); + char -> + char_value(Node); + string -> + string_value(Node); + nil -> + []; + list -> + [concrete(list_head(Node)) + | concrete(list_tail(Node))]; + tuple -> + list_to_tuple(concrete_list(tuple_elements(Node))); + binary -> + Fs = [revert_binary_field( + binary_field(binary_field_body(F), + case binary_field_size(F) of + none -> none; + S -> + revert(S) + end, + binary_field_types(F))) + || F <- binary_fields(Node)], + {value, B, _} = + eval_bits:expr_grp(Fs, [], + fun(F, _) -> + {value, concrete(F), []} + end, [], true), + B; + _ -> + erlang:error({badarg, Node}) + end. + +concrete_list([E | Es]) -> + [concrete(E) | concrete_list(Es)]; +concrete_list([]) -> + []. + + +%% ===================================================================== +%% @spec is_literal(Node::syntaxTree()) -> bool() +%% +%% @doc Returns <code>true</code> if <code>Node</code> represents a +%% literal term, otherwise <code>false</code>. This function returns +%% <code>true</code> if and only if the value of +%% <code>concrete(Node)</code> is defined. +%% +%% @see abstract/1 +%% @see concrete/1 + +is_literal(T) -> + case type(T) of + atom -> + true; + integer -> + true; + float -> + true; + char-> + true; + string -> + true; + nil -> + true; + list -> + is_literal(list_head(T)) andalso is_literal(list_tail(T)); + tuple -> + lists:all(fun is_literal/1, tuple_elements(T)); + _ -> + false + end. + + +%% ===================================================================== +%% @spec revert(Tree::syntaxTree()) -> syntaxTree() +%% +%% @doc Returns an <code>erl_parse</code>-compatible representation of a +%% syntax tree, if possible. If <code>Tree</code> represents a +%% well-formed Erlang program or expression, the conversion should work +%% without problems. Typically, <code>is_tree/1</code> yields +%% <code>true</code> if conversion failed (i.e., the result is still an +%% abstract syntax tree), and <code>false</code> otherwise. +%% +%% <p>The <code>is_tree/1</code> test is not completely foolproof. For a +%% few special node types (e.g. <code>arity_qualifier</code>), if such a +%% node occurs in a context where it is not expected, it will be left +%% unchanged as a non-reverted subtree of the result. This can only +%% happen if <code>Tree</code> does not actually represent legal Erlang +%% code.</p> +%% +%% @see revert_forms/1 +%% @see //stdlib/erl_parse + +revert(Node) -> + case is_tree(Node) of + false -> + %% Just remove any wrapper. `erl_parse' nodes never contain + %% abstract syntax tree nodes as subtrees. + unwrap(Node); + true -> + case is_leaf(Node) of + true -> + revert_root(Node); + false -> + %% First revert the subtrees, where possible. + %% (Sometimes, subtrees cannot be reverted out of + %% context, and the real work will be done when the + %% parent node is reverted.) + Gs = [[revert(X) || X <- L] || L <- subtrees(Node)], + + %% Then reconstruct the node from the reverted + %% parts, and revert the node itself. + Node1 = update_tree(Node, Gs), + revert_root(Node1) + end + end. + +%% Note: The concept of "compatible root node" is not strictly defined. +%% At a minimum, if `make_tree' is used to compose a node `T' from +%% subtrees that are all completely backwards compatible, then the +%% result of `revert_root(T)' should also be completely backwards +%% compatible. + +revert_root(Node) -> + case type(Node) of + application -> + revert_application(Node); + atom -> + revert_atom(Node); + attribute -> + revert_attribute(Node); + binary -> + revert_binary(Node); + binary_comp -> + revert_binary_comp(Node); + binary_field -> + revert_binary_field(Node); + binary_generator -> + revert_binary_generator(Node); + block_expr -> + revert_block_expr(Node); + case_expr -> + revert_case_expr(Node); + catch_expr -> + revert_catch_expr(Node); + char -> + revert_char(Node); + clause -> + revert_clause(Node); + cond_expr -> + revert_cond_expr(Node); + eof_marker -> + revert_eof_marker(Node); + error_marker -> + revert_error_marker(Node); + float -> + revert_float(Node); + fun_expr -> + revert_fun_expr(Node); + function -> + revert_function(Node); + generator -> + revert_generator(Node); + if_expr -> + revert_if_expr(Node); + implicit_fun -> + revert_implicit_fun(Node); + infix_expr -> + revert_infix_expr(Node); + integer -> + revert_integer(Node); + list -> + revert_list(Node); + list_comp -> + revert_list_comp(Node); + match_expr -> + revert_match_expr(Node); + module_qualifier -> + revert_module_qualifier(Node); + nil -> + revert_nil(Node); + parentheses -> + revert_parentheses(Node); + prefix_expr -> + revert_prefix_expr(Node); + qualified_name -> + revert_qualified_name(Node); + query_expr -> + revert_query_expr(Node); + receive_expr -> + revert_receive_expr(Node); + record_access -> + revert_record_access(Node); + record_expr -> + revert_record_expr(Node); + record_index_expr -> + revert_record_index_expr(Node); + rule -> + revert_rule(Node); + string -> + revert_string(Node); + try_expr -> + revert_try_expr(Node); + tuple -> + revert_tuple(Node); + underscore -> + revert_underscore(Node); + variable -> + revert_variable(Node); + warning_marker -> + revert_warning_marker(Node); + _ -> + %% Non-revertible new-form node + Node + end. + + +%% ===================================================================== +%% @spec revert_forms(Forms) -> [erl_parse()] +%% +%% Forms = syntaxTree() | [syntaxTree()] +%% +%% @doc Reverts a sequence of Erlang source code forms. The sequence can +%% be given either as a <code>form_list</code> syntax tree (possibly +%% nested), or as a list of "program form" syntax trees. If successful, +%% the corresponding flat list of <code>erl_parse</code>-compatible +%% syntax trees is returned (cf. <code>revert/1</code>). If some program +%% form could not be reverted, <code>{error, Form}</code> is thrown. +%% Standalone comments in the form sequence are discarded. +%% +%% @see revert/1 +%% @see form_list/1 +%% @see is_form/1 + +revert_forms(L) when is_list(L) -> + revert_forms(form_list(L)); +revert_forms(T) -> + case type(T) of + form_list -> + T1 = flatten_form_list(T), + case catch {ok, revert_forms_1(form_list_elements(T1))} of + {ok, Fs} -> + Fs; + {error, _} = Error -> + erlang:error(Error); + {'EXIT', R} -> + exit(R); + R -> + throw(R) + end; + _ -> + erlang:error({badarg, T}) + end. + +revert_forms_1([T | Ts]) -> + case type(T) of + comment -> + revert_forms_1(Ts); + _ -> + T1 = revert(T), + case is_tree(T1) of + true -> + throw({error, T1}); + false -> + [T1 | revert_forms_1(Ts)] + end + end; +revert_forms_1([]) -> + []. + + +%% ===================================================================== +%% @spec subtrees(Node::syntaxTree()) -> [[syntaxTree()]] +%% +%% @doc Returns the grouped list of all subtrees of a syntax tree. If +%% <code>Node</code> is a leaf node (cf. <code>is_leaf/1</code>), this +%% is the empty list, otherwise the result is always a nonempty list, +%% containing the lists of subtrees of <code>Node</code>, in +%% left-to-right order as they occur in the printed program text, and +%% grouped by category. Often, each group contains only a single +%% subtree. +%% +%% <p>Depending on the type of <code>Node</code>, the size of some +%% groups may be variable (e.g., the group consisting of all the +%% elements of a tuple), while others always contain the same number of +%% elements - usually exactly one (e.g., the group containing the +%% argument expression of a case-expression). Note, however, that the +%% exact structure of the returned list (for a given node type) should +%% in general not be depended upon, since it might be subject to change +%% without notice.</p> +%% +%% <p>The function <code>subtrees/1</code> and the constructor functions +%% <code>make_tree/2</code> and <code>update_tree/2</code> can be a +%% great help if one wants to traverse a syntax tree, visiting all its +%% subtrees, but treat nodes of the tree in a uniform way in most or all +%% cases. Using these functions makes this simple, and also assures that +%% your code is not overly sensitive to extensions of the syntax tree +%% data type, because any node types not explicitly handled by your code +%% can be left to a default case.</p> +%% +%% <p>For example: +%% <pre> +%% postorder(F, Tree) -> +%% F(case subtrees(Tree) of +%% [] -> Tree; +%% List -> update_tree(Tree, +%% [[postorder(F, Subtree) +%% || Subtree <- Group] +%% || Group <- List]) +%% end). +%% </pre> +%% maps the function <code>F</code> on <code>Tree</code> and all its +%% subtrees, doing a post-order traversal of the syntax tree. (Note the +%% use of <code>update_tree/2</code> to preserve node attributes.) For a +%% simple function like: +%% <pre> +%% f(Node) -> +%% case type(Node) of +%% atom -> atom("a_" ++ atom_name(Node)); +%% _ -> Node +%% end. +%% </pre> +%% the call <code>postorder(fun f/1, Tree)</code> will yield a new +%% representation of <code>Tree</code> in which all atom names have been +%% extended with the prefix "a_", but nothing else (including comments, +%% annotations and line numbers) has been changed.</p> +%% +%% @see make_tree/2 +%% @see type/1 +%% @see is_leaf/1 +%% @see copy_attrs/2 + +subtrees(T) -> + case is_leaf(T) of + true -> + []; + false -> + case type(T) of + application -> + [[application_operator(T)], + application_arguments(T)]; + arity_qualifier -> + [[arity_qualifier_body(T)], + [arity_qualifier_argument(T)]]; + attribute -> + case attribute_arguments(T) of + none -> + [[attribute_name(T)]]; + As -> + [[attribute_name(T)], As] + end; + binary -> + [binary_fields(T)]; + binary_comp -> + [[binary_comp_template(T)], binary_comp_body(T)]; + binary_field -> + case binary_field_types(T) of + [] -> + [[binary_field_body(T)]]; + Ts -> + [[binary_field_body(T)], + Ts] + end; + binary_generator -> + [[binary_generator_pattern(T)], + [binary_generator_body(T)]]; + block_expr -> + [block_expr_body(T)]; + case_expr -> + [[case_expr_argument(T)], + case_expr_clauses(T)]; + catch_expr -> + [[catch_expr_body(T)]]; + class_qualifier -> + [[class_qualifier_argument(T)], + [class_qualifier_body(T)]]; + clause -> + case clause_guard(T) of + none -> + [clause_patterns(T), clause_body(T)]; + G -> + [clause_patterns(T), [G], + clause_body(T)] + end; + cond_expr -> + [cond_expr_clauses(T)]; + conjunction -> + [conjunction_body(T)]; + disjunction -> + [disjunction_body(T)]; + form_list -> + [form_list_elements(T)]; + fun_expr -> + [fun_expr_clauses(T)]; + function -> + [[function_name(T)], function_clauses(T)]; + generator -> + [[generator_pattern(T)], [generator_body(T)]]; + if_expr -> + [if_expr_clauses(T)]; + implicit_fun -> + [[implicit_fun_name(T)]]; + infix_expr -> + [[infix_expr_left(T)], + [infix_expr_operator(T)], + [infix_expr_right(T)]]; + list -> + case list_suffix(T) of + none -> + [list_prefix(T)]; + S -> + [list_prefix(T), [S]] + end; + list_comp -> + [[list_comp_template(T)], list_comp_body(T)]; + macro -> + case macro_arguments(T) of + none -> + [[macro_name(T)]]; + As -> + [[macro_name(T)], As] + end; + match_expr -> + [[match_expr_pattern(T)], + [match_expr_body(T)]]; + module_qualifier -> + [[module_qualifier_argument(T)], + [module_qualifier_body(T)]]; + parentheses -> + [[parentheses_body(T)]]; + prefix_expr -> + [[prefix_expr_operator(T)], + [prefix_expr_argument(T)]]; + qualified_name -> + [qualified_name_segments(T)]; + query_expr -> + [[query_expr_body(T)]]; + receive_expr -> + case receive_expr_timeout(T) of + none -> + [receive_expr_clauses(T)]; + E -> + [receive_expr_clauses(T), + [E], + receive_expr_action(T)] + end; + record_access -> + case record_access_type(T) of + none -> + [[record_access_argument(T)], + [record_access_field(T)]]; + R -> + [[record_access_argument(T)], + [R], + [record_access_field(T)]] + end; + record_expr -> + case record_expr_argument(T) of + none -> + [[record_expr_type(T)], + record_expr_fields(T)]; + V -> + [[V], + [record_expr_type(T)], + record_expr_fields(T)] + end; + record_field -> + case record_field_value(T) of + none -> + [[record_field_name(T)]]; + V -> + [[record_field_name(T)], [V]] + end; + record_index_expr -> + [[record_index_expr_type(T)], + [record_index_expr_field(T)]]; + rule -> + [[rule_name(T)], rule_clauses(T)]; + size_qualifier -> + [[size_qualifier_body(T)], + [size_qualifier_argument(T)]]; + try_expr -> + [try_expr_body(T), + try_expr_clauses(T), + try_expr_handlers(T), + try_expr_after(T)]; + tuple -> + [tuple_elements(T)] + end + end. + + +%% ===================================================================== +%% @spec update_tree(Node::syntaxTree(), Groups::[[syntaxTree()]]) -> +%% syntaxTree() +%% +%% @doc Creates a syntax tree with the same type and attributes as the +%% given tree. This is equivalent to <code>copy_attrs(Node, +%% make_tree(type(Node), Groups))</code>. +%% +%% @see make_tree/2 +%% @see copy_attrs/2 +%% @see type/1 + +update_tree(Node, Groups) -> + copy_attrs(Node, make_tree(type(Node), Groups)). + + +%% ===================================================================== +%% @spec make_tree(Type::atom(), Groups::[[syntaxTree()]]) -> +%% syntaxTree() +%% +%% @doc Creates a syntax tree with the given type and subtrees. +%% <code>Type</code> must be a node type name (cf. <code>type/1</code>) +%% that does not denote a leaf node type (cf. <code>is_leaf/1</code>). +%% <code>Groups</code> must be a <em>nonempty</em> list of groups of +%% syntax trees, representing the subtrees of a node of the given type, +%% in left-to-right order as they would occur in the printed program +%% text, grouped by category as done by <code>subtrees/1</code>. +%% +%% <p>The result of <code>copy_attrs(Node, make_tree(type(Node), +%% subtrees(Node)))</code> (cf. <code>update_tree/2</code>) represents +%% the same source code text as the original <code>Node</code>, assuming +%% that <code>subtrees(Node)</code> yields a nonempty list. However, it +%% does not necessarily have the same data representation as +%% <code>Node</code>.</p> +%% +%% @see update_tree/2 +%% @see subtrees/1 +%% @see type/1 +%% @see is_leaf/1 +%% @see copy_attrs/2 + +make_tree(application, [[F], A]) -> application(F, A); +make_tree(arity_qualifier, [[N], [A]]) -> arity_qualifier(N, A); +make_tree(attribute, [[N]]) -> attribute(N); +make_tree(attribute, [[N], A]) -> attribute(N, A); +make_tree(binary, [Fs]) -> binary(Fs); +make_tree(binary_comp, [[T], B]) -> binary_comp(T, B); +make_tree(binary_field, [[B]]) -> binary_field(B); +make_tree(binary_field, [[B], Ts]) -> binary_field(B, Ts); +make_tree(binary_generator, [[P], [E]]) -> binary_generator(P, E); +make_tree(block_expr, [B]) -> block_expr(B); +make_tree(case_expr, [[A], C]) -> case_expr(A, C); +make_tree(catch_expr, [[B]]) -> catch_expr(B); +make_tree(class_qualifier, [[A], [B]]) -> class_qualifier(A, B); +make_tree(clause, [P, B]) -> clause(P, none, B); +make_tree(clause, [P, [G], B]) -> clause(P, G, B); +make_tree(cond_expr, [C]) -> cond_expr(C); +make_tree(conjunction, [E]) -> conjunction(E); +make_tree(disjunction, [E]) -> disjunction(E); +make_tree(form_list, [E]) -> form_list(E); +make_tree(fun_expr, [C]) -> fun_expr(C); +make_tree(function, [[N], C]) -> function(N, C); +make_tree(generator, [[P], [E]]) -> generator(P, E); +make_tree(if_expr, [C]) -> if_expr(C); +make_tree(implicit_fun, [[N]]) -> implicit_fun(N); +make_tree(infix_expr, [[L], [F], [R]]) -> infix_expr(L, F, R); +make_tree(list, [P]) -> list(P); +make_tree(list, [P, [S]]) -> list(P, S); +make_tree(list_comp, [[T], B]) -> list_comp(T, B); +make_tree(macro, [[N]]) -> macro(N); +make_tree(macro, [[N], A]) -> macro(N, A); +make_tree(match_expr, [[P], [E]]) -> match_expr(P, E); +make_tree(module_qualifier, [[M], [N]]) -> module_qualifier(M, N); +make_tree(parentheses, [[E]]) -> parentheses(E); +make_tree(prefix_expr, [[F], [A]]) -> prefix_expr(F, A); +make_tree(qualified_name, [S]) -> qualified_name(S); +make_tree(query_expr, [[B]]) -> query_expr(B); +make_tree(receive_expr, [C]) -> receive_expr(C); +make_tree(receive_expr, [C, [E], A]) -> receive_expr(C, E, A); +make_tree(record_access, [[E], [F]]) -> + record_access(E, F); +make_tree(record_access, [[E], [T], [F]]) -> + record_access(E, T, F); +make_tree(record_expr, [[T], F]) -> record_expr(T, F); +make_tree(record_expr, [[E], [T], F]) -> record_expr(E, T, F); +make_tree(record_field, [[N]]) -> record_field(N); +make_tree(record_field, [[N], [E]]) -> record_field(N, E); +make_tree(record_index_expr, [[T], [F]]) -> + record_index_expr(T, F); +make_tree(rule, [[N], C]) -> rule(N, C); +make_tree(size_qualifier, [[N], [A]]) -> size_qualifier(N, A); +make_tree(try_expr, [B, C, H, A]) -> try_expr(B, C, H, A); +make_tree(tuple, [E]) -> tuple(E). + + +%% ===================================================================== +%% @spec meta(Tree::syntaxTree()) -> syntaxTree() +%% +%% @doc Creates a meta-representation of a syntax tree. The result +%% represents an Erlang expression "<code><em>MetaTree</em></code>" +%% which, if evaluated, will yield a new syntax tree representing the +%% same source code text as <code>Tree</code> (although the actual data +%% representation may be different). The expression represented by +%% <code>MetaTree</code> is <em>implementation independent</em> with +%% regard to the data structures used by the abstract syntax tree +%% implementation. Comments attached to nodes of <code>Tree</code> will +%% be preserved, but other attributes are lost. +%% +%% <p>Any node in <code>Tree</code> whose node type is +%% <code>variable</code> (cf. <code>type/1</code>), and whose list of +%% annotations (cf. <code>get_ann/1</code>) contains the atom +%% <code>meta_var</code>, will remain unchanged in the resulting tree, +%% except that exactly one occurrence of <code>meta_var</code> is +%% removed from its annotation list.</p> +%% +%% <p>The main use of the function <code>meta/1</code> is to transform a +%% data structure <code>Tree</code>, which represents a piece of program +%% code, into a form that is <em>representation independent when +%% printed</em>. E.g., suppose <code>Tree</code> represents a variable +%% named "V". Then (assuming a function <code>print/1</code> for +%% printing syntax trees), evaluating <code>print(abstract(Tree))</code> +%% - simply using <code>abstract/1</code> to map the actual data +%% structure onto a syntax tree representation - would output a string +%% that might look something like "<code>{tree, variable, ..., "V", +%% ...}</code>", which is obviously dependent on the implementation of +%% the abstract syntax trees. This could e.g. be useful for caching a +%% syntax tree in a file. However, in some situations like in a program +%% generator generator (with two "generator"), it may be unacceptable. +%% Using <code>print(meta(Tree))</code> instead would output a +%% <em>representation independent</em> syntax tree generating +%% expression; in the above case, something like +%% "<code>erl_syntax:variable("V")</code>".</p> +%% +%% @see abstract/1 +%% @see type/1 +%% @see get_ann/1 + +meta(T) -> + %% First of all we check for metavariables: + case type(T) of + variable -> + case lists:member(meta_var, get_ann(T)) of + false -> + meta_precomment(T); + true -> + %% A meta-variable: remove the first found + %% `meta_var' annotation, but otherwise leave + %% the node unchanged. + set_ann(T, lists:delete(meta_var, get_ann(T))) + end; + _ -> + case has_comments(T) of + true -> + meta_precomment(T); + false -> + meta_1(T) + end + end. + +meta_precomment(T) -> + case get_precomments(T) of + [] -> + meta_postcomment(T); + Cs -> + meta_call(set_precomments, + [meta_postcomment(T), list(meta_list(Cs))]) + end. + +meta_postcomment(T) -> + case get_postcomments(T) of + [] -> + meta_0(T); + Cs -> + meta_call(set_postcomments, + [meta_0(T), list(meta_list(Cs))]) + end. + +meta_0(T) -> + meta_1(remove_comments(T)). + +meta_1(T) -> + %% First handle leaf nodes and other common cases, in order to + %% generate compact code. + case type(T) of + atom -> + meta_call(atom, [T]); + char -> + meta_call(char, [T]); + comment -> + meta_call(comment, [list([string(S) + || S <- comment_text(T)])]); + eof_marker -> + meta_call(eof_marker, []); + error_marker -> + meta_call(error_marker, + [abstract(error_marker_info(T))]); + float -> + meta_call(float, [T]); + integer -> + meta_call(integer, [T]); + nil -> + meta_call(nil, []); + operator -> + meta_call(operator, [atom(operator_name(T))]); + string -> + meta_call(string, [T]); + text -> + meta_call(text, [string(text_string(T))]); + underscore -> + meta_call(underscore, []); + variable -> + meta_call(variable, [string(atom_to_list(variable_name(T)))]); + warning_marker -> + meta_call(warning_marker, + [abstract(warning_marker_info(T))]); + list -> + case list_suffix(T) of + none -> + meta_call(list, + [list(meta_list(list_prefix(T)))]); + S -> + meta_call(list, + [list(meta_list(list_prefix(T))), + meta(S)]) + end; + tuple -> + meta_call(tuple, + [list(meta_list(tuple_elements(T)))]); + Type -> + %% All remaining cases are handled using `subtrees' + %% and `make_tree' to decompose and reassemble the + %% nodes. More cases could of course be handled + %% directly to get a more compact output, but I can't + %% be bothered right now. + meta_call(make_tree, + [abstract(Type), + meta_subtrees(subtrees(T))]) + end. + +meta_list([T | Ts]) -> + [meta(T) | meta_list(Ts)]; +meta_list([]) -> + []. + +meta_subtrees(Gs) -> + list([list([meta(T) + || T <- G]) + || G <- Gs]). + +meta_call(F, As) -> + application(atom(?MODULE), atom(F), As). + + +%% ===================================================================== +%% Functions for abstraction of the syntax tree representation; may be +%% used externally, but not intended for the normal user. +%% ===================================================================== + + +%% ===================================================================== +%% @spec tree(Type) -> syntaxTree() +%% @equiv tree(Type, []) + +tree(Type) -> + tree(Type, []). + +%% ===================================================================== +%% @spec tree(Type::atom(), Data::term()) -> syntaxTree() +%% +%% @doc <em>For special purposes only</em>. Creates an abstract syntax +%% tree node with type tag <code>Type</code> and associated data +%% <code>Data</code>. +%% +%% <p>This function and the related <code>is_tree/1</code> and +%% <code>data/1</code> provide a uniform way to extend the set of +%% <code>erl_parse</code> node types. The associated data is any term, +%% whose format may depend on the type tag.</p> +%% +%% <h4>Notes:</h4> +%% <ul> +%% <li>Any nodes created outside of this module must have type tags +%% distinct from those currently defined by this module; see +%% <code>type/1</code> for a complete list.</li> +%% <li>The type tag of a syntax tree node may also be used +%% as a primary tag by the <code>erl_parse</code> representation; +%% in that case, the selector functions for that node type +%% <em>must</em> handle both the abstract syntax tree and the +%% <code>erl_parse</code> form. The function <code>type(T)</code> +%% should return the correct type tag regardless of the +%% representation of <code>T</code>, so that the user sees no +%% difference between <code>erl_syntax</code> and +%% <code>erl_parse</code> nodes.</li> +%% </ul> +%% @see is_tree/1 +%% @see data/1 +%% @see type/1 + +tree(Type, Data) -> + #tree{type = Type, data = Data}. + + +%% ===================================================================== +%% @spec is_tree(Tree::syntaxTree()) -> bool() +%% +%% @doc <em>For special purposes only</em>. Returns <code>true</code> if +%% <code>Tree</code> is an abstract syntax tree and <code>false</code> +%% otherwise. +%% +%% <p><em>Note</em>: this function yields <code>false</code> for all +%% "old-style" <code>erl_parse</code>-compatible "parse trees".</p> +%% +%% @see tree/2 + +is_tree(#tree{}) -> + true; +is_tree(_) -> + false. + + +%% ===================================================================== +%% @spec data(Tree::syntaxTree()) -> term() +%% +%% @doc <em>For special purposes only</em>. Returns the associated data +%% of a syntax tree node. Evaluation fails with reason +%% <code>badarg</code> if <code>is_tree(Node)</code> does not yield +%% <code>true</code>. +%% +%% @see tree/2 + +data(#tree{data = D}) -> D; +data(T) -> erlang:error({badarg, T}). + + +%% ===================================================================== +%% Primitives for backwards compatibility; for internal use only +%% ===================================================================== + + +%% ===================================================================== +%% @spec wrap(Node::erl_parse()) -> syntaxTree() +%% +%% @type erl_parse() = erl_parse:parse_tree(). The "parse tree" +%% representation built by the Erlang standard library parser +%% <code>erl_parse</code>. This is a subset of the +%% <a href="#type-syntaxTree"><code>syntaxTree</code></a> type. +%% +%% @doc Creates a wrapper structure around an <code>erl_parse</code> +%% "parse tree". +%% +%% <p>This function and the related <code>unwrap/1</code> and +%% <code>is_wrapper/1</code> provide a uniform way to attach arbitrary +%% information to an <code>erl_parse</code> tree. Some information about +%% the encapsuled tree may be cached in the wrapper, such as the node +%% type. All functions on syntax trees must behave so that the user sees +%% no difference between wrapped and non-wrapped <code>erl_parse</code> +%% trees. <em>Attaching a wrapper onto another wrapper structure is an +%% error</em>.</p> + +wrap(Node) -> + %% We assume that Node is an old-school `erl_parse' tree. + #wrapper{type = type(Node), attr = #attr{pos = get_pos(Node)}, + tree = Node}. + + +%% ===================================================================== +%% @spec unwrap(Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Removes any wrapper structure, if present. If <code>Node</code> +%% is a wrapper structure, this function returns the wrapped +%% <code>erl_parse</code> tree; otherwise it returns <code>Node</code> +%% itself. + +unwrap(#wrapper{tree = Node}) -> Node; +unwrap(Node) -> Node. % This could also be a new-form node. + + +%% ===================================================================== +%% @spec is_wrapper(Term::term()) -> bool() +%% +%% @doc Returns <code>true</code> if the argument is a wrapper +%% structure, otherwise <code>false</code>. + +-ifndef(NO_UNUSED). +is_wrapper(#wrapper{}) -> + true; +is_wrapper(_) -> + false. +-endif. + + +%% ===================================================================== +%% General utility functions for internal use +%% ===================================================================== + +is_printable(S) -> + io_lib:printable_list(S). + +%% Support functions for transforming lists of function names +%% specified as `arity_qualifier' nodes. + +unfold_function_names(Ns, Pos) -> + F = fun ({Atom, Arity}) -> + N = arity_qualifier(atom(Atom), integer(Arity)), + set_pos(N, Pos) + end, + [F(N) || N <- Ns]. + +fold_function_names(Ns) -> + [fold_function_name(N) || N <- Ns]. + +fold_function_name(N) -> + Name = arity_qualifier_body(N), + Arity = arity_qualifier_argument(N), + true = ((type(Name) =:= atom) and (type(Arity) =:= integer)), + {concrete(Name), concrete(Arity)}. + +fold_variable_names(Vs) -> + [variable_name(V) || V <- Vs]. + +unfold_variable_names(Vs, Pos) -> + [set_pos(variable(V), Pos) || V <- Vs]. + +%% Support functions for qualified names ("foo.bar.baz", +%% "erl.lang.lists", etc.). The representation overlaps with the weird +%% "Mnesia query record access" operators. The '.' operator is left +%% associative, so folding should nest on the left. + +is_qualified_name({record_field, _, L, R}) -> + is_qualified_name(L) andalso is_qualified_name(R); +is_qualified_name({atom, _, _}) -> true; +is_qualified_name(_) -> false. + +unfold_qualified_name(Node) -> + lists:reverse(unfold_qualified_name(Node, [])). + +unfold_qualified_name({record_field, _, L, R}, Ss) -> + unfold_qualified_name(R, unfold_qualified_name(L, Ss)); +unfold_qualified_name(S, Ss) -> [S | Ss]. + +fold_qualified_name([S | Ss], Pos) -> + fold_qualified_name(Ss, Pos, {atom, Pos, atom_value(S)}). + +fold_qualified_name([S | Ss], Pos, Ack) -> + fold_qualified_name(Ss, Pos, {record_field, Pos, Ack, + {atom, Pos, atom_value(S)}}); +fold_qualified_name([], _Pos, Ack) -> + Ack. + +%% Support functions for transforming lists of record field definitions. +%% +%% There is no unique representation for field definitions in the +%% standard form. There, they may only occur in the "fields" part of a +%% record expression or declaration, and are represented as +%% `{record_field, Pos, Name, Value}', or as `{record_field, Pos, Name}' +%% if the value part is left out. However, these cannot be distinguished +%% out of context from the representation of record field access +%% expressions (see `record_access'). + +fold_record_fields(Fs) -> + [fold_record_field(F) || F <- Fs]. + +fold_record_field(F) -> + Pos = get_pos(F), + Name = record_field_name(F), + case record_field_value(F) of + none -> + {record_field, Pos, Name}; + Value -> + {record_field, Pos, Name, Value} + end. + +unfold_record_fields(Fs) -> + [unfold_record_field(F) || F <- Fs]. + +unfold_record_field({typed_record_field, Field, _Type}) -> + unfold_record_field_1(Field); +unfold_record_field(Field) -> + unfold_record_field_1(Field). + +unfold_record_field_1({record_field, Pos, Name}) -> + set_pos(record_field(Name), Pos); +unfold_record_field_1({record_field, Pos, Name, Value}) -> + set_pos(record_field(Name, Value), Pos). + +fold_binary_field_types(Ts) -> + [fold_binary_field_type(T) || T <- Ts]. + +fold_binary_field_type(Node) -> + case type(Node) of + size_qualifier -> + {concrete(size_qualifier_body(Node)), + concrete(size_qualifier_argument(Node))}; + _ -> + concrete(Node) + end. + +unfold_binary_field_types(Ts, Pos) -> + [unfold_binary_field_type(T, Pos) || T <- Ts]. + +unfold_binary_field_type({Type, Size}, Pos) -> + set_pos(size_qualifier(atom(Type), integer(Size)), Pos); +unfold_binary_field_type(Type, Pos) -> + set_pos(atom(Type), Pos). + + +%% ===================================================================== diff --git a/lib/syntax_tools/src/erl_syntax_lib.erl b/lib/syntax_tools/src/erl_syntax_lib.erl new file mode 100644 index 0000000000..ccbf864c2a --- /dev/null +++ b/lib/syntax_tools/src/erl_syntax_lib.erl @@ -0,0 +1,2168 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 1997-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc Support library for abstract Erlang syntax trees. +%% +%% This module contains utility functions for working with the +%% abstract data type defined in the module {@link erl_syntax}. +%% +%% @type syntaxTree() = erl_syntax:syntaxTree(). An abstract syntax +%% tree. See the {@link erl_syntax} module for details. + +-module(erl_syntax_lib). + +-export([analyze_application/1, analyze_attribute/1, + analyze_export_attribute/1, analyze_file_attribute/1, + analyze_form/1, analyze_forms/1, analyze_function/1, + analyze_function_name/1, analyze_implicit_fun/1, + analyze_import_attribute/1, analyze_module_attribute/1, + analyze_record_attribute/1, analyze_record_expr/1, + analyze_record_field/1, analyze_rule/1, + analyze_wild_attribute/1, annotate_bindings/1, + annotate_bindings/2, fold/3, fold_subtrees/3, foldl_listlist/3, + function_name_expansions/1, is_fail_expr/1, limit/2, limit/3, + map/2, map_subtrees/2, mapfold/3, mapfold_subtrees/3, + mapfoldl_listlist/3, new_variable_name/1, new_variable_name/2, + new_variable_names/2, new_variable_names/3, strip_comments/1, + to_comment/1, to_comment/2, to_comment/3, variables/1]). + + +%% ===================================================================== +%% @spec map(Function, Tree::syntaxTree()) -> syntaxTree() +%% +%% Function = (syntaxTree()) -> syntaxTree() +%% +%% @doc Applies a function to each node of a syntax tree. The result of +%% each application replaces the corresponding original node. The order +%% of traversal is bottom-up. +%% +%% @see map_subtrees/2 + +map(F, Tree) -> + case erl_syntax:subtrees(Tree) of + [] -> + F(Tree); + Gs -> + Tree1 = erl_syntax:make_tree(erl_syntax:type(Tree), + [[map(F, T) || T <- G] + || G <- Gs]), + F(erl_syntax:copy_attrs(Tree, Tree1)) + end. + + +%% ===================================================================== +%% @spec map_subtrees(Function, syntaxTree()) -> syntaxTree() +%% +%% Function = (Tree) -> Tree1 +%% +%% @doc Applies a function to each immediate subtree of a syntax tree. +%% The result of each application replaces the corresponding original +%% node. +%% +%% @see map/2 + +map_subtrees(F, Tree) -> + case erl_syntax:subtrees(Tree) of + [] -> + Tree; + Gs -> + Tree1 = erl_syntax:make_tree(erl_syntax:type(Tree), + [[F(T) || T <- G] || G <- Gs]), + erl_syntax:copy_attrs(Tree, Tree1) + end. + + +%% ===================================================================== +%% @spec fold(Function, Start::term(), Tree::syntaxTree()) -> term() +%% +%% Function = (syntaxTree(), term()) -> term() +%% +%% @doc Folds a function over all nodes of a syntax tree. The result is +%% the value of `Function(X1, Function(X2, ... Function(Xn, Start) +%% ... ))', where `[X1, X2, ..., Xn]' are the nodes of +%% `Tree' in a post-order traversal. +%% +%% @see fold_subtrees/3 +%% @see foldl_listlist/3 + +fold(F, S, Tree) -> + case erl_syntax:subtrees(Tree) of + [] -> + F(Tree, S); + Gs -> + F(Tree, fold_1(F, S, Gs)) + end. + +fold_1(F, S, [L | Ls]) -> + fold_1(F, fold_2(F, S, L), Ls); +fold_1(_, S, []) -> + S. + +fold_2(F, S, [T | Ts]) -> + fold_2(F, fold(F, S, T), Ts); +fold_2(_, S, []) -> + S. + + +%% ===================================================================== +%% @spec fold_subtrees(Function, Start::term(), Tree::syntaxTree()) -> +%% term() +%% +%% Function = (syntaxTree(), term()) -> term() +%% +%% @doc Folds a function over the immediate subtrees of a syntax tree. +%% This is similar to `fold/3', but only on the immediate +%% subtrees of `Tree', in left-to-right order; it does not +%% include the root node of `Tree'. +%% +%% @see fold/3 + +fold_subtrees(F, S, Tree) -> + foldl_listlist(F, S, erl_syntax:subtrees(Tree)). + + +%% ===================================================================== +%% @spec foldl_listlist(Function, Start::term(), [[term()]]) -> term() +%% +%% Function = (term(), term()) -> term() +%% +%% @doc Like `lists:foldl/3', but over a list of lists. +%% +%% @see fold/3 +%% @see //stdlib/lists:foldl/3 + +foldl_listlist(F, S, [L | Ls]) -> + foldl_listlist(F, foldl(F, S, L), Ls); +foldl_listlist(_, S, []) -> + S. + +foldl(F, S, [T | Ts]) -> + foldl(F, F(T, S), Ts); +foldl(_, S, []) -> + S. + + +%% ===================================================================== +%% @spec mapfold(Function, Start::term(), Tree::syntaxTree()) -> +%% {syntaxTree(), term()} +%% +%% Function = (syntaxTree(), term()) -> {syntaxTree(), term()} +%% +%% @doc Combines map and fold in a single operation. This is similar to +%% `map/2', but also propagates an extra value from each +%% application of the `Function' to the next, while doing a +%% post-order traversal of the tree like `fold/3'. The value +%% `Start' is passed to the first function application, and +%% the final result is the result of the last application. +%% +%% @see map/2 +%% @see fold/3 + +mapfold(F, S, Tree) -> + case erl_syntax:subtrees(Tree) of + [] -> + F(Tree, S); + Gs -> + {Gs1, S1} = mapfold_1(F, S, Gs), + Tree1 = erl_syntax:make_tree(erl_syntax:type(Tree), Gs1), + F(erl_syntax:copy_attrs(Tree, Tree1), S1) + end. + +mapfold_1(F, S, [L | Ls]) -> + {L1, S1} = mapfold_2(F, S, L), + {Ls1, S2} = mapfold_1(F, S1, Ls), + {[L1 | Ls1], S2}; +mapfold_1(_, S, []) -> + {[], S}. + +mapfold_2(F, S, [T | Ts]) -> + {T1, S1} = mapfold(F, S, T), + {Ts1, S2} = mapfold_2(F, S1, Ts), + {[T1 | Ts1], S2}; +mapfold_2(_, S, []) -> + {[], S}. + + +%% ===================================================================== +%% @spec mapfold_subtrees(Function, Start::term(), +%% Tree::syntaxTree()) -> {syntaxTree(), term()} +%% +%% Function = (syntaxTree(), term()) -> {syntaxTree(), term()} +%% +%% @doc Does a mapfold operation over the immediate subtrees of a syntax +%% tree. This is similar to `mapfold/3', but only on the +%% immediate subtrees of `Tree', in left-to-right order; it +%% does not include the root node of `Tree'. +%% +%% @see mapfold/3 + +mapfold_subtrees(F, S, Tree) -> + case erl_syntax:subtrees(Tree) of + [] -> + {Tree, S}; + Gs -> + {Gs1, S1} = mapfoldl_listlist(F, S, Gs), + Tree1 = erl_syntax:make_tree(erl_syntax:type(Tree), Gs1), + {erl_syntax:copy_attrs(Tree, Tree1), S1} + end. + + +%% ===================================================================== +%% @spec mapfoldl_listlist(Function, State, [[term()]]) -> +%% {[[term()]], term()} +%% +%% Function = (term(), term()) -> {term(), term()} +%% +%% @doc Like `lists:mapfoldl/3', but over a list of lists. +%% The list of lists in the result has the same structure as the given +%% list of lists. + +mapfoldl_listlist(F, S, [L | Ls]) -> + {L1, S1} = mapfoldl(F, S, L), + {Ls1, S2} = mapfoldl_listlist(F, S1, Ls), + {[L1 | Ls1], S2}; +mapfoldl_listlist(_, S, []) -> + {[], S}. + +mapfoldl(F, S, [L | Ls]) -> + {L1, S1} = F(L, S), + {Ls1, S2} = mapfoldl(F, S1, Ls), + {[L1 | Ls1], S2}; +mapfoldl(_, S, []) -> + {[], S}. + + +%% ===================================================================== +%% @spec variables(syntaxTree()) -> set(atom()) +%% +%% set(T) = //stdlib/sets:set(T) +%% +%% @doc Returns the names of variables occurring in a syntax tree, The +%% result is a set of variable names represented by atoms. Macro names +%% are not included. +%% +%% @see //stdlib/sets + +variables(Tree) -> + variables(Tree, sets:new()). + +variables(T, S) -> + case erl_syntax:type(T) of + variable -> + sets:add_element(erl_syntax:variable_name(T), S); + macro -> + %% macro names are ignored, even if represented by variables + case erl_syntax:macro_arguments(T) of + none -> S; + As -> + variables_2(As, S) + end; + _ -> + case erl_syntax:subtrees(T) of + [] -> + S; + Gs -> + variables_1(Gs, S) + end + end. + +variables_1([L | Ls], S) -> + variables_1(Ls, variables_2(L, S)); +variables_1([], S) -> + S. + +variables_2([T | Ts], S) -> + variables_2(Ts, variables(T, S)); +variables_2([], S) -> + S. + + +-define(MINIMUM_RANGE, 100). +-define(START_RANGE_FACTOR, 100). +-define(MAX_RETRIES, 3). % retries before enlarging range +-define(ENLARGE_ENUM, 8). % range enlargment enumerator +-define(ENLARGE_DENOM, 1). % range enlargment denominator + +default_variable_name(N) -> + list_to_atom("V" ++ integer_to_list(N)). + +%% ===================================================================== +%% @spec new_variable_name(Used::set(atom())) -> atom() +%% +%% @doc Returns an atom which is not already in the set `Used'. This is +%% equivalent to `new_variable_name(Function, Used)', where `Function' +%% maps a given integer `N' to the atom whose name consists of "`V'" +%% followed by the numeral for `N'. +%% +%% @see new_variable_name/2 + +new_variable_name(S) -> + new_variable_name(fun default_variable_name/1, S). + +%% ===================================================================== +%% @spec new_variable_name(Function, Used::set(atom())) -> atom() +%% +%% Function = (integer()) -> atom() +%% +%% @doc Returns a user-named atom which is not already in the set +%% `Used'. The atom is generated by applying the given +%% `Function' to a generated integer. Integers are generated +%% using an algorithm which tries to keep the names randomly distributed +%% within a reasonably small range relative to the number of elements in +%% the set. +%% +%% This function uses the module `random' to generate new +%% keys. The seed it uses may be initialized by calling +%% `random:seed/0' or `random:seed/3' before this +%% function is first called. +%% +%% @see new_variable_name/1 +%% @see //stdlib/sets +%% @see //stdlib/random + +new_variable_name(F, S) -> + R = start_range(S), + new_variable_name(R, F, S). + +new_variable_name(R, F, S) -> + new_variable_name(generate(R, R), R, 0, F, S). + +new_variable_name(N, R, T, F, S) when T < ?MAX_RETRIES -> + A = F(N), + case sets:is_element(A, S) of + true -> + new_variable_name(generate(N, R), R, T + 1, F, S); + false -> + A + end; +new_variable_name(N, R, _T, F, S) -> + %% Too many retries - enlarge the range and start over. + R1 = (R * ?ENLARGE_ENUM) div ?ENLARGE_DENOM, + new_variable_name(generate(N, R1), R1, 0, F, S). + +%% Note that we assume that it is very cheap to take the size of +%% the given set. This should be valid for the stdlib +%% implementation of `sets'. + +start_range(S) -> + max(sets:size(S) * ?START_RANGE_FACTOR, ?MINIMUM_RANGE). + +max(X, Y) when X > Y -> X; +max(_, Y) -> Y. + +%% The previous number might or might not be used to compute the +%% next number to be tried. It is currently not used. +%% +%% It is important that this function does not generate values in +%% order, but (pseudo-)randomly distributed over the range. + +generate(_Key, Range) -> + random:uniform(Range). % works well + + +%% ===================================================================== +%% @spec new_variable_names(N::integer(), Used::set(atom())) -> [atom()] +%% +%% @doc Like `new_variable_name/1', but generates a list of +%% `N' new names. +%% +%% @see new_variable_name/1 + +new_variable_names(N, S) -> + new_variable_names(N, fun default_variable_name/1, S). + +%% ===================================================================== +%% @spec new_variable_names(N::integer(), Function, +%% Used::set(atom())) -> [atom()] +%% +%% Function = (integer()) -> atom() +%% +%% @doc Like `new_variable_name/2', but generates a list of +%% `N' new names. +%% +%% @see new_variable_name/2 + +new_variable_names(N, F, S) when is_integer(N) -> + R = start_range(S), + new_variable_names(N, [], R, F, S). + +new_variable_names(N, Names, R, F, S) when N > 0 -> + Name = new_variable_name(R, F, S), + S1 = sets:add_element(Name, S), + new_variable_names(N - 1, [Name | Names], R, F, S1); +new_variable_names(0, Names, _, _, _) -> + Names. + + +%% ===================================================================== +%% @spec annotate_bindings(Tree::syntaxTree(), +%% Bindings::ordset(atom())) -> syntaxTree() +%% +%% @type ordset(T) = //stdlib/ordsets:ordset(T) +%% +%% @doc Adds or updates annotations on nodes in a syntax tree. +%% `Bindings' specifies the set of bound variables in the +%% environment of the top level node. The following annotations are +%% affected: +%% <ul> +%% <li>`{env, Vars}', representing the input environment +%% of the subtree.</li> +%% +%% <li>`{bound, Vars}', representing the variables that +%% are bound in the subtree.</li> +%% +%% <li>`{free, Vars}', representing the free variables in +%% the subtree.</li> +%% </ul> +%% `Bindings' and `Vars' are ordered-set lists +%% (cf. module `ordsets') of atoms representing variable +%% names. +%% +%% @see annotate_bindings/1 +%% @see //stdlib/ordsets + +annotate_bindings(Tree, Env) -> + {Tree1, _, _} = vann(Tree, Env), + Tree1. + +%% ===================================================================== +%% @spec annotate_bindings(Tree::syntaxTree()) -> syntaxTree() +%% +%% @doc Adds or updates annotations on nodes in a syntax tree. +%% Equivalent to `annotate_bindings(Tree, Bindings)' where +%% the top-level environment `Bindings' is taken from the +%% annotation `{env, Bindings}' on the root node of +%% `Tree'. An exception is thrown if no such annotation +%% should exist. +%% +%% @see annotate_bindings/2 + +annotate_bindings(Tree) -> + As = erl_syntax:get_ann(Tree), + case lists:keyfind(env, 1, As) of + {env, InVars} -> + annotate_bindings(Tree, InVars); + _ -> + erlang:error(badarg) + end. + +vann(Tree, Env) -> + case erl_syntax:type(Tree) of + variable -> + %% Variable use + Bound = [], + Free = [erl_syntax:variable_name(Tree)], + {ann_bindings(Tree, Env, Bound, Free), Bound, Free}; + match_expr -> + vann_match_expr(Tree, Env); + case_expr -> + vann_case_expr(Tree, Env); + if_expr -> + vann_if_expr(Tree, Env); + cond_expr -> + vann_cond_expr(Tree, Env); + receive_expr -> + vann_receive_expr(Tree, Env); + catch_expr -> + vann_catch_expr(Tree, Env); + try_expr -> + vann_try_expr(Tree, Env); + function -> + vann_function(Tree, Env); + rule -> + vann_rule(Tree, Env); + fun_expr -> + vann_fun_expr(Tree, Env); + list_comp -> + vann_list_comp(Tree, Env); + binary_comp -> + vann_binary_comp(Tree, Env); + generator -> + vann_generator(Tree, Env); + binary_generator -> + vann_binary_generator(Tree, Env); + block_expr -> + vann_block_expr(Tree, Env); + macro -> + vann_macro(Tree, Env); + _Type -> + F = vann_list_join(Env), + {Tree1, {Bound, Free}} = mapfold_subtrees(F, {[], []}, + Tree), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free} + end. + +vann_list_join(Env) -> + fun (T, {Bound, Free}) -> + {T1, Bound1, Free1} = vann(T, Env), + {T1, {ordsets:union(Bound, Bound1), + ordsets:union(Free, Free1)}} + end. + +vann_list(Ts, Env) -> + lists:mapfoldl(vann_list_join(Env), {[], []}, Ts). + +vann_function(Tree, Env) -> + Cs = erl_syntax:function_clauses(Tree), + {Cs1, {_, Free}} = vann_clauses(Cs, Env), + N = erl_syntax:function_name(Tree), + {N1, _, _} = vann(N, Env), + Tree1 = rewrite(Tree, erl_syntax:function(N1, Cs1)), + Bound = [], + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_rule(Tree, Env) -> + Cs = erl_syntax:rule_clauses(Tree), + {Cs1, {_, Free}} = vann_clauses(Cs, Env), + N = erl_syntax:rule_name(Tree), + {N1, _, _} = vann(N, Env), + Tree1 = rewrite(Tree, erl_syntax:rule(N1, Cs1)), + Bound = [], + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_fun_expr(Tree, Env) -> + Cs = erl_syntax:fun_expr_clauses(Tree), + {Cs1, {_, Free}} = vann_clauses(Cs, Env), + Tree1 = rewrite(Tree, erl_syntax:fun_expr(Cs1)), + Bound = [], + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_match_expr(Tree, Env) -> + E = erl_syntax:match_expr_body(Tree), + {E1, Bound1, Free1} = vann(E, Env), + Env1 = ordsets:union(Env, Bound1), + P = erl_syntax:match_expr_pattern(Tree), + {P1, Bound2, Free2} = vann_pattern(P, Env1), + Bound = ordsets:union(Bound1, Bound2), + Free = ordsets:union(Free1, Free2), + Tree1 = rewrite(Tree, erl_syntax:match_expr(P1, E1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_case_expr(Tree, Env) -> + E = erl_syntax:case_expr_argument(Tree), + {E1, Bound1, Free1} = vann(E, Env), + Env1 = ordsets:union(Env, Bound1), + Cs = erl_syntax:case_expr_clauses(Tree), + {Cs1, {Bound2, Free2}} = vann_clauses(Cs, Env1), + Bound = ordsets:union(Bound1, Bound2), + Free = ordsets:union(Free1, Free2), + Tree1 = rewrite(Tree, erl_syntax:case_expr(E1, Cs1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_if_expr(Tree, Env) -> + Cs = erl_syntax:if_expr_clauses(Tree), + {Cs1, {Bound, Free}} = vann_clauses(Cs, Env), + Tree1 = rewrite(Tree, erl_syntax:if_expr(Cs1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_cond_expr(_Tree, _Env) -> + erlang:error({not_implemented,cond_expr}). + +vann_catch_expr(Tree, Env) -> + E = erl_syntax:catch_expr_body(Tree), + {E1, _, Free} = vann(E, Env), + Tree1 = rewrite(Tree, erl_syntax:catch_expr(E1)), + Bound = [], + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_try_expr(Tree, Env) -> + Es = erl_syntax:try_expr_body(Tree), + {Es1, {Bound1, Free1}} = vann_body(Es, Env), + Cs = erl_syntax:try_expr_clauses(Tree), + %% bindings in the body should be available in the success case, + {Cs1, {_, Free2}} = vann_clauses(Cs, ordsets:union(Env, Bound1)), + Hs = erl_syntax:try_expr_handlers(Tree), + {Hs1, {_, Free3}} = vann_clauses(Hs, Env), + %% the after part does not export anything, yet; this might change + As = erl_syntax:try_expr_after(Tree), + {As1, {_, Free4}} = vann_body(As, Env), + Tree1 = rewrite(Tree, erl_syntax:try_expr(Es1, Cs1, Hs1, As1)), + Bound = [], + Free = ordsets:union(Free1, ordsets:union(Free2, ordsets:union(Free3, Free4))), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_receive_expr(Tree, Env) -> + %% The timeout action is treated as an extra clause. + %% Bindings in the expiry expression are local only. + Cs = erl_syntax:receive_expr_clauses(Tree), + Es = erl_syntax:receive_expr_action(Tree), + C = erl_syntax:clause([], Es), + {[C1 | Cs1], {Bound, Free1}} = vann_clauses([C | Cs], Env), + Es1 = erl_syntax:clause_body(C1), + {T1, _, Free2} = case erl_syntax:receive_expr_timeout(Tree) of + none -> + {none, [], []}; + T -> + vann(T, Env) + end, + Free = ordsets:union(Free1, Free2), + Tree1 = rewrite(Tree, erl_syntax:receive_expr(Cs1, T1, Es1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_list_comp(Tree, Env) -> + Es = erl_syntax:list_comp_body(Tree), + {Es1, {Bound1, Free1}} = vann_list_comp_body(Es, Env), + Env1 = ordsets:union(Env, Bound1), + T = erl_syntax:list_comp_template(Tree), + {T1, _, Free2} = vann(T, Env1), + Free = ordsets:union(Free1, ordsets:subtract(Free2, Bound1)), + Bound = [], + Tree1 = rewrite(Tree, erl_syntax:list_comp(T1, Es1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_list_comp_body_join() -> + fun (T, {Env, Bound, Free}) -> + {T1, Bound1, Free1} = case erl_syntax:type(T) of + binary_generator -> + vann_binary_generator(T,Env); + generator -> + vann_generator(T, Env); + _ -> + %% Bindings in filters are not + %% exported to the rest of the + %% body. + {T2, _, Free2} = vann(T, Env), + {T2, [], Free2} + end, + Env1 = ordsets:union(Env, Bound1), + {T1, {Env1, ordsets:union(Bound, Bound1), + ordsets:union(Free, + ordsets:subtract(Free1, Bound))}} + end. + +vann_list_comp_body(Ts, Env) -> + F = vann_list_comp_body_join(), + {Ts1, {_, Bound, Free}} = lists:mapfoldl(F, {Env, [], []}, Ts), + {Ts1, {Bound, Free}}. + +vann_binary_comp(Tree, Env) -> + Es = erl_syntax:binary_comp_body(Tree), + {Es1, {Bound1, Free1}} = vann_binary_comp_body(Es, Env), + Env1 = ordsets:union(Env, Bound1), + T = erl_syntax:binary_comp_template(Tree), + {T1, _, Free2} = vann(T, Env1), + Free = ordsets:union(Free1, ordsets:subtract(Free2, Bound1)), + Bound = [], + Tree1 = rewrite(Tree, erl_syntax:binary_comp(T1, Es1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_binary_comp_body_join() -> + fun (T, {Env, Bound, Free}) -> + {T1, Bound1, Free1} = case erl_syntax:type(T) of + binary_generator -> + vann_binary_generator(T, Env); + generator -> + vann_generator(T, Env); + _ -> + %% Bindings in filters are not + %% exported to the rest of the + %% body. + {T2, _, Free2} = vann(T, Env), + {T2, [], Free2} + end, + Env1 = ordsets:union(Env, Bound1), + {T1, {Env1, ordsets:union(Bound, Bound1), + ordsets:union(Free, + ordsets:subtract(Free1, Bound))}} + end. + +vann_binary_comp_body(Ts, Env) -> + F = vann_binary_comp_body_join(), + {Ts1, {_, Bound, Free}} = lists:mapfoldl(F, {Env, [], []}, Ts), + {Ts1, {Bound, Free}}. + +%% In list comprehension generators, the pattern variables are always +%% viewed as new occurrences, shadowing whatever is in the input +%% environment (thus, the pattern contains no variable uses, only +%% bindings). Bindings in the generator body are not exported. + +vann_generator(Tree, Env) -> + P = erl_syntax:generator_pattern(Tree), + {P1, Bound, _} = vann_pattern(P, []), + E = erl_syntax:generator_body(Tree), + {E1, _, Free} = vann(E, Env), + Tree1 = rewrite(Tree, erl_syntax:generator(P1, E1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_binary_generator(Tree, Env) -> + P = erl_syntax:binary_generator_pattern(Tree), + {P1, Bound, _} = vann_pattern(P, Env), + E = erl_syntax:binary_generator_body(Tree), + {E1, _, Free} = vann(E, Env), + Tree1 = rewrite(Tree, erl_syntax:binary_generator(P1, E1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_block_expr(Tree, Env) -> + Es = erl_syntax:block_expr_body(Tree), + {Es1, {Bound, Free}} = vann_body(Es, Env), + Tree1 = rewrite(Tree, erl_syntax:block_expr(Es1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_body_join() -> + fun (T, {Env, Bound, Free}) -> + {T1, Bound1, Free1} = vann(T, Env), + Env1 = ordsets:union(Env, Bound1), + {T1, {Env1, ordsets:union(Bound, Bound1), + ordsets:union(Free, + ordsets:subtract(Free1, Bound))}} + end. + +vann_body(Ts, Env) -> + {Ts1, {_, Bound, Free}} = lists:mapfoldl(vann_body_join(), + {Env, [], []}, Ts), + {Ts1, {Bound, Free}}. + +%% Macro names must be ignored even if they happen to be variables, +%% lexically speaking. + +vann_macro(Tree, Env) -> + {As, {Bound, Free}} = case erl_syntax:macro_arguments(Tree) of + none -> + {none, {[], []}}; + As1 -> + vann_list(As1, Env) + end, + N = erl_syntax:macro_name(Tree), + Tree1 = rewrite(Tree, erl_syntax:macro(N, As)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_pattern(Tree, Env) -> + case erl_syntax:type(Tree) of + variable -> + V = erl_syntax:variable_name(Tree), + case ordsets:is_element(V, Env) of + true -> + %% Variable use + Bound = [], + Free = [V], + {ann_bindings(Tree, Env, Bound, Free), Bound, Free}; + false -> + %% Variable binding + Bound = [V], + Free = [], + {ann_bindings(Tree, Env, Bound, Free), Bound, Free} + end; + match_expr -> + %% Alias pattern + P = erl_syntax:match_expr_pattern(Tree), + {P1, Bound1, Free1} = vann_pattern(P, Env), + E = erl_syntax:match_expr_body(Tree), + {E1, Bound2, Free2} = vann_pattern(E, Env), + Bound = ordsets:union(Bound1, Bound2), + Free = ordsets:union(Free1, Free2), + Tree1 = rewrite(Tree, erl_syntax:match_expr(P1, E1)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}; + macro -> + %% The macro name must be ignored. The arguments are treated + %% as patterns. + {As, {Bound, Free}} = + case erl_syntax:macro_arguments(Tree) of + none -> + {none, {[], []}}; + As1 -> + vann_patterns(As1, Env) + end, + N = erl_syntax:macro_name(Tree), + Tree1 = rewrite(Tree, erl_syntax:macro(N, As)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}; + _Type -> + F = vann_patterns_join(Env), + {Tree1, {Bound, Free}} = mapfold_subtrees(F, {[], []}, + Tree), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free} + end. + +vann_patterns_join(Env) -> + fun (T, {Bound, Free}) -> + {T1, Bound1, Free1} = vann_pattern(T, Env), + {T1, {ordsets:union(Bound, Bound1), + ordsets:union(Free, Free1)}} + end. + +vann_patterns(Ps, Env) -> + lists:mapfoldl(vann_patterns_join(Env), {[], []}, Ps). + +vann_clause(C, Env) -> + {Ps, {Bound1, Free1}} = vann_patterns(erl_syntax:clause_patterns(C), + Env), + Env1 = ordsets:union(Env, Bound1), + %% Guards cannot add bindings + {G1, _, Free2} = case erl_syntax:clause_guard(C) of + none -> + {none, [], []}; + G -> + vann(G, Env1) + end, + {Es, {Bound2, Free3}} = vann_body(erl_syntax:clause_body(C), Env1), + Bound = ordsets:union(Bound1, Bound2), + Free = ordsets:union(Free1, + ordsets:subtract(ordsets:union(Free2, Free3), + Bound1)), + Tree1 = rewrite(C, erl_syntax:clause(Ps, G1, Es)), + {ann_bindings(Tree1, Env, Bound, Free), Bound, Free}. + +vann_clauses_join(Env) -> + fun (C, {Bound, Free}) -> + {C1, Bound1, Free1} = vann_clause(C, Env), + {C1, {ordsets:intersection(Bound, Bound1), + ordsets:union(Free, Free1)}} + end. + +vann_clauses([C | Cs], Env) -> + {C1, Bound, Free} = vann_clause(C, Env), + {Cs1, BF} = lists:mapfoldl(vann_clauses_join(Env), {Bound, Free}, Cs), + {[C1 | Cs1], BF}; +vann_clauses([], _Env) -> + {[], {[], []}}. + +ann_bindings(Tree, Env, Bound, Free) -> + As0 = erl_syntax:get_ann(Tree), + As1 = [{env, Env}, + {bound, Bound}, + {free, Free} + | delete_binding_anns(As0)], + erl_syntax:set_ann(Tree, As1). + +delete_binding_anns([{env, _} | As]) -> + delete_binding_anns(As); +delete_binding_anns([{bound, _} | As]) -> + delete_binding_anns(As); +delete_binding_anns([{free, _} | As]) -> + delete_binding_anns(As); +delete_binding_anns([A | As]) -> + [A | delete_binding_anns(As)]; +delete_binding_anns([]) -> + []. + + +%% ===================================================================== +%% @spec is_fail_expr(Tree::syntaxTree()) -> bool() +%% +%% @doc Returns `true' if `Tree' represents an +%% expression which never terminates normally. Note that the reverse +%% does not apply. Currently, the detected cases are calls to +%% `exit/1', `throw/1', +%% `erlang:error/1' and `erlang:error/2'. +%% +%% @see //erts/erlang:exit/1 +%% @see //erts/erlang:throw/1 +%% @see //erts/erlang:error/1 +%% @see //erts/erlang:error/2 + +is_fail_expr(E) -> + case erl_syntax:type(E) of + application -> + N = length(erl_syntax:application_arguments(E)), + F = erl_syntax:application_operator(E), + case catch {ok, analyze_function_name(F)} of + syntax_error -> + false; + {ok, exit} when N =:= 1 -> + true; + {ok, throw} when N =:= 1 -> + true; + {ok, {erlang, exit}} when N =:= 1 -> + true; + {ok, {erlang, throw}} when N =:= 1 -> + true; + {ok, {erlang, error}} when N =:= 1 -> + true; + {ok, {erlang, error}} when N =:= 2 -> + true; + {ok, {erlang, fault}} when N =:= 1 -> + true; + {ok, {erlang, fault}} when N =:= 2 -> + true; + _ -> + false + end; + _ -> + false + end. + + +%% ===================================================================== +%% @spec analyze_forms(Forms) -> [{Key, term()}] +%% +%% Forms = syntaxTree() | [syntaxTree()] +%% Key = attributes | errors | exports | functions | imports +%% | module | records | rules | warnings +%% +%% @doc Analyzes a sequence of "program forms". The given +%% `Forms' may be a single syntax tree of type +%% `form_list', or a list of "program form" syntax trees. The +%% returned value is a list of pairs `{Key, Info}', where +%% each value of `Key' occurs at most once in the list; the +%% absence of a particular key indicates that there is no well-defined +%% value for that key. +%% +%% Each entry in the resulting list contains the following +%% corresponding information about the program forms: +%% <dl> +%% <dt>`{attributes, Attributes}'</dt> +%% <dd><ul> +%% <li>`Attributes = [{atom(), term()}]'</li> +%% </ul> +%% `Attributes' is a list of pairs representing the +%% names and corresponding values of all so-called "wild" +%% attributes (as e.g. "`-compile(...)'") occurring in +%% `Forms' (cf. `analyze_wild_attribute/1'). +%% We do not guarantee that each name occurs at most once in the +%% list. The order of listing is not defined.</dd> +%% +%% <dt>`{errors, Errors}'</dt> +%% <dd><ul> +%% <li>`Errors = [term()]'</li> +%% </ul> +%% `Errors' is the list of error descriptors of all +%% `error_marker' nodes that occur in +%% `Forms'. The order of listing is not defined.</dd> +%% +%% <dt>`{exports, Exports}'</dt> +%% <dd><ul> +%% <li>`Exports = [FunctionName]'</li> +%% <li>`FunctionName = atom() +%% | {atom(), integer()} +%% | {ModuleName, FunctionName}'</li> +%% <li>`ModuleName = atom()'</li> +%% </ul> +%% `Exports' is a list of representations of those +%% function names that are listed by export declaration attributes +%% in `Forms' (cf. +%% `analyze_export_attribute/1'). We do not guarantee +%% that each name occurs at most once in the list. The order of +%% listing is not defined.</dd> +%% +%% <dt>`{functions, Functions}'</dt> +%% <dd><ul> +%% <li>`Functions = [{atom(), integer()}]'</li> +%% </ul> +%% `Functions' is a list of the names of the functions +%% that are defined in `Forms' (cf. +%% `analyze_function/1'). We do not guarantee that each +%% name occurs at most once in the list. The order of listing is +%% not defined.</dd> +%% +%% <dt>`{imports, Imports}'</dt> +%% <dd><ul> +%% <li>`Imports = [{Module, Names}]'</li> +%% <li>`Module = atom()'</li> +%% <li>`Names = [FunctionName]'</li> +%% <li>`FunctionName = atom() +%% | {atom(), integer()} +%% | {ModuleName, FunctionName}'</li> +%% <li>`ModuleName = atom()'</li> +%% </ul> +%% `Imports' is a list of pairs representing those +%% module names and corresponding function names that are listed +%% by import declaration attributes in `Forms' (cf. +%% `analyze_import_attribute/1'), where each +%% `Module' occurs at most once in +%% `Imports'. We do not guarantee that each name occurs +%% at most once in the lists of function names. The order of +%% listing is not defined.</dd> +%% +%% <dt>`{module, ModuleName}'</dt> +%% <dd><ul> +%% <li>`ModuleName = atom()'</li> +%% </ul> +%% `ModuleName' is the name declared by a module +%% attribute in `Forms'. If no module name is defined +%% in `Forms', the result will contain no entry for the +%% `module' key. If multiple module name declarations +%% should occur, all but the first will be ignored.</dd> +%% +%% <dt>`{records, Records}'</dt> +%% <dd><ul> +%% <li>`Records = [{atom(), Fields}]'</li> +%% <li>`Fields = [{atom(), Default}]'</li> +%% <li>`Default = none | syntaxTree()'</li> +%% </ul> +%% `Records' is a list of pairs representing the names +%% and corresponding field declarations of all record declaration +%% attributes occurring in `Forms'. For fields declared +%% without a default value, the corresponding value for +%% `Default' is the atom `none' (cf. +%% `analyze_record_attribute/1'). We do not guarantee +%% that each record name occurs at most once in the list. The +%% order of listing is not defined.</dd> +%% +%% <dt>`{rules, Rules}'</dt> +%% <dd><ul> +%% <li>`Rules = [{atom(), integer()}]'</li> +%% </ul> +%% `Rules' is a list of the names of the rules that are +%% defined in `Forms' (cf. +%% `analyze_rule/1'). We do not guarantee that each +%% name occurs at most once in the list. The order of listing is +%% not defined.</dd> +%% +%% <dt>`{warnings, Warnings}'</dt> +%% <dd><ul> +%% <li>`Warnings = [term()]'</li> +%% </ul> +%% `Warnings' is the list of error descriptors of all +%% `warning_marker' nodes that occur in +%% `Forms'. The order of listing is not defined.</dd> +%% </dl> +%% +%% The evaluation throws `syntax_error' if an ill-formed +%% Erlang construct is encountered. +%% +%% @see analyze_wild_attribute/1 +%% @see analyze_export_attribute/1 +%% @see analyze_import_attribute/1 +%% @see analyze_record_attribute/1 +%% @see analyze_function/1 +%% @see analyze_rule/1 +%% @see erl_syntax:error_marker_info/1 +%% @see erl_syntax:warning_marker_info/1 + +analyze_forms(Forms) when is_list(Forms) -> + finfo_to_list(lists:foldl(fun collect_form/2, new_finfo(), Forms)); +analyze_forms(Forms) -> + analyze_forms( + erl_syntax:form_list_elements( + erl_syntax:flatten_form_list(Forms))). + +collect_form(Node, Info) -> + case analyze_form(Node) of + {attribute, {Name, Data}} -> + collect_attribute(Name, Data, Info); + {attribute, preprocessor} -> + Info; + {function, Name} -> + finfo_add_function(Name, Info); + {rule, Name} -> + finfo_add_rule(Name, Info); + {error_marker, Data} -> + finfo_add_error(Data, Info); + {warning_marker, Data} -> + finfo_add_warning(Data, Info); + _ -> + Info + end. + +collect_attribute(module, M, Info) -> + finfo_set_module(M, Info); +collect_attribute(export, L, Info) -> + finfo_add_exports(L, Info); +collect_attribute(import, {M, L}, Info) -> + finfo_add_imports(M, L, Info); +collect_attribute(import, M, Info) -> + finfo_add_module_import(M, Info); +collect_attribute(file, _, Info) -> + Info; +collect_attribute(record, {R, L}, Info) -> + finfo_add_record(R, L, Info); +collect_attribute(spec, _, Info) -> + Info; +collect_attribute(_, {N, V}, Info) -> + finfo_add_attribute(N, V, Info). + +%% Abstract datatype for collecting module information. + +-record(forms, {module, exports, module_imports, imports, attributes, + records, errors, warnings, functions, rules}). + +new_finfo() -> + #forms{module = none, + exports = [], + module_imports = [], + imports = [], + attributes = [], + records = [], + errors = [], + warnings = [], + functions = [], + rules = [] + }. + +finfo_set_module(Name, Info) -> + case Info#forms.module of + none -> + Info#forms{module = {value, Name}}; + {value, _} -> + Info + end. + +finfo_add_exports(L, Info) -> + Info#forms{exports = L ++ Info#forms.exports}. + +finfo_add_module_import(M, Info) -> + Info#forms{module_imports = [M | Info#forms.module_imports]}. + +finfo_add_imports(M, L, Info) -> + Es = Info#forms.imports, + case lists:keyfind(M, 1, Es) of + {_, L1} -> + Es1 = lists:keyreplace(M, 1, Es, {M, L ++ L1}), + Info#forms{imports = Es1}; + false -> + Info#forms{imports = [{M, L} | Es]} + end. + +finfo_add_attribute(Name, Val, Info) -> + Info#forms{attributes = [{Name, Val} | Info#forms.attributes]}. + +finfo_add_record(R, L, Info) -> + Info#forms{records = [{R, L} | Info#forms.records]}. + +finfo_add_error(R, Info) -> + Info#forms{errors = [R | Info#forms.errors]}. + +finfo_add_warning(R, Info) -> + Info#forms{warnings = [R | Info#forms.warnings]}. + +finfo_add_function(F, Info) -> + Info#forms{functions = [F | Info#forms.functions]}. + +finfo_add_rule(F, Info) -> + Info#forms{rules = [F | Info#forms.rules]}. + +finfo_to_list(Info) -> + [{Key, Value} + || {Key, {value, Value}} <- + [{module, Info#forms.module}, + {exports, list_value(Info#forms.exports)}, + {imports, list_value(Info#forms.imports)}, + {module_imports, list_value(Info#forms.module_imports)}, + {attributes, list_value(Info#forms.attributes)}, + {records, list_value(Info#forms.records)}, + {errors, list_value(Info#forms.errors)}, + {warnings, list_value(Info#forms.warnings)}, + {functions, list_value(Info#forms.functions)}, + {rules, list_value(Info#forms.rules)} + ]]. + +list_value([]) -> + none; +list_value(List) -> + {value, List}. + + +%% ===================================================================== +%% @spec analyze_form(Node::syntaxTree()) -> {atom(), term()} | atom() +%% +%% @doc Analyzes a "source code form" node. If `Node' is a +%% "form" type (cf. `erl_syntax:is_form/1'), the returned +%% value is a tuple `{Type, Info}' where `Type' is +%% the node type and `Info' depends on `Type', as +%% follows: +%% <dl> +%% <dt>`{attribute, Info}'</dt> +%% +%% <dd>where `Info = analyze_attribute(Node)'.</dd> +%% +%% <dt>`{error_marker, Info}'</dt> +%% +%% <dd>where `Info = +%% erl_syntax:error_marker_info(Node)'.</dd> +%% +%% <dt>`{function, Info}'</dt> +%% +%% <dd>where `Info = analyze_function(Node)'.</dd> +%% +%% <dt>`{rule, Info}'</dt> +%% +%% <dd>where `Info = analyze_rule(Node)'.</dd> +%% +%% <dt>`{warning_marker, Info}'</dt> +%% +%% <dd>where `Info = +%% erl_syntax:warning_marker_info(Node)'.</dd> +%% </dl> +%% For other types of forms, only the node type is returned. +%% +%% The evaluation throws `syntax_error' if +%% `Node' is not well-formed. +%% +%% @see analyze_attribute/1 +%% @see analyze_function/1 +%% @see analyze_rule/1 +%% @see erl_syntax:is_form/1 +%% @see erl_syntax:error_marker_info/1 +%% @see erl_syntax:warning_marker_info/1 + +analyze_form(Node) -> + case erl_syntax:type(Node) of + attribute -> + {attribute, analyze_attribute(Node)}; + function -> + {function, analyze_function(Node)}; + rule -> + {rule, analyze_rule(Node)}; + error_marker -> + {error_marker, erl_syntax:error_marker_info(Node)}; + warning_marker -> + {warning_marker, erl_syntax:warning_marker_info(Node)}; + _ -> + case erl_syntax:is_form(Node) of + true -> + erl_syntax:type(Node); + false -> + throw(syntax_error) + end + end. + +%% ===================================================================== +%% @spec analyze_attribute(Node::syntaxTree()) -> +%% preprocessor | {atom(), atom()} +%% +%% @doc Analyzes an attribute node. If `Node' represents a +%% preprocessor directive, the atom `preprocessor' is +%% returned. Otherwise, if `Node' represents a module +%% attribute "`-<em>Name</em>...'", a tuple `{Name, +%% Info}' is returned, where `Info' depends on +%% `Name', as follows: +%% <dl> +%% <dt>`{module, Info}'</dt> +%% +%% <dd>where `Info = +%% analyze_module_attribute(Node)'.</dd> +%% +%% <dt>`{export, Info}'</dt> +%% +%% <dd>where `Info = +%% analyze_export_attribute(Node)'.</dd> +%% +%% <dt>`{import, Info}'</dt> +%% +%% <dd>where `Info = +%% analyze_import_attribute(Node)'.</dd> +%% +%% <dt>`{file, Info}'</dt> +%% +%% <dd>where `Info = +%% analyze_file_attribute(Node)'.</dd> +%% +%% <dt>`{record, Info}'</dt> +%% +%% <dd>where `Info = +%% analyze_record_attribute(Node)'.</dd> +%% +%% <dt>`{Name, Info}'</dt> +%% +%% <dd>where `{Name, Info} = +%% analyze_wild_attribute(Node)'.</dd> +%% </dl> +%% The evaluation throws `syntax_error' if `Node' +%% does not represent a well-formed module attribute. +%% +%% @see analyze_module_attribute/1 +%% @see analyze_export_attribute/1 +%% @see analyze_import_attribute/1 +%% @see analyze_file_attribute/1 +%% @see analyze_record_attribute/1 +%% @see analyze_wild_attribute/1 + +analyze_attribute(Node) -> + Name = erl_syntax:attribute_name(Node), + case erl_syntax:type(Name) of + atom -> + case erl_syntax:atom_value(Name) of + define -> preprocessor; + undef -> preprocessor; + include -> preprocessor; + include_lib -> preprocessor; + ifdef -> preprocessor; + ifndef -> preprocessor; + else -> preprocessor; + endif -> preprocessor; + A -> + {A, analyze_attribute(A, Node)} + end; + _ -> + throw(syntax_error) + end. + +analyze_attribute(module, Node) -> + analyze_module_attribute(Node); +analyze_attribute(export, Node) -> + analyze_export_attribute(Node); +analyze_attribute(import, Node) -> + analyze_import_attribute(Node); +analyze_attribute(file, Node) -> + analyze_file_attribute(Node); +analyze_attribute(record, Node) -> + analyze_record_attribute(Node); +analyze_attribute(define, _Node) -> + define; +analyze_attribute(spec, _Node) -> + spec; +analyze_attribute(_, Node) -> + %% A "wild" attribute (such as e.g. a `compile' directive). + analyze_wild_attribute(Node). + + +%% ===================================================================== +%% @spec analyze_module_attribute(Node::syntaxTree()) -> +%% Name::atom() | {Name::atom(), Variables::[atom()]} +%% +%% @doc Returns the module name and possible parameters declared by a +%% module attribute. If the attribute is a plain module declaration such +%% as `-module(name)', the result is the module name. If the attribute +%% is a parameterized module declaration, the result is a tuple +%% containing the module name and a list of the parameter variable +%% names. +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed module +%% attribute. +%% +%% @see analyze_attribute/1 + +analyze_module_attribute(Node) -> + case erl_syntax:type(Node) of + attribute -> + case erl_syntax:attribute_arguments(Node) of + [M] -> + module_name_to_atom(M); + [M, L] -> + M1 = module_name_to_atom(M), + L1 = analyze_variable_list(L), + {M1, L1}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + +analyze_variable_list(Node) -> + case erl_syntax:is_proper_list(Node) of + true -> + [erl_syntax:variable_name(V) + || V <- erl_syntax:list_elements(Node)]; + false -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_export_attribute(Node::syntaxTree()) -> [FunctionName] +%% +%% FunctionName = atom() | {atom(), integer()} +%% | {ModuleName, FunctionName} +%% ModuleName = atom() +%% +%% @doc Returns the list of function names declared by an export +%% attribute. We do not guarantee that each name occurs at most once in +%% the list. The order of listing is not defined. +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed export +%% attribute. +%% +%% @see analyze_attribute/1 + +analyze_export_attribute(Node) -> + case erl_syntax:type(Node) of + attribute -> + case erl_syntax:attribute_arguments(Node) of + [L] -> + analyze_function_name_list(L); + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + +analyze_function_name_list(Node) -> + case erl_syntax:is_proper_list(Node) of + true -> + [analyze_function_name(F) + || F <- erl_syntax:list_elements(Node)]; + false -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_function_name(Node::syntaxTree()) -> FunctionName +%% +%% FunctionName = atom() | {atom(), integer()} +%% | {ModuleName, FunctionName} +%% ModuleName = atom() +%% +%% @doc Returns the function name represented by a syntax tree. If +%% `Node' represents a function name, such as +%% "`foo/1'" or "`bloggs:fred/2'", a uniform +%% representation of that name is returned. Different nestings of arity +%% and module name qualifiers in the syntax tree does not affect the +%% result. +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed function name. + +analyze_function_name(Node) -> + case erl_syntax:type(Node) of + atom -> + erl_syntax:atom_value(Node); + arity_qualifier -> + A = erl_syntax:arity_qualifier_argument(Node), + case erl_syntax:type(A) of + integer -> + F = erl_syntax:arity_qualifier_body(Node), + F1 = analyze_function_name(F), + append_arity(erl_syntax:integer_value(A), F1); + _ -> + throw(syntax_error) + end; + module_qualifier -> + M = erl_syntax:module_qualifier_argument(Node), + case erl_syntax:type(M) of + atom -> + F = erl_syntax:module_qualifier_body(Node), + F1 = analyze_function_name(F), + {erl_syntax:atom_value(M), F1}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + +append_arity(A, {Module, Name}) -> + {Module, append_arity(A, Name)}; +append_arity(A, Name) when is_atom(Name) -> + {Name, A}; +append_arity(A, A) -> + A; +append_arity(_A, Name) -> + Name. % quietly drop extra arity in case of conflict + + +%% ===================================================================== +%% @spec analyze_import_attribute(Node::syntaxTree()) -> +%% {atom(), [FunctionName]} | atom() +%% +%% FunctionName = atom() | {atom(), integer()} +%% | {ModuleName, FunctionName} +%% ModuleName = atom() +%% +%% @doc Returns the module name and (if present) list of function names +%% declared by an import attribute. The returned value is an atom +%% `Module' or a pair `{Module, Names}', where +%% `Names' is a list of function names declared as imported +%% from the module named by `Module'. We do not guarantee +%% that each name occurs at most once in `Names'. The order +%% of listing is not defined. +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed import +%% attribute. +%% +%% @see analyze_attribute/1 + +analyze_import_attribute(Node) -> + case erl_syntax:type(Node) of + attribute -> + case erl_syntax:attribute_arguments(Node) of + [M] -> + module_name_to_atom(M); + [M, L] -> + M1 = module_name_to_atom(M), + L1 = analyze_function_name_list(L), + {M1, L1}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_wild_attribute(Node::syntaxTree()) -> {atom(), term()} +%% +%% @doc Returns the name and value of a "wild" attribute. The result is +%% the pair `{Name, Value}', if `Node' represents +%% "`-Name(Value)'". +%% +%% Note that no checking is done whether `Name' is a +%% reserved attribute name such as `module' or +%% `export': it is assumed that the attribute is "wild". +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed wild +%% attribute. +%% +%% @see analyze_attribute/1 + +analyze_wild_attribute(Node) -> + case erl_syntax:type(Node) of + attribute -> + N = erl_syntax:attribute_name(Node), + case erl_syntax:type(N) of + atom -> + case erl_syntax:attribute_arguments(Node) of + [V] -> + case catch {ok, erl_syntax:concrete(V)} of + {ok, Val} -> + {erl_syntax:atom_value(N), Val}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_record_attribute(Node::syntaxTree()) -> +%% {atom(), Fields} +%% +%% Fields = [{atom(), none | syntaxTree()}] +%% +%% @doc Returns the name and the list of fields of a record declaration +%% attribute. The result is a pair `{Name, Fields}', if +%% `Node' represents "`-record(Name, {...}).'", +%% where `Fields' is a list of pairs `{Label, +%% Default}' for each field "`Label'" or "`Label = +%% <em>Default</em>'" in the declaration, listed in left-to-right +%% order. If the field has no default-value declaration, the value for +%% `Default' will be the atom `none'. We do not +%% guarantee that each label occurs at most one in the list. +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed record declaration +%% attribute. +%% +%% @see analyze_attribute/1 +%% @see analyze_record_field/1 + +analyze_record_attribute(Node) -> + case erl_syntax:type(Node) of + attribute -> + case erl_syntax:attribute_arguments(Node) of + [R, T] -> + case erl_syntax:type(R) of + atom -> + Es = analyze_record_attribute_tuple(T), + {erl_syntax:atom_value(R), Es}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + +analyze_record_attribute_tuple(Node) -> + case erl_syntax:type(Node) of + tuple -> + [analyze_record_field(F) + || F <- erl_syntax:tuple_elements(Node)]; + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_record_expr(Node::syntaxTree()) -> +%% {atom(), Info} | atom() +%% +%% Info = {atom(), [{atom(), Value}]} | {atom(), atom()} | atom() +%% Value = none | syntaxTree() +%% +%% @doc Returns the record name and field name/names of a record +%% expression. If `Node' has type `record_expr', +%% `record_index_expr' or `record_access', a pair +%% `{Type, Info}' is returned, otherwise an atom +%% `Type' is returned. `Type' is the node type of +%% `Node', and `Info' depends on +%% `Type', as follows: +%% <dl> +%% <dt>`record_expr':</dt> +%% <dd>`{atom(), [{atom(), Value}]}'</dd> +%% <dt>`record_access':</dt> +%% <dd>`{atom(), atom()} | atom()'</dd> +%% <dt>`record_index_expr':</dt> +%% <dd>`{atom(), atom()}'</dd> +%% </dl> +%% +%% For a `record_expr' node, `Info' represents +%% the record name and the list of descriptors for the involved fields, +%% listed in the order they appear. (See +%% `analyze_record_field/1' for details on the field +%% descriptors). For a `record_access' node, +%% `Info' represents the record name and the field name (or +%% if the record name is not included, only the field name; this is +%% allowed only in Mnemosyne-query syntax). For a +%% `record_index_expr' node, `Info' represents the +%% record name and the name field name. +%% +%% The evaluation throws `syntax_error' if +%% `Node' represents a record expression that is not +%% well-formed. +%% +%% @see analyze_record_attribute/1 +%% @see analyze_record_field/1 + +analyze_record_expr(Node) -> + case erl_syntax:type(Node) of + record_expr -> + A = erl_syntax:record_expr_type(Node), + case erl_syntax:type(A) of + atom -> + Fs = [analyze_record_field(F) + || F <- erl_syntax:record_expr_fields(Node)], + {record_expr, {erl_syntax:atom_value(A), Fs}}; + _ -> + throw(syntax_error) + end; + record_access -> + F = erl_syntax:record_access_field(Node), + case erl_syntax:type(F) of + atom -> + case erl_syntax:record_access_type(Node) of + none -> + {record_access, erl_syntax:atom_value(F)}; + A -> + case erl_syntax:type(A) of + atom -> + {record_access, + {erl_syntax:atom_value(A), + erl_syntax:atom_value(F)}}; + _ -> + throw(syntax_error) + end + end; + _ -> + throw(syntax_error) + end; + record_index_expr -> + F = erl_syntax:record_index_expr_field(Node), + case erl_syntax:type(F) of + atom -> + A = erl_syntax:record_index_expr_type(Node), + case erl_syntax:type(A) of + atom -> + {record_index_expr, + {erl_syntax:atom_value(A), + erl_syntax:atom_value(F)}}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end; + Type -> + Type + end. + +%% ===================================================================== +%% @spec analyze_record_field(Node::syntaxTree()) -> {atom(), Value} +%% +%% Value = none | syntaxTree() +%% +%% @doc Returns the label and value-expression of a record field +%% specifier. The result is a pair `{Label, Value}', if +%% `Node' represents "`Label = <em>Value</em>'" or +%% "`Label'", where in the first case, `Value' is +%% a syntax tree, and in the second case `Value' is +%% `none'. +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed record field +%% specifier. +%% +%% @see analyze_record_attribute/1 +%% @see analyze_record_expr/1 + +analyze_record_field(Node) -> + case erl_syntax:type(Node) of + record_field -> + A = erl_syntax:record_field_name(Node), + case erl_syntax:type(A) of + atom -> + T = erl_syntax:record_field_value(Node), + {erl_syntax:atom_value(A), T}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_file_attribute(Node::syntaxTree()) -> +%% {string(), integer()} +%% +%% @doc Returns the file name and line number of a `file' +%% attribute. The result is the pair `{File, Line}' if +%% `Node' represents "`-file(File, Line).'". +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed `file' +%% attribute. +%% +%% @see analyze_attribute/1 + +analyze_file_attribute(Node) -> + case erl_syntax:type(Node) of + attribute -> + case erl_syntax:attribute_arguments(Node) of + [F, N] -> + case (erl_syntax:type(F) =:= string) + and (erl_syntax:type(N) =:= integer) of + true -> + {erl_syntax:string_value(F), + erl_syntax:integer_value(N)}; + false -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_function(Node::syntaxTree()) -> {atom(), integer()} +%% +%% @doc Returns the name and arity of a function definition. The result +%% is a pair `{Name, A}' if `Node' represents a +%% function definition "`Name(<em>P_1</em>, ..., <em>P_A</em>) -> +%% ...'". +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed function +%% definition. +%% +%% @see analyze_rule/1 + +analyze_function(Node) -> + case erl_syntax:type(Node) of + function -> + N = erl_syntax:function_name(Node), + case erl_syntax:type(N) of + atom -> + {erl_syntax:atom_value(N), + erl_syntax:function_arity(Node)}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_rule(Node::syntaxTree()) -> {atom(), integer()} +%% +%% @doc Returns the name and arity of a Mnemosyne rule. The result is a +%% pair `{Name, A}' if `Node' represents a rule +%% "`Name(<em>P_1</em>, ..., <em>P_A</em>) :- ...'". +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed Mnemosyne +%% rule. +%% +%% @see analyze_function/1 + +analyze_rule(Node) -> + case erl_syntax:type(Node) of + rule -> + N = erl_syntax:rule_name(Node), + case erl_syntax:type(N) of + atom -> + {erl_syntax:atom_value(N), + erl_syntax:rule_arity(Node)}; + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_implicit_fun(Node::syntaxTree()) -> FunctionName +%% +%% FunctionName = atom() | {atom(), integer()} +%% | {ModuleName, FunctionName} +%% ModuleName = atom() +%% +%% @doc Returns the name of an implicit fun expression "`fun +%% <em>F</em>'". The result is a representation of the function +%% name `F'. (Cf. `analyze_function_name/1'.) +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed implicit fun. +%% +%% @see analyze_function_name/1 + +analyze_implicit_fun(Node) -> + case erl_syntax:type(Node) of + implicit_fun -> + analyze_function_name( + erl_syntax:implicit_fun_name(Node)); + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec analyze_application(Node::syntaxTree()) -> FunctionName | Arity +%% +%% FunctionName = {atom(), Arity} +%% | {ModuleName, FunctionName} +%% Arity = integer() +%% ModuleName = atom() +%% +%% @doc Returns the name of a called function. The result is a +%% representation of the name of the applied function `F/A', +%% if `Node' represents a function application +%% "`<em>F</em>(<em>X_1</em>, ..., <em>X_A</em>)'". If the +%% function is not explicitly named (i.e., `F' is given by +%% some expression), only the arity `A' is returned. +%% +%% The evaluation throws `syntax_error' if +%% `Node' does not represent a well-formed application +%% expression. +%% +%% @see analyze_function_name/1 + +analyze_application(Node) -> + case erl_syntax:type(Node) of + application -> + A = length(erl_syntax:application_arguments(Node)), + F = erl_syntax:application_operator(Node), + case catch {ok, analyze_function_name(F)} of + syntax_error -> + A; + {ok, N} -> + append_arity(A, N); + _ -> + throw(syntax_error) + end; + _ -> + throw(syntax_error) + end. + + +%% ===================================================================== +%% @spec function_name_expansions(Names::[Name]) -> [{ShortName, Name}] +%% +%% Name = ShortName | {atom(), Name} +%% ShortName = atom() | {atom(), integer()} +%% +%% @doc Creates a mapping from corresponding short names to full +%% function names. Names are represented by nested tuples of atoms and +%% integers (cf. `analyze_function_name/1'). The result is a +%% list containing a pair `{ShortName, Name}' for each +%% element `Name' in the given list, where the corresponding +%% `ShortName' is the rightmost-innermost part of +%% `Name'. The list thus represents a finite mapping from +%% unqualified names to the corresponding qualified names. +%% +%% Note: the resulting list can contain more than one tuple +%% `{ShortName, Name}' for the same `ShortName', +%% possibly with different values for `Name', depending on +%% the given list. +%% +%% @see analyze_function_name/1 + +function_name_expansions(Fs) -> + function_name_expansions(Fs, []). + +function_name_expansions([F | Fs], Ack) -> + function_name_expansions(Fs, + function_name_expansions(F, F, Ack)); +function_name_expansions([], Ack) -> + Ack. + +function_name_expansions({A, N}, Name, Ack) when is_integer(N) -> + [{{A, N}, Name} | Ack]; +function_name_expansions({_, N}, Name, Ack) -> + function_name_expansions(N, Name, Ack); +function_name_expansions(A, Name, Ack) -> + [{A, Name} | Ack]. + + +%% ===================================================================== +%% @spec strip_comments(Tree::syntaxTree()) -> syntaxTree() +%% +%% @doc Removes all comments from all nodes of a syntax tree. All other +%% attributes (such as position information) remain unchanged. +%% Standalone comments in form lists are removed; any other standalone +%% comments are changed into null-comments (no text, no indentation). + +strip_comments(Tree) -> + map(fun strip_comments_1/1, Tree). + +strip_comments_1(T) -> + case erl_syntax:type(T) of + form_list -> + Es = erl_syntax:form_list_elements(T), + Es1 = [E || E <- Es, erl_syntax:type(E) /= comment], + T1 = erl_syntax:copy_attrs(T, erl_syntax:form_list(Es1)), + erl_syntax:remove_comments(T1); + comment -> + erl_syntax:comment([]); + _ -> + erl_syntax:remove_comments(T) + end. + +%% ===================================================================== +%% @spec to_comment(Tree) -> syntaxTree() +%% @equiv to_comment(Tree, "% ") + +to_comment(Tree) -> + to_comment(Tree, "% "). + +%% ===================================================================== +%% @spec to_comment(Tree::syntaxTree(), Prefix::string()) -> +%% syntaxTree() +%% +%% @doc Equivalent to `to_comment(Tree, Prefix, F)' for a +%% default formatting function `F'. The default +%% `F' simply calls `erl_prettypr:format/1'. +%% +%% @see to_comment/3 +%% @see erl_prettypr:format/1 + +to_comment(Tree, Prefix) -> + F = fun (T) -> erl_prettypr:format(T) end, + to_comment(Tree, Prefix, F). + +%% ===================================================================== +%% @spec to_comment(Tree::syntaxTree(), Prefix::string(), Printer) -> +%% syntaxTree() +%% +%% Printer = (syntaxTree()) -> string() +%% +%% @doc Transforms a syntax tree into an abstract comment. The lines of +%% the comment contain the text for `Node', as produced by +%% the given `Printer' function. Each line of the comment is +%% prefixed by the string `Prefix' (this does not include the +%% initial "`%'" character of the comment line). +%% +%% For example, the result of +%% `to_comment(erl_syntax:abstract([a,b,c]))' represents +%% <pre> +%% %% [a,b,c]</pre> +%% (cf. `to_comment/1'). +%% +%% Note: the text returned by the formatting function will be split +%% automatically into separate comment lines at each line break. No +%% extra work is needed. +%% +%% @see to_comment/1 +%% @see to_comment/2 + +to_comment(Tree, Prefix, F) -> + erl_syntax:comment(split_lines(F(Tree), Prefix)). + + +%% ===================================================================== +%% @spec limit(Tree, Depth) -> syntaxTree() +%% +%% @doc Equivalent to `limit(Tree, Depth, Text)' using the +%% text `"..."' as default replacement. +%% +%% @see limit/3 +%% @see erl_syntax:text/1 + +limit(Tree, Depth) -> + limit(Tree, Depth, erl_syntax:text("...")). + +%% ===================================================================== +%% @spec limit(Tree::syntaxTree(), Depth::integer(), +%% Node::syntaxTree()) -> syntaxTree() +%% +%% @doc Limits a syntax tree to a specified depth. Replaces all non-leaf +%% subtrees in `Tree' at the given `Depth' by +%% `Node'. If `Depth' is negative, the result is +%% always `Node', even if `Tree' has no subtrees. +%% +%% When a group of subtrees (as e.g., the argument list of an +%% `application' node) is at the specified depth, and there +%% are two or more subtrees in the group, these will be collectively +%% replaced by `Node' even if they are leaf nodes. Groups of +%% subtrees that are above the specified depth will be limited in size, +%% as if each subsequent tree in the group were one level deeper than +%% the previous. E.g., if `Tree' represents a list of +%% integers "`[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]'", the result +%% of `limit(Tree, 5)' will represent `[1, 2, 3, 4, +%% ...]'. +%% +%% The resulting syntax tree is typically only useful for +%% pretty-printing or similar visual formatting. +%% +%% @see limit/2 + +limit(_Tree, Depth, Node) when Depth < 0 -> + Node; +limit(Tree, Depth, Node) -> + limit_1(Tree, Depth, Node). + +limit_1(Tree, Depth, Node) -> + %% Depth is nonnegative here. + case erl_syntax:subtrees(Tree) of + [] -> + if Depth > 0 -> + Tree; + true -> + case is_simple_leaf(Tree) of + true -> + Tree; + false -> + Node + end + end; + Gs -> + if Depth > 1 -> + Gs1 = [[limit_1(T, Depth - 1, Node) + || T <- limit_list(G, Depth, Node)] + || G <- Gs], + rewrite(Tree, + erl_syntax:make_tree(erl_syntax:type(Tree), + Gs1)); + Depth =:= 0 -> + %% Depth is zero, and this is not a leaf node + %% so we always replace it. + Node; + true -> + %% Depth is 1, so all subtrees are to be cut. + %% This is done groupwise. + Gs1 = [cut_group(G, Node) || G <- Gs], + rewrite(Tree, + erl_syntax:make_tree(erl_syntax:type(Tree), + Gs1)) + end + end. + +cut_group([], _Node) -> + []; +cut_group([T], Node) -> + %% Only if the group contains a single subtree do we try to + %% preserve it if suitable. + [limit_1(T, 0, Node)]; +cut_group(_Ts, Node) -> + [Node]. + +is_simple_leaf(Tree) -> + case erl_syntax:type(Tree) of + atom -> true; + char -> true; + float -> true; + integer -> true; + nil -> true; + operator -> true; + tuple -> true; + underscore -> true; + variable -> true; + _ -> false + end. + +%% If list has more than N elements, take the N - 1 first and +%% append Node; otherwise return list as is. + +limit_list(Ts, N, Node) -> + if length(Ts) > N -> + limit_list_1(Ts, N - 1, Node); + true -> + Ts + end. + +limit_list_1([T | Ts], N, Node) -> + if N > 0 -> + [T | limit_list_1(Ts, N - 1, Node)]; + true -> + [Node] + end; +limit_list_1([], _N, _Node) -> + []. + + +%% ===================================================================== +%% Utility functions + +rewrite(Tree, Tree1) -> + erl_syntax:copy_attrs(Tree, Tree1). + +module_name_to_atom(M) -> + case erl_syntax:type(M) of + atom -> + erl_syntax:atom_value(M); + qualified_name -> + list_to_atom(packages:concat( + [erl_syntax:atom_value(A) + || A <- erl_syntax:qualified_name_segments(M)]) + ); + _ -> + throw(syntax_error) + end. + +%% This splits lines at line terminators and expands tab characters to +%% spaces. The width of a tab is assumed to be 8. + +% split_lines(Cs) -> +% split_lines(Cs, ""). + +split_lines(Cs, Prefix) -> + split_lines(Cs, Prefix, 0). + +split_lines(Cs, Prefix, N) -> + lists:reverse(split_lines(Cs, N, [], [], Prefix)). + +split_lines([$\r, $\n | Cs], _N, Cs1, Ls, Prefix) -> + split_lines_1(Cs, Cs1, Ls, Prefix); +split_lines([$\r | Cs], _N, Cs1, Ls, Prefix) -> + split_lines_1(Cs, Cs1, Ls, Prefix); +split_lines([$\n | Cs], _N, Cs1, Ls, Prefix) -> + split_lines_1(Cs, Cs1, Ls, Prefix); +split_lines([$\t | Cs], N, Cs1, Ls, Prefix) -> + split_lines(Cs, 0, push(8 - (N rem 8), $\040, Cs1), Ls, + Prefix); +split_lines([C | Cs], N, Cs1, Ls, Prefix) -> + split_lines(Cs, N + 1, [C | Cs1], Ls, Prefix); +split_lines([], _, [], Ls, _) -> + Ls; +split_lines([], _N, Cs, Ls, Prefix) -> + [Prefix ++ lists:reverse(Cs) | Ls]. + +split_lines_1(Cs, Cs1, Ls, Prefix) -> + split_lines(Cs, 0, [], [Prefix ++ lists:reverse(Cs1) | Ls], + Prefix). + +push(N, C, Cs) when N > 0 -> + push(N - 1, C, [C | Cs]); +push(0, _, Cs) -> + Cs. + diff --git a/lib/syntax_tools/src/erl_tidy.erl b/lib/syntax_tools/src/erl_tidy.erl new file mode 100644 index 0000000000..e3b479008f --- /dev/null +++ b/lib/syntax_tools/src/erl_tidy.erl @@ -0,0 +1,1898 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or +%% modify it under the terms of the GNU Lesser General Public License +%% as published by the Free Software Foundation; either version 2 of +%% the License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 1999-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc Tidies and pretty-prints Erlang source code, removing unused +%% functions, updating obsolete constructs and function calls, etc. +%% +%% Caveats: It is possible that in some intricate uses of macros, +%% the automatic addition or removal of parentheses around uses or +%% arguments could cause the resulting program to be rejected by the +%% compiler; however, we have found no such case in existing +%% code. Programs defining strange macros can usually not be read by +%% this program, and in those cases, no changes will be made. +%% +%% If you really, really want to, you may call it "Inga". +%% +%% Disclaimer: The author accepts no responsibility for errors +%% introduced in code that has been processed by the program. It has +%% been reasonably well tested, but the possibility of errors remains. +%% Keep backups of your original code safely stored, until you feel +%% confident that the new, modified code can be trusted. + +-module(erl_tidy). + +-export([dir/0, dir/1, dir/2, file/1, file/2, module/1, module/2]). + +-include_lib("kernel/include/file.hrl"). + +-define(DEFAULT_BACKUP_SUFFIX, ".bak"). +-define(DEFAULT_DIR, ""). +-define(DEFAULT_REGEXP, ".*\\.erl$"). + +%% ===================================================================== + +-type options() :: [atom() | {atom(), any()}]. + +%% ===================================================================== + +dir__defaults() -> + [{follow_links, false}, + recursive, + {regexp, ?DEFAULT_REGEXP}, + verbose]. + +%% ===================================================================== +%% @spec dir() -> ok +%% @equiv dir("") + +-spec dir() -> 'ok'. +dir() -> + dir(""). + +%% ===================================================================== +%% @spec dir(Dir) -> ok +%% @equiv dir(Dir, []) + +-spec dir(file:filename()) -> 'ok'. +dir(Dir) -> + dir(Dir, []). + +%% ===================================================================== +%% @spec dir(Directory::filename(), Options::[term()]) -> ok +%% filename() = file:filename() +%% +%% @doc Tidies Erlang source files in a directory and its +%% subdirectories. +%% +%% Available options: +%% <dl> +%% <dt>{follow_links, boolean()}</dt> +%% +%% <dd>If the value is `true', symbolic directory +%% links will be followed. The default value is +%% `false'.</dd> +%% +%% <dt>{recursive, boolean()}</dt> +%% +%% <dd>If the value is `true', subdirectories will be +%% visited recursively. The default value is +%% `true'.</dd> +%% +%% <dt>{regexp, string()}</dt> +%% +%% <dd>The value denotes a regular expression (see module +%% `regexp'). Tidying will only be applied to those +%% regular files whose names match this pattern. The default +%% value is `".*\\.erl$"', which matches normal +%% Erlang source file names.</dd> +%% +%% <dt>{test, boolean()}</dt> +%% +%% <dd>If the value is `true', no files will be +%% modified. The default value is `false'.</dd> +%% +%% <dt>{verbose, boolean()}</dt> +%% +%% <dd>If the value is `true', progress messages will +%% be output while the program is running, unless the +%% `quiet' option is `true'. The default +%% value when calling {@link dir/2} is `true'.</dd> +%% +%% </dl> +%% +%% See the function {@link file/2} for further options. +%% +%% @see //stdlib/regexp +%% @see file/2 + +-record(dir, {follow_links = false :: boolean(), + recursive = true :: boolean(), + options :: options()}). + +-spec dir(file:filename(), options()) -> 'ok'. +dir(Dir, Opts) -> + Opts1 = Opts ++ dir__defaults(), + Env = #dir{follow_links = proplists:get_bool(follow_links, Opts1), + recursive = proplists:get_bool(recursive, Opts1), + options = Opts1}, + Regexp = proplists:get_value(regexp, Opts1), + case filename(Dir) of + "" -> + Dir1 = "."; + Dir1 -> + ok + end, + dir_1(Dir1, Regexp, Env). + +dir_1(Dir, Regexp, Env) -> + case file:list_dir(Dir) of + {ok, Files} -> + lists:foreach(fun (X) -> dir_2(X, Regexp, Dir, Env) end, + Files); + {error, _} -> + report_error("error reading directory `~s'", + [filename(Dir)]), + exit(error) + end. + +dir_2(Name, Regexp, Dir, Env) -> + File = if Dir =:= "" -> + Name; + true -> + filename:join(Dir, Name) + end, + case file_type(File) of + {value, regular} -> + dir_4(File, Regexp, Env); + {value, directory} when Env#dir.recursive =:= true -> + case is_symlink(Name) of + false -> + dir_3(Name, Dir, Regexp, Env); + true when Env#dir.follow_links =:= true -> + dir_3(Name, Dir, Regexp, Env); + _ -> + ok + end; + _ -> + ok + end. + +dir_3(Name, Dir, Regexp, Env) -> + Dir1 = filename:join(Dir, Name), + verbose("tidying directory `~s'.", [Dir1], Env#dir.options), + dir_1(Dir1, Regexp, Env). + +dir_4(File, Regexp, Env) -> + case re:run(File, Regexp) of + {match, _} -> + Opts = [{outfile, File}, {dir, ""} | Env#dir.options], + case catch file(File, Opts) of + {'EXIT', Value} -> + warn("error tidying `~s'.~n~p", [File,Value], Opts); + _ -> + ok + end; + nomatch -> + ok + end. + +file__defaults() -> + [{backup_suffix, ?DEFAULT_BACKUP_SUFFIX}, + backups, + {dir, ?DEFAULT_DIR}, + {printer, default_printer()}, + {quiet, false}, + {verbose, false}]. + +default_printer() -> + fun (Tree, Options) -> erl_prettypr:format(Tree, Options) end. + +%% ===================================================================== +%% @spec file(Name) -> ok +%% @equiv file(Name, []) + +-spec file(file:filename()) -> 'ok'. +file(Name) -> + file(Name, []). + +%% ===================================================================== +%% @spec file(Name::filename(), Options::[term()]) -> ok +%% +%% @doc Tidies an Erlang source code file. +%% +%% Available options are: +%% <dl> +%% <dt>{backup_suffix, string()}</dt> +%% +%% <dd>Specifies the file name suffix to be used when a backup +%% file is created; the default value is `".bak"' +%% (cf. the `backups' option).</dd> +%% +%% <dt>{backups, boolean()}</dt> +%% +%% <dd>If the value is `true', existing files will be +%% renamed before new files are opened for writing. The new +%% names are formed by appending the string given by the +%% `backup_suffix' option to the original name. The +%% default value is `true'.</dd> +%% +%% <dt>{dir, filename()}</dt> +%% +%% <dd>Specifies the name of the directory in which the output +%% file is to be written. By default, the current directory is +%% used. If the value is an empty string, the current directory +%% is used. </dd> +%% +%% <dt>{outfile, filename()}</dt> +%% +%% <dd>Specifies the name of the file (without suffix) to which +%% the resulting source code is to be written. If this option is +%% not specified, the `Name' argument is used.</dd> +%% +%% <dt>{printer, Function}</dt> +%% <dd><ul> +%% <li>`Function = (syntaxTree()) -> string()'</li> +%% </ul> +%% +%% Specifies a function for prettyprinting Erlang syntax trees. +%% This is used for outputting the resulting module definition. +%% The function is assumed to return formatted text for the given +%% syntax tree, and should raise an exception if an error occurs. +%% The default formatting function calls +%% `erl_prettypr:format/2'.</dd> +%% +%% <dt>{test, boolean()}</dt> +%% +%% <dd>If the value is `true', no files will be modified; this +%% is typically most useful if the `verbose' flag is enabled, to +%% generate reports about the program files without affecting +%% them. The default value is `false'.</dd> +%% </dl> +%% +%% See the function `module/2' for further options. +%% +%% @see erl_prettypr:format/2 +%% @see module/2 + +-spec file(file:filename(), options()) -> 'ok'. +file(Name, Opts) -> + Parent = self(), + Child = spawn_link(fun () -> file_1(Parent, Name, Opts) end), + receive + {Child, ok} -> + ok; + {Child, {error, Reason}} -> + exit(Reason) + end. + +file_1(Parent, Name, Opts) -> + try file_2(Name, Opts) of + _ -> + Parent ! {self(), ok} + catch + throw:syntax_error -> % ignore syntax errors + Parent ! {self(), ok}; + error:Reason -> + Parent ! {self(), {error, Reason}} + end. + +file_2(Name, Opts) -> + Opts1 = Opts ++ file__defaults(), + Forms = read_module(Name, Opts1), + Comments = erl_comment_scan:file(Name), + Forms1 = erl_recomment:recomment_forms(Forms, Comments), + Tree = module(Forms1, [{file, Name} | Opts1]), + case proplists:get_bool(test, Opts1) of + true -> + ok; + false -> + write_module(Tree, Name, Opts1), + ok + end. + +read_module(Name, Opts) -> + verbose("reading module `~s'.", [filename(Name)], Opts), + case epp_dodger:parse_file(Name, [no_fail]) of + {ok, Forms} -> + check_forms(Forms, Name), + Forms; + {error, R} -> + error_read_file(Name), + exit({error, R}) + end. + +check_forms(Fs, Name) -> + Fun = fun (F) -> + case erl_syntax:type(F) of + error_marker -> + S = case erl_syntax:error_marker_info(F) of + {_, M, D} -> + M:format_error(D); + _ -> + "unknown error" + end, + report_error({Name, erl_syntax:get_pos(F), + "\n ~s"}, [S]), + exit(error); + _ -> + ok + end + end, + lists:foreach(Fun, Fs). + +%% Create the target directory and make a backup file if necessary, +%% then open the file, output the text and close the file +%% safely. Returns the file name. + +write_module(Tree, Name, Opts) -> + Name1 = proplists:get_value(outfile, Opts, filename(Name)), + Dir = filename(proplists:get_value(dir, Opts, "")), + File = if Dir =:= "" -> + Name1; + true -> + case file_type(Dir) of + {value, directory} -> + ok; + {value, _} -> + report_error("`~s' is not a directory.", + [filename(Dir)]), + exit(error); + none -> + case file:make_dir(Dir) of + ok -> + verbose("created directory `~s'.", + [filename(Dir)], Opts), + ok; + E -> + report_error("failed to create " + "directory `~s'.", + [filename(Dir)]), + exit({make_dir, E}) + end + end, + filename(filename:join(Dir, Name1)) + end, + case proplists:get_bool(backups, Opts) of + true -> + backup_file(File, Opts); + false -> + ok + end, + Printer = proplists:get_value(printer, Opts), + FD = open_output_file(File), + verbose("writing to file `~s'.", [File], Opts), + V = (catch {ok, output(FD, Printer, Tree, Opts)}), + ok = file:close(FD), + case V of + {ok, _} -> + File; + {'EXIT', R} -> + error_write_file(File), + exit(R); + R -> + error_write_file(File), + throw(R) + end. + +output(FD, Printer, Tree, Opts) -> + io:put_chars(FD, Printer(Tree, Opts)), + io:nl(FD). + +%% file_type(file:filename()) -> {value, Type} | none + +file_type(Name) -> + file_type(Name, false). + +is_symlink(Name) -> + file_type(Name, true) =:= {value, symlink}. + +file_type(Name, Links) -> + V = case Links of + true -> + catch file:read_link_info(Name); + false -> + catch file:read_file_info(Name) + end, + case V of + {ok, Env} -> + {value, Env#file_info.type}; + {error, enoent} -> + none; + {error, R} -> + error_read_file(Name), + exit({error, R}); + {'EXIT', R} -> + error_read_file(Name), + exit(R); + R -> + error_read_file(Name), + throw(R) + end. + +open_output_file(FName) -> + case catch file:open(FName, [write]) of + {ok, FD} -> + FD; + {error, R} -> + error_open_output(FName), + exit({error, R}); + {'EXIT', R} -> + error_open_output(FName), + exit(R); + R -> + error_open_output(FName), + exit(R) + end. + +%% If the file exists, rename it by appending the given suffix to the +%% file name. + +backup_file(Name, Opts) -> + case file_type(Name) of + {value, regular} -> + backup_file_1(Name, Opts); + {value, _} -> + error_backup_file(Name), + exit(error); + none -> + ok + end. + +%% The file should exist and be a regular file here. + +backup_file_1(Name, Opts) -> + Suffix = proplists:get_value(backup_suffix, Opts, ""), + Dest = filename:join(filename:dirname(Name), + filename:basename(Name) ++ Suffix), + case catch file:rename(Name, Dest) of + ok -> + verbose("made backup of file `~s'.", [Name], Opts); + {error, R} -> + error_backup_file(Name), + exit({error, R}); + {'EXIT', R} -> + error_backup_file(Name), + exit(R); + R -> + error_backup_file(Name), + throw(R) + end. + + +%% ===================================================================== +%% @spec module(Forms) -> syntaxTree() +%% @equiv module(Forms, []) + +module(Forms) -> + module(Forms, []). + +%% ===================================================================== +%% @spec module(Forms, Options::[term()]) -> syntaxTree() +%% +%% Forms = syntaxTree() | [syntaxTree()] +%% syntaxTree() = erl_syntax:syntaxTree() +%% +%% @doc Tidies a syntax tree representation of a module +%% definition. The given `Forms' may be either a single +%% syntax tree of type `form_list', or a list of syntax +%% trees representing "program forms". In either case, +%% `Forms' must represent a single complete module +%% definition. The returned syntax tree has type +%% `form_list' and represents a tidied-up version of the +%% same source code. +%% +%% Available options are: +%% <dl> +%% <dt>{auto_export_vars, boolean()}</dt> +%% +%% <dd>If the value is `true', all matches +%% "`{V1, ..., Vn} = E'" where `E' is a +%% case-, if- or receive-expression whose branches all return +%% n-tuples (or explicitly throw exceptions) will be rewritten +%% to bind and export the variables `V1', ..., +%% `Vn' directly. The default value is `false'. +%% +%% For example: +%% <pre> +%% {X, Y} = case ... of +%% ... -> {17, foo()}; +%% ... -> {42, bar()} +%% end +%% </pre> +%% will be rewritten to: +%% <pre> +%% case ... of +%% ... -> X = 17, Y = foo(), {X, Y}; +%% ... -> X = 42, Y = bar(), {X, Y} +%% end +%% </pre></dd> +%% +%% <dt>{auto_list_comp, boolean()}</dt> +%% +%% <dd>If the value is `true', calls to `lists:map/2' and +%% `lists:filter/2' will be rewritten using list comprehensions. +%% The default value is `true'.</dd> +%% +%% <dt>{file, string()}</dt> +%% +%% <dd>Specifies the name of the file from which the source code +%% was taken. This is only used for generation of error +%% reports. The default value is the empty string.</dd> +%% +%% <dt>{idem, boolean()}</dt> +%% +%% <dd>If the value is `true', all options that affect how the +%% code is modified are set to "no changes". For example, to +%% only update guard tests, and nothing else, use the options +%% `[new_guard_tests, idem]'. (Recall that options closer to the +%% beginning of the list have higher precedence.)</dd> +%% +%% <dt>{keep_unused, boolean()}</dt> +%% +%% <dd>If the value is `true', unused functions will +%% not be removed from the code. The default value is +%% `false'.</dd> +%% +%% <dt>{new_guard_tests, boolean()}</dt> +%% +%% <dd>If the value is `true', guard tests will be updated to +%% use the new names, e.g. "`is_integer(X)'" instead of +%% "`integer(X)'". The default value is `true'. See also +%% `old_guard_tests'.</dd> +%% +%% <dt>{no_imports, boolean()}</dt> +%% +%% <dd>If the value is `true', all import statements will be +%% removed and calls to imported functions will be expanded to +%% explicit remote calls. The default value is `false'.</dd> +%% +%% <dt>{old_guard_tests, boolean()}</dt> +%% +%% <dd>If the value is `true', guard tests will be changed to +%% use the old names instead of the new ones, e.g. +%% "`integer(X)'" instead of "`is_integer(X)'". The default +%% value is `false'. This option overrides the `new_guard_tests' +%% option.</dd> +%% +%% <dt>{quiet, boolean()}</dt> +%% +%% <dd>If the value is `true', all information +%% messages and warning messages will be suppressed. The default +%% value is `false'.</dd> +%% +%% <dt>{rename, [{{atom(), atom(), integer()}, +%% {atom(), atom()}}]}</dt> +%% +%% <dd>The value is a list of pairs, associating tuples +%% `{Module, Name, Arity}' with tuples `{NewModule, NewName}', +%% specifying renamings of calls to remote functions. By +%% default, the value is the empty list. +%% +%% The renaming affects only remote calls (also when +%% disguised by import declarations); local calls within a +%% module are not affected, and no function definitions are +%% renamed. Since the arity cannot change, the new name is +%% represented by `{NewModule, NewName}' only. Only +%% calls matching the specified arity will match; multiple +%% entries are necessary for renaming calls to functions that +%% have the same module and function name, but different +%% arities. +%% +%% This option can also be used to override the default +%% renaming of calls which use obsolete function names.</dd> +%% +%% <dt>{verbose, boolean()}</dt> +%% +%% <dd>If the value is `true', progress messages will be output +%% while the program is running, unless the `quiet' option is +%% `true'. The default value is `false'.</dd> +%% +%% </dl> + +module(Forms, Opts) when is_list(Forms) -> + module(erl_syntax:form_list(Forms), Opts); +module(Forms, Opts) -> + Opts1 = proplists:expand(module__expansions(), Opts) + ++ module__defaults(), + File = proplists:get_value(file, Opts1, ""), + Forms1 = erl_syntax:flatten_form_list(Forms), + module_1(Forms1, File, Opts1). + +module__defaults() -> + [{auto_export_vars, false}, + {auto_list_comp, true}, + {keep_unused, false}, + {new_guard_tests, true}, + {no_imports, false}, + {old_guard_tests, false}, + {quiet, false}, + {verbose, false}]. + +module__expansions() -> + [{idem, [{auto_export_vars, false}, + {auto_list_comp, false}, + {keep_unused, true}, + {new_guard_tests, false}, + {no_imports, false}, + {old_guard_tests, false}]}]. + +module_1(Forms, File, Opts) -> + Info = analyze_forms(Forms, File), + Module = get_module_name(Info, File), + Attrs = get_module_attributes(Info), + Exports = get_module_exports(Info), + Imports = get_module_imports(Info), + Opts1 = check_imports(Imports, Opts, File), + Fs = erl_syntax:form_list_elements(Forms), + {Names, Defs} = collect_functions(Fs), + Exports1 = check_export_all(Attrs, Names, Exports), + Roots = ordsets:union(ordsets:from_list(Exports1), + hidden_uses(Fs, Imports)), + {Names1, Used, Imported, Defs1} = visit_used(Names, Defs, Roots, + Imports, Module, + Opts1), + Fs1 = update_forms(Fs, Defs1, Imported, Opts1), + Fs2 = filter_forms(Fs1, Names1, Used, Opts1), + rewrite(Forms, erl_syntax:form_list(Fs2)). + +analyze_forms(Forms, File) -> + case catch {ok, erl_syntax_lib:analyze_forms(Forms)} of + {ok, L1} -> + L1; + syntax_error -> + report_error({File, 0, "syntax error."}), + throw(syntax_error); + {'EXIT', R} -> + exit(R); + R -> + throw(R) + end. + +%% XXX: The following should be imported from erl_syntax_lib +-type key() :: atom(). +-type info_pair() :: {key(), any()}. + +-spec get_module_name([info_pair()], string()) -> atom(). +get_module_name(List, File) -> + case lists:keyfind(module, 1, List) of + {module, M} -> + M; + _ -> + report_error({File, 0, + "cannot determine module name."}), + exit(error) + end. + +get_module_attributes(List) -> + case lists:keyfind(attributes, 1, List) of + {attributes, As} -> + As; + _ -> + [] + end. + +-spec get_module_exports([info_pair()]) -> [{atom(), byte()}]. +get_module_exports(List) -> + case lists:keyfind(exports, 1, List) of + {exports, Es} -> + Es; + _ -> + [] + end. + +-spec get_module_imports([info_pair()]) -> [{atom(), atom()}]. +get_module_imports(List) -> + case lists:keyfind(imports, 1, List) of + {imports, Is} -> + flatten_imports(Is); + _ -> + [] + end. + +compile_attrs(As) -> + lists:append([if is_list(T) -> T; true -> [T] end + || {compile, T} <- As]). + +-spec flatten_imports([{atom(), [atom()]}]) -> [{atom(), atom()}]. +flatten_imports(Is) -> + [{F, M} || {M, Fs} <- Is, F <- Fs]. + +check_imports(Is, Opts, File) -> + case check_imports_1(lists:sort(Is)) of + true -> + Opts; + false -> + case proplists:get_bool(no_imports, Opts) of + true -> + warn({File, 0, + "conflicting import declarations - " + "will not expand imports."}, + [], Opts), + %% prevent expansion of imports + [{no_imports, false} | Opts]; + false -> + Opts + end + end. + +-spec check_imports_1([{atom(), atom()}]) -> boolean(). +check_imports_1([{F1, M1}, {F2, M2} | _Is]) when F1 =:= F2, M1 =/= M2 -> + false; +check_imports_1([_ | Is]) -> + check_imports_1(Is); +check_imports_1([]) -> + true. + +check_export_all(Attrs, Names, Exports) -> + case lists:member(export_all, compile_attrs(Attrs)) of + true -> + Exports ++ sets:to_list(Names); + false -> + Exports + end. + +filter_forms(Fs, Names, Used, Opts) -> + Keep = case proplists:get_bool(keep_unused, Opts) of + true -> + Names; + false -> + Used + end, + [F || F <- Fs, keep_form(F, Keep, Opts)]. + +keep_form(Form, Used, Opts) -> + case erl_syntax:type(Form) of + function -> + N = erl_syntax_lib:analyze_function(Form), + case sets:is_element(N, Used) of + false -> + report_removed_def("function", N, Form, Opts), + false; + true -> + true + end; + rule -> + N = erl_syntax_lib:analyze_rule(Form), + case sets:is_element(N, Used) of + false -> + report_removed_def("rule", N, Form, Opts), + false; + true -> + true + end; + attribute -> + case erl_syntax_lib:analyze_attribute(Form) of + {file, _} -> + false; + _ -> + true + end; + error_marker -> + false; + warning_marker -> + false; + eof_marker -> + false; + _ -> + true + end. + +report_removed_def(Type, {N, A}, Form, Opts) -> + File = proplists:get_value(file, Opts, ""), + report({File, erl_syntax:get_pos(Form), + "removing unused ~s `~w/~w'."}, + [Type, N, A], Opts). + +collect_functions(Forms) -> + lists:foldl( + fun (F, {Names, Defs}) -> + case erl_syntax:type(F) of + function -> + N = erl_syntax_lib:analyze_function(F), + {sets:add_element(N, Names), + dict:store(N, {F, []}, Defs)}; + rule -> + N = erl_syntax_lib:analyze_rule(F), + {sets:add_element(N, Names), + dict:store(N, {F, []}, Defs)}; + _ -> + {Names, Defs} + end + end, + {sets:new(), dict:new()}, + Forms). + +update_forms([F | Fs], Defs, Imports, Opts) -> + case erl_syntax:type(F) of + function -> + N = erl_syntax_lib:analyze_function(F), + {F1, Fs1} = dict:fetch(N, Defs), + [F1 | lists:reverse(Fs1)] ++ update_forms(Fs, Defs, Imports, + Opts); + rule -> + N = erl_syntax_lib:analyze_rule(F), + {F1, Fs1} = dict:fetch(N, Defs), + [F1 | lists:reverse(Fs1)] ++ update_forms(Fs, Defs, Imports, + Opts); + attribute -> + [update_attribute(F, Imports, Opts) + | update_forms(Fs, Defs, Imports, Opts)]; + _ -> + [F | update_forms(Fs, Defs, Imports, Opts)] + end; +update_forms([], _, _, _) -> + []. + +update_attribute(F, Imports, Opts) -> + case erl_syntax_lib:analyze_attribute(F) of + {import, {M, Ns}} -> + Ns1 = ordsets:from_list([N || N <- Ns, + sets:is_element(N, Imports)]), + case ordsets:subtract(ordsets:from_list(Ns), Ns1) of + [] -> + ok; + Names -> + File = proplists:get_value(file, Opts, ""), + report({File, erl_syntax:get_pos(F), + "removing unused imports:~s"}, + [[io_lib:fwrite("\n\t`~w:~w/~w'", [M, N, A]) + || {N, A} <- Names]], Opts) + end, + Is = [make_fname(N) || N <- Ns1], + if Is =:= [] -> + %% This will be filtered out later. + erl_syntax:warning_marker(deleted); + true -> + F1 = erl_syntax:attribute(erl_syntax:atom(import), + [erl_syntax:atom(M), + erl_syntax:list(Is)]), + rewrite(F, F1) + end; + {export, Ns} -> + Es = [make_fname(N) || N <- ordsets:from_list(Ns)], + F1 = erl_syntax:attribute(erl_syntax:atom(export), + [erl_syntax:list(Es)]), + rewrite(F, F1); + _ -> + F + end. + +make_fname({F, A}) -> + erl_syntax:arity_qualifier(erl_syntax:atom(F), + erl_syntax:integer(A)). + +hidden_uses(Fs, Imports) -> + Used = lists:foldl(fun (F, S) -> + case erl_syntax:type(F) of + attribute -> + hidden_uses_1(F, S); + _ -> + S + end + end, + [], Fs), + ordsets:subtract(Used, ordsets:from_list([F || {F, _M} <- Imports])). + +hidden_uses_1(Tree, Used) -> + erl_syntax_lib:fold(fun hidden_uses_2/2, Used, Tree). + +hidden_uses_2(Tree, Used) -> + case erl_syntax:type(Tree) of + application -> + F = erl_syntax:application_operator(Tree), + case erl_syntax:type(F) of + atom -> + As = erl_syntax:application_arguments(Tree), + N = {erl_syntax:atom_value(F), length(As)}, + case is_auto_imported(N) of + true -> + Used; + false -> + ordsets:add_element(N, Used) + end; + _ -> + Used + end; + implicit_fun -> + F = erl_syntax:implicit_fun_name(Tree), + case catch {ok, erl_syntax_lib:analyze_function_name(F)} of + {ok, {Name, Arity} = N} + when is_atom(Name), is_integer(Arity) -> + ordsets:add_element(N, Used); + _ -> + Used + end; + _ -> + Used + end. + +-type context() :: 'guard_expr' | 'guard_test' | 'normal'. + +-record(env, {file :: file:filename(), + module, + current, + imports, + context = normal :: context(), + verbosity = 1 :: 0 | 1 | 2, + quiet = false :: boolean(), + no_imports = false :: boolean(), + spawn_funs = false :: boolean(), + auto_list_comp = true :: boolean(), + auto_export_vars = false :: boolean(), + new_guard_tests = true :: boolean(), + old_guard_tests = false :: boolean()}). + +-record(st, {varc, used, imported, vars, functions, new_forms, rename}). + +visit_used(Names, Defs, Roots, Imports, Module, Opts) -> + File = proplists:get_value(file, Opts, ""), + NoImports = proplists:get_bool(no_imports, Opts), + Rename = proplists:append_values(rename, Opts), + loop(Roots, sets:new(), Defs, + #env{file = File, + module = Module, + imports = dict:from_list(Imports), + verbosity = verbosity(Opts), + no_imports = NoImports, + spawn_funs = proplists:get_bool(spawn_funs, Opts), + auto_list_comp = proplists:get_bool(auto_list_comp, Opts), + auto_export_vars = proplists:get_bool(auto_export_vars, + Opts), + new_guard_tests = proplists:get_bool(new_guard_tests, + Opts), + old_guard_tests = proplists:get_bool(old_guard_tests, + Opts)}, + #st{used = sets:from_list(Roots), + imported = sets:new(), + functions = Names, + rename = dict:from_list([X || {F1, F2} = X <- Rename, + is_remote_name(F1), + is_atom_pair(F2)])}). + +loop([F | Work], Seen0, Defs0, Env, St0) -> + case sets:is_element(F, Seen0) of + true -> + loop(Work, Seen0, Defs0, Env, St0); + false -> + Seen1 = sets:add_element(F, Seen0), + case dict:find(F, Defs0) of + {ok, {Form, Fs}} -> + Vars = erl_syntax_lib:variables(Form), + Form1 = erl_syntax_lib:annotate_bindings(Form, []), + {Form2, St1} = visit(Form1, Env#env{current = F}, + St0#st{varc = 1, + used = sets:new(), + vars = Vars, + new_forms = []}), + Fs1 = St1#st.new_forms ++ Fs, + Defs1 = dict:store(F, {Form2, Fs1}, Defs0), + Used = St1#st.used, + Work1 = sets:to_list(Used) ++ Work, + St2 = St1#st{used = sets:union(Used, St0#st.used)}, + loop(Work1, Seen1, Defs1, Env, St2); + error -> + %% Quietly ignore any names that have no definition. + loop(Work, Seen1, Defs0, Env, St0) + end + end; +loop([], _, Defs, _, St) -> + {St#st.functions, St#st.used, St#st.imported, Defs}. + +visit(Tree, Env, St0) -> + case erl_syntax:type(Tree) of + application -> + visit_application(Tree, Env, St0); + infix_expr -> + visit_infix_expr(Tree, Env, St0); + prefix_expr -> + visit_prefix_expr(Tree, Env, St0); + implicit_fun -> + visit_implicit_fun(Tree, Env, St0); + clause -> + visit_clause(Tree, Env, St0); + list_comp -> + visit_list_comp(Tree, Env, St0); + match_expr -> + visit_match_expr(Tree, Env, St0); + _ -> + visit_other(Tree, Env, St0) + end. + +visit_other(Tree, Env, St) -> + F = fun (T, S) -> visit(T, Env, S) end, + erl_syntax_lib:mapfold_subtrees(F, St, Tree). + +visit_list(Ts, Env, St0) -> + lists:mapfoldl(fun (T, S) -> visit(T, Env, S) end, St0, Ts). + +visit_implicit_fun(Tree, _Env, St0) -> + F = erl_syntax:implicit_fun_name(Tree), + case catch {ok, erl_syntax_lib:analyze_function_name(F)} of + {ok, {Name, Arity} = N} + when is_atom(Name), is_integer(Arity) -> + Used = sets:add_element(N, St0#st.used), + {Tree, St0#st{used = Used}}; + _ -> + %% symbolic funs do not count as uses of a function + {Tree, St0} + end. + +visit_clause(Tree, Env, St0) -> + %% We do not visit the patterns (for now, anyway). + Ps = erl_syntax:clause_patterns(Tree), + {G, St1} = case erl_syntax:clause_guard(Tree) of + none -> + {none, St0}; + G0 -> + visit(G0, Env#env{context = guard_test}, St0) + end, + {B, St2} = visit_list(erl_syntax:clause_body(Tree), Env, St1), + {rewrite(Tree, erl_syntax:clause(Ps, G, B)), St2}. + +visit_infix_expr(Tree, #env{context = guard_test}, St0) -> + %% Detect transition from guard test to guard expression. + visit_other(Tree, #env{context = guard_expr}, St0); +visit_infix_expr(Tree, Env, St0) -> + visit_other(Tree, Env, St0). + +visit_prefix_expr(Tree, #env{context = guard_test}, St0) -> + %% Detect transition from guard test to guard expression. + visit_other(Tree, #env{context = guard_expr}, St0); +visit_prefix_expr(Tree, Env, St0) -> + visit_other(Tree, Env, St0). + +visit_application(Tree, Env, St0) -> + Env1 = case Env of + #env{context = guard_test} -> + Env#env{context = guard_expr}; + _ -> + Env + end, + {F, St1} = visit(erl_syntax:application_operator(Tree), Env1, St0), + {As, St2} = visit_list(erl_syntax:application_arguments(Tree), Env1, + St1), + case erl_syntax:type(F) of + atom -> + visit_atom_application(F, As, Tree, Env, St2); + implicit_fun -> + visit_named_fun_application(F, As, Tree, Env, St2); + fun_expr -> + visit_lambda_application(F, As, Tree, Env, St2); + _ -> + visit_nonlocal_application(F, As, Tree, Env, St2) + end. + +visit_application_final(F, As, Tree, St0) -> + {rewrite(Tree, erl_syntax:application(F, As)), St0}. + +revisit_application(F, As, Tree, Env, St0) -> + visit(rewrite(Tree, erl_syntax:application(F, As)), Env, St0). + +visit_atom_application(F, As, Tree, #env{context = guard_test} = Env, + St0) -> + N = erl_syntax:atom_value(F), + A = length(As), + N1 = case Env#env.old_guard_tests of + true -> + reverse_guard_test(N, A); + false -> + case Env#env.new_guard_tests of + true -> + rewrite_guard_test(N, A); + false -> + N + end + end, + if N1 =/= N -> + report({Env#env.file, erl_syntax:get_pos(F), + "changing guard test `~w' to `~w'."}, + [N, N1], Env#env.verbosity); + true -> + ok + end, + %% No need to revisit here. + F1 = rewrite(F, erl_syntax:atom(N1)), + visit_application_final(F1, As, Tree, St0); +visit_atom_application(F, As, Tree, #env{context = guard_expr}, St0) -> + %% Atom applications in guard expressions are never local calls. + visit_application_final(F, As, Tree, St0); +visit_atom_application(F, As, Tree, Env, St0) -> + N = {erl_syntax:atom_value(F), length(As)}, + case is_auto_imported(N) of + true -> + visit_bif_call(N, F, As, Tree, Env, St0); + false -> + case is_imported(N, Env) of + true -> + visit_import_application(N, F, As, Tree, Env, St0); + false -> + Used = sets:add_element(N, St0#st.used), + visit_application_final(F, As, Tree, + St0#st{used = Used}) + end + end. + +visit_import_application({N, A} = Name, F, As, Tree, Env, St0) -> + M = dict:fetch(Name, Env#env.imports), + Expand = case Env#env.no_imports of + true -> + true; + false -> + auto_expand_import({M, N, A}, St0) + end, + case Expand of + true -> + report({Env#env.file, erl_syntax:get_pos(F), + "expanding call to imported function `~w:~w/~w'."}, + [M, N, A], Env#env.verbosity), + F1 = erl_syntax:module_qualifier(erl_syntax:atom(M), + erl_syntax:atom(N)), + revisit_application(rewrite(F, F1), As, Tree, Env, St0); + false -> + Is = sets:add_element(Name, St0#st.imported), + visit_application_final(F, As, Tree, St0#st{imported = Is}) + end. + +visit_bif_call({apply, 2}, F, [E, Args] = As, Tree, Env, St0) -> + case erl_syntax:is_proper_list(Args) of + true -> + report({Env#env.file, erl_syntax:get_pos(F), + "changing use of `apply/2' " + "to direct function call."}, + [], Env#env.verbosity), + As1 = erl_syntax:list_elements(Args), + revisit_application(E, As1, Tree, Env, St0); + false -> + visit_application_final(F, As, Tree, St0) + end; +visit_bif_call({apply, 3}, F, [M, N, Args] = As, Tree, Env, St0) -> + case erl_syntax:is_proper_list(Args) of + true -> + report({Env#env.file, erl_syntax:get_pos(F), + "changing use of `apply/3' " + "to direct remote call."}, + [], Env#env.verbosity), + F1 = rewrite(F, erl_syntax:module_qualifier(M, N)), + As1 = erl_syntax:list_elements(Args), + visit_nonlocal_application(F1, As1, Tree, Env, St0); + false -> + visit_application_final(F, As, Tree, St0) + end; +visit_bif_call({spawn, 3} = N, F, [_, _, _] = As, Tree, Env, St0) -> + visit_spawn_call(N, F, [], As, Tree, Env, St0); +visit_bif_call({spawn_link, 3} = N, F, [_, _, _] = As, Tree, Env, + St0) -> + visit_spawn_call(N, F, [], As, Tree, Env, St0); +visit_bif_call({spawn, 4} = N, F, [A | [_, _, _] = As], Tree, Env, + St0) -> + visit_spawn_call(N, F, [A], As, Tree, Env, St0); +visit_bif_call({spawn_link, 4} = N, F, [A | [_, _, _] = As], Tree, Env, + St0) -> + visit_spawn_call(N, F, [A], As, Tree, Env, St0); +visit_bif_call(_, F, As, Tree, _Env, St0) -> + visit_application_final(F, As, Tree, St0). + +visit_spawn_call({N, A}, F, Ps, [A1, A2, A3] = As, Tree, + #env{spawn_funs = true} = Env, St0) -> + case erl_syntax:is_proper_list(A3) of + true -> + report({Env#env.file, erl_syntax:get_pos(F), + "changing use of `~w/~w' to `~w/~w' with a fun."}, + [N, A, N, 1 + length(Ps)], Env#env.verbosity), + F1 = case erl_syntax:is_atom(A1, Env#env.module) of + true -> + A2; % calling self + false -> + clone(A1, + erl_syntax:module_qualifier(A1, A2)) + end, + %% Need to do some scoping tricks here to make sure the + %% arguments are evaluated by the parent, not by the spawned + %% process. + As1 = erl_syntax:list_elements(A3), + {Vs, St1} = new_variables(length(As1), St0), + E1 = clone(F1, erl_syntax:application(F1, Vs)), + C1 = clone(E1, erl_syntax:clause([], [E1])), + E2 = clone(C1, erl_syntax:fun_expr([C1])), + C2 = clone(E2, erl_syntax:clause(Vs, [], [E2])), + E3 = clone(C2, erl_syntax:fun_expr([C2])), + E4 = clone(E3, erl_syntax:application(E3, As1)), + E5 = erl_syntax_lib:annotate_bindings(E4, get_env(A1)), + {E6, St2} = visit(E5, Env, St1), + F2 = rewrite(F, erl_syntax:atom(N)), + visit_nonlocal_application(F2, Ps ++ [E6], Tree, Env, St2); + false -> + visit_application_final(F, Ps ++ As, Tree, St0) + end; +visit_spawn_call(_, F, Ps, As, Tree, _Env, St0) -> + visit_application_final(F, Ps ++ As, Tree, St0). + +visit_named_fun_application(F, As, Tree, Env, St0) -> + Name = erl_syntax:implicit_fun_name(F), + case catch {ok, erl_syntax_lib:analyze_function_name(Name)} of + {ok, {A, N}} when is_atom(A), is_integer(N), N =:= length(As) -> + case is_nonlocal({A, N}, Env) of + true -> + %% Making this a direct call would be an error. + visit_application_final(F, As, Tree, St0); + false -> + report({Env#env.file, erl_syntax:get_pos(F), + "changing application of implicit fun " + "to direct local call."}, + [], Env#env.verbosity), + Used = sets:add_element({A, N}, St0#st.used), + F1 = rewrite(F, erl_syntax:atom(A)), + revisit_application(F1, As, Tree, Env, + St0#st{used = Used}) + end; + _ -> + visit_application_final(F, As, Tree, St0) + end. + +visit_lambda_application(F, As, Tree, Env, St0) -> + A = erl_syntax:fun_expr_arity(F), + case A =:= length(As) of + true -> + report({Env#env.file, erl_syntax:get_pos(F), + "changing application of fun-expression " + "to local function call."}, + [], Env#env.verbosity), + {Base, _} = Env#env.current, + Free = [erl_syntax:variable(V) || V <- get_free_vars(F)], + N = length(Free), + A1 = A + N, + {Name, St1} = new_fname({Base, A1}, St0), + Cs = augment_clauses(erl_syntax:fun_expr_clauses(F), Free), + F1 = erl_syntax:atom(Name), + New = rewrite(F, erl_syntax:function(F1, Cs)), + Used = sets:add_element({Name, A1}, St1#st.used), + Forms = [New | St1#st.new_forms], + St2 = St1#st{new_forms = Forms, used = Used}, + visit_application_final(F1, As ++ Free, Tree, St2); + false -> + warn({Env#env.file, erl_syntax:get_pos(F), + "arity mismatch in fun-expression application."}, + [], Env#env.verbosity), + visit_application_final(F, As, Tree, St0) + end. + +augment_clauses(Cs, Vs) -> + [begin + Ps = erl_syntax:clause_patterns(C), + G = erl_syntax:clause_guard(C), + Es = erl_syntax:clause_body(C), + rewrite(C, erl_syntax:clause(Ps ++ Vs, G, Es)) + end + || C <- Cs]. + +visit_nonlocal_application(F, As, Tree, Env, St0) -> + case erl_syntax:type(F) of + tuple -> + case erl_syntax:tuple_elements(F) of + [X1, X2] -> + report({Env#env.file, erl_syntax:get_pos(F), + "changing application of 2-tuple " + "to direct remote call."}, + [], Env#env.verbosity), + F1 = erl_syntax:module_qualifier(X1, X2), + revisit_application(rewrite(F, F1), As, Tree, Env, + St0); + _ -> + visit_application_final(F, As, Tree, St0) + end; + module_qualifier -> + case catch {ok, erl_syntax_lib:analyze_function_name(F)} of + {ok, {M, N}} when is_atom(M), is_atom(N) -> + visit_remote_application({M, N, length(As)}, F, As, + Tree, Env, St0); + _ -> + visit_application_final(F, As, Tree, St0) + end; + _ -> + visit_application_final(F, As, Tree, St0) + end. + +%% --- lists:append/2 and lists:subtract/2 --- +visit_remote_application({lists, append, 2}, F, [A1, A2], Tree, Env, + St0) -> + report({Env#env.file, erl_syntax:get_pos(F), + "replacing call to `lists:append/2' " + "with the `++' operator."}, + [], Env#env.verbosity), + Tree1 = erl_syntax:infix_expr(A1, erl_syntax:operator('++'), A2), + visit(rewrite(Tree, Tree1), Env, St0); +visit_remote_application({lists, subtract, 2}, F, [A1, A2], Tree, Env, + St0) -> + report({Env#env.file, erl_syntax:get_pos(F), + "replacing call to `lists:subtract/2' " + "with the `--' operator."}, + [], Env#env.verbosity), + Tree1 = erl_syntax:infix_expr(A1, erl_syntax:operator('--'), A2), + visit(rewrite(Tree, Tree1), Env, St0); +%% --- lists:map/2 and lists:filter/2 --- +visit_remote_application({lists, filter, 2}, F, [A1, A2] = As, Tree, + Env, St0) -> + case Env#env.auto_list_comp + and (erl_syntax:type(A1) =/= variable) + and (get_var_exports(A1) =:= []) + and (get_var_exports(A2) =:= []) of + true -> + report({Env#env.file, erl_syntax:get_pos(F), + "replacing call to `lists:filter/2' " + "with a list comprehension."}, + [], Env#env.verbosity), + {V, St1} = new_variable(St0), + G = clone(A2, erl_syntax:generator(V, A2)), + T = clone(A1, erl_syntax:application(A1, [V])), + L = erl_syntax:list_comp(V, [G, T]), + L1 = erl_syntax_lib:annotate_bindings(L, get_env(Tree)), + visit(rewrite(Tree, L1), Env, St1); + false -> + visit_application_final(F, As, Tree, St0) + end; +visit_remote_application({lists, map, 2}, F, [A1, A2] = As, Tree, Env, + St0) -> + case Env#env.auto_list_comp + and (erl_syntax:type(A1) =/= variable) + and (get_var_exports(A1) =:= []) + and (get_var_exports(A2) =:= []) of + true -> + report({Env#env.file, erl_syntax:get_pos(F), + "replacing call to `lists:map/2' " + "with a list comprehension."}, + [], Env#env.verbosity), + {V, St1} = new_variable(St0), + T = clone(A1, erl_syntax:application(A1, [V])), + G = clone(A2, erl_syntax:generator(V, A2)), + L = erl_syntax:list_comp(T, [G]), + L1 = erl_syntax_lib:annotate_bindings(L, get_env(Tree)), + visit(rewrite(Tree, L1), Env, St1); + false -> + visit_application_final(F, As, Tree, St0) + end; +%% --- all other functions --- +visit_remote_application({M, N, A} = Name, F, As, Tree, Env, St) -> + case is_auto_imported(Name) of + true -> + %% We don't remove the qualifier - it might be there for the + %% sake of clarity. + visit_bif_call({N, A}, F, As, Tree, Env, St); + false -> + case rename_remote_call(Name, St) of + {M1, N1} -> + report({Env#env.file, erl_syntax:get_pos(F), + "updating obsolete call to `~w:~w/~w' " + "to use `~w:~w/~w' instead."}, + [M, N, A, M1, N1, A], Env#env.verbosity), + M2 = erl_syntax:atom(M1), + N2 = erl_syntax:atom(N1), + F1 = erl_syntax:module_qualifier(M2, N2), + revisit_application(rewrite(F, F1), As, Tree, Env, + St); + false -> + visit_application_final(F, As, Tree, St) + end + end. + +-spec auto_expand_import(mfa(), #st{}) -> boolean(). + +auto_expand_import({lists, append, 2}, _St) -> true; +auto_expand_import({lists, subtract, 2}, _St) -> true; +auto_expand_import({lists, filter, 2}, _St) -> true; +auto_expand_import({lists, map, 2}, _St) -> true; +auto_expand_import(Name, St) -> + case is_auto_imported(Name) of + true -> + true; + false -> + rename_remote_call(Name, St) =/= false + end. + +visit_list_comp(Tree, Env, St0) -> + Es = erl_syntax:list_comp_body(Tree), + {Es1, St1} = visit_list_comp_body(Es, Env, St0), + {T, St2} = visit(erl_syntax:list_comp_template(Tree), Env, St1), + {rewrite(Tree, erl_syntax:list_comp(T, Es1)), St2}. + +visit_list_comp_body_join(Env) -> + fun (E, St0) -> + case is_generator(E) of + true -> + visit_generator(E, Env, St0); + false -> + visit_filter(E, Env, St0) + end + end. + +visit_list_comp_body(Es, Env, St0) -> + lists:mapfoldl(visit_list_comp_body_join(Env), St0, Es). + +%% 'visit_filter' also handles uninteresting generators. + +visit_filter(E, Env, St0) -> + visit(E, Env, St0). + +%% "interesting" generators have the form V <- [V || ...]; this can be +%% unfolded as long as no bindings become erroneously shadowed. + +visit_generator(G, Env, St0) -> + P = erl_syntax:generator_pattern(G), + case erl_syntax:type(P) of + variable -> + B = erl_syntax:generator_body(G), + case erl_syntax:type(B) of + list_comp -> + T = erl_syntax:list_comp_template(B), + case erl_syntax:type(T) of + variable -> + visit_generator_1(G, Env, St0); + _ -> + visit_filter(G, Env, St0) + end; + _ -> + visit_filter(G, Env, St0) + end; + _ -> + visit_filter(G, Env, St0) + end. + +visit_generator_1(G, Env, St0) -> + recommend({Env#env.file, erl_syntax:get_pos(G), + "unfold that this nested list comprehension can be unfolded " + "by hand to get better efficiency."}, + [], Env#env.verbosity), + visit_filter(G, Env, St0). + +visit_match_expr(Tree, Env, St0) -> + %% We do not visit the pattern (for now, anyway). + P = erl_syntax:match_expr_pattern(Tree), + {B, St1} = visit(erl_syntax:match_expr_body(Tree), Env, St0), + case erl_syntax:type(P) of + tuple -> + Ps = erl_syntax:tuple_elements(P), + case lists:all(fun is_variable/1, Ps) of + true -> + Vs = lists:sort([erl_syntax:variable_name(X) + || X <- Ps]), + case ordsets:is_set(Vs) of + true -> + Xs = get_var_exports(B), + case ordsets:intersection(Vs, Xs) of + [] -> + visit_match_body(Ps, P, B, Tree, + Env, St1); + _ -> + visit_match_expr_final(P, B, Tree, + Env, St1) + end; + false -> + visit_match_expr_final(P, B, Tree, Env, St1) + end; + false -> + visit_match_expr_final(P, B, Tree, Env, St1) + end; + _ -> + visit_match_expr_final(P, B, Tree, Env, St1) + end. + +visit_match_expr_final(P, B, Tree, _Env, St0) -> + {rewrite(Tree, erl_syntax:match_expr(P, B)), St0}. + +visit_match_body(_Ps, P, B, Tree, #env{auto_export_vars = false} = Env, + St0) -> + visit_match_expr_final(P, B, Tree, Env, St0); +visit_match_body(Ps, P, B, Tree, Env, St0) -> + case erl_syntax:type(B) of + case_expr -> + Cs = erl_syntax:case_expr_clauses(B), + case multival_clauses(Cs, length(Ps), Ps) of + {true, Cs1} -> + report_export_vars(Env#env.file, + erl_syntax:get_pos(B), + "case", Env#env.verbosity), + A = erl_syntax:case_expr_argument(B), + Tree1 = erl_syntax:case_expr(A, Cs1), + {rewrite(Tree, Tree1), St0}; + false -> + visit_match_expr_final(P, B, Tree, Env, St0) + end; + if_expr -> + Cs = erl_syntax:if_expr_clauses(B), + case multival_clauses(Cs, length(Ps), Ps) of + {true, Cs1} -> + report_export_vars(Env#env.file, + erl_syntax:get_pos(B), + "if", Env#env.verbosity), + Tree1 = erl_syntax:if_expr(Cs1), + {rewrite(Tree, Tree1), St0}; + false -> + visit_match_expr_final(P, B, Tree, Env, St0) + end; + cond_expr -> + Cs = erl_syntax:cond_expr_clauses(B), + case multival_clauses(Cs, length(Ps), Ps) of + {true, Cs1} -> + report_export_vars(Env#env.file, + erl_syntax:get_pos(B), + "cond", Env#env.verbosity), + Tree1 = erl_syntax:cond_expr(Cs1), + {rewrite(Tree, Tree1), St0}; + false -> + visit_match_expr_final(P, B, Tree, Env, St0) + end; + receive_expr -> + %% Handle the timeout case as an extra clause. + As = erl_syntax:receive_expr_action(B), + C = erl_syntax:clause([], As), + Cs = erl_syntax:receive_expr_clauses(B), + case multival_clauses([C | Cs], length(Ps), Ps) of + {true, [C1 | Cs1]} -> + report_export_vars(Env#env.file, + erl_syntax:get_pos(B), + "receive", Env#env.verbosity), + T = erl_syntax:receive_expr_timeout(B), + As1 = erl_syntax:clause_body(C1), + Tree1 = erl_syntax:receive_expr(Cs1, T, As1), + {rewrite(Tree, Tree1), St0}; + false -> + visit_match_expr_final(P, B, Tree, Env, St0) + end; + _ -> + visit_match_expr_final(P, B, Tree, Env, St0) + end. + +multival_clauses(Cs, N, Vs) -> + multival_clauses(Cs, N, Vs, []). + +multival_clauses([C | Cs], N, Vs, Cs1) -> + case erl_syntax:clause_body(C) of + [] -> + false; + Es -> + E = lists:last(Es), + case erl_syntax:type(E) of + tuple -> + Ts = erl_syntax:tuple_elements(E), + if length(Ts) =:= N -> + Bs = make_matches(E, Vs, Ts), + Es1 = replace_last(Es, Bs), + Ps = erl_syntax:clause_patterns(C), + G = erl_syntax:clause_guard(C), + C1 = erl_syntax:clause(Ps, G, Es1), + multival_clauses(Cs, N, Vs, + [rewrite(C, C1) | Cs1]); + true -> + false + end; + _ -> + case erl_syntax_lib:is_fail_expr(E) of + true -> + %% We must add dummy bindings here so we + %% don't introduce compilation errors due to + %% "unsafe" variable exports. + Bs = make_matches(Vs, + erl_syntax:atom(false)), + Es1 = replace_last(Es, Bs ++ [E]), + Ps = erl_syntax:clause_patterns(C), + G = erl_syntax:clause_guard(C), + C1 = erl_syntax:clause(Ps, G, Es1), + multival_clauses(Cs, N, Vs, + [rewrite(C, C1) | Cs1]); + false -> + false + end + end + end; +multival_clauses([], _N, _Vs, Cs) -> + {true, lists:reverse(Cs)}. + +make_matches(E, Vs, Ts) -> + case make_matches(Vs, Ts) of + [] -> + []; + [B | Bs] -> + [rewrite(E, B) | Bs] % preserve comments on E (but not B) + end. + +make_matches([V | Vs], [T | Ts]) -> + [erl_syntax:match_expr(V, T) | make_matches(Vs, Ts)]; +make_matches([V | Vs], T) when T =/= [] -> + [erl_syntax:match_expr(V, T) | make_matches(Vs, T)]; +make_matches([], _) -> + []. + +rename_remote_call(F, St) -> + case dict:find(F, St#st.rename) of + error -> + rename_remote_call_1(F); + {ok, F1} -> F1 + end. + +-spec rename_remote_call_1(mfa()) -> {atom(), atom()} | 'false'. +rename_remote_call_1({dict, dict_to_list, 1}) -> {dict, to_list}; +rename_remote_call_1({dict, list_to_dict, 1}) -> {dict, from_list}; +rename_remote_call_1({erl_eval, arg_list, 2}) -> {erl_eval, expr_list}; +rename_remote_call_1({erl_eval, arg_list, 3}) -> {erl_eval, expr_list}; +rename_remote_call_1({erl_eval, seq, 2}) -> {erl_eval, exprs}; +rename_remote_call_1({erl_eval, seq, 3}) -> {erl_eval, exprs}; +rename_remote_call_1({erl_pp, seq, 1}) -> {erl_eval, seq}; +rename_remote_call_1({erl_pp, seq, 2}) -> {erl_eval, seq}; +rename_remote_call_1({erlang, info, 1}) -> {erlang, system_info}; +rename_remote_call_1({io, parse_erl_seq, 1}) -> {io, parse_erl_exprs}; +rename_remote_call_1({io, parse_erl_seq, 2}) -> {io, parse_erl_exprs}; +rename_remote_call_1({io, parse_erl_seq, 3}) -> {io, parse_erl_exprs}; +rename_remote_call_1({io, scan_erl_seq, 1}) -> {io, scan_erl_exprs}; +rename_remote_call_1({io, scan_erl_seq, 2}) -> {io, scan_erl_exprs}; +rename_remote_call_1({io, scan_erl_seq, 3}) -> {io, scan_erl_exprs}; +rename_remote_call_1({io_lib, reserved_word, 1}) -> {erl_scan, reserved_word}; +rename_remote_call_1({io_lib, scan, 1}) -> {erl_scan, string}; +rename_remote_call_1({io_lib, scan, 2}) -> {erl_scan, string}; +rename_remote_call_1({io_lib, scan, 3}) -> {erl_scan, tokens}; +rename_remote_call_1({orddict, dict_to_list, 1}) -> {orddict, to_list}; +rename_remote_call_1({orddict, list_to_dict, 1}) -> {orddict, from_list}; +rename_remote_call_1({ordsets, list_to_set, 1}) -> {ordsets, from_list}; +rename_remote_call_1({ordsets, new_set, 0}) -> {ordsets, new}; +rename_remote_call_1({ordsets, set_to_list, 1}) -> {ordsets, to_list}; +rename_remote_call_1({ordsets, subset, 2}) -> {ordsets, is_subset}; +rename_remote_call_1({sets, list_to_set, 1}) -> {sets, from_list}; +rename_remote_call_1({sets, new_set, 0}) -> {sets, new}; +rename_remote_call_1({sets, set_to_list, 1}) -> {sets, to_list}; +rename_remote_call_1({sets, subset, 2}) -> {sets, is_subset}; +rename_remote_call_1({string, index, 2}) -> {string, str}; +rename_remote_call_1({unix, cmd, 1}) -> {os, cmd}; +rename_remote_call_1(_) -> false. + +-spec rewrite_guard_test(atom(), byte()) -> atom(). +rewrite_guard_test(atom, 1) -> is_atom; +rewrite_guard_test(binary, 1) -> is_binary; +rewrite_guard_test(constant, 1) -> is_constant; +rewrite_guard_test(float, 1) -> is_float; +rewrite_guard_test(function, 1) -> is_function; +rewrite_guard_test(function, 2) -> is_function; +rewrite_guard_test(integer, 1) -> is_integer; +rewrite_guard_test(list, 1) -> is_list; +rewrite_guard_test(number, 1) -> is_number; +rewrite_guard_test(pid, 1) -> is_pid; +rewrite_guard_test(port, 1) -> is_port; +rewrite_guard_test(reference, 1) -> is_reference; +rewrite_guard_test(tuple, 1) -> is_tuple; +rewrite_guard_test(record, 2) -> is_record; +rewrite_guard_test(record, 3) -> is_record; +rewrite_guard_test(N, _A) -> N. + +-spec reverse_guard_test(atom(), byte()) -> atom(). +reverse_guard_test(is_atom, 1) -> atom; +reverse_guard_test(is_binary, 1) -> binary; +reverse_guard_test(is_constant, 1) -> constant; +reverse_guard_test(is_float, 1) -> float; +reverse_guard_test(is_function, 1) -> function; +reverse_guard_test(is_function, 2) -> function; +reverse_guard_test(is_integer, 1) -> integer; +reverse_guard_test(is_list, 1) -> list; +reverse_guard_test(is_number, 1) -> number; +reverse_guard_test(is_pid, 1) -> pid; +reverse_guard_test(is_port, 1) -> port; +reverse_guard_test(is_reference, 1) -> reference; +reverse_guard_test(is_tuple, 1) -> tuple; +reverse_guard_test(is_record, 2) -> record; +reverse_guard_test(is_record, 3) -> record; +reverse_guard_test(N, _A) -> N. + + +%% ===================================================================== +%% Utility functions + +is_remote_name({M,F,A}) when is_atom(M), is_atom(F), is_integer(A) -> true; +is_remote_name(_) -> false. + +is_atom_pair({M,F}) when is_atom(M), is_atom(F) -> true; +is_atom_pair(_) -> false. + +replace_last([_E], Xs) -> + Xs; +replace_last([E | Es], Xs) -> + [E | replace_last(Es, Xs)]. + +is_generator(E) -> + erl_syntax:type(E) =:= generator. + +is_variable(E) -> + erl_syntax:type(E) =:= variable. + +new_variables(N, St0) when N > 0 -> + {V, St1} = new_variable(St0), + {Vs, St2} = new_variables(N - 1, St1), + {[V | Vs], St2}; +new_variables(0, St) -> + {[], St}. + +new_variable(St0) -> + Fun = fun (N) -> + list_to_atom("V" ++ integer_to_list(N)) + end, + Vs = St0#st.vars, + {Name, N} = new_name(St0#st.varc, Fun, Vs), + St1 = St0#st{varc = N + 1, vars = sets:add_element(Name, Vs)}, + {erl_syntax:variable(Name), St1}. + +new_fname({F, A}, St0) -> + Base = atom_to_list(F), + Fun = fun (N) -> + {list_to_atom(Base ++ "_" ++ integer_to_list(N)), A} + end, + Fs = St0#st.functions, + {{F1, _A} = Name, _N} = new_name(1, Fun, Fs), + {F1, St0#st{functions = sets:add_element(Name, Fs)}}. + +new_name(N, F, Set) -> + Name = F(N), + case sets:is_element(Name, Set) of + true -> + new_name(N + 1, F, Set); + false -> + {Name, N} + end. + +is_imported(F, Env) -> + dict:is_key(F, Env#env.imports). + +is_auto_imported({erlang, N, A}) -> + is_auto_imported({N, A}); +is_auto_imported({_, _N, _A}) -> + false; +is_auto_imported({N, A}) -> + erl_internal:bif(N, A). + +is_nonlocal(N, Env) -> + case is_imported(N, Env) of + true -> + true; + false -> + is_auto_imported(N) + end. + +get_var_exports(Node) -> + get_var_exports_1(erl_syntax:get_ann(Node)). + +get_var_exports_1([{bound, B} | _Bs]) -> B; +get_var_exports_1([_ | Bs]) -> get_var_exports_1(Bs); +get_var_exports_1([]) -> []. + +get_free_vars(Node) -> + get_free_vars_1(erl_syntax:get_ann(Node)). + +get_free_vars_1([{free, B} | _Bs]) -> B; +get_free_vars_1([_ | Bs]) -> get_free_vars_1(Bs); +get_free_vars_1([]) -> []. + +filename([C | T]) when is_integer(C), C > 0, C =< 255 -> + [C | filename(T)]; +filename([H|T]) -> + filename(H) ++ filename(T); +filename([]) -> + []; +filename(N) when is_atom(N) -> + atom_to_list(N); +filename(N) -> + report_error("bad filename: `~P'.", [N, 25]), + exit(error). + +get_env(Tree) -> + case lists:keyfind(env, 1, erl_syntax:get_ann(Tree)) of + {env, Env} -> + Env; + _ -> + [] + end. + +rewrite(Source, Target) -> + erl_syntax:copy_attrs(Source, Target). + +clone(Source, Target) -> + erl_syntax:copy_pos(Source, Target). + + +%% ===================================================================== +%% Reporting + +report_export_vars(F, L, Type, Opts) -> + report({F, L, "rewrote ~s-expression to export variables."}, + [Type], Opts). + +error_read_file(Name) -> + report_error("error reading file `~s'.", [filename(Name)]). + +error_write_file(Name) -> + report_error("error writing to file `~s'.", [filename(Name)]). + +error_backup_file(Name) -> + report_error("could not create backup of file `~s'.", + [filename(Name)]). + +error_open_output(Name) -> + report_error("cannot open file `~s' for output.", [filename(Name)]). + +verbosity(Opts) -> + case proplists:get_bool(quiet, Opts) of + true -> 0; + false -> + case proplists:get_value(verbose, Opts) of + true -> 2; + N when is_integer(N) -> N; + _ -> 1 + end + end. + +report_error(D) -> + report_error(D, []). + +report_error({F, L, D}, Vs) -> + report({F, L, {error, D}}, Vs); +report_error(D, Vs) -> + report({error, D}, Vs). + +%% warn(D, N) -> +%% warn(D, [], N). + +warn({F, L, D}, Vs, N) -> + report({F, L, {warning, D}}, Vs, N); +warn(D, Vs, N) -> + report({warning, D}, Vs, N). + +recommend(D, Vs, N) -> + report({recommend, D}, Vs, N). + +verbose(D, Vs, N) -> + report(2, D, Vs, N). + +report(D, Vs) -> + report(D, Vs, 1). + +report(D, Vs, N) -> + report(1, D, Vs, N). + +report(Level, _D, _Vs, N) when is_integer(N), N < Level -> + ok; +report(_Level, D, Vs, N) when is_integer(N) -> + io:put_chars(format(D, Vs)); +report(Level, D, Vs, Options) when is_list(Options) -> + report(Level, D, Vs, verbosity(Options)). + +format({error, D}, Vs) -> + ["error: ", format(D, Vs)]; +format({warning, D}, Vs) -> + ["warning: ", format(D, Vs)]; +format({recommend, D}, Vs) -> + ["recommendation: ", format(D, Vs)]; +format({"", L, D}, Vs) when is_integer(L), L > 0 -> + [io_lib:fwrite("~w: ", [L]), format(D, Vs)]; +format({"", _L, D}, Vs) -> + format(D, Vs); +format({F, L, D}, Vs) when is_integer(L), L > 0 -> + [io_lib:fwrite("~s:~w: ", [filename(F), L]), format(D, Vs)]; +format({F, _L, D}, Vs) -> + [io_lib:fwrite("~s: ", [filename(F)]), format(D, Vs)]; +format(S, Vs) when is_list(S) -> + [io_lib:fwrite(S, Vs), $\n]. + +%% ===================================================================== diff --git a/lib/syntax_tools/src/igor.erl b/lib/syntax_tools/src/igor.erl new file mode 100644 index 0000000000..9e7b784170 --- /dev/null +++ b/lib/syntax_tools/src/igor.erl @@ -0,0 +1,3023 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 1998-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc Igor: the Module Merger and Renamer. +%% +%% The program Igor merges the source code of one or more Erlang +%% modules into a single module, which can then replace the original set +%% of modules. Igor is also able to rename a set of (possibly +%% interdependent) modules, without joining them into a single +%% module. +%% +%% The main user interface consists of the functions {@link merge/3} and +%% {@link rename/3}. See also the function {@link parse_transform/2}. +%% +%% A note of warning: Igor cannot do anything about the case when the +%% name of a remote function is passed to the built-in functions +%% `apply' and `spawn' <em>unless</em> the module +%% and function names are explicitly stated in the call, as in e.g. +%% `apply(lists, reverse, [Xs])'. In all other cases, Igor +%% leaves such calls unchanged, and warns the user that manual editing +%% might be necessary. +%% +%% Also note that Erlang records will be renamed as necessary to +%% avoid non-equivalent definitions using the same record name. This +%% does not work if the source code accesses the name field of such +%% record tuples by `element/2' or similar methods. Always +%% use the record syntax to handle record tuples, if possible. +%% +%% Disclaimer: the author of this program takes no responsibility for +%% the correctness of the produced output, or for any effects of its +%% execution. In particular, the author may not be held responsible +%% should Igor include the code of a deceased madman in the result. +%% +%% For further information on Igors in general, see e.g. "Young +%% Frankenstein", Mel Brooks, 1974, and "The Fifth Elephant", Terry +%% Pratchett, 1999. +%% @end +%% ===================================================================== + + +%% This program is named after the character Igor, assistant to Dr. +%% Frankenstein, in the 1939 film "Son of Frankenstein" (with Boris +%% Karloff playing The Monster for the last time; Igor was played by +%% Bela Lugosi). Igor's job (in the film) was mainly to bring reasonably +%% fresh parts of various human corpses to the good Doctor, for his +%% purpose of reanimating them in the shape of a new formidable, living +%% creature. +%% +%% Merging code is done by joining the sources, possibly changing the +%% order of declarations as necessary, renaming functions and records to +%% avoid name clashes, and changing remote calls to local calls where +%% possible. Stub modules may be automatically generated to redirect any +%% calls that still use the old names. Indirectly, code merging can be +%% used to simply rename a set of modules. +%% +%% What Igor does not do is to optimise the resulting code, which +%% typically can benefit from techniques such as inlining, constant +%% folding, specialisation, etc. This task is left to the Doctor. +%% (Luckily, Igor can call on Inga to do some cleanup; cf. 'erl_tidy'.) + +%% TODO: FIXME: don't remove module qualifier if name is (auto-)imported! +%% TODO: handle merging of parameterized modules (somehow). +%% TODO: check for redefinition of macros; check equivalence; comment out. +%% TODO: {export, [E]}, E = atom() | {atom(), atom(), integer()}. +%% TODO: improve documentation. +%% TODO: optionally rename all functions from specified (or all) modules. + +-module(igor). + +-export([create_stubs/2, merge/2, merge/3, merge_files/3, merge_files/4, + merge_sources/3, parse_transform/2, rename/2, rename/3]). + +-include_lib("kernel/include/file.hrl"). + + +%% ===================================================================== +%% Global Constants + +-define(NOTE_HEADER, "Note from Igor: "). +-define(COMMENT_PREFIX, "% "). +-define(COMMENT_BAR, + "=======================" + "=======================" + "======================="). +-define(NOTE_PREFIX, "%! "). +-define(KILL_PREFIX, "%<<< "). +-define(DEFAULT_INCLUDES, ["."]). +-define(DEFAULT_MACROS, []). +-define(DEFAULT_SUFFIX, ".erl"). +-define(DEFAULT_BACKUP_SUFFIX, ".bak"). +-define(DEFAULT_DIR, ""). +-define(DEFAULT_STUB_DIR, "stubs"). +-define(TIDY_OPTS, [quiet]). + +%% This may also be used in patterns. R must not be an integer, i.e., +%% the structure must be distinct from function names. + +-define(record_name(R), {record, R}). + + +%% Data structure for module information + +-record(module, {name, % = atom() + vars = none, % = [atom()] | none + functions, % = ordset({atom(), int()}) + exports, % = ordset({atom(), int()}) + % | ordset({{atom(), int()}, + % term()}) + aliases, % = ordset({{atom(), int()}, + % {atom(), + % {atom(), int()}}}) + attributes, % = ordset({atom(), term()}) + records % = [{atom(), [{atom(), term()}]}] + }). + +%% The default pretty-printing function. + +default_printer(Tree, Options) -> + erl_prettypr:format(Tree, Options). + + +%% ===================================================================== +%% @spec parse_transform(Forms::[syntaxTree()], Options::[term()]) -> +%% [syntaxTree()] +%% +%% syntaxTree() = erl_syntax:syntaxTree() +%% +%% @doc Allows Igor to work as a component of the Erlang compiler. +%% Including the term `{parse_transform, igor}' in the +%% compile options when compiling an Erlang module (cf. +%% `compile:file/2'), will call upon Igor to process the +%% source code, allowing automatic inclusion of other source files. No +%% files are created or overwritten when this function is used. +%% +%% Igor will look for terms `{igor, List}' in the compile +%% options, where `List' is a list of Igor-specific options, +%% as follows: +%% <dl> +%% <dt>`{files, [filename()]}'</dt> +%% <dd>The value specifies a list of source files to be merged with +%% the file being compiled; cf. `merge_files/4'.</dd> +%% </dl> +%% +%% See `merge_files/4' for further options. Note, however, +%% that some options are preset by this function and cannot be +%% overridden by the user; in particular, all cosmetic features are +%% turned off, for efficiency. Preprocessing is turned on. +%% +%% @see merge_files/4 +%% @see //compiler/compile:file/2 + +parse_transform(Forms, Options) -> + M = get_module_info(Forms), + Name = M#module.name, + Opts = proplists:append_values(igor, Options), + Files = proplists:append_values(files, Opts), + %% We turn off all features that are only cosmetic, and make sure to + %% turn on preservation of `file' attributes. + Opts1 = [{comments, false}, + {notes, no}, + {no_imports, true}, + {file_attributes, yes}, + {preprocess, true}, + {export, [Name]} + | Opts], + {T, _} = merge_files(Name, [Forms], Files, Opts1), + verbose("done.", Opts1), + erl_syntax:revert_forms(T). + + +%% ===================================================================== +%% @spec merge(Name::atom(), Files::[filename()]) -> [filename()] +%% @equiv merge(Name, Files, []) + +merge(Name, Files) -> + merge(Name, Files, []). + +%% ===================================================================== +%% @spec merge(Name::atom(), Files::[filename()], Options::[term()]) -> +%% [filename()] +%% +%% filename() = file:filename() +%% +%% @doc Merges source code files to a single file. `Name' +%% specifies the name of the resulting module - not the name of the +%% output file. `Files' is a list of file names and/or module +%% names of source modules to be read and merged (see +%% `merge_files/4' for details). All the input modules must +%% be distinctly named. +%% +%% The resulting source code is written to a file named +%% "`<em>Name</em>.erl'" in the current directory, unless +%% otherwise specified by the options `dir' and +%% `outfile' described below. +%% +%% Examples: +%% <ul> +%% <li>given a module `m' in file "`m.erl'" +%% which uses the standard library module `lists', calling +%% `igor:merge(m, [m, lists])' will create a new file +%% "`m.erl' which contains the code from `m' and +%% exports the same functions, and which includes the referenced code +%% from the `lists' module. The original file will be +%% renamed to "`m.erl.bak'".</li> +%% +%% <li>given modules `m1' and `m2', in +%% corresponding files, calling `igor:merge(m, [m1, m2])' +%% will create a file "`m.erl'" which contains the code +%% from `m1' and `m2' and exports the functions +%% of `m1'.</li> +%% </ul> +%% +%% Stub module files are created for those modules that are to be +%% exported by the target module (see options `export', +%% `stubs' and `stub_dir'). +%% +%% The function returns the list of file names of all created +%% modules, including any automatically created stub modules. The file +%% name of the target module is always first in the list. +%% +%% Note: If you get a "syntax error" message when trying to merge +%% files (and you know those files to be correct), then try the +%% `preprocess' option. It typically means that your code +%% contains too strange macros to be handled without actually performing +%% the preprocessor expansions. +%% +%% Options: +%% <dl> +%% <dt>`{backup_suffix, string()}'</dt> +%% +%% <dd>Specifies the file name suffix to be used when a backup file +%% is created; the default value is `".bak"'.</dd> +%% +%% <dt>`{backups, bool()}'</dt> +%% +%% <dd>If the value is `true', existing files will be +%% renamed before new files are opened for writing. The new names +%% are formed by appending the string given by the +%% `backup_suffix' option to the original name. The +%% default value is `true'.</dd> +%% +%% <dt>`{dir, filename()}'</dt> +%% +%% <dd>Specifies the name of the directory in which the output file +%% is to be written. An empty string is interpreted as the current +%% directory. By default, the current directory is used.</dd> +%% +%% <dt>`{outfile, filename()}'</dt> +%% +%% <dd>Specifies the name of the file (without suffix) to which the +%% resulting source code is to be written. By default, this is the +%% same as the `Name' argument.</dd> +%% +%% <dt>`{preprocess, bool()}'</dt> +%% +%% <dd>If the value is `true', preprocessing will be done +%% when reading the source code. See `merge_files/4' for +%% details.</dd> +%% +%% <dt>`{printer, Function}'</dt> +%% <dd><ul> +%% <li>`Function = (syntaxTree()) -> string()'</li> +%% </ul> +%% Specifies a function for prettyprinting Erlang syntax trees. +%% This is used for outputting the resulting module definition, as +%% well as for creating stub files. The function is assumed to +%% return formatted text for the given syntax tree, and should raise +%% an exception if an error occurs. The default formatting function +%% calls `erl_prettypr:format/2'.</dd> +%% +%% <dt>`{stub_dir, filename()}'</dt> +%% +%% <dd>Specifies the name of the directory to which any generated +%% stub module files are written. The default value is +%% `"stubs"'.</dd> +%% +%% <dt>`{stubs, bool()}'</dt> +%% +%% <dd>If the value is `true', stub module files will be +%% automatically generated for all exported modules that do not have +%% the same name as the target module. The default value is +%% `true'.</dd> +%% +%% <dt>`{suffix, string()}'</dt> +%% +%% <dd>Specifies the suffix to be used for the output file names; +%% the default value is `".erl"'.</dd> +%% </dl> +%% +%% See `merge_files/4' for further options. +%% +%% @see merge/2 +%% @see merge_files/4 + +%% The defaults for 'merge' are also used for 'create_stubs'. + +-define(DEFAULT_MERGE_OPTS, + [{backup_suffix, ?DEFAULT_BACKUP_SUFFIX}, + backups, + {dir, ?DEFAULT_DIR}, + {printer, fun default_printer/2}, + {stub_dir, ?DEFAULT_STUB_DIR}, + stubs, + {suffix, ?DEFAULT_SUFFIX}, + {verbose, false}]). + +merge(Name, Files, Opts) -> + Opts1 = Opts ++ ?DEFAULT_MERGE_OPTS, + {Tree, Stubs} = merge_files(Name, Files, Opts1), + Dir = proplists:get_value(dir, Opts1, ""), + Filename = proplists:get_value(outfile, Opts1, Name), + File = write_module(Tree, Filename, Dir, Opts1), + [File | maybe_create_stubs(Stubs, Opts1)]. + + +%% ===================================================================== +%% @spec merge_files(Name::atom(), Files::[filename()], +%% Options::[term()]) -> +%% {syntaxTree(), [stubDescriptor()]} +%% @equiv merge_files(Name, [], Files, Options) + +merge_files(Name, Files, Options) -> + merge_files(Name, [], Files, Options). + + +%% ===================================================================== +%% @spec merge_files(Name::atom(), Sources::[Forms], +%% Files::[filename()], Options::[term()]) -> +%% {syntaxTree(), [stubDescriptor()]} +%% Forms = syntaxTree() | [syntaxTree()] +%% +%% @doc Merges source code files and syntax trees to a single syntax +%% tree. This is a file-reading front end to +%% `merge_sources/3'. `Name' specifies the name of +%% the resulting module - not the name of the output file. +%% `Sources' is a list of syntax trees and/or lists of +%% "source code form" syntax trees, each entry representing a module +%% definition. `Files' is a list of file names and/or module +%% names of source modules to be read and included. All the input +%% modules must be distinctly named. +%% +%% If a name in `Files' is not the name of an existing +%% file, Igor assumes it represents a module name, and tries to locate +%% and read the corresponding source file. The parsed files are appended +%% to `Sources' and passed on to +%% `merge_sources/3', i.e., entries in `Sources' +%% are listed before entries read from files. +%% +%% If no exports are listed by an `export' option (see +%% `merge_sources/3' for details), then if `Name' +%% is also the name of one of the input modules, that module will be +%% exported; otherwise, the first listed module will be exported. Cf. +%% the examples under `merge/3'. +%% +%% The result is a pair `{Tree, Stubs}', where +%% `Tree' represents the source code that is the result of +%% merging all the code in `Sources' and `Files', +%% and `Stubs' is a list of stub module descriptors (see +%% `merge_sources/3' for details). +%% +%% Options: +%% <dl> +%% <dt>`{comments, bool()}'</dt> +%% +%% <dd>If the value is `true', source code comments in +%% the original files will be preserved in the output. The default +%% value is `true'.</dd> +%% +%% <dt>`{find_src_rules, [{string(), string()}]}'</dt> +%% +%% <dd>Specifies a list of rules for associating object files with +%% source files, to be passed to the function +%% `filename:find_src/2'. This can be used to change the +%% way Igor looks for source files. If this option is not specified, +%% the default system rules are used. The first occurrence of this +%% option completely overrides any later in the option list.</dd> +%% +%% <dt>`{includes, [filename()]}'</dt> +%% +%% <dd>Specifies a list of directory names for the Erlang +%% preprocessor, if used, to search for include files (cf. the +%% `preprocess' option). The default value is the empty +%% list. The directory of the source file and the current directory +%% are automatically appended to the list.</dd> +%% +%% <dt>`{macros, [{atom(), term()}]}'</dt> +%% +%% <dd>Specifies a list of "pre-defined" macro definitions for the +%% Erlang preprocessor, if used (cf. the `preprocess' +%% option). The default value is the empty list.</dd> +%% +%% <dt>`{preprocess, bool()}'</dt> +%% +%% <dd>If the value is `false', Igor will read source +%% files without passing them through the Erlang preprocessor +%% (`epp'), in order to avoid expansion of preprocessor +%% directives such as `-include(...).', +%% `-define(...).' and `-ifdef(...)', and +%% macro calls such as `?LINE' and `?MY_MACRO(x, +%% y)'. The default value is `false', i.e., +%% preprocessing is not done. (See the module +%% `epp_dodger' for details.) +%% +%% Notes: If a file contains too exotic definitions or uses of +%% macros, it will not be possible to read it without preprocessing. +%% Furthermore, Igor does not currently try to sort out multiple +%% inclusions of the same file, or redefinitions of the same macro +%% name. Therefore, when preprocessing is turned off, it may become +%% necessary to edit the resulting source code, removing such +%% re-inclusions and redefinitions.</dd> +%% </dl> +%% +%% See `merge_sources/3' for further options. +%% +%% @see merge/3 +%% @see merge_files/3 +%% @see merge_sources/3 +%% @see //stdlib/filename:find_src/2 +%% @see epp_dodger + +merge_files(_, _Trees, [], _) -> + report_error("no files to merge."), + exit(badarg); +merge_files(Name, Trees, Files, Opts) -> + Opts1 = Opts ++ [{includes, ?DEFAULT_INCLUDES}, + {macros, ?DEFAULT_MACROS}, + {preprocess, false}, + comments], + Sources = [read_module(F, Opts1) || F <- Files], + merge_sources(Name, Trees ++ Sources, Opts1). + + +%% ===================================================================== +%% @spec merge_sources(Name::atom(), Sources::[Forms], +%% Options::[term()]) -> +%% {syntaxTree(), [stubDescriptor()]} +%% +%% Forms = syntaxTree() | [syntaxTree()] +%% +%% @type stubDescriptor() = [{ModuleName, Functions, [Attribute]}] +%% ModuleName = atom() +%% Functions = [{FunctionName, {ModuleName, FunctionName}}] +%% FunctionName = {atom(), integer()} +%% Attribute = {atom(), term()}. +%% +%% A stub module descriptor contains the module name, a list of +%% exported functions, and a list of module attributes. Each +%% function is described by its name (which includes its arity), +%% and the corresponding module and function that it calls. (The +%% arities should always match.) The attributes are simply +%% described by key-value pairs. +%% +%% @doc Merges syntax trees to a single syntax tree. This is the main +%% code merging "engine". `Name' specifies the name of the +%% resulting module. `Sources' is a list of syntax trees of +%% type `form_list' and/or lists of "source code form" syntax +%% trees, each entry representing a module definition. All the input +%% modules must be distinctly named. +%% +%% Unless otherwise specified by the options, all modules are assumed +%% to be at least "static", and all except the target module are assumed +%% to be "safe". See the `static' and `safe' +%% options for details. +%% +%% If `Name' is also the name of one of the input modules, +%% the code from that module will occur at the top of the resulting +%% code, and no extra "header" comments will be added. In other words, +%% the look of that module will be preserved. +%% +%% The result is a pair `{Tree, Stubs}', where +%% `Tree' represents the source code that is the result of +%% merging all the code in `Sources', and `Stubs' +%% is a list of stub module descriptors (see below). +%% +%% `Stubs' contains one entry for each exported input +%% module (cf. the `export' option), each entry describing a +%% stub module that redirects calls of functions in the original module +%% to the corresponding (possibly renamed) functions in the new module. +%% The stub descriptors can be used to automatically generate stub +%% modules; see `create_stubs/2'. +%% +%% Options: +%% <dl> +%% <dt>`{export, [atom()]}'</dt> +%% +%% <dd>Specifies a list of names of input modules whose interfaces +%% should be exported by the output module. A stub descriptor is +%% generated for each specified module, unless its name is +%% `Name'. If no modules are specified, then if +%% `Name' is also the name of an input module, that +%% module will be exported; otherwise the first listed module in +%% `Sources' will be exported. The default value is the +%% empty list.</dd> +%% +%% <dt>`{export_all, bool()}'</dt> +%% +%% <dd>If the value is `true', this is equivalent to +%% listing all of the input modules in the `export' +%% option. The default value is `false'.</dd> +%% +%% <dt>`{file_attributes, Preserve}'</dt> +%% <dd><ul> +%% <li>`Preserve = yes | comment | no'</li> +%% </ul> +%% If the value is `yes', all file attributes +%% `-file(...)' in the input sources will be preserved in +%% the resulting code. If the value is `comment', they +%% will be turned into comments, but remain in their original +%% positions in the code relative to the other source code forms. If +%% the value is `no', all file attributes will be removed +%% from the code, unless they have attached comments, in which case +%% they will be handled as in the `comment' case. The +%% default value is `no'.</dd> +%% +%% <dt>`{no_banner, bool()}'</dt> +%% +%% <dd>If the value is `true', no banner comment will be +%% added at the top of the resulting module, even if the target +%% module does not have the same name as any of the input modules. +%% Instead, Igor will try to preserve the look of the module whose +%% code is at the top of the output. The default value is +%% `false'.</dd> +%% +%% <dt>`{no_headers, bool()}'</dt> +%% +%% <dd>If the value is `true', no header comments will be +%% added to the resulting module at the beginning of each section of +%% code that originates from a particular input module. The default +%% value is `false', which means that section headers are +%% normally added whenever more than two or more modules are +%% merged.</dd> +%% +%% <dt>`{no_imports, bool()}'</dt> +%% +%% <dd>If the value is `true', all +%% `-import(...)' declarations in the original code will +%% be expanded in the result; otherwise, as much as possible of the +%% original import declarations will be preserved. The default value +%% is `false'.</dd> +%% +%% <dt>`{notes, Notes}'</dt> +%% <dd><ul> +%% <li>`Notes = always | yes | no'</li> +%% </ul> +%% If the value is `yes', comments will be inserted where +%% important changes have been made in the code. If the value is +%% `always', <em>all</em> changes to the code will be +%% commented. If the value is `no', changes will be made +%% without comments. The default value is `yes'.</dd> +%% +%% <dt>`{redirect, [{atom(), atom()}]}'</dt> +%% +%% <dd>Specifies a list of pairs of module names, representing a +%% mapping from old names to new. <em>The set of old names may not +%% include any of the names of the input modules.</em> All calls to +%% the listed old modules will be rewritten to refer to the +%% corresponding new modules. <em>The redirected calls will not be +%% further processed, even if the new destination is in one of the +%% input modules.</em> This option mainly exists to support module +%% renaming; cf. `rename/3'. The default value is the +%% empty list.</dd> +%% +%% <dt>`{safe, [atom()]}'</dt> +%% +%% <dd>Specifies a list of names of input modules such that calls to +%% these "safe" modules may be turned into direct local calls, that +%% do not test for code replacement. Typically, this can be done for +%% e.g. standard library modules. If a module is "safe", it is per +%% definition also "static" (cf. below). The list may be empty. By +%% default, all involved modules <em>except the target module</em> +%% are considered "safe".</dd> +%% +%% <dt>`{static, [atom()]}'</dt> +%% +%% <dd>Specifies a list of names of input modules which will be +%% assumed never to be replaced (reloaded) unless the target module +%% is also first replaced. The list may be empty. The target module +%% itself (which may also be one of the input modules) is always +%% regarded as "static", regardless of the value of this option. By +%% default, all involved modules are assumed to be static.</dd> +%% +%% <dt>`{tidy, bool()}'</dt> +%% +%% <dd>If the value is `true', the resulting code will be +%% processed using the `erl_tidy' module, which removes +%% unused functions and does general code cleanup. (See +%% `erl_tidy:module/2' for additional options.) The +%% default value is `true'.</dd> +%% +%% <dt>`{verbose, bool()}'</dt> +%% +%% <dd>If the value is `true', progress messages will be +%% output while the program is running; the default value is +%% `false'.</dd> +%% </dl> +%% +%% Note: The distinction between "static" and "safe" modules is +%% necessary in order not to break the semantics of dynamic code +%% replacement. A "static" source module will not be replaced unless the +%% target module also is. Now imagine a state machine implemented by +%% placing the code for each state in a separate module, and suppose +%% that we want to merge this into a single target module, marking all +%% source modules as static. At each point in the original code where a +%% call is made from one of the modules to another (i.e., the state +%% transitions), code replacement is expected to be detected. Then, if +%% we in the merged code do not check at these points if the +%% <em>target</em> module (the result of the merge) has been replaced, +%% we can not be sure in general that we will be able to do code +%% replacement of the merged state machine - it could run forever +%% without detecting the code change. Therefore, all such calls must +%% remain remote-calls (detecting code changes), but may call the target +%% module directly. +%% +%% If we are sure that this kind of situation cannot ensue, we may +%% specify the involved modules as "safe", and all calls between them +%% will become local. Note that if the target module itself is specified +%% as safe, "remote" calls to itself will be turned into local calls. +%% This would destroy the code replacement properties of e.g. a typical +%% server loop. +%% +%% @see create_stubs/2 +%% @see rename/3 +%% @see erl_tidy:module/2 + +%% Currently, there is no run-time support in Erlang for detecting +%% whether some module has been changed since the current module was +%% loaded. Therefore, if a source module is specified as non-static, not +%% much will be gained from merging: a call to a non-static module will +%% remain a remote call using the old module name, even when it is +%% performed from within the merged code. If that module is specified as +%% exported, the old name could then refer to an auto-generated stub, +%% redirecting the call back to the corresponding function in the target +%% module. This could possibly be useful in some cases, but efficiency +%% is not improved by such a transformation. If support for efficient +%% testing for module updates is added to Erlang in future versions, +%% code merging will be able to use local calls even for non-static +%% source modules, opening the way for compiler optimisations over the +%% module boundaries. + +%% Data structure for merging environment. + +-record(merge, {target, % = atom() + sources, % = ordset(atom()) + export, % = ordset(atom()) + static, % = ordset(atom()) + safe, % = ordset(atom()) + preserved, % = bool() + no_headers, % = bool() + notes, % = bool() + redirect, % = dict(atom(), atom()) + no_imports, % = ordset(atom()) + options % = [term()] + }). + +merge_sources(Name, Sources, Opts) -> + %% Prepare the options and the inputs. + Opts1 = Opts ++ [{export_all, false}, + {file_attributes, no}, + {no_imports, false}, + {notes, yes}, + tidy, + {verbose, false}], + Trees = case Sources of + [] -> + report_error("no sources to merge."), + exit(badarg); + _ -> + [if is_list(M) -> erl_syntax:form_list(M); + true -> M + end + || M <- Sources] + end, + %% There must be at least one module to work with. + Modules = [get_module_info(T) || T <- Trees], + merge_sources_1(Name, Modules, Trees, Opts1). + +%% Data structure for keeping state during transformation. + +-record(state, {export}). + +state__add_export(Name, Arity, S) -> + S#state{export = sets:add_element({Name, Arity}, + S#state.export)}. + +merge_sources_1(Name, Modules, Trees, Opts) -> + %% Get the (nonempty) list of source module names, in the given + %% order. Multiple occurrences of the same source module name are + %% not accepted. + Ns = [M#module.name || M <- Modules], + case duplicates(Ns) of + [] -> + ok; + Ns1 -> + report_error("same module names repeated in input: ~p.", + [Ns1]), + exit(error) + end, + Sources = ordsets:from_list(Ns), + All = ordsets:add_element(Name, Sources), + + %% Initialise the merging environment from the given options. + %% + %% If the `export' option is the empty list, then if the target + %% module is the same as one of the sources, that module will be + %% exported; otherwise the first listed source module is exported. + %% This simplifies use in most cases, and guarantees that the + %% generated module has a well-defined interface. If `export_all' is + %% `true', we expand it here by including the set of source module + %% names. + Es = case proplists:append_values(export, Opts) of + [] -> + case ordsets:is_element(Name, Sources) of + true -> + [Name]; + false -> + [hd(Ns)] + end; + Es1 when is_list(Es1) -> + ordsets:from_list(Es1) + end, + Export = case proplists:get_bool(export_all, Opts) of + false -> + Es; + true -> + ordsets:union(Sources, Es) + end, + check_module_names(Export, Sources, "declared as exported"), + verbose("modules exported from `~w': ~p.", [Name, Export], Opts), + + %% The target module is always "static". (Particularly useful when + %% the target is the same as one of the source modules). It is + %% however not "safe" by default. If no modules are explicitly + %% specified as static, it is assumed that *all* are static. + Static0 = ordsets:from_list(proplists:append_values(static, Opts)), + case proplists:is_defined(static, Opts) of + false -> + Static = All; + true -> + Static = ordsets:add_element(Name, Static0) + end, + check_module_names(Static, All, "declared 'static'"), + verbose("static modules: ~p.", [Static], Opts), + + %% If no modules are explicitly specified as "safe", it is assumed + %% that *all* source modules are "safe" except the target module and + %% those explicitly specified as "static". + Safe = case proplists:is_defined(safe, Opts) of + false -> + ordsets:subtract(Sources, + ordsets:add_element(Name, Static0)); + true -> + ordsets:from_list( + proplists:append_values(safe, Opts)) + end, + check_module_names(Safe, All, "declared 'safe'"), + verbose("safe modules: ~p.", [Safe], Opts), + + Preserved = (ordsets:is_element(Name, Sources) + and ordsets:is_element(Name, Export)) + or proplists:get_bool(no_banner, Opts), + NoHeaders = proplists:get_bool(no_headers, Opts), + Notes = proplists:get_value(notes, Opts, always), + Rs = proplists:append_values(redirect, Opts), + Redirect = case is_atom_map(Rs) of + true -> + Ms = ordsets:from_list([M || {M, _} <- Rs]), + case ordsets:intersection(Sources, Ms) of + [] -> + ok; + Ms1 -> + report_error("cannot redirect calls to " + "modules in input set: ~p.", + [Ms1]), + exit(error) + end, + dict:from_list(Rs); + false -> + report_error("bad value for `redirect' option: " + "~P.", + [Rs, 10]), + exit(error) + end, + NoImports = case proplists:get_bool(no_imports, Opts) of + true -> + ordsets:from_list(Sources ++ + dict:fetch_keys(Redirect)); + false -> + ordsets:from_list(dict:fetch_keys(Redirect)) + end, + Env = #merge{target = Name, + sources = Sources, + export = Export, + safe = Safe, + static = Static, + preserved = Preserved, + no_headers = NoHeaders, + notes = Notes, + redirect = Redirect, + no_imports = NoImports, + options = Opts}, + merge_sources_2(Env, Modules, Trees, Opts). + +is_atom_map([{A1, A2} | As]) when is_atom(A1), is_atom(A2) -> + is_atom_map(As); +is_atom_map([]) -> + true; +is_atom_map(_) -> + false. + +check_module_names(Names, Sources, Txt) -> + case Names -- Sources of + [] -> + ok; + Xs -> + report_error("unknown modules ~s: ~p.", [Txt, Xs]), + exit(error) + end. + +%% This function performs all the stages of the actual merge: + +merge_sources_2(Env, Modules, Trees, Opts) -> + %% Compute the merged name space and the list of renamings. + {Names, Renaming} = merge_namespaces(Modules, Env), + + %% Merge the source module descriptions, computing a structure + %% describing the resulting module, and a table of aliases which + %% must be expanded. + {Module, Expansions} = merge_info(Modules, Names, Renaming, + Env), + + %% Merge the actual source code, also returning the "original + %% header" (for the first code section in the output). + St = #state{export = sets:new()}, + {Tree, Header, St1} = merge_code(Trees, Modules, Expansions, + Renaming, Env, St), + + %% Filter out unwanted program forms and add a preamble to the code, + %% making a complete module. + Tree1 = erl_syntax:form_list([make_preamble(Module, Header, + Env, St1), + filter_forms(Tree, Env)]), + + %% Tidy the final syntax tree (removing unused functions) and return + %% it together with the list of stub descriptors. + {tidy(Tree1, Opts), make_stubs(Modules, Renaming, Env)}. + +make_preamble(Module, Header, Env, St) -> + Name = Module#module.name, + Vars = Module#module.vars, + Extras = ordsets:from_list(sets:to_list(St#state.export)), + Exports = make_exports(Module#module.exports, Extras), + Imports = make_imports(Module#module.aliases), + Attributes = make_attributes(Module#module.attributes), + erl_syntax:form_list(module_header(Header, Name, Vars, Env) + ++ Exports + ++ Imports + ++ Attributes). + +%% If the target preserves one of the source modules, we do not generate +%% a new header, but use the original. + +module_header(Forms, Name, Vars, Env) -> + case Env#merge.preserved of + true -> + update_header(Forms, Name, Vars); + false -> + [comment([?COMMENT_BAR, + "This module was formed by merging " + "the following modules:", + ""] + ++ [lists:flatten(io_lib:fwrite("\t\t`~w'", + [M])) + || M <- Env#merge.sources] + ++ ["", + timestamp(), + ""]), + erl_syntax:attribute(erl_syntax:atom('module'), + [erl_syntax:atom(Name)])] + end. + +update_header(Fs, Name, Vars) -> + [M | Fs1] = lists:reverse(Fs), + Ps = if Vars =:= none -> []; + true -> [erl_syntax:list([erl_syntax:variable(V) + || V <- Vars])] + end, + M1 = rewrite(M, erl_syntax:attribute(erl_syntax:atom('module'), + [erl_syntax:atom(Name) | Ps])), + lists:reverse([M1 | Fs1]). + +%% Some functions may have been noted as necessary to export (because of +%% how they are called) even though the user did not specify that the +%% modules in which these functions originated should be part of the +%% interface of the resulting module. + +make_exports(Exports, Extras) -> + case ordsets:subtract(Extras, Exports) of + [] -> + [make_export(Exports)]; + Es -> + [make_export(Exports), + comment(["** The following exports " + "are not official: **"]), + make_export(Es)] + end. + +make_export(Names) -> + Es = [erl_syntax:arity_qualifier(erl_syntax:atom(F), + erl_syntax:integer(A)) + || {F, A} <- Names], + if Es =:= [] -> + comment(["** Nothing is officially exported " + "from this module! **"]); + true -> + erl_syntax:attribute(erl_syntax:atom('export'), + [erl_syntax:list(Es)]) + end. + +%% Any aliases that cannot be expressed using `import' (i.e. those not +%% on the form `{F, {M, F}}') are ignored. + +make_imports(As) -> + %% First remove any auto-imports and "non-proper" imports from + %% the list. + As1 = [A || {F, {_M, F}} = A <- As, not is_auto_import(F)], + [make_import(M, Fs) || {M, Fs} <- group_imports(As1)]. + +make_import(Module, Names) -> + Is = [erl_syntax:arity_qualifier(erl_syntax:atom(F), + erl_syntax:integer(A)) + || {F, A} <- Names], + erl_syntax:attribute(erl_syntax:atom('import'), + [erl_syntax:atom(Module), + erl_syntax:list(Is)]). + +%% Group aliases by module. + +group_imports(Imports) -> + dict:to_list( + lists:foldl( + fun ({F, {M, F}}, D) -> + case dict:find(M, D) of + {ok, V} -> + V1 = ordsets:add_element(F, V), + dict:store(M, V1, D); + error -> + dict:store(M, [F], D) + end + end, + dict:new(), Imports)). + + +%% --------------------------------------------------------------------- +%% Making stub descriptors +%% +%% These are generated for all exported modules that are not the target +%% module. + +make_stubs(Modules, Renaming, Env) -> + make_stubs_1(Modules, Renaming, Env). + +make_stubs_1([M | Ms], Renaming, Env) -> + Name = M#module.name, + if Name /= Env#merge.target -> + case ordsets:is_element(Name, Env#merge.export) of + true -> + [make_stub(M, Renaming(Name), Env) + | make_stubs_1(Ms, Renaming, Env)]; + false -> + make_stubs_1(Ms, Renaming, Env) + end; + true -> + make_stubs_1(Ms, Renaming, Env) + end; +make_stubs_1([], _, _) -> + []. + +make_stub(M, Map, Env) -> + Target = Env#merge.target, + Es = [{F, {Target, Map(F)}} || F <- M#module.exports], + {M#module.name, Es, M#module.attributes}. + + +%% --------------------------------------------------------------------- +%% Removing and/or out-commenting program forms. The returned form +%% sequence tree is not necessarily flat. + +-record(filter, {records, file_attributes, attributes}). + +filter_forms(Tree, Env) -> + Forms = erl_syntax:form_list_elements( + erl_syntax:flatten_form_list(Tree)), + erl_syntax:form_list(filter_forms_1(Forms, Env)). + +filter_forms_1(Forms, Env) -> + {Fs, _} = filter_forms_2(Forms, Env), + lists:reverse(Fs). + +filter_forms_2(Forms, Env) -> + FileAttrsOpt = proplists:get_value(file_attributes, + Env#merge.options, comment), + %% Sanity check and translation of option value: + FileAttrs = case FileAttrsOpt of + yes -> keep; + no -> delete; + comment -> kill; + _ -> + report_error("invalid value for option " + "`file_attributes': ~w.", + [FileAttrsOpt]), + exit(error) + end, + Attrs = if length(Env#merge.sources) =:= 1 -> + delete; %% keeping the originals looks weird + true -> + kill + end, + S = #filter{records = sets:new(), + file_attributes = FileAttrs, + attributes = Attrs}, + lists:foldl( + fun (F, {Fs, S0}) -> + case filter_form(F, S0) of + {keep, S1} -> + {[F | Fs], S1}; % keep + {kill, S1} -> + {[kill_form(F) | Fs], S1}; % kill + {delete, S1} -> + %% Remove, or kill if it has comments (only + %% top-level comments are examined). + case erl_syntax:has_comments(F) of + false -> + {Fs, S1}; + true -> + {[kill_form(F) | Fs], S1} + end + end + end, + {[], S}, Forms). + +filter_form(F, S) -> + case erl_syntax_lib:analyze_form(F) of + {attribute, {'file', _}} -> + {S#filter.file_attributes, S}; + {attribute, {'module', _}} -> + {delete, S}; + {attribute, {'export', _}} -> + {delete, S}; + {attribute, {'import', _}} -> + {delete, S}; + {attribute, {'record', {R, _}}} -> + Records = S#filter.records, + case sets:is_element(R, Records) of + true -> + {kill, S}; % already defined above + false -> + S1 = S#filter{records = + sets:add_element(R, Records)}, + {keep, S1} + end; + {attribute, preprocessor} -> + {keep, S}; %% keep all preprocessor attributes + {attribute, _} -> + {S#filter.attributes, S}; %% handle all other attributes + {error_marker, _} -> + {delete, S}; + {warning_marker, _} -> + {delete, S}; + eof_marker -> + {delete, S}; % these must be deleted! + _ -> + {keep, S} % keep all other Erlang forms + end. + +%% This out-comments (kills) a program form. Any top-level pre-comments +%% are moved out, to avoid "nested" comments. + +kill_form(F) -> + F1 = erl_syntax:set_precomments(F, []), + F2 = erl_syntax_lib:to_comment(F1, ?KILL_PREFIX), + erl_syntax:set_precomments(F2, + erl_syntax:get_precomments(F)). + + +%% --------------------------------------------------------------------- +%% Merging the name spaces of a set of modules. Returns the final set +%% (see module `sets') of names and a total renaming function (atom()) +%% -> ({atom(), integer()}) -> {atom(), integer()}. +%% +%% Names are added in two passes, in order to avoid renaming the +%% interface functions whenever possible: all exported functions are +%% added to the name space before any nonexported are added, and +%% "exported" modules are taken before any other. Thus, the order is: +%% +%% - exported functions of exported modules +%% - exported functions of nonexported modules +%% - internal functions of exported modules +%% - internal functions of nonexported modules +%% +%% In fact, only the first group is important, but there might be some +%% point in establishing the above order, for better readability of the +%% final code. + +merge_namespaces(Modules, Env) -> + Export = Env#merge.export, + Split = fun (M) -> + ordsets:is_element(M#module.name, Export) + end, + {M1, M2} = split_list(Split, Modules), + R = dict:new(), + Acc = {sets:new(), R}, + {M3, Acc1} = merge_namespaces_1(M1, Acc), + + %% Detect and warn about renamed interface functions + {_, Maps0} = Acc1, + case [{M, dict:to_list(Map)} + || {M, Map} <- dict:to_list(Maps0), dict:size(Map) =/= 0] of + [] -> + ok; + Fs -> + report_warning("interface functions renamed:\n\t~p.", + [Fs]) + end, + {M4, Acc2} = merge_namespaces_1(M2, Acc1), + Ms = M3 ++ M4, + Acc3 = merge_namespaces_2(Ms, Acc2), + {{Names, Maps}, _} = merge_namespaces_3(Ms, Acc3), + {Names, make_renaming_function(Maps)}. + +%% Adding exported names. (Note that the list gets a new temporary +%% format also containing the exports.) This first step initialises the +%% Maps "dict-of-dicts" structure. + +merge_namespaces_1(Modules, Acc) -> + lists:mapfoldl( + fun (Module, {Names, Maps}) -> + Exports = sets:from_list(Module#module.exports), + M = Module#module.name, + {Names1, Map} = add_function_renamings(M, Exports, Names, + dict:new()), + Maps1 = dict:store(M, Map, Maps), + {{Module, Exports}, {Names1, Maps1}} + end, + Acc, Modules). + +%% Adding nonexported names. + +merge_namespaces_2(Modules, Acc) -> + lists:foldl( + fun ({Module, Exports}, {Names, Maps}) -> + Other = sets:subtract( + sets:from_list(Module#module.functions), + Exports), + M = Module#module.name, + Map = dict:fetch(M, Maps), + {Names1, Map1} = add_function_renamings(M, Other, Names, + Map), + Maps1 = dict:store(M, Map1, Maps), + {Names1, Maps1} + end, + Acc, Modules). + +%% Adding record names. We need to keep a global +%% "record-definition-to-new-record-name" mapping RMap while doing this. + +merge_namespaces_3(Modules, Acc) -> + lists:foldl( + fun ({Module, _Exports}, {{Names, Maps}, RMap}) -> + Records = Module#module.records, + M = Module#module.name, + Map = dict:fetch(M, Maps), + {Names1, Map1, RMap1} = add_record_renamings(M, Records, + Names, Map, + RMap), + Maps1 = dict:store(M, Map1, Maps), + {{Names1, Maps1}, RMap1} + end, + {Acc, dict:new()}, Modules). + +%% This takes the set of added function names together with the existing +%% name set, creates new function names where necessary, and returns the +%% final name set together with the list of renamings. + +add_function_renamings(Module, New, Names, Map) -> + Clashes = sets:to_list(sets:intersection(New, Names)), + lists:foldl( + fun (F = {_, A}, {Names, Map}) when is_integer(A) -> + F1 = new_function_name(Module, F, Names), + {sets:add_element(F1, Names), dict:store(F, F1, Map)} + end, + {sets:union(New, Names), Map}, Clashes). + +%% This is similar to the above, but for record names. Note that we add +%% both the record name and the whole definition to the namespace. + +add_record_renamings(Module, Records, Names, Map, RMap) -> + lists:foldl( + fun (N = {R, Fs}, {Names, Map, RMap}) -> + case sets:is_element(?record_name(R), Names) of + true -> + %% The name is already in use. + case sets:is_element(?record_name(N), Names) of + true -> + %% We have seen this definition before; + %% make sure we use the same name. + {R1, _} = remap_record_name(N, RMap), + Map1 = dict:store(?record_name(R), + ?record_name(R1), Map), + {Names, Map1, RMap}; + false -> + %% Redefinition of existing name. Create + %% new name and set up renamings. + N1 = {R1, _} = new_record_name(Module, R, + Fs, Names), + Map1 = dict:store(?record_name(R), + ?record_name(R1), Map), + RMap1 = dict:store(N, N1, RMap), + Names1 = sets:add_element(?record_name(N1), + Names), + {Names1, Map1, RMap1} + end; + false -> + %% A previously unused record name. + Names1 = sets:add_element(?record_name(R), Names), + Names2 = sets:add_element(?record_name(N), Names1), + {Names2, Map, RMap} + end + end, + {Names, Map, RMap}, Records). + +remap_record_name(N, Map) -> + case dict:find(N, Map) of + {ok, N1} -> N1; + error -> N + end. + +%% This hides the implementation of the record namespace. Since Map +%% yields identity for non-remapped names, the remapped names must be +%% stored in wrapped form. + +map_record_name(R, Map) -> + ?record_name(R1) = Map(?record_name(R)), + R1. + +%% When we rename a function, we want the new name to be as close as +%% possible to the old, and as informative as possible. Therefore, we +%% first prefix it with the name of the originating module, followed by +%% two underscore characters, and then if there still is a name clash, +%% we suffix the name by "_N", where N is the smallest possible positive +%% integer that does not cause a clash. + +new_function_name(M, {F, A}, Names) -> + Base = atom_to_list(M) ++ "__" ++ atom_to_list(F), + Name = {list_to_atom(Base), A}, + case sets:is_element(Name, Names) of + false -> + Name; + true -> + new_function_name(1, A, Base, Names) + end. + +new_function_name(N, Arity, Base, Names) -> + Name = {list_to_atom(Base ++ "_" ++ integer_to_list(N)), + Arity}, + case sets:is_element(Name, Names) of + false -> + Name; + true -> + %% Increment counter and try again. + new_function_name(N + 1, Arity, Base, Names) + end. + +%% This is pretty much the same as new_function_name, for now. + +new_record_name(M, R, Fs, Names) -> + Base = atom_to_list(M) ++ "__" ++ atom_to_list(R), + Name = {list_to_atom(Base), Fs}, + case sets:is_element(?record_name(Name), Names) of + false -> + Name; + true -> + new_record_name_1(1, Base, Fs, Names) + end. + +new_record_name_1(N, Base, Fs, Names) -> + Name = {list_to_atom(Base ++ "_" ++ integer_to_list(N)), Fs}, + case sets:is_element(?record_name(Name), Names) of + false -> + Name; + true -> + %% Increment counter and try again. + new_record_name_1(N + 1, Base, Fs, Names) + end. + +%% This returns a *total* function from the set of module names to the +%% set of *total* operators on function names, yielding identity for all +%% function names that are not specified in the given partial map +%% (ModuleName -> (Name -> Name)). + +make_renaming_function(Maps) -> + fun (Module) -> + case dict:find(Module, Maps) of + {ok, Map} -> + fun (Name) -> + case dict:find(Name, Map) of + {ok, Name1} -> + Name1; % renamed + error -> + Name % identity + end + end; + error -> + %% Other module - yield identity map. + fun (Name) -> Name end + end + end. + + +%% --------------------------------------------------------------------- +%% Merging module info records into a target module record, and finding +%% necessary alias expansions. Returns `{Module, Expansions}' where +%% `Expansions' has type `dict(ModuleName, dict(Alias, FullName))' + +merge_info(Modules, Names, Renaming, Env) -> + Forbid = sets:from_list(Env#merge.no_imports), + Expansions = alias_expansions(Modules, Names, Forbid), + Module = merge_info_1(Modules, Renaming, Expansions, Env), + {Module, Expansions}. + +merge_info_1(Modules, Renaming, Expansions, Env) -> + lists:foldl( + fun (M, A) -> + Name = M#module.name, + Map = Renaming(Name), + Functions = join_functions(Map, + M#module.functions, + A#module.functions), + Exports = join_exports(Env, Name, Map, + M#module.exports, + A#module.exports), + Aliases = join_aliases(Name, Expansions, + M#module.aliases, + A#module.aliases), + Attributes = join_attributes(Env, Name, + M#module.attributes, + A#module.attributes), + Records = join_records(Map, + M#module.records, + A#module.records), + A#module{functions = Functions, + exports = Exports, + aliases = Aliases, + attributes = Attributes, + records = Records} + end, + #module{name = Env#merge.target, + functions = ordsets:new(), + exports = ordsets:new(), + aliases = ordsets:new(), + attributes = ordsets:new(), + records = ordsets:new()}, + Modules). + +%% Functions must be renamed before including. + +join_functions(Map, Source, Target) -> + ordsets:union(ordsets:from_list([Map(A) || A <- Source]), + Target). + +%% Exports also need renaming, and are kept only if their originating +%% modules are exported. + +join_exports(Env, Name, Map, Source, Target) -> + case ordsets:is_element(Name, Env#merge.export) of + true -> + ordsets:union(ordsets:from_list([Map(F) + || F <- Source]), + Target); + false -> + Target + end. + +%% Aliases never need renaming; instead we always expand uses which +%% could cause name clashes. We must then remove the expanded names from +%% the imports of the target. + +join_aliases(Name, Expansions, Source, Target) -> + As = case dict:find(Name, Expansions) of + {ok, As1} -> + ordsets:from_list(dict:to_list(As1)); + error -> + [] + end, + ordsets:union(ordsets:subtract(Source, As), Target). + +%% We only propagate attributes if the number of source modules is 1 or +%% the source module has the same name as the resulting module. + +join_attributes(Env, Name, Source, Target) -> + if Env#merge.target =:= Name -> + ordsets:union(Source, Target); + true -> + if length(Env#merge.sources) =:= 1 -> + ordsets:union(Source, Target); + true -> + Target + end + end. + +%% The final record info in itself is not used at present, but we +%% compute the join anyway. We apply renaming to records much like we do +%% to functions, but records have a separate namespace. + +join_records(Map, Source, Target) -> + Renamed = [{map_record_name(R, Map), Fs} || {R, Fs} <- Source], + ordsets:union(ordsets:from_list(Renamed), Target). + +%% This finds aliases that are in conflict or are for other reasons +%% necessary to expand while transforming the code later. It is assumed +%% that each module is in itself correct, and thus does not contain +%% conflicting definitions of the same alias. +%% +%% We could of course simply say that *all* aliases, without exception, +%% should be expanded, but such a big change in the style of the code +%% should not be done unless the user explicitly specifies it. +%% +%% The returned `Expansions' is a dictionary (module `dict') mapping +%% each module name in `Modules' to a dictionary which maps those +%% aliases to be expanded for that module to their corresponding full +%% names. +%% +%% Aliases are filtered according to the following rules: +%% +%% 1. If a name is defined (in some source module) as an alias of a +%% name `M:...', where `M' is any of the source modules(*), then +%% the definition of that alias should be removed, and all its uses +%% (in the same module as the definition) be expanded. +%% +%% 2. Then, if a name is defined (in some source module) as an +%% alias, but the name occurs in the name space of the resulting +%% module, then the definition should be removed and all uses (in +%% the same module) expanded. +%% +%% 3. Finally, if a name has two or more distinct alias definitions +%% in the source modules, then all definitions of that alias should +%% be removed and all uses (in all modules) expanded. (We remove +%% all definitions mainly for symmetry.) +%% +%% (*) It is actually possible for an alias to refer to the module +%% in which it is itself defined. However, since we also in this +%% case want to expand all uses, we don't have to do any extra work +%% to handle it. + +%% The filtering is done in two stages. + +alias_expansions(Modules, Names, Forbid) -> + Table = alias_expansions_1(Modules, Forbid, Names), + alias_expansions_2(Modules, Table). + +%% First consider each alias in isolation. + +alias_expansions_1(Modules, Forbid, Names) -> + lists:foldl( + fun (M, T) -> + Map = lists:foldl( + fun ({A, F}, T1) -> + case keep_alias(A, F, Forbid, Names) + of + true -> + T1; + false -> + dict:store(A, F, T1) + end + end, + dict:new(), M#module.aliases), + dict:store(M#module.name, Map, T) + end, + dict:new(), Modules). + +keep_alias(A, {M, _}, Forbid, Names) -> + case sets:is_element(M, Forbid) of + true -> + false; + false -> + not sets:is_element(A, Names) + end. + +%% In this second stage, we resolve any conflicts that may remain +%% because of distinct source modules still containing distinct alias +%% definitions of the same name - in that case we remove *all* of them +%% (mainly for symmetry). + +alias_expansions_2(Modules, Table) -> + %% Get the set of all alias definitions in all modules (collapsing + %% duplicated but equivalent definitions). + Aliases = lists:foldl( + fun (M, A) -> + ordsets:union(A, M#module.aliases) + end, + ordsets:new(), Modules), + + %% Get the set of names with multiple (distinct) definitions. + Names = duplicates([F || {F, _} <- Aliases]), + + %% Go through all aliases in all source modules and add necessary + %% entries to the expansion-table. We expect that there is an entry + %% in the table here for each module. + lists:foldl( + fun (M, T) -> + N = M#module.name, + lists:foldl( + fun ({A, F}, T1) -> + case ordsets:is_element(A, Names) of + true -> + T2 = dict:fetch(N, T1), + dict:store(N, + dict:store(A, F, T2), + T1); + false -> + T1 + end + end, + T, M#module.aliases) + end, + Table, Modules). + + +%% --------------------------------------------------------------------- +%% Merging the source code. + +%% Data structure for code transformation environment. + +-record(code, {module, % = atom() + target, % = atom() + sources, % = ordset(atom()) + static, % = ordset(atom()) + safe, % = ordset(atom()) + preserved, % = bool() + no_headers, % = bool() + notes, % = bool() + map, % = ({atom(), int()}) -> {atom(), int()} + renaming, % = (atom()) -> ({atom(), int()}) -> + % {atom(), int()} + expand, % = dict({atom(), int()}, + % {atom(), {atom(), int()}}) + redirect % = dict(atom(), atom()) + }). + +%% `Trees' must be a list of syntax trees of type `form_list'. The +%% result is a pair `{Result, Header}' where `Result' is a `form_list' +%% tree representing the merged code, and if the `preserved' flag is +%% set, `Header' is the list of forms up to and including the first +%% `-module(...)' declaration, but stripped of any `-file(...)' +%% attributes - otherwise `Header' is an empty list. + +merge_code(Trees, Modules, Expansions, Renaming, Env, St) -> + Env1 = #code{target = Env#merge.target, + sources = sets:from_list(Env#merge.sources), + static = sets:from_list(Env#merge.static), + safe = sets:from_list(Env#merge.safe), + preserved = Env#merge.preserved, + no_headers = Env#merge.no_headers, + notes = Env#merge.notes, + redirect = Env#merge.redirect, + renaming = Renaming}, + Code = order_code(Modules, Trees, Env1), + {Code1, Header} = case Env1#code.preserved of + true -> + take_header(Code); + false -> + {Code, erl_syntax:form_list([])} + end, + {Forms, St1} = merge_code_1(Code1, Expansions, Env1, St), + Tree = erl_syntax:form_list(Forms), + {Tree, Header, St1}. + +merge_code_1(Code, Expansions, Env, St) -> + lists:foldr( + fun ({Module, T}, {Acc, St0}) -> + M = Module#module.name, + Expand = case dict:find(M, Expansions) of + {ok, Dict} -> Dict; + error -> dict:new() + end, + Env1 = Env#code{module = M, + map = (Env#code.renaming)(M), + expand = Expand}, + {T1, St1} = transform(T, Env1, St0), + {[section_header(M, T1, Env1) | Acc], St1} + end, + {[], St}, Code). + +%% Pair module info and source code, in the order we want, and flatten +%% the form lists. If the name of the target is the same as one of the +%% source modules, and the result should preserve the original module, +%% the code for that module should be first in the output. + +order_code(Modules, Trees, Env) -> + order_code(Modules, Trees, {}, [], Env). + +order_code([M | Ms], [T | Ts], First, Rest, Env) -> + T1 = erl_syntax:flatten_form_list(T), + case (M#module.name =:= Env#code.target) and + Env#code.preserved of + true -> + order_code(Ms, Ts, {M, T1}, Rest, Env); + false -> + order_code(Ms, Ts, First, [{M, T1} | Rest], Env) + end; +order_code([], [], First, Rest, _Env) -> + Rest1 = lists:reverse(Rest), + case First of + {} -> + Rest1; + M -> + [M | Rest1] + end. + +%% Extracting the "original" header (the `-module(...)' declaration is +%% sure to exist). + +take_header([{M, T} | Ms]) -> + Fs = erl_syntax:form_list_elements(T), + {Header, Fs1} = take_header_1(Fs, []), + T1 = erl_syntax:form_list(Fs1), + {[{M, T1} | Ms], Header}. + +take_header_1([F | Fs], As) -> + case erl_syntax_lib:analyze_form(F) of + {'attribute', {'module', _}} -> + {lists:reverse([F | As]), Fs}; % done + {'attribute', {'file', _}} -> + take_header_1(Fs, As); % discard + _ -> + take_header_1(Fs, [F | As]) % keep + end. + +section_header(Name, Tree, Env) -> + N = sets:size(Env#code.sources), + if N > 1, Name /= Env#code.target, Env#code.notes /= no, + Env#code.no_headers /= true -> + Text = io_lib:fwrite("The following code stems " + "from module `~w'.", [Name]), + Header = comment([?COMMENT_BAR, "", + lists:flatten(Text), ""]), + erl_syntax:form_list([Header, Tree]); + true -> + Tree + end. + +transform(Tree, Env, St) -> + case erl_syntax:type(Tree) of + application -> + transform_application(Tree, Env, St); + attribute -> + transform_attribute(Tree, Env, St); + function -> + transform_function(Tree, Env, St); + implicit_fun -> + transform_implicit_fun(Tree, Env, St); + rule -> + transform_rule(Tree, Env, St); + record_expr -> + transform_record(Tree, Env, St); + record_index_expr -> + transform_record(Tree, Env, St); + record_access -> + transform_record(Tree, Env, St); + _ -> + default_transform(Tree, Env, St) + end. + +default_transform(Tree, Env, St) -> + case erl_syntax:subtrees(Tree) of + [] -> + {Tree, St}; + Gs -> + {Gs1, St1} = transform_1(Gs, Env, St), + Tree1 = rewrite(Tree, erl_syntax:make_tree( + erl_syntax:type(Tree), + Gs1)), + {Tree1, St1} + end. + +transform_1([G | Gs], Env, St) -> + {G1, St1} = transform_list(G, Env, St), + {Gs1, St2} = transform_1(Gs, Env, St1), + {[G1 | Gs1], St2}; +transform_1([], _Env, St) -> + {[], St}. + +transform_list([T | Ts], Env, St) -> + {T1, St1} = transform(T, Env, St), + {Ts1, St2} = transform_list(Ts, Env, St1), + {[T1 | Ts1], St2}; +transform_list([], _Env, St) -> + {[], St}. + +%% Renaming function definitions + +transform_function(T, Env, St) -> + {T1, St1} = default_transform(T, Env, St), + F = erl_syntax_lib:analyze_function(T1), + {V, Text} = case (Env#code.map)(F) of + F -> + %% Not renamed + {none, []}; + {Atom, _Arity} -> + %% Renamed + Cs = erl_syntax:function_clauses(T1), + N = rename_atom( + erl_syntax:function_name(T1), + Atom), + T2 = erl_syntax:function(N, Cs), + {{value, T2}, renaming_note(Atom)} + end, + {maybe_modified(V, T1, 2, Text, Env), St1}. + +renaming_note(Name) -> + [lists:flatten(io_lib:fwrite("renamed function to `~w'", + [Name]))]. + +rename_atom(Node, Atom) -> + rewrite(Node, erl_syntax:atom(Atom)). + +%% Renaming Mnemosyne rules (just like function definitions) + +transform_rule(T, Env, St) -> + {T1, St1} = default_transform(T, Env, St), + F = erl_syntax_lib:analyze_rule(T1), + {V, Text} = case (Env#code.map)(F) of + F -> + %% Not renamed + {none, []}; + {Atom, _Arity} -> + %% Renamed + Cs = erl_syntax:rule_clauses(T1), + N = rename_atom( + erl_syntax:rule_name(T1), + Atom), + T2 = rewrite(T1, + erl_syntax:rule(N, Cs)), + {{value, T2}, renaming_note(Atom)} + end, + {maybe_modified(V, T1, 2, Text, Env), St1}. + +%% Renaming "implicit fun" expressions (done quietly). + +transform_implicit_fun(T, Env, St) -> + {T1, St1} = default_transform(T, Env, St), + F = erl_syntax_lib:analyze_implicit_fun(T1), + {V, Text} = case (Env#code.map)(F) of + F -> + %% Not renamed + {none, []}; + {Atom, Arity} -> + %% Renamed + N = rewrite( + erl_syntax:implicit_fun_name(T1), + erl_syntax:arity_qualifier( + erl_syntax:atom(Atom), + erl_syntax:integer(Arity))), + T2 = erl_syntax:implicit_fun(N), + {{value, T2}, ["function was renamed"]} + end, + {maybe_modified_quiet(V, T1, 2, Text, Env), St1}. + +%% Transforming function applications + +transform_application(T, Env, St) -> + %% We transform the arguments first, so we can concentrate on the + %% application itself after that point. + {As, St1} = transform_list( + erl_syntax:application_arguments(T), + Env, St), + F = erl_syntax:application_operator(T), + + %% See if the operator is an explicit function name. + %% (Usually, this will be the case.) + case catch {ok, erl_syntax_lib:analyze_function_name(F)} of + {ok, Name} -> + transform_application_1(Name, F, As, T, Env, St1); + syntax_error -> + %% Oper is not a function name, but might be any other + %% expression - we just visit it and reassemble the + %% application. + %% We do not handle applications of tuples `{M, F}'. + {F1, St2} = transform(F, Env, St1), + {rewrite(T, erl_syntax:application(F1, As)), St2}; + {'EXIT', R} -> + exit(R); + R -> + throw(R) + end. + +%% At this point we should have an explicit function name, which might +%% or might not be qualified by a module name. + +transform_application_1(Name, F, As, T, Env, St) -> + %% Before doing anything else, we must unfold any uses of aliases + %% whose definitions have been killed. + Arity = length(As), + {Name1, F1} = expand_operator(Name, Arity, F, Env), + F2 = maybe_modified_quiet(F1, F, 7, ["unfolded alias"], Env), + {V, St1} = transform_application_2(Name1, Arity, F2, As, Env, + St), + T1 = rewrite(T, erl_syntax:application(F2, As)), + T3 = case V of + none -> + T1; + {value, {T2, Depth, Message}} -> + maybe_modified_quiet({value, T2}, T1, Depth, + Message, Env) + end, + {T3, St1}. + +%% Here, Name has been expanded if necessary (if so, this is also +%% reflected by F), and As have been transformed. We should return +%% `{none, State}' if no further rewriting is necessary, and otherwise +%% `{{value, {Tree, Depth, Message}}, State}', where `Depth' and +%% `Message' are to be passed to `maybe_modified'. + +transform_application_2(Name, Arity, F, As, Env, St) + when is_atom(Name) -> + transform_atom_application(Name, Arity, F, As, Env, St); +transform_application_2({M, N}, Arity, F, As, Env, St) + when is_atom(M), is_atom(N) -> + transform_qualified_application(M, N, Arity, F, As, Env, St); +transform_application_2(_Name, _Arity, _F, _As, _Env, St) -> + {none, St}. % strange name - do nothing. + +expand_operator(Name, Arity, _F, Env) when is_atom(Name) -> + %% An unqualified function name - must be looked up. However, we + %% must first check that it is not an auto-imported name - these + %% have precedence over normal imports. We do a sanity check on the + %% found arity. + case is_auto_import({Name, Arity}) of + true -> + {Name, none}; % auto-import - never expand. + false -> + case dict:find({Name, Arity}, Env#code.expand) of + {ok, {M, {N, A}}} when A =:= Arity -> + %% Expand to a qualified name. + F1 = erl_syntax:module_qualifier( + erl_syntax:atom(M), + erl_syntax:atom(N)), + {{M, N}, {value, F1}}; + error -> + %% Not in the table - leave it unchanged + {Name, none} + end + end; +expand_operator(Name, _Arity, _F, _Env) -> + %% Qualified function name - leave it unchanged + {Name, none}. + +%% Transforming an application of a named function without module +%% qualifier (often misleadingly called "local" applications). Note that +%% since the `apply', `spawn' and `spawn_link' functions are implicitly +%% imported (from module `erlang'), applications of these names cannot +%% refer to functions defined in the source code. + +transform_atom_application(Name, Arity, F, As, Env, St) -> + %% Catch applications of `apply' and `spawn'. + case {Name, Arity} of + {'apply', 2} -> + warning_apply_2(Env#code.module, Env#code.target), + {none, St}; + {'apply', 3} -> + transform_apply_call(F, As, Env, St); + {'spawn', 3} -> + transform_spawn_call(F, As, Env, St); + {'spawn', 4} -> + transform_spawn_call(F, As, Env, St); + {'spawn_link', 3} -> + transform_spawn_call(F, As, Env, St); + {'spawn_link', 4} -> + transform_spawn_call(F, As, Env, St); + _ -> + %% A simple call of an unqualified function name - just + %% remap the name as necessary. Auto-imported names may not + %% be changed - the call never refers to a local function. + %% We do a sanity check on the arity. + case is_auto_import({Name, Arity}) of + true -> + {none, St}; % auto-import - do not change. + false -> + case (Env#code.map)({Name, Arity}) of + {N, A} when N =:= Name, A =:= Arity -> + %% Not changed. + {none, St}; + {N, A} when A =:= Arity -> + %% The callee (in the current module) + %% was renamed. + F1 = rewrite(F, erl_syntax:atom(N)), + T = erl_syntax:application(F1, As), + V = {T, 2, ["callee was renamed"]}, + {{value, V}, St} + end + end + end. + +%% Transforming an application of an explicitly named function qualified +%% with an (also explicit) module name. (Often called "remote" +%% applications.) + +transform_qualified_application(Module, Name, Arity, F, As, Env, St) -> + %% Catch applications of `apply' and `spawn'. + case {Module, Name, Arity} of + {'erlang', 'apply', 2} -> + warning_apply_2(Env#code.module, Env#code.target), + {none, St}; + {'erlang', 'apply', 3} -> + transform_apply_call(F, As, Env, St); + {'erlang', 'spawn', 3} -> + transform_spawn_call(F, As, Env, St); + {'erlang', 'spawn', 4} -> + transform_spawn_call(F, As, Env, St); + {'erlang', 'spawn_link', 3} -> + transform_spawn_call(F, As, Env, St); + {'erlang', 'spawn_link', 4} -> + transform_spawn_call(F, As, Env, St); + _ -> + case erlang:is_builtin(Module, Name, Arity) of + false -> + transform_qualified_application_1( + Module, Name, Arity, F, As, Env, St); + true -> + {none, St} + end + end. + +transform_qualified_application_1(Module, Name, Arity, F, As, Env, + St) -> + MakeLocal = fun (N) -> + F1 = rewrite(F, erl_syntax:atom(N)), + erl_syntax:application(F1, As) + end, + MakeRemote = fun () -> + erl_syntax:application(F, As) + end, + MakeDynamic = fun(M, N) -> + F1 = erl_syntax:module_qualifier( + erl_syntax:atom(M), + erl_syntax:atom(N)), + F2 = rewrite(F, F1), + erl_syntax:application(F2, As) + end, + localise(Module, Name, Arity, MakeLocal, MakeRemote, + MakeDynamic, 3, Env, St). + +%% For an `apply/3' call, if we know the called module and function +%% names, and the number of arguments, then we can rewrite it to a +%% direct remote call - and if we do not, there is nothing we can +%% change. + +transform_apply_call(F, As, Env, St) -> + [Module, Name, List] = As, + case (erl_syntax:type(Module) =:= atom) + and (erl_syntax:type(Name) =:= atom) + and erl_syntax:is_proper_list(List) of + true -> + transform_apply_call_1(Module, Name, List, F, As, Env, + St); + false -> + %% We can't get enough information about the + %% arguments to the `apply' call, so we do nothing + %% but warn. + warning_unsafe_call(apply, Env#code.module, + Env#code.target), + {none, St} + end. + +%% Rewrite the apply-call to a static qualified call and handle that +%% instead. + +transform_apply_call_1(Module, Name, List, F, _As, Env, St) -> + F1 = rewrite(F, erl_syntax:module_qualifier( Module, Name)), + As1 = erl_syntax:list_elements(List), + M = erl_syntax:atom_value(Module), + N = erl_syntax:atom_value(Name), + A = length(As1), + transform_qualified_application_1(M, N, A, F1, As1, Env, St). + +%% `spawn' and `spawn_link' (with arity 3 or 4) are very much like +%% `apply/3', but there could be an extra `Node' argument. Note that `F' +%% below can represent both `spawn' and `spawn_link'. + +transform_spawn_call(F, As, Env, St) -> + case As of + [Module, Name, List] -> + MakeSpawn = fun (As1) -> + erl_syntax:application(F, As1) + end, + transform_spawn_call_1(Module, Name, List, MakeSpawn, + Env, St); + [Node, Module, Name, List] -> + MakeSpawn = fun (As1) -> + erl_syntax:application( + F, [Node | As1]) + end, + transform_spawn_call_1(Module, Name, List, MakeSpawn, + Env, St) + end. + +%% Here, we can treat all dynamic-lookup spawns like `spawn/3'. + +transform_spawn_call_1(Module, Name, List, MakeSpawn, Env, St) -> + case (erl_syntax:type(Module) =:= atom) + and (erl_syntax:type(Name) =:= atom) + and erl_syntax:is_proper_list(List) + of + true -> + transform_spawn_call_2(Module, Name, List, MakeSpawn, + Env, St); + _ -> + %% We can't get enough information about the arguments to + %% the `spawn' call, so we do nothing but warn. + warning_unsafe_call(spawn, Env#code.module, + Env#code.target), + {none, St} + end. + +transform_spawn_call_2(Module, Name, List, MakeSpawn, Env, St) -> + As = erl_syntax:list_elements(List), + Arity = length(As), + MakeLocal = fun (N) -> + %% By using `spawn-a-fun', we do not have to + %% force the callee to be exported. + A = rewrite(Name, erl_syntax:atom(N)), + B = erl_syntax:application(A, As), + C = erl_syntax:clause([], [B]), + F = erl_syntax:fun_expr([C]), + MakeSpawn([F]) + end, + MakeRemote = fun () -> + MakeSpawn([Module, Name, List]) + end, + MakeDynamic = fun (M, N) -> + F = rewrite(Name, erl_syntax:atom(N)), + MakeSpawn([erl_syntax:atom(M), F, List]) + end, + localise(erl_syntax:atom_value(Module), + erl_syntax:atom_value(Name), + Arity, MakeLocal, MakeRemote, MakeDynamic, + 4, Env, St). + +%% MakeLocal = (atom()) -> syntaxTree() +%% MakeRemote = () -> syntaxTree() +%% MakeDynamic = (atom(), atom()) -> syntaxTree() +%% localise(...) -> {none, state()} | {{value, V}, State} + +localise(Module, Name, Arity, MakeLocal, MakeRemote, MakeDynamic, + Depth, Env, St) -> + %% Is the callee in one of the source modules? + case sets:is_element(Module, Env#code.sources) of + false -> + case dict:find(Module, Env#code.redirect) of + {ok, Module1} -> + T = MakeDynamic(Module1, Name), + V = {T, Depth, ["redirected call"]}, + {{value, V}, St}; + error -> + {none, St} % Nothing needs doing. + end; + true -> + %% Remap the name of the callee, as necessary. Do a sanity + %% check on the arity. + Map = (Env#code.renaming)(Module), + Name1 = case Map({Name, Arity}) of + {N, A} when A =:= Arity -> + N + end, + + %% See if the callee module is "safe" and/or "static". + Safe = sets:is_element(Module, Env#code.safe), + Static = (sets:is_element(Module, Env#code.static) + or Safe), + + %% Select what kind of code to generate for the call: + case Static of + false -> + %% (This also implies that the called module is not + %% the target module - which is always "static" - + %% and that it is not "safe".) The called module + %% could be replaced dynamically, independent of the + %% target module, so we must protect the localised + %% call. We strip all comments from the localised + %% code, to avoid getting the same comments twice. + L = MakeLocal(Name1), + L1 = erl_syntax_lib:strip_comments(L), + R = MakeRemote(), + {T, Text} = protect_call(Module, L1, R), + V = {T, Depth, Text}, + {{value, V}, St}; + true -> + %% In this case, the called module is never replaced + %% unless the target module also is. (N.B.: These + %% might be the same module.) + case Safe of + false -> + %% The normal code replacement semantics + %% must be preserved here, so the generated + %% call must be qualified with the name of + %% the target module. (We assume this is + %% efficiently compiled even if we do not + %% insert an explicit "latest version" + %% test.) + Target = Env#code.target, + case Module =:= Target of + true -> + %% Already calling the target module + %% - do not insert irritating notes. + {none, St}; + false -> + %% We must ensure that the function + %% is exported. + St1 = state__add_export(Name1, + Arity, St), + T = MakeDynamic(Target, Name1), + Text = ["localised call"], + V = {T, Depth, Text}, + {{value, V}, St1} + end; + true -> + %% The call is regarded as safe to localise + %% completely. Code replacement will in + %% general not be detected (except for + %% spawn/apply). + T = MakeLocal(Name1), + Text = ["localised safe call"], + V = {T, Depth, Text}, + {{value, V}, St} + end + end + end. + +%%% %% This creates a test on whether there is a later loaded version of +%%% %% Module: if not, select the `Local' expression, otherwise the `Remote' +%%% %% expression. We knowingly duplicate code here, to allow better +%%% %% optimisations, but we never duplicate work. +%%% +%%% protect_call(Module, Local, Remote) -> +%%% T = erl_syntax:if_expr( +%%% [erl_syntax:clause([erl_syntax:application( +%%% erl_syntax:atom('not_replaced'), +%%% [erl_syntax:atom(Module)])], +%%% [Local]), +%%% erl_syntax:clause([erl_syntax:atom('true')], +%%% [Remote])]), +%%% {T, ["localised dynamic call"]}. + +%% This "protects" a localised call by letting it remain a remote call. + +protect_call(_Module, _Local, Remote) -> + {Remote, ["dynamic call"]}. + +%% Renaming record declarations + +transform_attribute(T, Env, St) -> + {T1, St1} = TSt1 = default_transform(T, Env, St), + case erl_syntax_lib:analyze_attribute(T1) of + {record, {R, _}} -> + F = fun(R) -> + [_ | As] = erl_syntax:attribute_arguments(T1), + erl_syntax:attribute( + erl_syntax:attribute_name(T1), + [erl_syntax:atom(R) | As]) + end, + {V, Text} = rename_record(R, F, Env), + {maybe_modified(V, T1, 2, Text, Env), St1}; + _ -> + TSt1 + end. + +%% This handles renaming of records. + +transform_record(T, Env, St) -> + {T1, St1} = TSt1 = default_transform(T, Env, St), + X = case catch erl_syntax_lib:analyze_record_expr(T1) of + {record_expr, {R, _}} -> + F = fun (R) -> + erl_syntax:record_expr( + erl_syntax:record_expr_argument(T1), + erl_syntax:atom(R), + erl_syntax:record_expr_fields(T1)) + end, + {R, F}; + {record_index_expr, {R, _}} -> + F = fun (R) -> + erl_syntax:record_index_expr( + erl_syntax:atom(R), + erl_syntax:record_index_expr_field(T1)) + end, + {R, F}; + {record_access, {R, _}} -> + F = fun (R) -> + erl_syntax:record_access( + erl_syntax:record_access_argument(T1), + erl_syntax:atom(R), + erl_syntax:record_access_field(T1)) + end, + {R, F}; + _Type -> + false + end, + case X of + {R1, F1} -> + {V, Text} = rename_record(R1, F1, Env), + {maybe_modified(V, T1, 1, Text, Env), St1}; + false -> + TSt1 + end. + +rename_record(R, F, Env) -> + case map_record_name(R, Env#code.map) of + R -> + %% Not renamed + {none, []}; + R1 -> + %% Renamed + {{value, F(R1)}, ["record was renamed"]} + end. + +%% Maybe-rewriting Node, adding modification notes. + +%% This is for non-primary modifications; they are not commented unless +%% the `notes' option is set to `always'. + +maybe_modified_quiet(V, Node, Depth, Message, Env) -> + case Env#code.notes of + always -> + maybe_modified_1(V, Node, Depth, Message, yes); + _ -> + maybe_modified_1(V, Node, Depth, Message, no) + end. + +%% This is for important notes; they are only disabled if the `notes' +%% option is set to `no'. + +maybe_modified(V, Node, Depth, Message, Env) -> + maybe_modified_1(V, Node, Depth, Message, Env#code.notes). + +maybe_modified_1(none, Node, _Depth, _Message, _Notes) -> + Node; +maybe_modified_1({value, Node1}, Node, Depth, Message, Notes) -> + case Notes of + no -> + rewrite(Node, Node1); + _ -> + Code = erl_syntax:comment_text( + erl_syntax_lib:to_comment( + erl_syntax_lib:strip_comments( + erl_syntax_lib:limit(Node, Depth)), + "\040\040")), + erl_syntax:add_precomments( + [comment_note(Message ++ + ["Original code:" | Code])], + rewrite(Node, Node1)) + end. + + +%% ===================================================================== +%% @spec create_stubs(Stubs::[stubDescriptor()], Options::[term()]) -> +%% [string()] +%% +%% @doc Creates stub module source files corresponding to the given stub +%% descriptors. The returned value is the list of names of the created +%% files. See `merge_sources/3' for more information about +%% stub descriptors. +%% +%% Options: +%% <dl> +%% <dt>`{backup_suffix, string()}'</dt> +%% <dt>`{backups, bool()}'</dt> +%% <dt>`{printer, Function}'</dt> +%% <dt>`{stub_dir, filename()}'</dt> +%% <dt>`{suffix, string()}'</dt> +%% <dt>`{verbose, bool()}'</dt> +%% </dl> +%% +%% See `merge/3' for details on these options. +%% +%% @see merge/3 +%% @see merge_sources/3 + +create_stubs(Stubs, Opts) -> + Opts1 = Opts ++ ?DEFAULT_MERGE_OPTS, + lists:foldl(fun (S, Fs) -> + F = create_stub(S, Opts1), + [F | Fs] + end, + [], Stubs). + +maybe_create_stubs(Stubs, Opts) -> + case proplists:get_bool(stubs, Opts) of + true -> + create_stubs(Stubs, Opts); + false -> + [] + end. + +create_stub({Name, Fs, Attrs}, Opts) -> + Defs = [stub_function(F) || F <- Fs], + Exports = [F || {F, _} <- Fs], + Forms = stub_header(Name, Exports, Attrs) ++ Defs, + Dir = proplists:get_value(stub_dir, Opts, ""), + verbose("creating stub file for module `~w'.", [Name], Opts), + write_module(erl_syntax:form_list(Forms), Name, Dir, Opts). + +%% We just follow the arity specifications naively when we create the +%% stub funcion - it is not our responsibility to check them. + +stub_function({{F, A}, {M, {F1, A1}}}) -> + Vs = var_list(A), + Vs1 = var_list(A1), + R = erl_syntax:module_qualifier(erl_syntax:atom(M), + erl_syntax:atom(F1)), + Call = erl_syntax:application(R, Vs1), + erl_syntax:function(erl_syntax:atom(F), + [erl_syntax:clause(Vs, [], [Call])]). + +var_list(N) -> + var_list(N, 1). + +var_list(N, I) when N > 0 -> + [erl_syntax:variable("X" ++ integer_to_list(I)) + | var_list(N - 1, I + 1)]; +var_list(0, _) -> + []. + +stub_header(Name, Exports, Attrs) -> + [comment([?COMMENT_BAR, + io_lib:fwrite("This is an automatically " + "generated stub interface\n" + "for the module `~w'.", + [Name]), + "", + timestamp(), + ""]), + erl_syntax:attribute(erl_syntax:atom('module'), + [erl_syntax:atom(Name)]), + make_export(Exports)] + ++ make_attributes(Attrs). + + +%% ===================================================================== +%% @spec rename(Files::[filename()], Renamings) -> [string()] +%% @equiv rename(Files, Renamings, []) + +rename(Files, Renamings) -> + rename(Files, Renamings, []). + +%% ===================================================================== +%% @spec rename(Files::[filename()], Renamings, Options::[term()]) -> +%% [string()] +%% +%% Renamings = [{atom(), atom()}] +%% +%% @doc Renames a set of possibly interdependent source code modules. +%% `Files' is a list of file names of source modules to be +%% processed. `Renamings' is a list of pairs of <em>module +%% names</em>, representing a mapping from old names to new. The +%% returned value is the list of output file names. +%% +%% Each file in the list will be read and processed separately. For +%% every file, each reference to some module M, such that there is an +%% entry `{<em>M</em>, <em>M1</em>}' in +%% `Renamings', will be changed to the corresponding M1. +%% Furthermore, if a file F defines module M, and there is an entry +%% `{<em>M</em>, <em>M1</em>}' in `Renamings', a +%% new file named `<em>M1</em>.erl' will be created in the +%% same directory as F, containing the source code for module M, renamed +%% to M1. If M does not have an entry in `Renamings', the +%% module is not renamed, only updated, and the resulting source code is +%% written to `<em>M</em>.erl' (typically, this overwrites +%% the original file). The `suffix' option (see below) can be +%% used to change the default "`.erl'" suffix for the +%% generated files. +%% +%% Stub modules will automatically be created (see the +%% `stubs' and `stub_dir' options below) for each +%% module that is renamed. These can be used to redirect any calls still +%% using the old module names. The stub files are created in the same +%% directory as the source file (typically overwriting the original +%% file). +%% +%% Options: +%% <dl> +%% <dt>`{backup_suffix, string()}'</dt> +%% <dt>`{backups, bool()}'</dt> +%% <dt>`{printer, Function}'</dt> +%% <dt>`{stubs, bool()}'</dt> +%% <dt>`{suffix, string()}'</dt> +%% </dl> +%% See `merge/3' for details on these options. +%% +%% <dl> +%% <dt>`{comments, bool()}'</dt> +%% <dt>`{preprocess, bool()}'</dt> +%% </dl> +%% See `merge_files/4' for details on these options. +%% +%% <dl> +%% <dt>`{no_banner, bool()}'</dt> +%% </dl> +%% For the `rename' function, this option is +%% `true' by default. See `merge_sources/3' for +%% details. +%% +%% <dl> +%% <dt>`{tidy, bool()}'</dt> +%% </dl> +%% For the `rename' function, this option is +%% `false' by default. See `merge_sources/3' for +%% details. +%% +%% <dl> +%% <dt>`{no_headers, bool()}'</dt> +%% <dt>`{stub_dir, filename()}'</dt> +%% </dl> +%% These options are preset by the `rename' function and +%% cannot be overridden by the user. +%% +%% See `merge_sources/3' for further options. +%% +%% @see merge/3 +%% @see merge_sources/3 +%% @see merge_files/4 + +rename(Files, Renamings, Opts) -> + Dict = case is_atom_map(Renamings) of + true -> + dict:from_list(Renamings); + false -> + report_error("bad module renaming: ~P.", + [Renamings, 10]), + exit(error) + end, + %% We disable *all* automatic source code lookup, for safety: you + %% are only allowed to do renaming on a module if you give its path. + Opts1 = [{find_src_rules, []}] + ++ Opts ++ [{backup_suffix, ?DEFAULT_BACKUP_SUFFIX}, + backups, + {printer, fun default_printer/2}, + stubs, + {suffix, ?DEFAULT_SUFFIX}, + comments, + {preprocess, false}, + {tidy, false}, + no_banner, + {notes, no}, + {verbose, false}], + lists:flatmap(fun (F) -> rename_file(F, Dict, Opts1) end, Files). + +rename_file(File, Dict, Opts) -> + S = read_module(File, Opts), + M = get_module_info(S), + Name = M#module.name, + Name1 = case dict:find(Name, Dict) of + {ok, N} -> N; + error -> Name + end, + %% We convert the dictionary to a new list to ensure that we use the + %% exact same renaming for redirections. We must remove the current + %% module from the redirection set. + Dict1 = dict:erase(Name, Dict), + Opts1 = [no_headers, + {export, [Name]}, + {static, [Name]}, + {redirect, dict:to_list(Dict1)}] ++ Opts, + {Tree, Stubs} = merge_sources(Name1, [S], Opts1), + Dir = filename:dirname(filename(File)), + File1 = write_module(Tree, Name1, Dir, Opts), + + %% We create the stub file in the same directory as the source file + %% and the target file. + [File1 | maybe_create_stubs(Stubs, [{stub_dir, Dir} | Opts1])]. + + +%% --------------------------------------------------------------------- +%% Initialise a module-info record with data about the module +%% represented by the syntax tree (or list of "forms"). Listed exports +%% are guaranteed to be in the set of function names. + +get_module_info(Forms) -> + L = case catch {ok, erl_syntax_lib:analyze_forms(Forms)} of + {ok, L1} -> + L1; + syntax_error -> + report_error("syntax error in input."), + erlang:error(badarg); + {'EXIT', R} -> + exit(R); + R -> + throw(R) + end, + {Name, Vars} = + case lists:keyfind(module, 1, L) of + {module, {_N, _Vs} = NVs} -> + NVs; + {module, N} -> + {N, none}; + false -> + report_error("in source code: module name missing."), + exit(error) + end, + case lists:keyfind(errors, 1, L) of + {errors, Ds} when Ds =/= [] -> + report_errors(Ds, Name), + exit(error); + _ -> + ok + end, + case lists:keyfind(warnings, 1, L) of + {warnings, Ds1} when Ds1 =/= [] -> + report_warnings(Ds1, Name); + _ -> + ok + end, + Functions = case lists:keyfind(functions, 1, L) of + {functions, Fs} -> + ordsets:from_list(Fs); + _ -> + [] + end, + Exports = case lists:keyfind(exports, 1, L) of + {exports, Es} -> + ordsets:from_list(Es); + _ -> + [] + end, + Imports = case lists:keyfind(imports, 1, L) of + {imports, Is} -> + expand_imports(Is, Name); + _ -> + [] + end, + Attributes = case lists:keyfind(attributes, 1, L) of + {attributes, As} -> + ordsets:from_list(As); + _ -> + [] + end, + Records = case lists:keyfind(records, 1, L) of + {records, Rs} -> + fold_record_fields(Rs); + _ -> + [] + end, + check_records(Records, Name), + #module{name = Name, + vars = Vars, + functions = Functions, + exports = ordsets:intersection(Exports, Functions), + aliases = Imports, + attributes = Attributes, + records = Records}. + +fold_record_fields(Rs) -> + [{N, [fold_record_field(F) || F <- Fs]} || {N, Fs} <- Rs]. + +fold_record_field({_Name, none} = None) -> + None; +fold_record_field({Name, F}) -> + case erl_syntax:is_literal(F) of + true -> + {Name, {value, erl_syntax:concrete(F)}}; + false -> + %% The default value for the field is not a constant, so we + %% represent it by a hash value instead. (We don't want to + %% do this in the general case.) + {Name, {hash, erlang:phash(F, 16#ffffff)}} + end. + +report_errors([D | Ds], Name) -> + report_error("error: " ++ error_text(D, Name)), + report_errors(Ds, Name); +report_errors([], _) -> + ok. + +report_warnings([D | Ds], Name) -> + report_warning(error_text(D, Name)), + report_errors(Ds, Name); +report_warnings([], _) -> + ok. + +error_text(D, Name) -> + case D of + {L, M, E} when is_integer(L), is_atom(M) -> + case catch M:format_error(E) of + S when is_list(S) -> + io_lib:fwrite("`~w', line ~w: ~s.", + [Name, L, S]); + _ -> + error_text_1(D, Name) + end; + _E -> + error_text_1(D, Name) + end. + +error_text_1(D, Name) -> + io_lib:fwrite("error: `~w', ~P.", [Name, D, 15]). + +check_records(Rs, Name) -> + case duplicates([N || {N, _} <- Rs]) of + [] -> + ok; + Ns -> + report_error("in module `~w': " + "multiply defined records: ~p.", + [Name, Ns]), + exit(error) + end. + +expand_imports(Is, Name) -> + Fs = ordsets:from_list(lists:append([[{M, F} || F <- Fs] + || {M, Fs} <- Is])), + As = erl_syntax_lib:function_name_expansions(Fs), + case duplicates([N || {N, _} <- As]) of + [] -> + ordsets:from_list(As); + Ns -> + report_error("in module `~w': " + "multiply imported functions: ~p.", + [Name, Ns]), + exit(error) + end. + + +%% --------------------------------------------------------------------- +%% File handling + +%% open_output_file(filename()) -> filedescriptor() + +open_output_file(FName) -> + case catch file:open(FName, [write]) of + {ok, FD} -> + FD; + {error, _} = Error -> + error_open_output(FName), + exit(Error); + {'EXIT', R} -> + error_open_output(FName), + exit(R); + R -> + error_open_output(FName), + exit(R) + end. + +%% read_module(Name, Options) -> syntaxTree() +%% +%% This also tries to locate the real source file, if "Name" does not +%% point directly to a particular file. + +read_module(Name, Options) -> + case file_type(Name) of + {value, _} -> + read_module_1(Name, Options); + none -> + Rules = proplists:get_value(find_src_rules, Options), + case find_src(Name, Rules) of + {error, _} -> + %% It seems that we have no file - go on anyway, + %% just to get a decent error message. + read_module_1(Name, Options); + {Name1, _} -> + read_module_1(Name1 ++ ".erl", Options) + end + end. + +read_module_1(Name, Options) -> + verbose("reading module `~s'.", [filename(Name)], Options), + Forms = read_module_2(Name, Options), + case proplists:get_bool(comments, Options) of + false -> + Forms; + true -> + Comments = erl_comment_scan:file(Name), + erl_recomment:recomment_forms(Forms, Comments) + end. + +read_module_2(Name, Options) -> + case read_module_3(Name, Options) of + {ok, Forms} -> + check_forms(Forms, Name), + Forms; + {error, _} = Error -> + error_read_file(Name), + exit(Error) + end. + +read_module_3(Name, Options) -> + case proplists:get_bool(preprocess, Options) of + false -> + epp_dodger:parse_file(Name); + true -> + read_module_4(Name, Options) + end. + +read_module_4(Name, Options) -> + Includes = proplists:append_values(includes, Options) + ++ [filename:dirname(Name) | ?DEFAULT_INCLUDES], + Macros = proplists:append_values(macros, Options) + ++ ?DEFAULT_MACROS, + epp:parse_file(Name, Includes, Macros). + +check_forms([F | Fs], File) -> + case erl_syntax:type(F) of + error_marker -> + S = case erl_syntax:error_marker_info(F) of + {_, M, D} -> + M:format_error(D); + _ -> + "unknown error" + end, + report_error("in file `~s' at line ~w:\n ~s", + [filename(File), erl_syntax:get_pos(F), S]), + exit(error); + _ -> + check_forms(Fs, File) + end; +check_forms([], _) -> + ok. + +find_src(Name, undefined) -> + filename:find_src(filename(Name)); +find_src(Name, Rules) -> + filename:find_src(filename(Name), Rules). + +%% file_type(filename()) -> {value, Type} | none + +file_type(Name) -> + case catch file:read_file_info(Name) of + {ok, Env} -> + {value, Env#file_info.type}; + {error, enoent} -> + none; + {error, _} = Error -> + error_read_file_info(Name), + exit(Error); + {'EXIT', R} -> + error_read_file_info(Name), + exit(R); + R -> + error_read_file_info(Name), + throw(R) + end. + +%% Create the target directory and make a backup file if necessary, then +%% open the file, output the text and close the file safely. Returns the +%% file name. + +write_module(Tree, Name, Dir, Opts) -> + Name1 = filename(Name), + Dir1 = filename(Dir), + Base = if Dir1 =:= "" -> + Name1; + true -> + case file_type(Dir1) of + {value, directory} -> + ok; + {value, _} -> + report_error("`~s' is not a directory.", + [Dir1]), + exit(error); + none -> + case file:make_dir(Dir1) of + ok -> + verbose("created directory `~s'.", + [Dir1], Opts), + ok; + E -> + report_error("failed to create " + "directory `~s'.", + [Dir1]), + exit({make_dir, E}) + end + end, + filename:join(Dir1, Name1) + end, + Suffix = proplists:get_value(suffix, Opts, ""), + File = Base ++ Suffix, + case proplists:get_bool(backups, Opts) of + true -> + backup_file(File, Opts); + false -> + ok + end, + Printer = proplists:get_value(printer, Opts), + FD = open_output_file(File), + verbose("writing to file `~s'.", [File], Opts), + V = (catch {ok, output(FD, Printer, Tree, Opts)}), + ok = file:close(FD), + case V of + {ok, _} -> + File; + {'EXIT', R} -> + error_write_file(File), + exit(R); + R -> + error_write_file(File), + throw(R) + end. + +output(FD, Printer, Tree, Opts) -> + io:put_chars(FD, Printer(Tree, Opts)), + io:nl(FD). + +%% If the file exists, rename it by appending the given suffix to the +%% file name. + +backup_file(Name, Opts) -> + case file_type(Name) of + {value, regular} -> + backup_file_1(Name, Opts); + {value, _} -> + error_backup_file(Name), + exit(error); + none -> + ok + end. + +%% The file should exist and be a regular file here. + +backup_file_1(Name, Opts) -> + Name1 = filename(Name), + Suffix = proplists:get_value(backup_suffix, Opts, ""), + Dest = filename:join(filename:dirname(Name1), + filename:basename(Name1) ++ Suffix), + case catch file:rename(Name1, Dest) of + ok -> + verbose("made backup of file `~s'.", [Name1], Opts); + {error, R} -> + error_backup_file(Name1), + exit({error, R}); + {'EXIT', R} -> + error_backup_file(Name1), + exit(R); + R -> + error_backup_file(Name1), + throw(R) + end. + + +%% ===================================================================== +%% Utility functions + +%% The form sequence returned by 'erl_tidy:module' is flat, even if the +%% given tree is not. + +tidy(Tree, Opts) -> + case proplists:get_bool(tidy, Opts) of + true -> + verbose("tidying final module.", Opts), + erl_tidy:module(Tree, ?TIDY_OPTS); + false -> + Tree + end. + +make_attributes(As) -> + [make_attribute(A) || A <- As]. + +make_attribute({Name, Term}) -> + erl_syntax:attribute(erl_syntax:atom(Name), + [erl_syntax:abstract(Term)]). + +is_auto_import({F, A}) -> + erl_internal:bif(F, A); +is_auto_import(_) -> + false. + +timestamp() -> + {{Yr, Mth, Dy}, {Hr, Mt, Sc}} = erlang:localtime(), + lists:flatten(io_lib:fwrite("Created by Igor " + "~w-~2.2.0w-~2.2.0w, " + "~2.2.0w:~2.2.0w:~2.2.0w.", + [Yr, Mth, Dy, Hr, Mt, Sc])). + +filename([C | T]) when is_integer(C), C > 0, C =< 255 -> + [C | filename(T)]; +filename([H|T]) -> + filename(H) ++ filename(T); +filename([]) -> + []; +filename(N) when is_atom(N) -> + atom_to_list(N); +filename(N) -> + report_error("bad filename: `~P'.", [N, 25]), + exit(error). + +duplicates(Xs) -> + ordsets:from_list(Xs -- ordsets:from_list(Xs)). + +split_list(F, L) -> + split_list(L, F, [], []). + +split_list([H | T], F, A1, A2) -> + case F(H) of + true -> + split_list(T, F, [H | A1], A2); + false -> + split_list(T, F, A1, [H | A2]) + end; +split_list([], _, A1, A2) -> + {lists:reverse(A1), lists:reverse(A2)}. + +rewrite(Source, Target) -> + erl_syntax:copy_attrs(Source, Target). + +comment_note([L | Ls]) -> + comment([?NOTE_HEADER ++ L | Ls], ?NOTE_PREFIX). + +comment(Txt) -> + comment(Txt, ?COMMENT_PREFIX). + +comment(Txt, Prefix) -> + erl_syntax:comment(prefix_lines(split_lines(Txt), Prefix)). + +prefix_lines([L | Ls], Prefix) -> + [Prefix ++ L | prefix_lines(Ls, Prefix)]; +prefix_lines([], _) -> + []. + +split_lines(Ls) -> + split_lines(Ls, []). + +split_lines([L | Ls], Ls1) -> + split_lines(Ls, split_lines(L, [], Ls1)); +split_lines([], Ls1) -> + lists:reverse(Ls1). + +split_lines([$\r, $\n | Cs], Cs1, Ls) -> + split_lines_1(Cs, Cs1, Ls); +split_lines([$\r | Cs], Cs1, Ls) -> + split_lines_1(Cs, Cs1, Ls); +split_lines([$\n | Cs], Cs1, Ls) -> + split_lines_1(Cs, Cs1, Ls); +split_lines([C | Cs], Cs1, Ls) -> + split_lines(Cs, [C | Cs1], Ls); +split_lines([], Cs, Ls) -> + [lists:reverse(Cs) | Ls]. + +split_lines_1(Cs, Cs1, Ls) -> + split_lines(Cs, [], [lists:reverse(Cs1) | Ls]). + + +%% ===================================================================== +%% Reporting + +warning_unsafe_call(Name, Module, Target) -> + report_warning("call to `~w' in module `~w' " + "possibly unsafe in `~s'.", [Name, Module, Target]). + +warning_apply_2(Module, Target) -> + report_warning("call to `apply/2' in module `~w' " + "possibly unsafe in `~s'.", [Module, Target]). + +error_open_output(Name) -> + report_error("cannot open file `~s' for output.", [filename(Name)]). + +error_read_file(Name) -> + report_error("error reading file `~s'.", [filename(Name)]). + +error_read_file_info(Name) -> + report_error("error getting file info: `~s'.", [filename(Name)]). + +error_write_file(Name) -> + report_error("error writing to file `~s'.", [filename(Name)]). + +error_backup_file(Name) -> + report_error("could not create backup of file `~s'.", + [filename(Name)]). + +verbose(S, Opts) -> + verbose(S, [], Opts). + +verbose(S, Vs, Opts) -> + case proplists:get_bool(verbose, Opts) of + true -> + report(S, Vs); + false -> + ok + end. + +report_error(S) -> + report_error(S, []). + +report_error(S, Vs) -> + report(S, Vs). + +report_warning(S) -> + report_warning(S, []). + +report_warning(S, Vs) -> + report("warning: " ++ S, Vs). + +% report(S) -> +% report(S, []). + +report(S, Vs) -> + io:fwrite(lists:concat([?MODULE, ": ", S, "\n"]), Vs). + diff --git a/lib/syntax_tools/src/prettypr.erl b/lib/syntax_tools/src/prettypr.erl new file mode 100644 index 0000000000..4dd95a2b08 --- /dev/null +++ b/lib/syntax_tools/src/prettypr.erl @@ -0,0 +1,1301 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 2000-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc A generic pretty printer library. This module uses a +%% strict-style context passing implementation of John Hughes algorithm, +%% described in "The design of a Pretty-printing Library". The +%% paragraph-style formatting, empty documents, floating documents, and +%% null strings are my own additions to the algorithm. +%% +%% To get started, you should read about the {@link document()} data +%% type; the main constructor functions: {@link text/1}, {@link +%% above/2}, {@link beside/2}, {@link nest/2}, {@link sep/1}, and {@link +%% par/2}; and the main layout function {@link format/3}. +%% +%% If you simply want to format a paragraph of plain text, you probably +%% want to use the {@link text_par/2} function, as in the following +%% example: +%% ``` +%% prettypr:format(prettypr:text_par("Lorem ipsum dolor sit amet"), 20) +%% ''' + +%% @TODO can floats be moved in/out of sep:s without too much pain? + +-module(prettypr). + +-export([above/2, beside/2, best/3, break/1, empty/0, floating/1, + floating/3, follow/2, follow/3, format/1, format/2, format/3, + nest/2, par/1, par/2, sep/1, text/1, null_text/1, text_par/1, + text_par/2]). + +%% --------------------------------------------------------------------- + +%% XXX: just an approximation +-type deep_string() :: [char() | [_]]. + +%% XXX: poor man's document() until recursive data types are supported +-type doc() :: 'null' + | {'text' | 'fit', _} + | {'nest' | 'beside' | 'above' | 'union', _, _} + | {'sep' | 'float', _, _, _}. + +%% Document structures fully implemented and available to the user: +-record(text, {s :: deep_string()}). +-record(nest, {n :: integer(), d :: doc()}). +-record(beside, {d1 :: doc(), d2 :: doc()}). +-record(above, {d1 :: doc(), d2 :: doc()}). +-record(sep, {ds :: [doc()], i = 0 :: integer(), p = false :: boolean()}). + +%% Document structure which is not clear whether it is fully implemented: +-record(float, {d :: doc(), h :: integer(), v :: integer()}). + +%% Document structures not available to the user: +-record(union, {d1 :: doc(), d2 :: doc()}). +-record(fit, {d :: doc()}). + + +%% --------------------------------------------------------------------- +%% A small warning for hackers: it's fairly easy to break this +%% thing (in particular, to muck up the complexity) if you don't +%% understand how it works. +%% --------------------------------------------------------------------- + + +%% ===================================================================== +%% @type document(). An abstract character-based "document" representing +%% a number of possible layouts, which can be processed to produce a +%% single concrete layout. A concrete layout can then be rendered as a +%% sequence of characters containing linebreaks, which can be passed to +%% a printer or terminal that uses a fixed-width font. +%% +%% For example, a document `sep([text("foo"), text("bar")])' +%% represents the two layouts +%% ```foo bar''' +%% and +%% ```foo +%% bar''' +%% +%% Which layout is chosen depends on the available horizontal space. +%% When processing a document, the main parameters are the <em>paper +%% width</em> and the <em>line width</em> (also known as the "ribbon +%% width"). In the resulting layout, no text should be printed beyond +%% the paper width (which by default is 80 characters) as long as it can +%% be avoided, and each single line of text (its indentation not +%% counted, hence "ribbon") should preferably be no wider than the +%% specified line width (which by default is 65). +%% +%% Documents can be joined into a single new document using the +%% constructor functions of this module. Note that the new document +%% often represents a larger number of possible layouts than just the +%% sum of the components. + +-type document() :: 'null' | #text{} | #nest{} | #beside{} + | #above{} | #sep{} | #float{} | #union{} | #fit{}. + +%% ===================================================================== +%% @spec text(Characters::string()) -> document() +%% +%% @doc Yields a document representing a fixed, unbreakable sequence of +%% characters. The string should contain only <em>printable</em> +%% characters (tabs allowed but not recommended), and <em>not</em> +%% newline, line feed, vertical tab, etc. A tab character (`\t') is +%% interpreted as padding of 1-8 space characters to the next column of +%% 8 characters <em>within the string</em>. +%% +%% @see empty/0 +%% @see null_text/1 +%% @see text_par/2 + +-spec text(string()) -> #text{}. + +text(S) -> + mktext(string(S)). % convert to internal representation + +%% This function is used internally only, and expects a string on +%% the internal representation: + +mktext(S) -> + #text{s = S}. + + +%% ===================================================================== +%% @spec null_text(Characters::string()) -> document() +%% +%% @doc Similar to {@link text/1}, but the result is treated as having +%% zero width. This is regardless of the actual length of the string. +%% Null text is typically used for markup, which is supposed to have no +%% effect on the actual layout. +%% +%% The standard example is when formatting source code as HTML to be +%% placed within `<pre>...</pre>' markup, and using e.g. `<i>' and `<b>' +%% to make parts of the source code stand out. In this case, the markup +%% does not add to the width of the text when viewed in an HTML browser, +%% so the layout engine should simply pretend that the markup has zero +%% width. +%% +%% @see text/1 +%% @see empty/0 + +-spec null_text(string()) -> #text{}. + +null_text(S) -> + mktext(null_string(S)). % convert to internal representation + + +%% ===================================================================== +%% @spec text_par(Text::string()) -> document() +%% @equiv text_par(Text, 0) + +-spec text_par(string()) -> document(). + +text_par(S) -> + text_par(S, 0). + + +%% ===================================================================== +%% @spec text_par(Text::string(), Indentation::integer()) -> document() +%% +%% @doc Yields a document representing paragraph-formatted plain text. +%% The optional `Indentation' parameter specifies the extra indentation +%% of the first line of the paragraph. For example, `text_par("Lorem +%% ipsum dolor sit amet", N)' could represent +%% ```Lorem ipsum dolor +%% sit amet''' +%% if `N' = 0, or +%% ``` Lorem ipsum +%% dolor sit amet''' +%% if `N' = 2, or +%% ```Lorem ipsum dolor +%% sit amet''' +%% if `N' = -2. +%% +%% (The sign of the indentation is thus reversed compared to the {@link +%% par/2} function, and the behaviour varies slightly depending on the +%% sign in order to match the expected layout of a paragraph of text.) +%% +%% Note that this is just a utility function, which does all the work of +%% splitting the given string into words separated by whitespace and +%% setting up a {@link par/2. `par'} with the proper indentation, +%% containing a list of {@link text/1. `text'} elements. +%% +%% @see text_par/1 +%% @see text/1 +%% @see par/2 + +-spec text_par(string(), integer()) -> document(). + +text_par(S, 0) -> + par(words(S)); +text_par(S, N) when N > 0 -> + nest(N, par(words(S), -N)); +text_par(S, N) when N < 0 -> + par(words(S), -N). + +words(S) -> + words(S, [], []). + +words([$\s | Cs], As, Ws) -> words_1(Cs, As, Ws); +words([$\t | Cs], As, Ws) -> words_1(Cs, As, Ws); +words([$\n | Cs], As, Ws) -> words_1(Cs, As, Ws); +words([C | Cs], As, Ws) -> words(Cs, [C | As], Ws); +words([], [], Ws) -> lists:reverse(Ws); +words([], As, Ws) -> words_1([], As, Ws). + +words_1(Cs, [], Ws) -> + words(Cs, [], Ws); +words_1(Cs, As, Ws) -> + words(Cs, [], [text(lists:reverse(As)) | Ws]). + + +%% ===================================================================== +%% @spec empty() -> document() +%% +%% @doc Yields the empty document, which has neither height nor width. +%% (`empty' is thus different from an empty {@link text/1. `text'} +%% string, which has zero width but height 1.) +%% +%% Empty documents are occasionally useful; in particular, they have the +%% property that `above(X, empty())' will force a new line after `X' +%% without leaving an empty line below it; since this is a common idiom, +%% the utility function {@link break/1} will place a given document in +%% such a context. +%% +%% @see text/1 + +-spec empty() -> 'null'. + +empty() -> + null. + + +%% ===================================================================== +%% @spec break(document()) -> document() +%% +%% @doc Forces a line break at the end of the given document. This is a +%% utility function; see {@link empty/0} for details. + +-spec break(document()) -> #above{}. + +break(D) -> + above(D, empty()). + + +%% ===================================================================== +%% @spec nest(N::integer(), D::document()) -> document() +%% +%% @doc Indents a document a number of character positions to the right. +%% Note that `N' may be negative, shifting the text to the left, or +%% zero, in which case `D' is returned unchanged. + +-spec nest(integer(), document()) -> document(). + +nest(N, D) -> + if N =:= 0 -> + D; + true -> + #nest{n = N, d = D} + end. + + +%% ===================================================================== +%% @spec beside(D1::document(), D2::document()) -> document() +%% +%% @doc Concatenates documents horizontally. Returns a document +%% representing the concatenation of the documents `D1' and `D2' such +%% that the last character of `D1' is horizontally adjacent to the first +%% character of `D2', in all possible layouts. (Note: any indentation of +%% `D2' is lost.) +%% +%% Examples: +%% ```ab cd => abcd +%% +%% ab ef ab +%% cd gh => cdef +%% gh''' + +-spec beside(document(), document()) -> #beside{}. + +beside(D1, D2) -> + #beside{d1 = D1, d2 = D2}. + + +%% ===================================================================== +%% @spec above(D1::document(), D2::document()) -> document() +%% +%% @doc Concatenates documents vertically. Returns a document +%% representing the concatenation of the documents `D1' and `D2' such +%% that the first line of `D2' follows directly below the last line of +%% `D1', and the first character of `D2' is in the same horizontal +%% column as the first character of `D1', in all possible layouts. +%% +%% Examples: +%% ```ab cd => ab +%% cd +%% +%% abc +%% abc fgh => de +%% de ij fgh +%% ij''' + +-spec above(document(), document()) -> #above{}. + +above(D1, D2) -> + #above{d1 = D1, d2 = D2}. + + +%% ===================================================================== +%% @spec sep(Docs::[document()]) -> document() +%% +%% @doc Arranges documents horizontally or vertically, separated by +%% whitespace. Returns a document representing two alternative layouts +%% of the (nonempty) sequence `Docs' of documents, such that either all +%% elements in `Docs' are concatenated horizontally, and separated by a +%% space character, or all elements are concatenated vertically (without +%% extra separation). +%% +%% Note: If some document in `Docs' contains a line break, the vertical +%% layout will always be selected. +%% +%% Examples: +%% ``` ab +%% ab cd ef => ab cd ef | cd +%% ef +%% +%% ab ab +%% cd ef => cd +%% ef''' +%% +%% @see par/2 + +-spec sep([document()]) -> #sep{}. + +sep(Ds) -> + #sep{ds = Ds}. + + +%% ===================================================================== +%% @spec par(Docs::[document()]) -> document() +%% @equiv par(Ds, 0) + +-spec par([document()]) -> #sep{}. + +par(Ds) -> + par(Ds, 0). + + +%% ===================================================================== +%% @spec par(Docs::[document()], Offset::integer()) -> document() +%% +%% @doc Arranges documents in a paragraph-like layout. Returns a +%% document representing all possible left-aligned paragraph-like +%% layouts of the (nonempty) sequence `Docs' of documents. Elements in +%% `Docs' are separated horizontally by a single space character and +%% vertically with a single line break. All lines following the first +%% (if any) are indented to the same left column, whose indentation is +%% specified by the optional `Offset' parameter relative to the position +%% of the first element in `Docs'. For example, with an offset of -4, +%% the following layout can be produced, for a list of documents +%% representing the numbers 0 to 15: +%% +%% ``` 0 1 2 3 +%% 4 5 6 7 8 9 +%% 10 11 12 13 +%% 14 15''' +%% or with an offset of +2: +%% ```0 1 2 3 4 5 6 +%% 7 8 9 10 11 +%% 12 13 14 15''' +%% +%% The utility function {@link text_par/2} can be used to easily +%% transform a string of text into a `par' representation by splitting +%% it into words. +%% +%% Note that whenever a document in `Docs' contains a line break, it +%% will be placed on a separate line. Thus, neither a layout such as +%% ```ab cd +%% ef''' +%% nor +%% ```ab +%% cd ef''' +%% will be generated. However, a useful idiom for making the former +%% variant possible (when wanted) is `beside(par([D1, text("")], N), +%% D2)' for two documents `D1' and `D2'. This will break the line +%% between `D1' and `D2' if `D1' contains a line break (or if otherwise +%% necessary), and optionally further indent `D2' by `N' character +%% positions. The utility function {@link follow/3} creates this context +%% for two documents `D1' and `D2', and an optional integer `N'. +%% +%% @see par/1 +%% @see text_par/2 + +-spec par([document()], integer()) -> #sep{}. + +par(Ds, N) -> + mksep(Ds, N, true). + +%% Used internally only: + +mksep(Ds, N, P) when is_integer(N) -> + #sep{ds = Ds, i = N, p = P}. + + +%% ===================================================================== +%% @spec follow(D1::document(), D2::document()) -> document() +%% @equiv follow(D1, D2, 0) + +-spec follow(document(), document()) -> #beside{}. + +follow(D1, D2) -> + follow(D1, D2, 0). + + +%% ===================================================================== +%% @spec follow(D1::document(), D2::document(), Offset::integer()) -> +%% document() +%% +%% @doc Separates two documents by either a single space, or a line +%% break and intentation. In other words, one of the layouts +%% ```abc def''' +%% or +%% ```abc +%% def''' +%% will be generated, using the optional offset in the latter case. This +%% is often useful for typesetting programming language constructs. +%% +%% This is a utility function; see {@link par/2} for further details. +%% +%% @see follow/2 + +-spec follow(document(), document(), integer()) -> #beside{}. + +follow(D1, D2, N) when is_integer(N) -> + beside(par([D1, nil()], N), D2). + + +%% ===================================================================== +%% @spec floating(document()) -> document() +%% @equiv floating(D, 0, 0) + +-spec floating(document()) -> #float{}. + +floating(D) -> + floating(D, 0, 0). + + +%% ===================================================================== +%% @spec floating(D::document(), Hp::integer(), Vp::integer()) -> +%% document() +%% +%% @doc Creates a "floating" document. The result represents the same +%% set of layouts as `D'; however, a floating document may be moved +%% relative to other floating documents immediately beside or above it, +%% according to their relative horizontal and vertical priorities. These +%% priorities are set with the `Hp' and `Vp' parameters; if omitted, +%% both default to zero. +%% +%% Notes: Floating documents appear to work well, but are currently less +%% general than you might wish, losing effect when embedded in certain +%% contexts. It is possible to nest floating-operators (even with +%% different priorities), but the effects may be difficult to predict. +%% In any case, note that the way the algorithm reorders floating +%% documents amounts to a "bubblesort", so don't expect it to be able to +%% sort large sequences of floating documents quickly. + +-spec floating(document(), integer(), integer()) -> #float{}. + +floating(D, H, V) when is_integer(H), is_integer(V) -> + #float{d = D, h = H, v = V}. + + +%% ===================================================================== +%% @spec format(D::document()) -> string() +%% @equiv format(D, 80) + +-spec format(document()) -> string(). + +format(D) -> + format(D, 80). + + +%% ===================================================================== +%% @spec format(D::document(), PaperWidth::integer()) -> string() +%% @equiv format(D, PaperWidth, 65) + +-spec format(document(), integer()) -> string(). + +format(D, W) -> + format(D, W, 65). + + +%% ===================================================================== +%% @spec format(D:: document(), PaperWidth::integer(), +%% LineWidth::integer()) -> string() +%% @throws no_layout +%% +%% @doc Computes a layout for a document and returns the corresponding +%% text. See {@link document()} for further information. Throws +%% `no_layout' if no layout could be selected. +%% +%% `PaperWidth' specifies the total width (in character positions) of +%% the field for which the text is to be laid out. `LineWidth' specifies +%% the desired maximum width (in number of characters) of the text +%% printed on any single line, disregarding leading and trailing white +%% space. These parameters need to be properly balanced in order to +%% produce good layouts. By default, `PaperWidth' is 80 and `LineWidth' +%% is 65. +%% +%% @see best/3 + +-spec format(document(), integer(), integer()) -> string(). + +format(D, W, R) -> + case best(D, W, R) of + empty -> + throw(no_layout); + L -> layout(L) + end. + + +%% ===================================================================== +%% Representation: +%% +%% document() = #text{s = string()} +%% | #nest{n = integer(), d = document()} +%% | #beside{d1 = document(), d2 = document()} +%% | #above{d1 = document(), d2 = document()} +%% | #sep{ds = [document()], i = integer(), p = boolean()} +%% | null +%% +%% A `text' node simply represents a string (which should not contain +%% linefeed or carriage return characters). A `nest' node specifies a +%% relative indentation (in number of character positions) of a +%% document. The indentation could be a negative number. A `beside' node +%% specifies a horizontal composition of two documents, and an `above' +%% node a vertical composition. A `sep' node specifies a list of +%% alternative documents; the `i' field holds the extra indentation of +%% all documents but the first in `ds', and if the `p' field is `true' +%% then the list is typeset in paragraph mode. +%% +%% The function `best/3' yields a representation of a "best layout", +%% suitable for direct conversion to text, having the following +%% restricted form: +%% +%% layout() = #text{s = string()} +%% | #above{d1 = #text{s = string()}, d2 = layout()} +%% | #nest{n = integer(), d = layout()} +%% | null +%% +%% The function `layout/1' performs the final transformation to a single +%% flat string from the restricted document form. + +layout(L) -> + lists:reverse(layout(0, L, [])). + +layout(N, #above{d1 = #text{s = S}, d2 = L}, Cs) -> + layout(N, L, [$\n | flatrev(string_chars(S), indent(N, Cs))]); +layout(N, #nest{n = N1, d = L}, Cs) -> + layout(N + N1, L, Cs); +layout(N, #text{s = S}, Cs) -> + flatrev(string_chars(S), indent(N, Cs)); +layout(_N, null, Cs) -> + Cs. + +indent(N, Cs) when N >= 8 -> + indent(N - 8, [$\t | Cs]); +indent(N, Cs) when N > 0 -> + indent(N - 1, [$\s | Cs]); +indent(_N, Cs) -> + Cs. + +flatrev(Cs, As) -> + flatrev(Cs, As, []). + +flatrev([C = [_|_] | Cs], As, Ss) -> + flatrev(C, As, [Cs | Ss]); +flatrev([[] | Cs], As, Ss) -> + flatrev(Cs, As, Ss); +flatrev([C | Cs], As, Ss) -> + flatrev(Cs, [C | As], Ss); +flatrev([], As, [S | Ss]) -> + flatrev(S, As, Ss); +flatrev([], As, []) -> + As. + + +%% ===================================================================== +%% @spec best(document(), PaperWidth::integer(), +%% LineWidth::integer()) -> empty | document() +%% +%% @doc Selects a "best" layout for a document, creating a corresponding +%% fixed-layout document. If no layout could be produced, the atom +%% `empty' is returned instead. For details about `PaperWidth' and +%% `LineWidth', see {@link format/3}. The function is idempotent. +%% +%% One possible use of this function is to compute a fixed layout for a +%% document, which can then be included as part of a larger document. +%% For example: +%% ```above(text("Example:"), nest(8, best(D, W - 12, L - 6)))''' +%% will format `D' as a displayed-text example indented by 8, whose +%% right margin is indented by 4 relative to the paper width `W' of the +%% surrounding document, and whose maximum individual line length is +%% shorter by 6 than the line length `L' of the surrounding document. +%% +% This function is used by the {@link format/3} function to prepare a +%% document before being laid out as text. + +%% Recall that a document represents a set of possible layouts. `best' +%% selects the "best" layout of a document, returning a simplified +%% representation that can be given directly to `layout', unless the +%% returned value is `empty', signaling that no layout could be +%% produced. In addition, documents on the form `#union{d1 = D1, d2 = +%% D2}' and `#fit{d = D}' are used internally. +%% +%% Note: It is vital for this algorithm to maintain the invariant on +%% unions that the left argument has a longer first line than the right +%% argument! + +%% Contexts: +%% +%% #c_best_nest{w = integer(), r = integer(), i = integer()} +%% #c_above_nest{d = doc(), i = integer(), c = ctxt()} +%% #c_beside{d = doc(), c = ctxt()} +%% #c_text_beside{s = string(), c = ctxt()} +%% #c_sep_nest{ds = [doc()], i = integer(), p = boolean(), +%% c = ctxt()} +%% #c_best_nest_or{w = integer(), r = integer(), i = integer(), +%% d = doc()} +%% #c_fit{c = ctxt()} + +-record(c_best_nest, {w, r, i}). %% best(w, r, nest(i, *)) + +-record(c_above_nest, {d, i = 0, c}). %% above(*, nest(i, d)) + +-record(c_beside, {d, c}). %% beside(*, d) + +-record(c_text_beside, {s, c}). %% beside(text(s), *) + +%% p = false => sep([* | map(nest i, ds)]) +%% p = true => par([* | map(nest i, ds)]) + +-record(c_sep_nest, {ds, i, p, c}). + +-record(c_best_nest_or, {w, r, i, d}). %% nicest( + %% best(w, r, + %% nest(i, *)), + %% best(w, r, d)) + +-record(c_fit, {c}). %% fit(*) + +-record(c_float_beside, {d, h, v, c}). %% beside( + %% float(d, h, + %% v), + %% *) +-record(c_float_above_nest, {d, h, v, i, c}). %% above( + %% float(d, h, + %% v), + %% nest(i, *)) + +%% Contexts introduced: In case: +%% +%% c_best_nest top-level call +%% c_above_nest above (c_best_nest) +%% c_beside beside (c_best_nest) +%% c_text_beside text (c_beside) +%% c_sep_nest sep (c_best_nest) +%% c_best_nest_or union (c_best_nest) +%% c_fit fit +%% c_float_beside float (c_beside) +%% c_float_above_nest float (c_above_nest) + +%% Entry point for the layout algorithm: + +-spec best(document(), integer(), integer()) -> 'empty' | document(). + +best(D, W, R) -> + rewrite(D, #c_best_nest{w = W, r = R, i = 0}). + +rewrite(#text{s = S}, C) -> + case C of + #c_best_nest{i = N} -> + nest(N, mktext(S)); % finish + #c_above_nest{d = D1, i = N1, c = C1} -> + case C1 of + #c_best_nest{w = W, r = R, i = N} -> + %% Move out completed line. + %% (Note new indentation N1.) + nest(N, + above(mktext(S), + rewrite(D1, + #c_best_nest{w = W - N, + r = R, + i = N1}))); + #c_beside{d = D2, c = C2} -> + %% Associativity (not symmetric) + rewrite(above(mktext(S), + nest(N1, beside(D1, D2))), C2); + #c_text_beside{s = S1, c = C2} -> + %% Join segments (note the indentation!) + rewrite(above(mktext(concat(S1, S)), + nest(N1 + width(S1), D1)), + C2); + #c_sep_nest{ds = Ds, i = N, c = C2} -> + case is_empty_string(S) of + false -> + %% Move out the prefix (note the + %% indentation!) + W = width(S), + rewrite(beside( + mktext(S), + mksep([above(nil(), + nest(N1 - W, + D1)) + | Ds], + N - W, + C1#c_sep_nest.p)), + C2); + true -> + %% Like when we have just an empty + %% string and nothing else, this + %% forces us to expand the `sep'. The + %% line break will then force a normal + %% `sep' to select the vertical + %% alternative, but for a `par', we + %% need to force a line break before + %% the remaining elements are laid + %% out. (Note the indentation!) + case C1#c_sep_nest.p of + false -> + rewrite(expand_sep( + above(nil(), + nest(N1, D1)), + Ds, N), + C2); + true -> + rewrite(expand_par( + above(nil(), + nest(N1, D1)), + Ds, N), + C2) + end + end; + #c_best_nest_or{w = W, r = R, i = N, d = D} -> + L = width(S), + case ((L + N) > W) or (L > R) of + true -> + %% The first line of the LHS layout is + %% not nice, so select the RHS. + rewrite(D, #c_best_nest{w = W, r = R, + i = N}); + false -> + %% Select the LHS. (Note the + %% indentation!) + rewrite(above(mktext(S), + nest(N1, D1)), + #c_best_nest{w = W, r = R, + i = N}) + end; + #c_float_beside{d = D2, c = C2} -> + rewrite(beside(D2, above(mktext(S), + nest(N1, D1))), + C2); + #c_float_above_nest{d = D2, i = N2, c = C2} -> + rewrite(above(D2, + nest(N2, above(mktext(S), + nest(N1, D1)))), + C2); + #c_above_nest{} -> + exit(badarg); % this can't happen + #c_fit{} -> + exit(badarg) % this can't happen + end; + #c_beside{d = D1, c = C1} -> + case C1 of + #c_above_nest{d = D2, i = N, c = C2} -> + case is_empty_string(S) of + false -> + %% Move out the prefix (note the + %% indentation!) + W = width(S), + rewrite(beside(mktext(S), + above( + beside(nil(), D1), + nest(N - W, D2))), + C2); + true -> + %% Pass on + rewrite(D1, #c_text_beside{s = S, + c = C1}) + end; + #c_text_beside{s = S1, c = C2} -> + %% Associativity (we simplify early) + rewrite(beside(mktext(concat(S1, S)), D1), + C2); + #c_sep_nest{ds = Ds, i = N, c = C2} -> + case is_empty_string(S) of + false -> + %% Move out the prefix (note the + %% indentation!) + W = width(S), + rewrite(beside(mktext(S), + mksep( + [beside(nil(), D1) + | Ds], + N - W, + C1#c_sep_nest.p)), + C2); + true -> + %% Pass on + rewrite(D1, #c_text_beside{s = S, + c = C1}) + end; + #c_best_nest_or{w = W, r = R, i = N, d = D} -> + L = width(S), + case ((L + N) > W) or (L > R) of + true -> + %% The first line of the LHS layout is + %% not nice, so select the RHS. + rewrite(D, #c_best_nest{w = W, r = R, + i = N}); + false -> + %% Pass on + rewrite(D1, #c_text_beside{s = S, + c = C1}) + end; + #c_float_beside{d = D2, c = C2} -> + rewrite(beside(D2, beside(mktext(S), D1)), + C2); + #c_float_above_nest{d = D2, i = N, c = C2} -> + rewrite(above(D2, + nest(N, beside(mktext(S), D1))), + C2); + _ -> + %% Pass on + rewrite(D1, #c_text_beside{s = S, c = C1}) + end; + #c_text_beside{s = S1, c = C1} -> + rewrite(mktext(concat(S1, S)), C1); % join segments + #c_sep_nest{ds = Ds, i = N, c = C1} -> + case is_empty_string(S) of + false -> + %% Move out the prefix (note the indentation!) + rewrite(beside(mktext(S), + mksep([nil() | Ds], + N - width(S), + C#c_sep_nest.p)), + C1); + true -> + %% This is the only place where we are forced to + %% introduce a union. Recall the invariant that the + %% left argument must have a longer first line than + %% the right argument; also recall that `Ds' is + %% always nonempty here. Now, since [D | Ds] + %% contains at least two elements, the first line of + %% the horizontal layout will always contain at + %% least one space character more than the first + %% line of the vertical layout. + case C#c_sep_nest.p of + false -> + rewrite(expand_sep(nil(), Ds, N), C1); + true -> + rewrite(expand_par(nil(), Ds, N), C1) + end + end; + #c_best_nest_or{w = W, r = R, i = N, d = D} -> + L = width(S), + case ((L + N) > W) or (L > R) of + true -> + %% The first line of the LHS layout is not + %% nice, so select the RHS (which contains + %% at least two lines). + rewrite(D, #c_best_nest{w = W, r = R, i = N}); + false -> + nest(N, mktext(S)) % finish + end; + #c_fit{c = C1} -> + %% Identity: + rewrite(mktext(S), C1); + #c_float_beside{d = D1, c = C1} -> + rewrite(beside(D1, mktext(S)), C1); + #c_float_above_nest{d = D1, i = N, c = C1} -> + rewrite(above(D1, nest(N, mktext(S))), C1) + end; +rewrite(#nest{n = N, d = D}, C) -> + case C of + #c_best_nest{w = W, r = R, i = N1} -> + %% Note that we simplify by not creating an actual `nest' + %% node, but instead just modifying the context: + %% rewrite(nest(N1, nest(N, D))) = rewrite(nest(N1 + N, D)). + rewrite(D, #c_best_nest{w = W, r = R, i = N + N1}); + #c_above_nest{d = D1, i = N1, c = C1} -> + %% Distributivity + %% (Note the indentation!) + rewrite(nest(N, above(D, nest(N1 - N, D1))), C1); + #c_beside{d = D1, c = C1} -> + %% Associativity (not symmetric): + rewrite(nest(N, beside(D, D1)), C1); + #c_text_beside{} -> + rewrite(D, C); % (`beside' kills RHS indentation) + #c_sep_nest{ds = Ds, i = N1, c = C1} -> + %% Distributivity (in the vertical form, the RHS + %% indentation is killed) + rewrite(nest(N, mksep([D | Ds], + N1 - N, + C#c_sep_nest.p)), + C1); + #c_fit{c = C1} -> + %% Distributivity: + rewrite(nest(N, fit(D)), C1); + #c_float_beside{} -> + rewrite(D, C); % (`beside' kills RHS indentation) + #c_float_above_nest{d = D1, h = H, v = V, i = N1, + c = C1} -> + rewrite(D, #c_float_above_nest{d = D1, h = H, v = V, + i = N + N1, c = C1}); + #c_best_nest_or{} -> + exit(badarg) % this can't happen + end; +rewrite(#above{d1 = D1, d2 = D2}, C) -> + case C of + #c_above_nest{d = D3, i = N, c = C1} -> + %% Associativity: + %% (Note the indentation!) + rewrite(D1, #c_above_nest{d = above(D2, nest(N, D3)), + c = C1}); + #c_beside{d = D3, c = C1} -> + %% Associativity (not symmetric): + rewrite(above(D1, beside(D2, D3)), C1); + #c_fit{c = C1} -> + rewrite(empty, C1); % this is the whole point of `fit' + _ -> + rewrite(D1, #c_above_nest{d = D2, c = C}) % pass on + end; +rewrite(#beside{d1 = D1, d2 = D2}, C) -> + case C of + #c_beside{d = D3, c = C1} -> + %% Associativity: + rewrite(D1, #c_beside{d = beside(D2, D3), c = C1}); + #c_fit{c = C1} -> + %% Distributivity: + rewrite(beside(fit(D1), fit(D2)), C1); + _ -> + rewrite(D1, #c_beside{d = D2, c = C}) % pass on + end; +rewrite(#sep{ds = Ds, i = N, p = P}, C) -> + case C of + #c_fit{c = C1} -> + %% The vertical layout is thus impossible, and the + %% extra indentation has no effect. + rewrite(fit(horizontal(Ds)), C1); + #c_float_beside{d = D1, c = C1} -> + %% Floats are not moved in or out of sep's + rewrite(beside(D1, mksep(Ds, N, P)), C1); + #c_float_above_nest{d = D1, i = N1, c = C1} -> + %% Floats are not moved in or out of sep's + rewrite(above(D1, nest(N1, mksep(Ds, N, P))), C1); + _ -> + enter_sep(Ds, N, P, C) % pass on + end; +rewrite(#union{d1 = D1, d2 = D2}, C) -> + %% Introduced by the occurrence of an empty `text' string in a + %% `sep' context. See the note above about the invariant for + %% unions! + case C of + #c_best_nest{w = W, r = R, i = N} -> + %% Pass on + rewrite(D1, #c_best_nest_or{w = W, r = R, i = N, + d = D2}); + #c_above_nest{d = D3, i = N, c = C1} -> + %% Distributivity: + %% (Note the indentation!) + rewrite(union(above(D1, nest(N, D3)), + above(D2, nest(N, D3))), + C1); + #c_beside{d = D3, c = C1} -> + %% Distributivity: + rewrite(union(beside(D1, D3), beside(D2, D3)), C1); + #c_text_beside{s = S, c = C1} -> + %% Distributivity: + rewrite(union(beside(mktext(S), D1), + beside(mktext(S), D2)), + C1); + #c_sep_nest{ds = Ds, i = N, c = C1} -> + %% Distributivity: + rewrite(union(mksep([D1 | Ds], N, C#c_sep_nest.p), + mksep([D2 | Ds], N, C#c_sep_nest.p)), + C1); + #c_best_nest_or{w = W, r = R, i = N, d = D3} -> + %% Associativity: + rewrite(D1, #c_best_nest_or{w = W, r = R, i = N, + d = union(D2, D3)}); + #c_fit{c = C1} -> + %% Distributivity: + rewrite(union(fit(D1), fit(D2)), C1); + #c_float_beside{d = D3, h = H, v = V, c = C1} -> + %% Distributivity: + rewrite(union(beside(floating(D3, H, V), D1), + beside(floating(D3, H, V), D2)), + C1); + #c_float_above_nest{d = D3, h = H, v = V, i = N, c = C1} -> + %% Distributivity: + rewrite(union(above(floating(D3, H, V), nest(N, D1)), + above(floating(D3, H, V), nest(N, D2))), + C1) + end; +rewrite(empty, C) -> + %% Introduced by `sep'. + case C of + #c_best_nest{} -> + empty; % preserve `empty' + #c_above_nest{c = C1} -> + rewrite(empty, C1); % preserve `empty' + #c_beside{c = C1} -> + rewrite(empty, C1); % preserve `empty' + #c_text_beside{c = C1} -> + rewrite(empty, C1); % preserve `empty' + #c_sep_nest{c = C1} -> + rewrite(empty, C1); % preserve `empty' + #c_best_nest_or{w = W, r = R, i = N, d = D} -> + %% Try the other layout + rewrite(D, #c_best_nest{w = W, r = R, i = N}); + #c_fit{c = C1} -> + rewrite(empty, C1); % preserve `empty' + #c_float_beside{c = C1} -> + rewrite(empty, C1); % preserve `empty' + #c_float_above_nest{c = C1} -> + rewrite(empty, C1) % preserve `empty' + end; +rewrite(#fit{d = D}, C) -> + %% Introduced by the occurrence of an empty `text' string in a + %% `sep' context. + case C of + #c_fit{} -> + %% Idempotency: + rewrite(D, C); + _ -> + rewrite(D, #c_fit{c = C}) % pass on + end; +rewrite(#float{d = D, h = H, v = V}, C) -> + case C of + #c_beside{d = D1, c = C1} -> + case C1 of + #c_float_beside{d = D2, h = H1, v = V1, c = C2} + when H1 > H -> + %% Move left + rewrite(beside(floating(D, H, V), + beside(floating(D2, H1, V1), + D1)), + C2); + #c_float_beside{d = D2, h = H1, v = V1, c = C2} + when V1 /= V -> + %% Align vertically + rewrite(above(floating(D2, H1, V1), + beside(floating(D, H, V), D1)), + C2); + #c_float_above_nest{d = D2, h = H1, v = V1, + i = N1, c = C2} + when V1 > V -> + %% Move up (note the indentation, and note + %% that all three become aligned vertically) + rewrite(above(nest(N1, floating(D, H, V)), + above(floating(D2, H1, V1), + D1)), + C2); + #c_float_above_nest{d = D2, h = H1, v = V1, + i = _N1, c = C2} + when V1 == V, H1 /= H -> + %% Align horizontally + rewrite(beside(floating(D2, H1, V1), + beside(floating(D, H, V), + D1)), + C2); + _ -> + rewrite(D1, #c_float_beside{d = D, h = H, + v = V, c = C1}) + end; + #c_above_nest{d = D1, i = N, c = C1} -> + case C1 of + #c_float_beside{d = D2, h = H1, v = V1, c = C2} + when H1 > H -> + %% Move left (indentation is lost; note that + %% all three become aligned horizontally) + rewrite(beside(floating(D, H, V), + beside(floating(D2, H1, V1), + D1)), + C2); + #c_float_beside{d = D2, h = H1, v = V1, c = C2} + when V1 /= V -> + %% Align vertically + rewrite(above(floating(D2, H1, V1), + above(floating(D, H, V), + nest(N, D1))), + C2); + #c_float_above_nest{d = D2, h = H1, v = V1, + i = N1, c = C2} + when V1 > V -> + %% Move up (note the indentation) + rewrite(above(nest(N1, floating(D, H, V)), + above(floating(D2, H1, V1), + nest(N + N1, D1))), + C2); + #c_float_above_nest{d = D2, h = H1, v = V1, + i = _N1, c = C2} + when V1 == V, H1 /= H -> + %% Align horizontally + rewrite(beside( + floating(D2, H1, V1), + above(floating(D, H, V), + nest(N, D1))), + C2); + _ -> + rewrite(D1, #c_float_above_nest{d = D, h = H, + v = V, i = N, + c = C1}) + end; + #c_fit{c = C1} -> + rewrite(floating(fit(D), H, V), C1); + #c_float_beside{d = D1, h = H1, v = V1, c = C1} -> + if H1 > H -> + %% Swap + rewrite(beside(floating(D, H, V), + floating(D1, H1, V1)), + C1); + V1 /= V -> + %% Align vertically + rewrite(above(floating(D, H, V), + floating(D1, H1, V1)), + C1); + true -> + %% Drop the 'float' wrapper of the rightmost. + rewrite(beside(floating(D1, H1, V1), D), C1) + end; + #c_float_above_nest{d = D1, h = H1, v = V1, i = N, + c = C1} -> + if V1 > V -> + %% Swap (note the indentation) + rewrite(above(nest(N, floating(D, H, V)), + floating(D1, H1, V1)), + C1); + V1 == V, H1 /= H -> + %% Align horizontally + rewrite(beside(floating(D, H, V), + floating(D1, H1, V1)), + C1); + true -> + %% Drop the 'float' wrapper of the lower. + rewrite(above(floating(D1, H1, V1), + nest(N, D)), + C1) + end; + _ -> + %% All other cases simply drop the `float' wrapper. + rewrite(D, C) + end; +rewrite(null, C) -> + case C of + #c_best_nest{} -> + null; % done + #c_above_nest{d = D, i = N, c = C1} -> + rewrite(nest(N, D), C1); + #c_beside{d = D, c = C1} -> + rewrite(D, C1); + #c_text_beside{s = S, c = C1} -> + rewrite(mktext(S), C1); + #c_sep_nest{} -> + %% In a `nest' context, an empty document behaves like + %% the empty string. + rewrite(nil(), C); + #c_best_nest_or{w = W, r = R, i = N} -> + %% An empty document as "nice" as it can be, so we + %% discard the alternative. + rewrite(null, #c_best_nest{w = W, r = R, i = N}); + #c_fit{c = C1} -> + rewrite(null, C1); % identity + #c_float_beside{d = D, h = _H, v = _V, c = C1} -> + %% We just remove the float wrapper; cf. below. + rewrite(beside(D, null), C1); + #c_float_above_nest{d = D, h = _H, v = _V, i = N, c = C1} -> + %% It is important that this case just removes the + %% float wrapper; the empty document must be preserved + %% until later, or it will not be useful for forcing + %% line breaks. + rewrite(above(D, nest(N, null)), C1) + end. + +%% Both `null' and `empty' are already in use, so what do you do? + +nil() -> + text(""). + +hspace() -> + text([$\s]). + +union(D1, D2) -> + #union{d1 = D1, d2 = D2}. + +fit(D) -> + #fit{d = D}. + +enter_sep(Ds, N, P, C) -> + case Ds of + [D] -> + rewrite(D, C); % Handle this case separately + [D | Ds1] -> + %% Note that we never build a `sep'-context with an + %% empty "tail" list! `Ds1' is nonempty here! + rewrite(D, #c_sep_nest{ds = Ds1, i = N, p = P, c = C}) + end. + +%% When we expand a `sep', the extra indentation appears as `nest' +%% operators, but not until then. + +expand_sep(D, Ds, N) -> + union(fit(horizontal([D | Ds])), + vertical([D | [nest(N, D1) || D1 <- Ds]])). + +expand_par(D, [D1 | Ds] = DL, N) -> + union(beside(fit(D), + beside(hspace(), + mksep([fit(D1) | Ds], N - 1, true))), + above(D, nest(N, par(DL)))). + +horizontal(Ds) -> + foldr1(fun (D1, D2) -> + beside(D1, beside(hspace(), D2)) + end, Ds). + +vertical(Ds) -> + foldr1(fun above/2, Ds). + +foldr1(_F, [H]) -> + H; +foldr1(F, [H | T]) -> + F(H, foldr1(F, T)). + +%% Internal representation of strings; stores the field width and does +%% not perform list concatenation until the text is requested. Strings +%% are thus deep lists whose first element is the length of the string. +%% Null strings are strings whose "official width" is zero, typically +%% used for markup that is not supposed to affect the indentation. + +string(S) -> + [strwidth(S) | S]. + +null_string(S) -> + [0 | S]. + +concat([_ | []], [_ | _] = S) -> + S; +concat([_ | _] = S, [_ | []]) -> + S; +concat([L1 | S1], [L2 | S2]) -> + [L1 + L2 | [S1 | S2]]. + +string_chars([_ | S]) -> + S. + +width(S) -> + hd(S). + +is_empty_string([_ | []]) -> + true; +is_empty_string([_ | _]) -> + false. + +%% We need to use `strwidth' instead of list `length', to properly +%% handle Tab characters in the text segments. Note that the width of +%% tabs is hard-coded as 8 character positions, and that strings are +%% individually considered to be aligned at column 0; Tab characters are +%% not really nice to give to a prettyprinter, and this seems to be the +%% best interpretation. + +strwidth(S) -> + strwidth(S, 0). + +strwidth([$\t | Cs], N) -> + strwidth(Cs, N - (N rem 8) + 8); +strwidth([_ | Cs], N) -> + strwidth(Cs, N + 1); +strwidth([], N) -> + N. + +%% ===================================================================== diff --git a/lib/syntax_tools/src/syntax_tools.app.src b/lib/syntax_tools/src/syntax_tools.app.src new file mode 100644 index 0000000000..dc0b9edd62 --- /dev/null +++ b/lib/syntax_tools/src/syntax_tools.app.src @@ -0,0 +1,17 @@ +% This is an -*- erlang -*- file. + +{application, syntax_tools, + [{description, "Syntax tools"}, + {vsn, "%VSN%"}, + {modules, [epp_dodger, + erl_comment_scan, + erl_prettypr, + erl_recomment, + erl_syntax, + erl_syntax_lib, + erl_tidy, + igor, + prettypr]}, + {registered,[]}, + {applications, [stdlib]}, + {env, []}]}. diff --git a/lib/syntax_tools/src/syntax_tools.appup.src b/lib/syntax_tools/src/syntax_tools.appup.src new file mode 100644 index 0000000000..54a63833e6 --- /dev/null +++ b/lib/syntax_tools/src/syntax_tools.appup.src @@ -0,0 +1 @@ +{"%VSN%",[],[]}. |