aboutsummaryrefslogtreecommitdiffstats
path: root/lib/syntax_tools/src/epp_dodger.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/syntax_tools/src/epp_dodger.erl')
-rw-r--r--lib/syntax_tools/src/epp_dodger.erl791
1 files changed, 791 insertions, 0 deletions
diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl
new file mode 100644
index 0000000000..7aef549574
--- /dev/null
+++ b/lib/syntax_tools/src/epp_dodger.erl
@@ -0,0 +1,791 @@
+%% =====================================================================
+%% This library is free software; you can redistribute it and/or modify
+%% it under the terms of the GNU Lesser General Public License as
+%% published by the Free Software Foundation; either version 2 of the
+%% License, or (at your option) any later version.
+%%
+%% This library is distributed in the hope that it will be useful, but
+%% WITHOUT ANY WARRANTY; without even the implied warranty of
+%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+%% Lesser General Public License for more details.
+%%
+%% You should have received a copy of the GNU Lesser General Public
+%% License along with this library; if not, write to the Free Software
+%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+%% USA
+%%
+%% $Id$
+%%
+%% @copyright 2001-2006 Richard Carlsson
+%% @author Richard Carlsson <[email protected]>
+%% @end
+%% =====================================================================
+
+%% @doc `epp_dodger' - bypasses the Erlang preprocessor.
+%%
+%% <p>This module tokenises and parses most Erlang source code without
+%% expanding preprocessor directives and macro applications, as long as
+%% these are syntactically "well-behaved". Because the normal parse
+%% trees of the `erl_parse' module cannot represent these things
+%% (normally, they are expanded by the Erlang preprocessor {@link
+%% //stdlib/epp} before the parser sees them), an extended syntax tree
+%% is created, using the {@link erl_syntax} module.</p>
+
+
+%% NOTES:
+%%
+%% * It's OK if the result does not parse - then at least nothing
+%% strange happens, and the user can resort to full preprocessing.
+%% However, we must avoid generating a token stream that is accepted by
+%% the parser, but has a different meaning than the intended. A typical
+%% example is when someone uses token-level string concatenation with
+%% macros, as in `"foo" ?bar' (where `?bar' expands to a string). If we
+%% replace the tokens `? bar' with `( ... )', to preserve precedence,
+%% the result will be parsed as an application `"foo" ( ... )' and cause
+%% trouble later on. We must detect such cases and report an error.
+%%
+%% * It is pointless to add a mechanism for tracking which macros are
+%% known to take arguments, and which are known to take no arguments,
+%% since a lot of the time we will not have seen the macro definition
+%% anyway (it's usually in a header file). Hence, we try to use
+%% heuristics instead. In most cases, the token sequence `? foo ('
+%% indicates that it is a call of a macro that is supposed to take
+%% arguments, but e.g., in the context `: ? foo (', the argument list
+%% typically belongs to a remote function call, as in `m:?f(...)' and
+%% should be parsed as `m:(?f)(...)' unless it is actually a try-clause
+%% pattern such as `throw:?f(...) ->'.
+%%
+%% * We do our best to make macros without arguments pass the parsing
+%% stage transparently. Atoms are accepted in most contexts, but
+%% variables are not, so we use only atoms to encode these macros.
+%% Sadly, the parsing sometimes discards even the line number info from
+%% atom tokens, so we can only use the actual characters for this.
+%%
+%% * We recognize `?m(...' at the start of a form and prevent this from
+%% being interpreted as a macro with arguments, since it is probably a
+%% function definition. Likewise with attributes `-?m(...'.
+
+-module(epp_dodger).
+
+-export([parse_file/1, quick_parse_file/1, parse_file/2,
+ quick_parse_file/2, parse/1, quick_parse/1, parse/2,
+ quick_parse/2, parse/3, quick_parse/3, parse_form/2,
+ parse_form/3, quick_parse_form/2, quick_parse_form/3,
+ format_error/1, tokens_to_string/1]).
+
+
+%% The following should be: 1) pseudo-uniquely identifiable, and 2)
+%% cause nice looking error messages when the parser has to give up.
+
+-define(macro_call, '? <macro> (').
+-define(atom_prefix, "? ").
+-define(var_prefix, "?,").
+-define(pp_form, '?preprocessor declaration?').
+
+
+%% @type errorinfo() = {ErrorLine::integer(),
+%% Module::atom(),
+%% Descriptor::term()}.
+%%
+%% This is a so-called Erlang I/O ErrorInfo structure; see the {@link
+%% //stdlib/io} module for details.
+
+
+%% =====================================================================
+%% @spec parse_file(File) -> {ok, Forms} | {error, errorinfo()}
+%% File = file:filename()
+%% Forms = [erl_syntax:syntaxTree()]
+%%
+%% @equiv parse_file(File, [])
+
+parse_file(File) ->
+ parse_file(File, []).
+
+%% @spec parse_file(File, Options) -> {ok, Forms} | {error, errorinfo()}
+%% File = file:filename()
+%% Options = [term()]
+%% Forms = [erl_syntax:syntaxTree()]
+%%
+%% @doc Reads and parses a file. If successful, `{ok, Forms}'
+%% is returned, where `Forms' is a list of abstract syntax
+%% trees representing the "program forms" of the file (cf.
+%% `erl_syntax:is_form/1'). Otherwise, `{error,
+%% errorinfo()}' is returned, typically if the file could not be
+%% opened. Note that parse errors show up as error markers in the
+%% returned list of forms; they do not cause this function to fail or
+%% return `{error,errorinfo()}'.
+%%
+%% Options:
+%% <dl>
+%% <dt>{@type {no_fail, boolean()@}}</dt>
+%% <dd>If `true', this makes `epp_dodger' replace any program forms
+%% that could not be parsed with nodes of type `text' (see {@link
+%% erl_syntax:text/1}), representing the raw token sequence of the
+%% form, instead of reporting a parse error. The default value is
+%% `false'.</dd>
+%% <dt>{@type {clever, boolean()@}}</dt>
+%% <dd>If set to `true', this makes `epp_dodger' try to repair the
+%% source code as it seems fit, in certain cases where parsing would
+%% otherwise fail. Currently, it inserts `++'-operators between string
+%% literals and macros where it looks like concatenation was intended.
+%% The default value is `false'.</dd>
+%% </dl>
+%%
+%% @see parse/2
+%% @see quick_parse_file/1
+%% @see erl_syntax:is_form/1
+
+parse_file(File, Options) ->
+ parse_file(File, fun parse/3, Options).
+
+%% @spec quick_parse_file(File) -> {ok, Forms} | {error, errorinfo()}
+%% File = file:filename()
+%% Forms = [erl_syntax:syntaxTree()]
+%%
+%% @equiv quick_parse_file(File, [])
+
+quick_parse_file(File) ->
+ quick_parse_file(File, []).
+
+%% @spec quick_parse_file(File, Options) ->
+%% {ok, Forms} | {error, errorinfo()}
+%% File = file:filename()
+%% Options = [term()]
+%% Forms = [erl_syntax:syntaxTree()]
+%%
+%% @doc Similar to {@link parse_file/2}, but does a more quick-and-dirty
+%% processing of the code. Macro definitions and other preprocessor
+%% directives are discarded, and all macro calls are replaced with
+%% atoms. This is useful when only the main structure of the code is of
+%% interest, and not the details. Furthermore, the quick-parse method
+%% can usually handle more strange cases than the normal, more exact
+%% parsing.
+%%
+%% Options: see {@link parse_file/2}. Note however that for
+%% `quick_parse_file/2', the option `no_fail' is `true' by default.
+%%
+%% @see quick_parse/2
+%% @see parse_file/2
+
+quick_parse_file(File, Options) ->
+ parse_file(File, fun quick_parse/3, Options ++ [no_fail]).
+
+parse_file(File, Parser, Options) ->
+ case file:open(File, [read]) of
+ {ok, Dev} ->
+ try Parser(Dev, 1, Options)
+ after ok = file:close(Dev)
+ end;
+ {error, _} = Error ->
+ Error
+ end.
+
+
+%% =====================================================================
+%% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()}
+%% @equiv parse(IODevice, 1)
+
+parse(Dev) ->
+ parse(Dev, 1).
+
+%% @spec parse(IODevice, StartLine) -> {ok, Forms} | {error, errorinfo()}
+%% IODevice = pid()
+%% StartLine = integer()
+%% Forms = [erl_syntax:syntaxTree()]
+%%
+%% @equiv parse(IODevice, StartLine, [])
+%% @see parse/1
+
+parse(Dev, L) ->
+ parse(Dev, L, []).
+
+%% @spec parse(IODevice, StartLine, Options) ->
+%% {ok, Forms} | {error, errorinfo()}
+%% IODevice = pid()
+%% StartLine = integer()
+%% Options = [term()]
+%% Forms = [erl_syntax:syntaxTree()]
+%%
+%% @doc Reads and parses program text from an I/O stream. Characters are
+%% read from `IODevice' until end-of-file; apart from this, the
+%% behaviour is the same as for {@link parse_file/2}. `StartLine' is the
+%% initial line number, which should be a positive integer.
+%%
+%% @see parse/2
+%% @see parse_file/2
+%% @see parse_form/2
+%% @see quick_parse/3
+
+parse(Dev, L0, Options) ->
+ parse(Dev, L0, fun parse_form/3, Options).
+
+%% @spec quick_parse(IODevice) -> {ok, Forms} | {error, errorinfo()}
+%% @equiv quick_parse(IODevice, 1)
+
+quick_parse(Dev) ->
+ quick_parse(Dev, 1).
+
+%% @spec quick_parse(IODevice, StartLine) ->
+%% {ok, Forms} | {error, errorinfo()}
+%% IODevice = pid()
+%% StartLine = integer()
+%% Forms = [erl_syntax:syntaxTree()]
+%%
+%% @equiv quick_parse(IODevice, StartLine, [])
+%% @see quick_parse/1
+
+quick_parse(Dev, L) ->
+ quick_parse(Dev, L, []).
+
+%% @spec (IODevice, StartLine, Options) ->
+%% {ok, Forms} | {error, errorinfo()}
+%% IODevice = pid()
+%% StartLine = integer()
+%% Options = [term()]
+%% Forms = [erl_syntax:syntaxTree()]
+%%
+%% @doc Similar to {@link parse/3}, but does a more quick-and-dirty
+%% processing of the code. See {@link quick_parse_file/2} for details.
+%%
+%% @see quick_parse/2
+%% @see quick_parse_file/2
+%% @see quick_parse_form/2
+%% @see parse/3
+
+quick_parse(Dev, L0, Options) ->
+ parse(Dev, L0, fun quick_parse_form/3, Options).
+
+parse(Dev, L0, Parser, Options) ->
+ parse(Dev, L0, [], Parser, Options).
+
+parse(Dev, L0, Fs, Parser, Options) ->
+ case Parser(Dev, L0, Options) of
+ {ok, none, L1} ->
+ parse(Dev, L1, Fs, Parser, Options);
+ {ok, F, L1} ->
+ parse(Dev, L1, [F | Fs], Parser, Options);
+ {error, IoErr, L1} ->
+ parse(Dev, L1, [{error, IoErr} | Fs], Parser, Options);
+ {eof, _L1} ->
+ {ok, lists:reverse(Fs)}
+ end.
+
+
+%% =====================================================================
+%% @spec parse_form(IODevice, StartLine) -> {ok, Form, LineNo}
+%% | {eof, LineNo}
+%% | {error, errorinfo(), LineNo}
+%% IODevice = pid()
+%% StartLine = integer()
+%% Form = erl_syntax:syntaxTree()
+%% LineNo = integer()
+%%
+%% @equiv parse_form(IODevice, StartLine, [])
+%%
+%% @see quick_parse_form/2
+
+parse_form(Dev, L0) ->
+ parse_form(Dev, L0, []).
+
+%% @spec parse_form(IODevice, StartLine, Options) ->
+%% {ok, Form, LineNo}
+%% | {eof, LineNo}
+%% | {error, errorinfo(), LineNo}
+%%
+%% IODevice = pid()
+%% StartLine = integer()
+%% Options = [term()]
+%% Form = erl_syntax:syntaxTree()
+%% LineNo = integer()
+%%
+%% @doc Reads and parses a single program form from an I/O stream.
+%% Characters are read from `IODevice' until an end-of-form
+%% marker is found (a period character followed by whitespace), or until
+%% end-of-file; apart from this, the behaviour is similar to that of
+%% `parse/3', except that the return values also contain the
+%% final line number given that `StartLine' is the initial
+%% line number, and that `{eof, LineNo}' may be returned.
+%%
+%% @see parse/3
+%% @see parse_form/2
+%% @see quick_parse_form/3
+
+parse_form(Dev, L0, Options) ->
+ parse_form(Dev, L0, fun normal_parser/2, Options).
+
+%% @spec quick_parse_form(IODevice, StartLine) ->
+%% {ok, Form, LineNo}
+%% | {eof, LineNo}
+%% | {error, errorinfo(), LineNo}
+%% IODevice = pid()
+%% StartLine = integer()
+%% Form = erl_syntax:syntaxTree() | none
+%% LineNo = integer()
+%%
+%% @equiv quick_parse_form(IODevice, StartLine, [])
+%%
+%% @see parse_form/2
+
+quick_parse_form(Dev, L0) ->
+ quick_parse_form(Dev, L0, []).
+
+%% @spec quick_parse_form(IODevice, StartLine, Options) ->
+%% {ok, Form, LineNo}
+%% | {eof, LineNo}
+%% | {error, errorinfo(), LineNo}
+%%
+%% IODevice = pid()
+%% StartLine = integer()
+%% Options = [term()]
+%% Form = erl_syntax:syntaxTree()
+%% LineNo = integer()
+%%
+%% @doc Similar to {@link parse_form/3}, but does a more quick-and-dirty
+%% processing of the code. See {@link quick_parse_file/2} for details.
+%%
+%% @see parse/3
+%% @see quick_parse_form/2
+%% @see parse_form/3
+
+quick_parse_form(Dev, L0, Options) ->
+ parse_form(Dev, L0, fun quick_parser/2, Options).
+
+-record(opt, {clever = false :: boolean()}).
+
+parse_form(Dev, L0, Parser, Options) ->
+ NoFail = proplists:get_bool(no_fail, Options),
+ Opt = #opt{clever = proplists:get_bool(clever, Options)},
+ case io:scan_erl_form(Dev, "", L0) of
+ {ok, Ts, L1} ->
+ case catch {ok, Parser(Ts, Opt)} of
+ {'EXIT', Term} ->
+ {error, io_error(L1, {unknown, Term}), L1};
+ {error, Term} ->
+ IoErr = io_error(L1, Term),
+ {error, IoErr, L1};
+ {parse_error, _IoErr} when NoFail ->
+ {ok, erl_syntax:set_pos(
+ erl_syntax:text(tokens_to_string(Ts)),
+ start_pos(Ts, L1)),
+ L1};
+ {parse_error, IoErr} ->
+ {error, IoErr, L1};
+ {ok, F} ->
+ {ok, F, L1}
+ end;
+ {error, _IoErr, _L1} = Err -> Err;
+ {eof, _L1} = Eof -> Eof
+ end.
+
+io_error(L, Desc) ->
+ {L, ?MODULE, Desc}.
+
+start_pos([T | _Ts], _L) ->
+ element(2, T);
+start_pos([], L) ->
+ L.
+
+%% Exception-throwing wrapper for the standard Erlang parser stage
+
+parse_tokens(Ts) ->
+ parse_tokens(Ts, fun fix_form/1).
+
+parse_tokens(Ts, Fix) ->
+ case erl_parse:parse_form(Ts) of
+ {ok, Form} ->
+ Form;
+ {error, IoErr} ->
+ case Fix(Ts) of
+ {form, Form} ->
+ Form;
+ {retry, Ts1, Fix1} ->
+ parse_tokens(Ts1, Fix1);
+ error ->
+ throw({parse_error, IoErr})
+ end
+ end.
+
+%% ---------------------------------------------------------------------
+%% Quick scanning/parsing - deletes macro definitions and other
+%% preprocessor directives, and replaces all macro calls with atoms.
+
+quick_parser(Ts, _Opt) ->
+ filter_form(parse_tokens(quickscan_form(Ts))).
+
+quickscan_form([{'-', _L}, {atom, La, define} | _Ts]) ->
+ kill_form(La);
+quickscan_form([{'-', _L}, {atom, La, undef} | _Ts]) ->
+ kill_form(La);
+quickscan_form([{'-', _L}, {atom, La, include} | _Ts]) ->
+ kill_form(La);
+quickscan_form([{'-', _L}, {atom, La, include_lib} | _Ts]) ->
+ kill_form(La);
+quickscan_form([{'-', _L}, {atom, La, ifdef} | _Ts]) ->
+ kill_form(La);
+quickscan_form([{'-', _L}, {atom, La, ifndef} | _Ts]) ->
+ kill_form(La);
+quickscan_form([{'-', _L}, {atom, La, else} | _Ts]) ->
+ kill_form(La);
+quickscan_form([{'-', _L}, {atom, La, endif} | _Ts]) ->
+ kill_form(La);
+quickscan_form([{'-', L}, {'?', _}, {Type, _, _}=N | [{'(', _} | _]=Ts])
+ when Type =:= atom; Type =:= var ->
+ %% minus, macro and open parenthesis at start of form - assume that
+ %% the macro takes no arguments; e.g. `-?foo(...).'
+ quickscan_macros_1(N, Ts, [{'-', L}]);
+quickscan_form([{'?', _L}, {Type, _, _}=N | [{'(', _} | _]=Ts])
+ when Type =:= atom; Type =:= var ->
+ %% macro and open parenthesis at start of form - assume that the
+ %% macro takes no arguments (see scan_macros for details)
+ quickscan_macros_1(N, Ts, []);
+quickscan_form(Ts) ->
+ quickscan_macros(Ts).
+
+kill_form(L) ->
+ [{atom, L, ?pp_form}, {'(', L}, {')', L}, {'->', L}, {atom, L, kill},
+ {dot, L}].
+
+quickscan_macros(Ts) ->
+ quickscan_macros(Ts, []).
+
+quickscan_macros([{'?',_}, {Type, _, A} | Ts], [{string, L, S} | As])
+ when Type =:= atom; Type =:= var ->
+ %% macro after a string literal: change to a single string
+ {_, Ts1} = skip_macro_args(Ts),
+ S1 = S ++ quick_macro_string(A),
+ quickscan_macros(Ts1, [{string, L, S1} | As]);
+quickscan_macros([{'?',_}, {Type, _, _}=N | [{'(',_}|_]=Ts],
+ [{':',_}|_]=As)
+ when Type =:= atom; Type =:= var ->
+ %% macro and open parenthesis after colon - check the token
+ %% following the arguments (see scan_macros for details)
+ Ts1 = case skip_macro_args(Ts) of
+ {_, [{'->',_} | _] = Ts2} -> Ts2;
+ {_, [{'when',_} | _] = Ts2} -> Ts2;
+ _ -> Ts %% assume macro without arguments
+ end,
+ quickscan_macros_1(N, Ts1, As);
+quickscan_macros([{'?',_}, {Type, _, _}=N | Ts], As)
+ when Type =:= atom; Type =:= var ->
+ %% macro with or without arguments
+ {_, Ts1} = skip_macro_args(Ts),
+ quickscan_macros_1(N, Ts1, As);
+quickscan_macros([T | Ts], As) ->
+ quickscan_macros(Ts, [T | As]);
+quickscan_macros([], As) ->
+ lists:reverse(As).
+
+%% (after a macro has been found and the arglist skipped, if any)
+quickscan_macros_1({_Type, _, A}, [{string, L, S} | Ts], As) ->
+ %% string literal following macro: change to single string
+ S1 = quick_macro_string(A) ++ S,
+ quickscan_macros(Ts, [{string, L, S1} | As]);
+quickscan_macros_1({_Type, L, A}, Ts, As) ->
+ %% normal case - just replace the macro with an atom
+ quickscan_macros(Ts, [{atom, L, quick_macro_atom(A)} | As]).
+
+quick_macro_atom(A) ->
+ list_to_atom("?" ++ atom_to_list(A)).
+
+quick_macro_string(A) ->
+ "(?" ++ atom_to_list(A) ++ ")".
+
+%% Skipping to the end of a macro call, tracking open/close constructs.
+%% @spec (Tokens) -> {Skipped, Rest}
+
+skip_macro_args([{'(',_}=T | Ts]) ->
+ skip_macro_args(Ts, [')'], [T]);
+skip_macro_args(Ts) ->
+ {[], Ts}.
+
+skip_macro_args([{'(',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, [')' | Es], [T | As]);
+skip_macro_args([{'{',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, ['}' | Es], [T | As]);
+skip_macro_args([{'[',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, [']' | Es], [T | As]);
+skip_macro_args([{'<<',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, ['>>' | Es], [T | As]);
+skip_macro_args([{'begin',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, ['end' | Es], [T | As]);
+skip_macro_args([{'if',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, ['end' | Es], [T | As]);
+skip_macro_args([{'case',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, ['end' | Es], [T | As]);
+skip_macro_args([{'receive',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, ['end' | Es], [T | As]);
+skip_macro_args([{'try',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, ['end' | Es], [T | As]);
+skip_macro_args([{'cond',_}=T | Ts], Es, As) ->
+ skip_macro_args(Ts, ['end' | Es], [T | As]);
+skip_macro_args([{E,_}=T | Ts], [E], As) -> %final close
+ {lists:reverse([T | As]), Ts};
+skip_macro_args([{E,_}=T | Ts], [E | Es], As) -> %matching close
+ skip_macro_args(Ts, Es, [T | As]);
+skip_macro_args([T | Ts], Es, As) ->
+ skip_macro_args(Ts, Es, [T | As]);
+skip_macro_args([], _Es, _As) ->
+ throw({error, macro_args}).
+
+filter_form({function, _, ?pp_form, _,
+ [{clause, _, [], [], [{atom, _, kill}]}]}) ->
+ none;
+filter_form(T) ->
+ T.
+
+
+%% ---------------------------------------------------------------------
+%% Normal parsing - try to preserve all information
+
+normal_parser(Ts, Opt) ->
+ rewrite_form(parse_tokens(scan_form(Ts, Opt))).
+
+scan_form([{'-', _L}, {atom, La, define} | Ts], Opt) ->
+ [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
+ {atom, La, define} | scan_macros(Ts, Opt)];
+scan_form([{'-', _L}, {atom, La, undef} | Ts], Opt) ->
+ [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
+ {atom, La, undef} | scan_macros(Ts, Opt)];
+scan_form([{'-', _L}, {atom, La, include} | Ts], Opt) ->
+ [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
+ {atom, La, include} | scan_macros(Ts, Opt)];
+scan_form([{'-', _L}, {atom, La, include_lib} | Ts], Opt) ->
+ [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
+ {atom, La, include_lib} | scan_macros(Ts, Opt)];
+scan_form([{'-', _L}, {atom, La, ifdef} | Ts], Opt) ->
+ [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
+ {atom, La, ifdef} | scan_macros(Ts, Opt)];
+scan_form([{'-', _L}, {atom, La, ifndef} | Ts], Opt) ->
+ [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
+ {atom, La, ifndef} | scan_macros(Ts, Opt)];
+scan_form([{'-', _L}, {atom, La, else} | Ts], Opt) ->
+ [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
+ {atom, La, else} | scan_macros(Ts, Opt)];
+scan_form([{'-', _L}, {atom, La, endif} | Ts], Opt) ->
+ [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
+ {atom, La, endif} | scan_macros(Ts, Opt)];
+scan_form([{'-', L}, {'?', L1}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt)
+ when Type =:= atom; Type =:= var ->
+ %% minus, macro and open parenthesis at start of form - assume that
+ %% the macro takes no arguments; e.g. `-?foo(...).'
+ macro(L1, N, Ts, [{'-', L}], Opt);
+scan_form([{'?', L}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt)
+ when Type =:= atom; Type =:= var ->
+ %% macro and open parenthesis at start of form - assume that the
+ %% macro takes no arguments; probably a function declaration on the
+ %% form `?m(...) -> ...', which will not parse if it is rewritten as
+ %% `(?m(...)) -> ...', so it must be handled as `(?m)(...) -> ...'
+ macro(L, N, Ts, [], Opt);
+scan_form(Ts, Opt) ->
+ scan_macros(Ts, Opt).
+
+scan_macros(Ts, Opt) ->
+ scan_macros(Ts, [], Opt).
+
+scan_macros([{'?', _}=M, {Type, _, _}=N | Ts], [{string, L, _}=S | As],
+ #opt{clever = true}=Opt)
+ when Type =:= atom; Type =:= var ->
+ %% macro after a string literal: be clever and insert ++
+ scan_macros([M, N | Ts], [{'++', L}, S | As], Opt);
+scan_macros([{'?', L}, {Type, _, _}=N | [{'(',_}|_]=Ts],
+ [{':',_}|_]=As, Opt)
+ when Type =:= atom; Type =:= var ->
+ %% macro and open parentheses after colon - probably a call
+ %% `m:?F(...)' so the argument list might belong to the call, not
+ %% the macro - but it could also be a try-clause pattern
+ %% `...:?T(...) ->' - we need to check the token following the
+ %% arguments to decide
+ {Args, Rest} = skip_macro_args(Ts),
+ case Rest of
+ [{'->',_} | _] ->
+ macro_call(Args, L, N, Rest, As, Opt);
+ [{'when',_} | _] ->
+ macro_call(Args, L, N, Rest, As, Opt);
+ _ ->
+ macro(L, N, Ts, As, Opt)
+ end;
+scan_macros([{'?', L}, {Type, _, _}=N | [{'(',_}|_]=Ts], As, Opt)
+ when Type =:= atom; Type =:= var ->
+ %% macro with arguments
+ {Args, Rest} = skip_macro_args(Ts),
+ macro_call(Args, L, N, Rest, As, Opt);
+scan_macros([{'?', L }, {Type, _, _}=N | Ts], As, Opt)
+ when Type =:= atom; Type =:= var ->
+ %% macro without arguments
+ macro(L, N, Ts, As, Opt);
+scan_macros([T | Ts], As, Opt) ->
+ scan_macros(Ts, [T | As], Opt);
+scan_macros([], As, _Opt) ->
+ lists:reverse(As).
+
+%% Rewriting to a call which will be recognized by the post-parse pass
+%% (we insert parentheses to preserve the precedences when parsing).
+
+macro(L, {Type, _, A}, Rest, As, Opt) ->
+ scan_macros_1([], Rest, [{atom,L,macro_atom(Type,A)} | As], Opt).
+
+macro_call([{'(',_}, {')',_}], L, {_, Ln, _}=N, Rest, As, Opt) ->
+ {Open, Close} = parentheses(As),
+ scan_macros_1([], Rest,
+ lists:reverse(Open ++ [{atom,L,?macro_call},
+ {'(',L}, N, {')',Ln}] ++ Close,
+ As), Opt);
+macro_call([{'(',_} | Args], L, {_, Ln, _}=N, Rest, As, Opt) ->
+ {Open, Close} = parentheses(As),
+ %% note that we must scan the argument list; it may not be skipped
+ scan_macros_1(Args ++ Close,
+ Rest,
+ lists:reverse(Open ++ [{atom,L,?macro_call},
+ {'(',L}, N, {',',Ln}],
+ As), Opt).
+
+macro_atom(atom, A) ->
+ list_to_atom(?atom_prefix ++ atom_to_list(A));
+macro_atom(var, A) ->
+ list_to_atom(?var_prefix ++ atom_to_list(A)).
+
+%% don't insert parentheses after a string token, to avoid turning
+%% `"string" ?macro' into a "function application" `"string"(...)'
+%% (see note at top of file)
+parentheses([{string, _, _} | _]) ->
+ {[], []};
+parentheses(_) ->
+ {[{'(',0}], [{')',0}]}.
+
+%% (after a macro has been found and the arglist skipped, if any)
+scan_macros_1(Args, [{string, L, _} | _]=Rest, As,
+ #opt{clever = true}=Opt) ->
+ %% string literal following macro: be clever and insert ++
+ scan_macros(Args ++ [{'++', L} | Rest], As, Opt);
+scan_macros_1(Args, Rest, As, Opt) ->
+ %% normal case - continue scanning
+ scan_macros(Args ++ Rest, As, Opt).
+
+rewrite_form({function, L, ?pp_form, _,
+ [{clause, _, [], [], [{call, _, A, As}]}]}) ->
+ erl_syntax:set_pos(erl_syntax:attribute(A, rewrite_list(As)), L);
+rewrite_form({function, L, ?pp_form, _, [{clause, _, [], [], [A]}]}) ->
+ erl_syntax:set_pos(erl_syntax:attribute(A), L);
+rewrite_form(T) ->
+ rewrite(T).
+
+rewrite_list([T | Ts]) ->
+ [rewrite(T) | rewrite_list(Ts)];
+rewrite_list([]) ->
+ [].
+
+%% Note: as soon as we start using erl_syntax:subtrees/1 and similar
+%% functions, we cannot assume that we know the exact representation of
+%% the syntax tree anymore - we must use erl_syntax functions to analyze
+%% and decompose the data.
+
+rewrite(Node) ->
+ case erl_syntax:type(Node) of
+ atom ->
+ case atom_to_list(erl_syntax:atom_value(Node)) of
+ ?atom_prefix ++ As ->
+ A1 = list_to_atom(As),
+ N = erl_syntax:copy_pos(Node, erl_syntax:atom(A1)),
+ erl_syntax:copy_pos(Node, erl_syntax:macro(N));
+ ?var_prefix ++ As ->
+ A1 = list_to_atom(As),
+ N = erl_syntax:copy_pos(Node, erl_syntax:variable(A1)),
+ erl_syntax:copy_pos(Node, erl_syntax:macro(N));
+ _ ->
+ Node
+ end;
+ application ->
+ F = erl_syntax:application_operator(Node),
+ case erl_syntax:type(F) of
+ atom ->
+ case erl_syntax:atom_value(F) of
+ ?macro_call ->
+ [A | As] = erl_syntax:application_arguments(Node),
+ M = erl_syntax:macro(A, rewrite_list(As)),
+ erl_syntax:copy_pos(Node, M);
+ _ ->
+ rewrite_1(Node)
+ end;
+ _ ->
+ rewrite_1(Node)
+ end;
+ _ ->
+ rewrite_1(Node)
+ end.
+
+rewrite_1(Node) ->
+ case erl_syntax:subtrees(Node) of
+ [] ->
+ Node;
+ Gs ->
+ Node1 = erl_syntax:make_tree(erl_syntax:type(Node),
+ [[rewrite(T) || T <- Ts]
+ || Ts <- Gs]),
+ erl_syntax:copy_pos(Node, Node1)
+ end.
+
+%% attempting a rescue operation on a token sequence for a single form
+%% if it could not be parsed after the normal treatment
+
+fix_form([{atom, _, ?pp_form}, {'(', _}, {')', _}, {'->', _},
+ {atom, _, define}, {'(', _} | _]=Ts) ->
+ case lists:reverse(Ts) of
+ [{dot, _}, {')', _} | _] ->
+ {retry, Ts, fun fix_define/1};
+ [{dot, L} | Ts1] ->
+ Ts2 = lists:reverse([{dot, L}, {')', L} | Ts1]),
+ {retry, Ts2, fun fix_define/1};
+ _ ->
+ error
+ end;
+fix_form(_Ts) ->
+ error.
+
+fix_define([{atom, L, ?pp_form}, {'(', _}, {')', _}, {'->', _},
+ {atom, La, define}, {'(', _}, N, {',', _} | Ts]) ->
+ [{dot, _}, {')', _} | Ts1] = lists:reverse(Ts),
+ S = tokens_to_string(lists:reverse(Ts1)),
+ A = erl_syntax:set_pos(erl_syntax:atom(define), La),
+ Txt = erl_syntax:set_pos(erl_syntax:text(S), La),
+ {form, erl_syntax:set_pos(erl_syntax:attribute(A, [N, Txt]), L)};
+fix_define(_Ts) ->
+ error.
+
+%% @spec (Tokens::[term()]) -> string()
+%%
+%% @doc Generates a string corresponding to the given token sequence.
+%% The string can be re-tokenized to yield the same token list again.
+
+tokens_to_string([{atom,_,A} | Ts]) ->
+ io_lib:write_atom(A) ++ " " ++ tokens_to_string(Ts);
+tokens_to_string([{string, _, S} | Ts]) ->
+ io_lib:write_string(S) ++ " " ++ tokens_to_string(Ts);
+tokens_to_string([{float, _, F} | Ts]) ->
+ float_to_list(F) ++ " " ++ tokens_to_string(Ts);
+tokens_to_string([{integer, _, N} | Ts]) ->
+ integer_to_list(N) ++ " " ++ tokens_to_string(Ts);
+tokens_to_string([{var,_,A} | Ts]) ->
+ atom_to_list(A) ++ " " ++ tokens_to_string(Ts);
+tokens_to_string([{dot,_} | Ts]) ->
+ ".\n" ++ tokens_to_string(Ts);
+tokens_to_string([{A,_} | Ts]) ->
+ atom_to_list(A) ++ " " ++ tokens_to_string(Ts);
+tokens_to_string([]) ->
+ "".
+
+
+%% @spec (Descriptor::term()) -> string()
+%% @hidden
+%% @doc Callback function for formatting error descriptors. Not for
+%% normal use.
+
+format_error(macro_args) ->
+ errormsg("macro call missing end parenthesis");
+format_error({unknown, Reason}) ->
+ errormsg(io_lib:format("unknown error: ~P", [Reason, 15])).
+
+errormsg(String) ->
+ io_lib:format("~s: ~s", [?MODULE, String]).
+
+
+%% =====================================================================