diff options
Diffstat (limited to 'lib/syntax_tools/src/epp_dodger.erl')
-rw-r--r-- | lib/syntax_tools/src/epp_dodger.erl | 791 |
1 files changed, 791 insertions, 0 deletions
diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl new file mode 100644 index 0000000000..7aef549574 --- /dev/null +++ b/lib/syntax_tools/src/epp_dodger.erl @@ -0,0 +1,791 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @copyright 2001-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @end +%% ===================================================================== + +%% @doc `epp_dodger' - bypasses the Erlang preprocessor. +%% +%% <p>This module tokenises and parses most Erlang source code without +%% expanding preprocessor directives and macro applications, as long as +%% these are syntactically "well-behaved". Because the normal parse +%% trees of the `erl_parse' module cannot represent these things +%% (normally, they are expanded by the Erlang preprocessor {@link +%% //stdlib/epp} before the parser sees them), an extended syntax tree +%% is created, using the {@link erl_syntax} module.</p> + + +%% NOTES: +%% +%% * It's OK if the result does not parse - then at least nothing +%% strange happens, and the user can resort to full preprocessing. +%% However, we must avoid generating a token stream that is accepted by +%% the parser, but has a different meaning than the intended. A typical +%% example is when someone uses token-level string concatenation with +%% macros, as in `"foo" ?bar' (where `?bar' expands to a string). If we +%% replace the tokens `? bar' with `( ... )', to preserve precedence, +%% the result will be parsed as an application `"foo" ( ... )' and cause +%% trouble later on. We must detect such cases and report an error. +%% +%% * It is pointless to add a mechanism for tracking which macros are +%% known to take arguments, and which are known to take no arguments, +%% since a lot of the time we will not have seen the macro definition +%% anyway (it's usually in a header file). Hence, we try to use +%% heuristics instead. In most cases, the token sequence `? foo (' +%% indicates that it is a call of a macro that is supposed to take +%% arguments, but e.g., in the context `: ? foo (', the argument list +%% typically belongs to a remote function call, as in `m:?f(...)' and +%% should be parsed as `m:(?f)(...)' unless it is actually a try-clause +%% pattern such as `throw:?f(...) ->'. +%% +%% * We do our best to make macros without arguments pass the parsing +%% stage transparently. Atoms are accepted in most contexts, but +%% variables are not, so we use only atoms to encode these macros. +%% Sadly, the parsing sometimes discards even the line number info from +%% atom tokens, so we can only use the actual characters for this. +%% +%% * We recognize `?m(...' at the start of a form and prevent this from +%% being interpreted as a macro with arguments, since it is probably a +%% function definition. Likewise with attributes `-?m(...'. + +-module(epp_dodger). + +-export([parse_file/1, quick_parse_file/1, parse_file/2, + quick_parse_file/2, parse/1, quick_parse/1, parse/2, + quick_parse/2, parse/3, quick_parse/3, parse_form/2, + parse_form/3, quick_parse_form/2, quick_parse_form/3, + format_error/1, tokens_to_string/1]). + + +%% The following should be: 1) pseudo-uniquely identifiable, and 2) +%% cause nice looking error messages when the parser has to give up. + +-define(macro_call, '? <macro> ('). +-define(atom_prefix, "? "). +-define(var_prefix, "?,"). +-define(pp_form, '?preprocessor declaration?'). + + +%% @type errorinfo() = {ErrorLine::integer(), +%% Module::atom(), +%% Descriptor::term()}. +%% +%% This is a so-called Erlang I/O ErrorInfo structure; see the {@link +%% //stdlib/io} module for details. + + +%% ===================================================================== +%% @spec parse_file(File) -> {ok, Forms} | {error, errorinfo()} +%% File = file:filename() +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @equiv parse_file(File, []) + +parse_file(File) -> + parse_file(File, []). + +%% @spec parse_file(File, Options) -> {ok, Forms} | {error, errorinfo()} +%% File = file:filename() +%% Options = [term()] +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @doc Reads and parses a file. If successful, `{ok, Forms}' +%% is returned, where `Forms' is a list of abstract syntax +%% trees representing the "program forms" of the file (cf. +%% `erl_syntax:is_form/1'). Otherwise, `{error, +%% errorinfo()}' is returned, typically if the file could not be +%% opened. Note that parse errors show up as error markers in the +%% returned list of forms; they do not cause this function to fail or +%% return `{error,errorinfo()}'. +%% +%% Options: +%% <dl> +%% <dt>{@type {no_fail, boolean()@}}</dt> +%% <dd>If `true', this makes `epp_dodger' replace any program forms +%% that could not be parsed with nodes of type `text' (see {@link +%% erl_syntax:text/1}), representing the raw token sequence of the +%% form, instead of reporting a parse error. The default value is +%% `false'.</dd> +%% <dt>{@type {clever, boolean()@}}</dt> +%% <dd>If set to `true', this makes `epp_dodger' try to repair the +%% source code as it seems fit, in certain cases where parsing would +%% otherwise fail. Currently, it inserts `++'-operators between string +%% literals and macros where it looks like concatenation was intended. +%% The default value is `false'.</dd> +%% </dl> +%% +%% @see parse/2 +%% @see quick_parse_file/1 +%% @see erl_syntax:is_form/1 + +parse_file(File, Options) -> + parse_file(File, fun parse/3, Options). + +%% @spec quick_parse_file(File) -> {ok, Forms} | {error, errorinfo()} +%% File = file:filename() +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @equiv quick_parse_file(File, []) + +quick_parse_file(File) -> + quick_parse_file(File, []). + +%% @spec quick_parse_file(File, Options) -> +%% {ok, Forms} | {error, errorinfo()} +%% File = file:filename() +%% Options = [term()] +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @doc Similar to {@link parse_file/2}, but does a more quick-and-dirty +%% processing of the code. Macro definitions and other preprocessor +%% directives are discarded, and all macro calls are replaced with +%% atoms. This is useful when only the main structure of the code is of +%% interest, and not the details. Furthermore, the quick-parse method +%% can usually handle more strange cases than the normal, more exact +%% parsing. +%% +%% Options: see {@link parse_file/2}. Note however that for +%% `quick_parse_file/2', the option `no_fail' is `true' by default. +%% +%% @see quick_parse/2 +%% @see parse_file/2 + +quick_parse_file(File, Options) -> + parse_file(File, fun quick_parse/3, Options ++ [no_fail]). + +parse_file(File, Parser, Options) -> + case file:open(File, [read]) of + {ok, Dev} -> + try Parser(Dev, 1, Options) + after ok = file:close(Dev) + end; + {error, _} = Error -> + Error + end. + + +%% ===================================================================== +%% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()} +%% @equiv parse(IODevice, 1) + +parse(Dev) -> + parse(Dev, 1). + +%% @spec parse(IODevice, StartLine) -> {ok, Forms} | {error, errorinfo()} +%% IODevice = pid() +%% StartLine = integer() +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @equiv parse(IODevice, StartLine, []) +%% @see parse/1 + +parse(Dev, L) -> + parse(Dev, L, []). + +%% @spec parse(IODevice, StartLine, Options) -> +%% {ok, Forms} | {error, errorinfo()} +%% IODevice = pid() +%% StartLine = integer() +%% Options = [term()] +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @doc Reads and parses program text from an I/O stream. Characters are +%% read from `IODevice' until end-of-file; apart from this, the +%% behaviour is the same as for {@link parse_file/2}. `StartLine' is the +%% initial line number, which should be a positive integer. +%% +%% @see parse/2 +%% @see parse_file/2 +%% @see parse_form/2 +%% @see quick_parse/3 + +parse(Dev, L0, Options) -> + parse(Dev, L0, fun parse_form/3, Options). + +%% @spec quick_parse(IODevice) -> {ok, Forms} | {error, errorinfo()} +%% @equiv quick_parse(IODevice, 1) + +quick_parse(Dev) -> + quick_parse(Dev, 1). + +%% @spec quick_parse(IODevice, StartLine) -> +%% {ok, Forms} | {error, errorinfo()} +%% IODevice = pid() +%% StartLine = integer() +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @equiv quick_parse(IODevice, StartLine, []) +%% @see quick_parse/1 + +quick_parse(Dev, L) -> + quick_parse(Dev, L, []). + +%% @spec (IODevice, StartLine, Options) -> +%% {ok, Forms} | {error, errorinfo()} +%% IODevice = pid() +%% StartLine = integer() +%% Options = [term()] +%% Forms = [erl_syntax:syntaxTree()] +%% +%% @doc Similar to {@link parse/3}, but does a more quick-and-dirty +%% processing of the code. See {@link quick_parse_file/2} for details. +%% +%% @see quick_parse/2 +%% @see quick_parse_file/2 +%% @see quick_parse_form/2 +%% @see parse/3 + +quick_parse(Dev, L0, Options) -> + parse(Dev, L0, fun quick_parse_form/3, Options). + +parse(Dev, L0, Parser, Options) -> + parse(Dev, L0, [], Parser, Options). + +parse(Dev, L0, Fs, Parser, Options) -> + case Parser(Dev, L0, Options) of + {ok, none, L1} -> + parse(Dev, L1, Fs, Parser, Options); + {ok, F, L1} -> + parse(Dev, L1, [F | Fs], Parser, Options); + {error, IoErr, L1} -> + parse(Dev, L1, [{error, IoErr} | Fs], Parser, Options); + {eof, _L1} -> + {ok, lists:reverse(Fs)} + end. + + +%% ===================================================================== +%% @spec parse_form(IODevice, StartLine) -> {ok, Form, LineNo} +%% | {eof, LineNo} +%% | {error, errorinfo(), LineNo} +%% IODevice = pid() +%% StartLine = integer() +%% Form = erl_syntax:syntaxTree() +%% LineNo = integer() +%% +%% @equiv parse_form(IODevice, StartLine, []) +%% +%% @see quick_parse_form/2 + +parse_form(Dev, L0) -> + parse_form(Dev, L0, []). + +%% @spec parse_form(IODevice, StartLine, Options) -> +%% {ok, Form, LineNo} +%% | {eof, LineNo} +%% | {error, errorinfo(), LineNo} +%% +%% IODevice = pid() +%% StartLine = integer() +%% Options = [term()] +%% Form = erl_syntax:syntaxTree() +%% LineNo = integer() +%% +%% @doc Reads and parses a single program form from an I/O stream. +%% Characters are read from `IODevice' until an end-of-form +%% marker is found (a period character followed by whitespace), or until +%% end-of-file; apart from this, the behaviour is similar to that of +%% `parse/3', except that the return values also contain the +%% final line number given that `StartLine' is the initial +%% line number, and that `{eof, LineNo}' may be returned. +%% +%% @see parse/3 +%% @see parse_form/2 +%% @see quick_parse_form/3 + +parse_form(Dev, L0, Options) -> + parse_form(Dev, L0, fun normal_parser/2, Options). + +%% @spec quick_parse_form(IODevice, StartLine) -> +%% {ok, Form, LineNo} +%% | {eof, LineNo} +%% | {error, errorinfo(), LineNo} +%% IODevice = pid() +%% StartLine = integer() +%% Form = erl_syntax:syntaxTree() | none +%% LineNo = integer() +%% +%% @equiv quick_parse_form(IODevice, StartLine, []) +%% +%% @see parse_form/2 + +quick_parse_form(Dev, L0) -> + quick_parse_form(Dev, L0, []). + +%% @spec quick_parse_form(IODevice, StartLine, Options) -> +%% {ok, Form, LineNo} +%% | {eof, LineNo} +%% | {error, errorinfo(), LineNo} +%% +%% IODevice = pid() +%% StartLine = integer() +%% Options = [term()] +%% Form = erl_syntax:syntaxTree() +%% LineNo = integer() +%% +%% @doc Similar to {@link parse_form/3}, but does a more quick-and-dirty +%% processing of the code. See {@link quick_parse_file/2} for details. +%% +%% @see parse/3 +%% @see quick_parse_form/2 +%% @see parse_form/3 + +quick_parse_form(Dev, L0, Options) -> + parse_form(Dev, L0, fun quick_parser/2, Options). + +-record(opt, {clever = false :: boolean()}). + +parse_form(Dev, L0, Parser, Options) -> + NoFail = proplists:get_bool(no_fail, Options), + Opt = #opt{clever = proplists:get_bool(clever, Options)}, + case io:scan_erl_form(Dev, "", L0) of + {ok, Ts, L1} -> + case catch {ok, Parser(Ts, Opt)} of + {'EXIT', Term} -> + {error, io_error(L1, {unknown, Term}), L1}; + {error, Term} -> + IoErr = io_error(L1, Term), + {error, IoErr, L1}; + {parse_error, _IoErr} when NoFail -> + {ok, erl_syntax:set_pos( + erl_syntax:text(tokens_to_string(Ts)), + start_pos(Ts, L1)), + L1}; + {parse_error, IoErr} -> + {error, IoErr, L1}; + {ok, F} -> + {ok, F, L1} + end; + {error, _IoErr, _L1} = Err -> Err; + {eof, _L1} = Eof -> Eof + end. + +io_error(L, Desc) -> + {L, ?MODULE, Desc}. + +start_pos([T | _Ts], _L) -> + element(2, T); +start_pos([], L) -> + L. + +%% Exception-throwing wrapper for the standard Erlang parser stage + +parse_tokens(Ts) -> + parse_tokens(Ts, fun fix_form/1). + +parse_tokens(Ts, Fix) -> + case erl_parse:parse_form(Ts) of + {ok, Form} -> + Form; + {error, IoErr} -> + case Fix(Ts) of + {form, Form} -> + Form; + {retry, Ts1, Fix1} -> + parse_tokens(Ts1, Fix1); + error -> + throw({parse_error, IoErr}) + end + end. + +%% --------------------------------------------------------------------- +%% Quick scanning/parsing - deletes macro definitions and other +%% preprocessor directives, and replaces all macro calls with atoms. + +quick_parser(Ts, _Opt) -> + filter_form(parse_tokens(quickscan_form(Ts))). + +quickscan_form([{'-', _L}, {atom, La, define} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, undef} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, include} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, include_lib} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, ifdef} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, ifndef} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, else} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', _L}, {atom, La, endif} | _Ts]) -> + kill_form(La); +quickscan_form([{'-', L}, {'?', _}, {Type, _, _}=N | [{'(', _} | _]=Ts]) + when Type =:= atom; Type =:= var -> + %% minus, macro and open parenthesis at start of form - assume that + %% the macro takes no arguments; e.g. `-?foo(...).' + quickscan_macros_1(N, Ts, [{'-', L}]); +quickscan_form([{'?', _L}, {Type, _, _}=N | [{'(', _} | _]=Ts]) + when Type =:= atom; Type =:= var -> + %% macro and open parenthesis at start of form - assume that the + %% macro takes no arguments (see scan_macros for details) + quickscan_macros_1(N, Ts, []); +quickscan_form(Ts) -> + quickscan_macros(Ts). + +kill_form(L) -> + [{atom, L, ?pp_form}, {'(', L}, {')', L}, {'->', L}, {atom, L, kill}, + {dot, L}]. + +quickscan_macros(Ts) -> + quickscan_macros(Ts, []). + +quickscan_macros([{'?',_}, {Type, _, A} | Ts], [{string, L, S} | As]) + when Type =:= atom; Type =:= var -> + %% macro after a string literal: change to a single string + {_, Ts1} = skip_macro_args(Ts), + S1 = S ++ quick_macro_string(A), + quickscan_macros(Ts1, [{string, L, S1} | As]); +quickscan_macros([{'?',_}, {Type, _, _}=N | [{'(',_}|_]=Ts], + [{':',_}|_]=As) + when Type =:= atom; Type =:= var -> + %% macro and open parenthesis after colon - check the token + %% following the arguments (see scan_macros for details) + Ts1 = case skip_macro_args(Ts) of + {_, [{'->',_} | _] = Ts2} -> Ts2; + {_, [{'when',_} | _] = Ts2} -> Ts2; + _ -> Ts %% assume macro without arguments + end, + quickscan_macros_1(N, Ts1, As); +quickscan_macros([{'?',_}, {Type, _, _}=N | Ts], As) + when Type =:= atom; Type =:= var -> + %% macro with or without arguments + {_, Ts1} = skip_macro_args(Ts), + quickscan_macros_1(N, Ts1, As); +quickscan_macros([T | Ts], As) -> + quickscan_macros(Ts, [T | As]); +quickscan_macros([], As) -> + lists:reverse(As). + +%% (after a macro has been found and the arglist skipped, if any) +quickscan_macros_1({_Type, _, A}, [{string, L, S} | Ts], As) -> + %% string literal following macro: change to single string + S1 = quick_macro_string(A) ++ S, + quickscan_macros(Ts, [{string, L, S1} | As]); +quickscan_macros_1({_Type, L, A}, Ts, As) -> + %% normal case - just replace the macro with an atom + quickscan_macros(Ts, [{atom, L, quick_macro_atom(A)} | As]). + +quick_macro_atom(A) -> + list_to_atom("?" ++ atom_to_list(A)). + +quick_macro_string(A) -> + "(?" ++ atom_to_list(A) ++ ")". + +%% Skipping to the end of a macro call, tracking open/close constructs. +%% @spec (Tokens) -> {Skipped, Rest} + +skip_macro_args([{'(',_}=T | Ts]) -> + skip_macro_args(Ts, [')'], [T]); +skip_macro_args(Ts) -> + {[], Ts}. + +skip_macro_args([{'(',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, [')' | Es], [T | As]); +skip_macro_args([{'{',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['}' | Es], [T | As]); +skip_macro_args([{'[',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, [']' | Es], [T | As]); +skip_macro_args([{'<<',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['>>' | Es], [T | As]); +skip_macro_args([{'begin',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'if',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'case',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'receive',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'try',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{'cond',_}=T | Ts], Es, As) -> + skip_macro_args(Ts, ['end' | Es], [T | As]); +skip_macro_args([{E,_}=T | Ts], [E], As) -> %final close + {lists:reverse([T | As]), Ts}; +skip_macro_args([{E,_}=T | Ts], [E | Es], As) -> %matching close + skip_macro_args(Ts, Es, [T | As]); +skip_macro_args([T | Ts], Es, As) -> + skip_macro_args(Ts, Es, [T | As]); +skip_macro_args([], _Es, _As) -> + throw({error, macro_args}). + +filter_form({function, _, ?pp_form, _, + [{clause, _, [], [], [{atom, _, kill}]}]}) -> + none; +filter_form(T) -> + T. + + +%% --------------------------------------------------------------------- +%% Normal parsing - try to preserve all information + +normal_parser(Ts, Opt) -> + rewrite_form(parse_tokens(scan_form(Ts, Opt))). + +scan_form([{'-', _L}, {atom, La, define} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, define} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, undef} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, undef} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, include} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, include} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, include_lib} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, include_lib} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, ifdef} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, ifdef} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, ifndef} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, ifndef} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, else} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, else} | scan_macros(Ts, Opt)]; +scan_form([{'-', _L}, {atom, La, endif} | Ts], Opt) -> + [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La}, + {atom, La, endif} | scan_macros(Ts, Opt)]; +scan_form([{'-', L}, {'?', L1}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt) + when Type =:= atom; Type =:= var -> + %% minus, macro and open parenthesis at start of form - assume that + %% the macro takes no arguments; e.g. `-?foo(...).' + macro(L1, N, Ts, [{'-', L}], Opt); +scan_form([{'?', L}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt) + when Type =:= atom; Type =:= var -> + %% macro and open parenthesis at start of form - assume that the + %% macro takes no arguments; probably a function declaration on the + %% form `?m(...) -> ...', which will not parse if it is rewritten as + %% `(?m(...)) -> ...', so it must be handled as `(?m)(...) -> ...' + macro(L, N, Ts, [], Opt); +scan_form(Ts, Opt) -> + scan_macros(Ts, Opt). + +scan_macros(Ts, Opt) -> + scan_macros(Ts, [], Opt). + +scan_macros([{'?', _}=M, {Type, _, _}=N | Ts], [{string, L, _}=S | As], + #opt{clever = true}=Opt) + when Type =:= atom; Type =:= var -> + %% macro after a string literal: be clever and insert ++ + scan_macros([M, N | Ts], [{'++', L}, S | As], Opt); +scan_macros([{'?', L}, {Type, _, _}=N | [{'(',_}|_]=Ts], + [{':',_}|_]=As, Opt) + when Type =:= atom; Type =:= var -> + %% macro and open parentheses after colon - probably a call + %% `m:?F(...)' so the argument list might belong to the call, not + %% the macro - but it could also be a try-clause pattern + %% `...:?T(...) ->' - we need to check the token following the + %% arguments to decide + {Args, Rest} = skip_macro_args(Ts), + case Rest of + [{'->',_} | _] -> + macro_call(Args, L, N, Rest, As, Opt); + [{'when',_} | _] -> + macro_call(Args, L, N, Rest, As, Opt); + _ -> + macro(L, N, Ts, As, Opt) + end; +scan_macros([{'?', L}, {Type, _, _}=N | [{'(',_}|_]=Ts], As, Opt) + when Type =:= atom; Type =:= var -> + %% macro with arguments + {Args, Rest} = skip_macro_args(Ts), + macro_call(Args, L, N, Rest, As, Opt); +scan_macros([{'?', L }, {Type, _, _}=N | Ts], As, Opt) + when Type =:= atom; Type =:= var -> + %% macro without arguments + macro(L, N, Ts, As, Opt); +scan_macros([T | Ts], As, Opt) -> + scan_macros(Ts, [T | As], Opt); +scan_macros([], As, _Opt) -> + lists:reverse(As). + +%% Rewriting to a call which will be recognized by the post-parse pass +%% (we insert parentheses to preserve the precedences when parsing). + +macro(L, {Type, _, A}, Rest, As, Opt) -> + scan_macros_1([], Rest, [{atom,L,macro_atom(Type,A)} | As], Opt). + +macro_call([{'(',_}, {')',_}], L, {_, Ln, _}=N, Rest, As, Opt) -> + {Open, Close} = parentheses(As), + scan_macros_1([], Rest, + lists:reverse(Open ++ [{atom,L,?macro_call}, + {'(',L}, N, {')',Ln}] ++ Close, + As), Opt); +macro_call([{'(',_} | Args], L, {_, Ln, _}=N, Rest, As, Opt) -> + {Open, Close} = parentheses(As), + %% note that we must scan the argument list; it may not be skipped + scan_macros_1(Args ++ Close, + Rest, + lists:reverse(Open ++ [{atom,L,?macro_call}, + {'(',L}, N, {',',Ln}], + As), Opt). + +macro_atom(atom, A) -> + list_to_atom(?atom_prefix ++ atom_to_list(A)); +macro_atom(var, A) -> + list_to_atom(?var_prefix ++ atom_to_list(A)). + +%% don't insert parentheses after a string token, to avoid turning +%% `"string" ?macro' into a "function application" `"string"(...)' +%% (see note at top of file) +parentheses([{string, _, _} | _]) -> + {[], []}; +parentheses(_) -> + {[{'(',0}], [{')',0}]}. + +%% (after a macro has been found and the arglist skipped, if any) +scan_macros_1(Args, [{string, L, _} | _]=Rest, As, + #opt{clever = true}=Opt) -> + %% string literal following macro: be clever and insert ++ + scan_macros(Args ++ [{'++', L} | Rest], As, Opt); +scan_macros_1(Args, Rest, As, Opt) -> + %% normal case - continue scanning + scan_macros(Args ++ Rest, As, Opt). + +rewrite_form({function, L, ?pp_form, _, + [{clause, _, [], [], [{call, _, A, As}]}]}) -> + erl_syntax:set_pos(erl_syntax:attribute(A, rewrite_list(As)), L); +rewrite_form({function, L, ?pp_form, _, [{clause, _, [], [], [A]}]}) -> + erl_syntax:set_pos(erl_syntax:attribute(A), L); +rewrite_form(T) -> + rewrite(T). + +rewrite_list([T | Ts]) -> + [rewrite(T) | rewrite_list(Ts)]; +rewrite_list([]) -> + []. + +%% Note: as soon as we start using erl_syntax:subtrees/1 and similar +%% functions, we cannot assume that we know the exact representation of +%% the syntax tree anymore - we must use erl_syntax functions to analyze +%% and decompose the data. + +rewrite(Node) -> + case erl_syntax:type(Node) of + atom -> + case atom_to_list(erl_syntax:atom_value(Node)) of + ?atom_prefix ++ As -> + A1 = list_to_atom(As), + N = erl_syntax:copy_pos(Node, erl_syntax:atom(A1)), + erl_syntax:copy_pos(Node, erl_syntax:macro(N)); + ?var_prefix ++ As -> + A1 = list_to_atom(As), + N = erl_syntax:copy_pos(Node, erl_syntax:variable(A1)), + erl_syntax:copy_pos(Node, erl_syntax:macro(N)); + _ -> + Node + end; + application -> + F = erl_syntax:application_operator(Node), + case erl_syntax:type(F) of + atom -> + case erl_syntax:atom_value(F) of + ?macro_call -> + [A | As] = erl_syntax:application_arguments(Node), + M = erl_syntax:macro(A, rewrite_list(As)), + erl_syntax:copy_pos(Node, M); + _ -> + rewrite_1(Node) + end; + _ -> + rewrite_1(Node) + end; + _ -> + rewrite_1(Node) + end. + +rewrite_1(Node) -> + case erl_syntax:subtrees(Node) of + [] -> + Node; + Gs -> + Node1 = erl_syntax:make_tree(erl_syntax:type(Node), + [[rewrite(T) || T <- Ts] + || Ts <- Gs]), + erl_syntax:copy_pos(Node, Node1) + end. + +%% attempting a rescue operation on a token sequence for a single form +%% if it could not be parsed after the normal treatment + +fix_form([{atom, _, ?pp_form}, {'(', _}, {')', _}, {'->', _}, + {atom, _, define}, {'(', _} | _]=Ts) -> + case lists:reverse(Ts) of + [{dot, _}, {')', _} | _] -> + {retry, Ts, fun fix_define/1}; + [{dot, L} | Ts1] -> + Ts2 = lists:reverse([{dot, L}, {')', L} | Ts1]), + {retry, Ts2, fun fix_define/1}; + _ -> + error + end; +fix_form(_Ts) -> + error. + +fix_define([{atom, L, ?pp_form}, {'(', _}, {')', _}, {'->', _}, + {atom, La, define}, {'(', _}, N, {',', _} | Ts]) -> + [{dot, _}, {')', _} | Ts1] = lists:reverse(Ts), + S = tokens_to_string(lists:reverse(Ts1)), + A = erl_syntax:set_pos(erl_syntax:atom(define), La), + Txt = erl_syntax:set_pos(erl_syntax:text(S), La), + {form, erl_syntax:set_pos(erl_syntax:attribute(A, [N, Txt]), L)}; +fix_define(_Ts) -> + error. + +%% @spec (Tokens::[term()]) -> string() +%% +%% @doc Generates a string corresponding to the given token sequence. +%% The string can be re-tokenized to yield the same token list again. + +tokens_to_string([{atom,_,A} | Ts]) -> + io_lib:write_atom(A) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{string, _, S} | Ts]) -> + io_lib:write_string(S) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{float, _, F} | Ts]) -> + float_to_list(F) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{integer, _, N} | Ts]) -> + integer_to_list(N) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{var,_,A} | Ts]) -> + atom_to_list(A) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([{dot,_} | Ts]) -> + ".\n" ++ tokens_to_string(Ts); +tokens_to_string([{A,_} | Ts]) -> + atom_to_list(A) ++ " " ++ tokens_to_string(Ts); +tokens_to_string([]) -> + "". + + +%% @spec (Descriptor::term()) -> string() +%% @hidden +%% @doc Callback function for formatting error descriptors. Not for +%% normal use. + +format_error(macro_args) -> + errormsg("macro call missing end parenthesis"); +format_error({unknown, Reason}) -> + errormsg(io_lib:format("unknown error: ~P", [Reason, 15])). + +errormsg(String) -> + io_lib:format("~s: ~s", [?MODULE, String]). + + +%% ===================================================================== |