aboutsummaryrefslogblamecommitdiffstats
path: root/lib/syntax_tools/src/epp_dodger.erl
blob: 39c522fd115979624514fffa82a8a5cd912b73b3 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16















                                                                        
                                        
                                                        







































































                                                                        
                                                 

                                            







                                                                        


                                                            










                                                                        




                                                                 




















                                                                        


                                                            








                                                                     


                                                            






















                                                                        


                                                            



                                                              

















                                                                        

                                   
                                                   


                                       

                                                                     

        







                                                            




                                                                        

                                                            










                                                                         

                                                                       



















                                                                        


                                                       





                                                                    


                                      











                                               


                                                 

















                                                                      


                                                             































                                                                        



                                                               

























                                                                        



                                                               















                                                      



                                                               




















                                                                        



                                                                  







                                                             
                                         

















                                                                 
                                                                          






                               
                                 
















































































































































































































































































































































































                                                                              
                                                       



                                                                     

                                             



                                                          

                                                        



                                                      
                                       
                                                   
                                    
                                  
                                  




                                                   
                                                     



                                                                   

                                       









                                                                        
%% =====================================================================
%% This library is free software; you can redistribute it and/or modify
%% it under the terms of the GNU Lesser General Public License as
%% published by the Free Software Foundation; either version 2 of the
%% License, or (at your option) any later version.
%%
%% This library is distributed in the hope that it will be useful, but
%% WITHOUT ANY WARRANTY; without even the implied warranty of
%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%% Lesser General Public License for more details.
%%
%% You should have received a copy of the GNU Lesser General Public
%% License along with this library; if not, write to the Free Software
%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
%% USA
%%
%% @copyright 2001-2006 Richard Carlsson
%% @author Richard Carlsson <[email protected]>
%% @end
%% =====================================================================

%% @doc `epp_dodger' - bypasses the Erlang preprocessor.
%%
%% <p>This module tokenises and parses most Erlang source code without
%% expanding preprocessor directives and macro applications, as long as
%% these are syntactically "well-behaved". Because the normal parse
%% trees of the `erl_parse' module cannot represent these things
%% (normally, they are expanded by the Erlang preprocessor {@link
%% //stdlib/epp} before the parser sees them), an extended syntax tree
%% is created, using the {@link erl_syntax} module.</p>


%% NOTES:
%%
%% * It's OK if the result does not parse - then at least nothing
%% strange happens, and the user can resort to full preprocessing.
%% However, we must avoid generating a token stream that is accepted by
%% the parser, but has a different meaning than the intended. A typical
%% example is when someone uses token-level string concatenation with
%% macros, as in `"foo" ?bar' (where `?bar' expands to a string). If we
%% replace the tokens `? bar' with `( ... )', to preserve precedence,
%% the result will be parsed as an application `"foo" ( ... )' and cause
%% trouble later on. We must detect such cases and report an error.
%% 
%% * It is pointless to add a mechanism for tracking which macros are
%% known to take arguments, and which are known to take no arguments,
%% since a lot of the time we will not have seen the macro definition
%% anyway (it's usually in a header file). Hence, we try to use
%% heuristics instead. In most cases, the token sequence `? foo ('
%% indicates that it is a call of a macro that is supposed to take
%% arguments, but e.g., in the context `: ? foo (', the argument list
%% typically belongs to a remote function call, as in `m:?f(...)' and
%% should be parsed as `m:(?f)(...)' unless it is actually a try-clause
%% pattern such as `throw:?f(...) ->'.
%% 
%% * We do our best to make macros without arguments pass the parsing
%% stage transparently. Atoms are accepted in most contexts, but
%% variables are not, so we use only atoms to encode these macros.
%% Sadly, the parsing sometimes discards even the line number info from
%% atom tokens, so we can only use the actual characters for this.
%%
%% * We recognize `?m(...' at the start of a form and prevent this from
%% being interpreted as a macro with arguments, since it is probably a
%% function definition. Likewise with attributes `-?m(...'.

-module(epp_dodger).

-export([parse_file/1, quick_parse_file/1, parse_file/2,
	 quick_parse_file/2, parse/1, quick_parse/1, parse/2,
	 quick_parse/2, parse/3, quick_parse/3, parse_form/2,
	 parse_form/3, quick_parse_form/2, quick_parse_form/3,
	 format_error/1, tokens_to_string/1]).


%% The following should be: 1) pseudo-uniquely identifiable, and 2)
%% cause nice looking error messages when the parser has to give up.

-define(macro_call, '? <macro> (').
-define(atom_prefix, "? ").
-define(var_prefix, "?,").
-define(pp_form, '?preprocessor declaration?').


%% @type errorinfo() = {ErrorLine::integer(),
%%                      Module::atom(),
%%                      Descriptor::term()}.
%% 
%% This is a so-called Erlang I/O ErrorInfo structure; see the {@link
%% //stdlib/io} module for details.

-type errorinfo() :: {integer(), atom(), term()}.

-type option() :: atom() | {atom(), term()}.

%% =====================================================================
%% @spec parse_file(File) -> {ok, Forms} | {error, errorinfo()}
%%       File = file:filename()
%%       Forms = [erl_syntax:syntaxTree()]
%% 
%% @equiv parse_file(File, [])

-spec parse_file(file:filename()) ->
        {'ok', erl_syntax:forms()} | {'error', errorinfo()}.

parse_file(File) ->
    parse_file(File, []).

%% @spec parse_file(File, Options) -> {ok, Forms} | {error, errorinfo()}
%%       File = file:filename()
%%       Options = [term()]
%%       Forms = [erl_syntax:syntaxTree()]
%% 
%% @doc Reads and parses a file. If successful, `{ok, Forms}'
%% is returned, where `Forms' is a list of abstract syntax
%% trees representing the "program forms" of the file (cf.
%% `erl_syntax:is_form/1'). Otherwise, `{error, errorinfo()}' is
%% returned, typically if the file could not be opened. Note that
%% parse errors show up as error markers in the returned list of
%% forms; they do not cause this function to fail or return
%% `{error, errorinfo()}'.
%%
%% Options:
%% <dl>
%%   <dt>{@type {no_fail, boolean()@}}</dt>
%%   <dd>If `true', this makes `epp_dodger' replace any program forms
%%   that could not be parsed with nodes of type `text' (see {@link
%%   erl_syntax:text/1}), representing the raw token sequence of the
%%   form, instead of reporting a parse error. The default value is
%%   `false'.</dd>
%%   <dt>{@type {clever, boolean()@}}</dt>
%%   <dd>If set to `true', this makes `epp_dodger' try to repair the
%%   source code as it seems fit, in certain cases where parsing would
%%   otherwise fail. Currently, it inserts `++'-operators between string
%%   literals and macros where it looks like concatenation was intended.
%%   The default value is `false'.</dd>
%% </dl>
%%
%% @see parse/2
%% @see quick_parse_file/1
%% @see erl_syntax:is_form/1

-spec parse_file(file:filename(), [option()]) ->
        {'ok', erl_syntax:forms()} | {'error', errorinfo()}.

parse_file(File, Options) ->
    parse_file(File, fun parse/3, Options).

%% @spec quick_parse_file(File) -> {ok, Forms} | {error, errorinfo()}
%%       File = file:filename()
%%       Forms = [erl_syntax:syntaxTree()]
%%
%% @equiv quick_parse_file(File, [])

-spec quick_parse_file(file:filename()) ->
        {'ok', erl_syntax:forms()} | {'error', errorinfo()}.

quick_parse_file(File) ->
    quick_parse_file(File, []).

%% @spec quick_parse_file(File, Options) ->
%%           {ok, Forms} | {error, errorinfo()}
%%       File = file:filename()
%%       Options = [term()]
%%       Forms = [erl_syntax:syntaxTree()]
%%
%% @doc Similar to {@link parse_file/2}, but does a more quick-and-dirty
%% processing of the code. Macro definitions and other preprocessor
%% directives are discarded, and all macro calls are replaced with
%% atoms. This is useful when only the main structure of the code is of
%% interest, and not the details. Furthermore, the quick-parse method
%% can usually handle more strange cases than the normal, more exact
%% parsing.
%%
%% Options: see {@link parse_file/2}. Note however that for
%% `quick_parse_file/2', the option `no_fail' is `true' by default.
%%
%% @see quick_parse/2
%% @see parse_file/2

-spec quick_parse_file(file:filename(), [option()]) ->
        {'ok', erl_syntax:forms()} | {'error', errorinfo()}.

quick_parse_file(File, Options) ->
    parse_file(File, fun quick_parse/3, Options ++ [no_fail]).

parse_file(File, Parser, Options) ->
    case do_parse_file(utf8, File, Parser, Options) of
        {ok, Forms}=Ret ->
            case find_invalid_unicode(Forms) of
                none ->
                    Ret;
                invalid_unicode ->
                    case epp:read_encoding(File) of
                        utf8 ->
                            Ret;
                        _ ->
                            do_parse_file(latin1, File, Parser, Options)
                    end
            end;
        Else ->
            Else
    end.

do_parse_file(DefEncoding, File, Parser, Options) ->
    case file:open(File, [read]) of
        {ok, Dev} ->
            _ = epp:set_encoding(Dev, DefEncoding),
            try Parser(Dev, 1, Options)
            after ok = file:close(Dev)
	    end;
        {error, Error} ->
            {error, {0, file, Error}}  % defer to file:format_error/1
    end.

find_invalid_unicode([H|T]) ->
    case H of
	{error, {_Line, file_io_server, invalid_unicode}} ->
	    invalid_unicode;
	_Other ->
	    find_invalid_unicode(T)
    end;
find_invalid_unicode([]) -> none.

%% =====================================================================
%% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()}
%% @equiv parse(IODevice, 1)

-spec parse(file:io_device()) -> {'ok', erl_syntax:forms()}.

parse(Dev) ->
    parse(Dev, 1).

%% @spec parse(IODevice, StartLine) -> {ok, Forms} | {error, errorinfo()}
%%       IODevice = pid()
%%       StartLine = integer()
%%       Forms = [erl_syntax:syntaxTree()]
%%
%% @equiv parse(IODevice, StartLine, [])
%% @see parse/1

-spec parse(file:io_device(), integer()) -> {'ok', erl_syntax:forms()}.

parse(Dev, L) ->
    parse(Dev, L, []).

%% @spec parse(IODevice, StartLine, Options) ->
%%           {ok, Forms} | {error, errorinfo()}
%%       IODevice = pid()
%%       StartLine = integer()
%%       Options = [term()]
%%       Forms = [erl_syntax:syntaxTree()]
%%
%% @doc Reads and parses program text from an I/O stream. Characters are
%% read from `IODevice' until end-of-file; apart from this, the
%% behaviour is the same as for {@link parse_file/2}. `StartLine' is the
%% initial line number, which should be a positive integer.
%%
%% @see parse/2
%% @see parse_file/2
%% @see parse_form/2
%% @see quick_parse/3

-spec parse(file:io_device(), integer(), [option()]) ->
        {'ok', erl_syntax:forms()}.

parse(Dev, L0, Options) ->
    parse(Dev, L0, fun parse_form/3, Options).

%% @spec quick_parse(IODevice) -> {ok, Forms} | {error, errorinfo()}
%% @equiv quick_parse(IODevice, 1)

-spec quick_parse(file:io_device()) ->
        {'ok', erl_syntax:forms()}.

quick_parse(Dev) ->
    quick_parse(Dev, 1).

%% @spec quick_parse(IODevice, StartLine) ->
%%           {ok, Forms} | {error, errorinfo()}
%%       IODevice = pid()
%%       StartLine = integer()
%%       Forms = [erl_syntax:syntaxTree()]
%%
%% @equiv quick_parse(IODevice, StartLine, [])
%% @see quick_parse/1

-spec quick_parse(file:io_device(), integer()) ->
        {'ok', erl_syntax:forms()}.

quick_parse(Dev, L) ->
    quick_parse(Dev, L, []).

%% @spec (IODevice, StartLine, Options) ->
%%           {ok, Forms} | {error, errorinfo()}
%%       IODevice = pid()
%%       StartLine = integer()
%%       Options = [term()]
%%       Forms = [erl_syntax:syntaxTree()]
%% 
%% @doc Similar to {@link parse/3}, but does a more quick-and-dirty
%% processing of the code. See {@link quick_parse_file/2} for details.
%%
%% @see quick_parse/2
%% @see quick_parse_file/2
%% @see quick_parse_form/2
%% @see parse/3

-spec quick_parse(file:io_device(), integer(), [option()]) ->
        {'ok', erl_syntax:forms()}.

quick_parse(Dev, L0, Options) ->
    parse(Dev, L0, fun quick_parse_form/3, Options).

parse(Dev, L0, Parser, Options) ->
    parse(Dev, L0, [], Parser, Options).

parse(Dev, L0, Fs, Parser, Options) ->
    case Parser(Dev, L0, Options) of
        {ok, none, L1} ->
            parse(Dev, L1, Fs, Parser, Options);
        {ok, F, L1} ->
            parse(Dev, L1, [F | Fs], Parser, Options);
        {error, IoErr, L1} ->
            parse(Dev, L1, [{error, IoErr} | Fs], Parser, Options);
        {eof, _L1} ->
            {ok, lists:reverse(Fs)}
    end.


%% =====================================================================
%% @spec parse_form(IODevice, StartLine) -> {ok, Form, LineNo}
%%                                        | {eof, LineNo}
%%                                        | {error, errorinfo(), LineNo}
%%       IODevice = pid()
%%       StartLine = integer()
%%       Form = erl_syntax:syntaxTree()
%%       LineNo = integer()
%%
%% @equiv parse_form(IODevice, StartLine, [])
%%
%% @see quick_parse_form/2

-spec parse_form(file:io_device(), integer()) ->
        {'ok', erl_syntax:forms(), integer()}
      | {'eof', integer()} | {'error', errorinfo(), integer()}.

parse_form(Dev, L0) ->
    parse_form(Dev, L0, []).

%% @spec parse_form(IODevice, StartLine, Options) ->
%%           {ok, Form, LineNo}
%%         | {eof, LineNo}
%%         | {error, errorinfo(), LineNo}
%% 
%%       IODevice = pid()
%%       StartLine = integer()
%%       Options = [term()]
%%       Form = erl_syntax:syntaxTree()
%%       LineNo = integer()
%%
%% @doc Reads and parses a single program form from an I/O stream.
%% Characters are read from `IODevice' until an end-of-form
%% marker is found (a period character followed by whitespace), or until
%% end-of-file; apart from this, the behaviour is similar to that of
%% `parse/3', except that the return values also contain the
%% final line number given that `StartLine' is the initial
%% line number, and that `{eof, LineNo}' may be returned.
%%
%% @see parse/3
%% @see parse_form/2
%% @see quick_parse_form/3

-spec parse_form(file:io_device(), integer(), [option()]) ->
        {'ok', erl_syntax:forms(), integer()}
      | {'eof', integer()} | {'error', errorinfo(), integer()}.

parse_form(Dev, L0, Options) ->
    parse_form(Dev, L0, fun normal_parser/2, Options).

%% @spec quick_parse_form(IODevice, StartLine) ->
%%           {ok, Form, LineNo}
%%         | {eof, LineNo}
%%         | {error, errorinfo(), LineNo}
%%       IODevice = pid()
%%       StartLine = integer()
%%       Form = erl_syntax:syntaxTree() | none
%%       LineNo = integer()
%%
%% @equiv quick_parse_form(IODevice, StartLine, [])
%%
%% @see parse_form/2

-spec quick_parse_form(file:io_device(), integer()) ->
        {'ok', erl_syntax:forms(), integer()}
      | {'eof', integer()} | {'error', errorinfo(), integer()}.

quick_parse_form(Dev, L0) ->
    quick_parse_form(Dev, L0, []).

%% @spec quick_parse_form(IODevice, StartLine, Options) ->
%%           {ok, Form, LineNo}
%%         | {eof, LineNo}
%%         | {error, errorinfo(), LineNo}
%%
%%       IODevice = pid()
%%       StartLine = integer()
%%       Options = [term()]
%%       Form = erl_syntax:syntaxTree()
%%       LineNo = integer()
%%
%% @doc Similar to {@link parse_form/3}, but does a more quick-and-dirty
%% processing of the code. See {@link quick_parse_file/2} for details.
%%
%% @see parse/3
%% @see quick_parse_form/2
%% @see parse_form/3

-spec quick_parse_form(file:io_device(), integer(), [option()]) ->
        {'ok', erl_syntax:forms(), integer()}
      | {'eof', integer()} | {'error', errorinfo(), integer()}.

quick_parse_form(Dev, L0, Options) ->
    parse_form(Dev, L0, fun quick_parser/2, Options).

-record(opt, {clever = false :: boolean()}).

parse_form(Dev, L0, Parser, Options) ->
    NoFail = proplists:get_bool(no_fail, Options),
    Opt = #opt{clever = proplists:get_bool(clever, Options)},
    case io:scan_erl_form(Dev, "", L0) of
        {ok, Ts, L1} ->
            case catch {ok, Parser(Ts, Opt)} of
                {'EXIT', Term} ->
                    {error, io_error(L1, {unknown, Term}), L1};
                {error, Term} ->
		    IoErr = io_error(L1, Term),
		    {error, IoErr, L1};
                {parse_error, _IoErr} when NoFail ->
		    {ok, erl_syntax:set_pos(
			   erl_syntax:text(tokens_to_string(Ts)),
			   start_pos(Ts, L1)),
		     L1};
                {parse_error, IoErr} ->
		    {error, IoErr, L1};
                {ok, F} ->
                    {ok, F, L1}
            end;
        {error, _IoErr, _L1} = Err -> Err;
        {error, _Reason} -> {eof, L0}; % This is probably encoding problem
        {eof, _L1} = Eof -> Eof
    end.

io_error(L, Desc) ->
    {L, ?MODULE, Desc}.

start_pos([T | _Ts], _L) ->
    erl_anno:line(element(2, T));
start_pos([], L) ->
    L.

%% Exception-throwing wrapper for the standard Erlang parser stage

parse_tokens(Ts) ->
    parse_tokens(Ts, fun fix_form/1).

parse_tokens(Ts, Fix) ->
    case erl_parse:parse_form(Ts) of
        {ok, Form} ->
            Form;
        {error, IoErr} ->
	    case Fix(Ts) of
		{form, Form} ->
		    Form;
		{retry, Ts1, Fix1} ->
		    parse_tokens(Ts1, Fix1);
		error ->
		    throw({parse_error, IoErr})
	    end
    end.

%% ---------------------------------------------------------------------
%% Quick scanning/parsing - deletes macro definitions and other
%% preprocessor directives, and replaces all macro calls with atoms.

quick_parser(Ts, _Opt) ->
    filter_form(parse_tokens(quickscan_form(Ts))).

quickscan_form([{'-', _L}, {atom, La, define} | _Ts]) ->
    kill_form(La);
quickscan_form([{'-', _L}, {atom, La, undef} | _Ts]) ->
    kill_form(La);
quickscan_form([{'-', _L}, {atom, La, include} | _Ts]) ->
    kill_form(La);
quickscan_form([{'-', _L}, {atom, La, include_lib} | _Ts]) ->
    kill_form(La);
quickscan_form([{'-', _L}, {atom, La, ifdef} | _Ts]) ->
    kill_form(La);
quickscan_form([{'-', _L}, {atom, La, ifndef} | _Ts]) ->
    kill_form(La);
quickscan_form([{'-', _L}, {atom, La, else} | _Ts]) ->
    kill_form(La);
quickscan_form([{'-', _L}, {atom, La, endif} | _Ts]) ->
    kill_form(La);
quickscan_form([{'-', L}, {'?', _}, {Type, _, _}=N | [{'(', _} | _]=Ts])
  when Type =:= atom; Type =:= var ->
    %% minus, macro and open parenthesis at start of form - assume that
    %% the macro takes no arguments; e.g. `-?foo(...).'
    quickscan_macros_1(N, Ts, [{'-', L}]);
quickscan_form([{'?', _L}, {Type, _, _}=N | [{'(', _} | _]=Ts])
  when Type =:= atom; Type =:= var ->
    %% macro and open parenthesis at start of form - assume that the
    %% macro takes no arguments (see scan_macros for details)
    quickscan_macros_1(N, Ts, []);
quickscan_form(Ts) ->
    quickscan_macros(Ts).

kill_form(L) ->
    [{atom, L, ?pp_form}, {'(', L}, {')', L}, {'->', L}, {atom, L, kill},
     {dot, L}].

quickscan_macros(Ts) ->
    quickscan_macros(Ts, []).

quickscan_macros([{'?',_}, {Type, _, A} | Ts], [{string, L, S} | As])
  when Type =:= atom; Type =:= var ->
    %% macro after a string literal: change to a single string
    {_, Ts1} = skip_macro_args(Ts),
    S1 = S ++ quick_macro_string(A),
    quickscan_macros(Ts1, [{string, L, S1} | As]);
quickscan_macros([{'?',_}, {Type, _, _}=N | [{'(',_}|_]=Ts],
		 [{':',_}|_]=As)
  when Type =:= atom; Type =:= var ->
    %% macro and open parenthesis after colon - check the token
    %% following the arguments (see scan_macros for details)
    Ts1 = case skip_macro_args(Ts) of
	      {_, [{'->',_} | _] = Ts2} -> Ts2;
	      {_, [{'when',_} | _] = Ts2} -> Ts2;
	      _ -> Ts    %% assume macro without arguments
	  end,
    quickscan_macros_1(N, Ts1, As);
quickscan_macros([{'?',_}, {Type, _, _}=N | Ts], As)
  when Type =:= atom; Type =:= var ->
    %% macro with or without arguments
    {_, Ts1} = skip_macro_args(Ts),
    quickscan_macros_1(N, Ts1, As);
quickscan_macros([T | Ts], As) ->
    quickscan_macros(Ts, [T | As]);
quickscan_macros([], As) ->
    lists:reverse(As).

%% (after a macro has been found and the arglist skipped, if any)
quickscan_macros_1({_Type, _, A}, [{string, L, S} | Ts], As) ->
    %% string literal following macro: change to single string
    S1 = quick_macro_string(A) ++ S,
    quickscan_macros(Ts, [{string, L, S1} | As]);
quickscan_macros_1({_Type, L, A}, Ts, As) ->
    %% normal case - just replace the macro with an atom
    quickscan_macros(Ts, [{atom, L, quick_macro_atom(A)} | As]).

quick_macro_atom(A) ->
    list_to_atom("?" ++ atom_to_list(A)).

quick_macro_string(A) ->
    "(?" ++ atom_to_list(A) ++ ")".

%% Skipping to the end of a macro call, tracking open/close constructs.
%% @spec (Tokens) -> {Skipped, Rest}

skip_macro_args([{'(',_}=T | Ts]) ->
    skip_macro_args(Ts, [')'], [T]);
skip_macro_args(Ts) ->
    {[], Ts}.

skip_macro_args([{'(',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, [')' | Es], [T | As]);
skip_macro_args([{'{',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, ['}' | Es], [T | As]);
skip_macro_args([{'[',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, [']' | Es], [T | As]);
skip_macro_args([{'<<',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, ['>>' | Es], [T | As]);
skip_macro_args([{'begin',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, ['end' | Es], [T | As]);
skip_macro_args([{'if',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, ['end' | Es], [T | As]);
skip_macro_args([{'case',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, ['end' | Es], [T | As]);
skip_macro_args([{'receive',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, ['end' | Es], [T | As]);
skip_macro_args([{'try',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, ['end' | Es], [T | As]);
skip_macro_args([{'cond',_}=T | Ts], Es, As) ->
    skip_macro_args(Ts, ['end' | Es], [T | As]);
skip_macro_args([{E,_}=T | Ts], [E], As) ->		%final close
    {lists:reverse([T | As]), Ts};
skip_macro_args([{E,_}=T | Ts], [E | Es], As) ->	%matching close
    skip_macro_args(Ts, Es, [T | As]);
skip_macro_args([T | Ts], Es, As) ->
    skip_macro_args(Ts, Es, [T | As]);
skip_macro_args([], _Es, _As) ->
    throw({error, macro_args}).

filter_form({function, _, ?pp_form, _,
	     [{clause, _, [], [], [{atom, _, kill}]}]}) ->
    none;
filter_form(T) ->
    T.


%% ---------------------------------------------------------------------
%% Normal parsing - try to preserve all information

normal_parser(Ts, Opt) ->
    rewrite_form(parse_tokens(scan_form(Ts, Opt))).

scan_form([{'-', _L}, {atom, La, define} | Ts], Opt) ->
    [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
     {atom, La, define} | scan_macros(Ts, Opt)];
scan_form([{'-', _L}, {atom, La, undef} | Ts], Opt) ->
    [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
     {atom, La, undef} | scan_macros(Ts, Opt)];
scan_form([{'-', _L}, {atom, La, include} | Ts], Opt) ->
    [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
     {atom, La, include} | scan_macros(Ts, Opt)];
scan_form([{'-', _L}, {atom, La, include_lib} | Ts], Opt) ->
    [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
     {atom, La, include_lib} | scan_macros(Ts, Opt)];
scan_form([{'-', _L}, {atom, La, ifdef} | Ts], Opt) ->
    [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
     {atom, La, ifdef} | scan_macros(Ts, Opt)];
scan_form([{'-', _L}, {atom, La, ifndef} | Ts], Opt) ->
    [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
     {atom, La, ifndef} | scan_macros(Ts, Opt)];
scan_form([{'-', _L}, {atom, La, else} | Ts], Opt) ->
    [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
     {atom, La, else} | scan_macros(Ts, Opt)];
scan_form([{'-', _L}, {atom, La, endif} | Ts], Opt) ->
    [{atom, La, ?pp_form}, {'(', La}, {')', La}, {'->', La},
     {atom, La, endif} | scan_macros(Ts, Opt)];
scan_form([{'-', L}, {'?', L1}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt)
  when Type =:= atom; Type =:= var ->
    %% minus, macro and open parenthesis at start of form - assume that
    %% the macro takes no arguments; e.g. `-?foo(...).'
    macro(L1, N, Ts, [{'-', L}], Opt);
scan_form([{'?', L}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt)
  when Type =:= atom; Type =:= var ->
    %% macro and open parenthesis at start of form - assume that the
    %% macro takes no arguments; probably a function declaration on the
    %% form `?m(...) -> ...', which will not parse if it is rewritten as
    %% `(?m(...)) -> ...', so it must be handled as `(?m)(...) -> ...'
    macro(L, N, Ts, [], Opt);
scan_form(Ts, Opt) ->
    scan_macros(Ts, Opt).

scan_macros(Ts, Opt) ->
    scan_macros(Ts, [], Opt).

scan_macros([{'?', _}=M, {Type, _, _}=N | Ts], [{string, L, _}=S | As],
 	    #opt{clever = true}=Opt)
  when Type =:= atom; Type =:= var ->
    %% macro after a string literal: be clever and insert ++
    scan_macros([M, N | Ts], [{'++', L}, S | As], Opt);
scan_macros([{'?', L}, {Type, _, _}=N | [{'(',_}|_]=Ts],
	    [{':',_}|_]=As, Opt)
  when Type =:= atom; Type =:= var ->
    %% macro and open parentheses after colon - probably a call
    %% `m:?F(...)' so the argument list might belong to the call, not
    %% the macro - but it could also be a try-clause pattern
    %% `...:?T(...) ->' - we need to check the token following the
    %% arguments to decide
    {Args, Rest} = skip_macro_args(Ts),
    case Rest of
	[{'->',_} | _] ->
	    macro_call(Args, L, N, Rest, As, Opt);
	[{'when',_} | _] ->
	    macro_call(Args, L, N, Rest, As, Opt);
	_ ->
	    macro(L, N, Ts, As, Opt)
    end;
scan_macros([{'?', L}, {Type, _, _}=N | [{'(',_}|_]=Ts], As, Opt)
  when Type =:= atom; Type =:= var ->
    %% macro with arguments
    {Args, Rest} = skip_macro_args(Ts),
    macro_call(Args, L, N, Rest, As, Opt);
scan_macros([{'?', L }, {Type, _, _}=N | Ts], As, Opt)
  when Type =:= atom; Type =:= var ->
    %% macro without arguments
    macro(L, N, Ts, As, Opt);
scan_macros([T | Ts], As, Opt) ->
    scan_macros(Ts, [T | As], Opt);
scan_macros([], As, _Opt) ->
    lists:reverse(As).

%% Rewriting to a call which will be recognized by the post-parse pass
%% (we insert parentheses to preserve the precedences when parsing).

macro(L, {Type, _, A}, Rest, As, Opt) ->
    scan_macros_1([], Rest, [{atom,L,macro_atom(Type,A)} | As], Opt).

macro_call([{'(',_}, {')',_}], L, {_, Ln, _}=N, Rest, As, Opt) ->
    {Open, Close} = parentheses(As),
    scan_macros_1([], Rest,
		  lists:reverse(Open ++ [{atom,L,?macro_call},
					 {'(',L}, N, {')',Ln}] ++ Close,
				As), Opt);
macro_call([{'(',_} | Args], L, {_, Ln, _}=N, Rest, As, Opt) ->
    {Open, Close} = parentheses(As),
    %% note that we must scan the argument list; it may not be skipped
    scan_macros_1(Args ++ Close,
		  Rest,
		  lists:reverse(Open ++ [{atom,L,?macro_call},
					 {'(',L}, N, {',',Ln}],
				As), Opt).

macro_atom(atom, A) ->
    list_to_atom(?atom_prefix ++ atom_to_list(A));
macro_atom(var, A) ->
    list_to_atom(?var_prefix ++ atom_to_list(A)).

%% don't insert parentheses after a string token, to avoid turning
%% `"string" ?macro' into a "function application" `"string"(...)'
%% (see note at top of file)
parentheses([{string, _, _} | _]) ->
    {[], []};
parentheses(_) ->
    {[{'(',0}], [{')',0}]}.

%% (after a macro has been found and the arglist skipped, if any)
scan_macros_1(Args, [{string, L, _} | _]=Rest, As,
	      #opt{clever = true}=Opt) ->
    %% string literal following macro: be clever and insert ++
    scan_macros(Args ++ [{'++', L} | Rest],  As, Opt);
scan_macros_1(Args, Rest, As, Opt) ->
    %% normal case - continue scanning
    scan_macros(Args ++ Rest, As, Opt).

rewrite_form({function, L, ?pp_form, _,
              [{clause, _, [], [], [{call, _, A, As}]}]}) ->
    erl_syntax:set_pos(erl_syntax:attribute(A, rewrite_list(As)), L);
rewrite_form({function, L, ?pp_form, _, [{clause, _, [], [], [A]}]}) ->
    erl_syntax:set_pos(erl_syntax:attribute(A), L);
rewrite_form(T) ->
    rewrite(T).

rewrite_list([T | Ts]) ->
    [rewrite(T) | rewrite_list(Ts)];
rewrite_list([]) ->
    [].

%% Note: as soon as we start using erl_syntax:subtrees/1 and similar
%% functions, we cannot assume that we know the exact representation of
%% the syntax tree anymore - we must use erl_syntax functions to analyze
%% and decompose the data.

rewrite(Node) ->
    case erl_syntax:type(Node) of
	atom ->
	    case atom_to_list(erl_syntax:atom_value(Node)) of
		?atom_prefix ++ As ->
		    A1 = list_to_atom(As),
		    N = erl_syntax:copy_pos(Node, erl_syntax:atom(A1)),
		    erl_syntax:copy_pos(Node, erl_syntax:macro(N));
		?var_prefix ++ As ->
		    A1 = list_to_atom(As),
		    N = erl_syntax:copy_pos(Node, erl_syntax:variable(A1)),
		    erl_syntax:copy_pos(Node, erl_syntax:macro(N));
		_ ->
		    Node
	    end;
	application ->
	    F = erl_syntax:application_operator(Node),
	    case erl_syntax:type(F) of
		atom ->
		    case erl_syntax:atom_value(F) of
			?macro_call ->
			    [A | As] = erl_syntax:application_arguments(Node),
			    M = erl_syntax:macro(A, rewrite_list(As)),
			    erl_syntax:copy_pos(Node, M);
			_ ->
			    rewrite_1(Node)
		    end;
		_ ->
		    rewrite_1(Node)
	    end;
	_ ->
	    rewrite_1(Node)
    end.

rewrite_1(Node) ->
    case erl_syntax:subtrees(Node) of
	[] ->
	    Node;
	Gs ->
	    Node1 = erl_syntax:make_tree(erl_syntax:type(Node),
					 [[rewrite(T) || T <- Ts]
					  || Ts <- Gs]),
	    erl_syntax:copy_pos(Node, Node1)
    end.

%% attempting a rescue operation on a token sequence for a single form
%% if it could not be parsed after the normal treatment

fix_form([{atom, _, ?pp_form}, {'(', _}, {')', _}, {'->', _},
	  {atom, _, define}, {'(', _} | _]=Ts) ->
    case lists:reverse(Ts) of
	[{dot, _}, {')', _} | _] ->
	    {retry, Ts, fun fix_define/1};
	[{dot, L} | Ts1] ->
	    Ts2 = lists:reverse([{dot, L}, {')', L} | Ts1]),
	    {retry, Ts2, fun fix_define/1};
	_ ->
	    error
    end;
fix_form(_Ts) ->
    error.

fix_define([{atom, L, ?pp_form}, {'(', _}, {')', _}, {'->', _},
	    {atom, La, define}, {'(', _}, N, {',', _} | Ts]) ->
    [{dot, _}, {')', _} | Ts1] = lists:reverse(Ts),
    S = tokens_to_string(lists:reverse(Ts1)),
    A = erl_syntax:set_pos(erl_syntax:atom(define), La),
    Txt = erl_syntax:set_pos(erl_syntax:text(S), La),
    {form, erl_syntax:set_pos(erl_syntax:attribute(A, [N, Txt]), L)};
fix_define(_Ts) ->
    error.

%% @spec tokens_to_string(Tokens::[term()]) -> string()
%% 
%% @doc Generates a string corresponding to the given token sequence.
%% The string can be re-tokenized to yield the same token list again.

-spec tokens_to_string([term()]) -> string().

tokens_to_string([{atom,_,A} | Ts]) ->
    io_lib:write_atom(A) ++ " " ++ tokens_to_string(Ts);
tokens_to_string([{string, _, S} | Ts]) ->
    io_lib:write_string(S) ++ " " ++ tokens_to_string(Ts);
tokens_to_string([{char, _, C} | Ts]) ->
    io_lib:write_char(C) ++ " " ++ tokens_to_string(Ts);
tokens_to_string([{float, _, F} | Ts]) ->
    float_to_list(F) ++ " " ++ tokens_to_string(Ts);
tokens_to_string([{integer, _, N} | Ts]) ->
    integer_to_list(N) ++ " " ++ tokens_to_string(Ts);
tokens_to_string([{var, _, A} | Ts]) ->
    atom_to_list(A) ++ " " ++ tokens_to_string(Ts);
tokens_to_string([{dot, _} | Ts]) ->
    ".\n" ++ tokens_to_string(Ts);
tokens_to_string([{A, _} | Ts]) ->
    atom_to_list(A) ++ " " ++ tokens_to_string(Ts);
tokens_to_string([]) ->
    "".


%% @spec format_error(Descriptor::term()) -> string()
%% @hidden
%% @doc Callback function for formatting error descriptors. Not for
%% normal use.

-spec format_error(term()) -> string().

format_error(macro_args) ->
    errormsg("macro call missing end parenthesis");
format_error({unknown, Reason}) ->
    errormsg(io_lib:format("unknown error: ~P", [Reason, 15])).

errormsg(String) ->
    io_lib:format("~s: ~s", [?MODULE, String]).


%% =====================================================================