%% =====================================================================
%% This library is free software; you can redistribute it and/or modify
%% it under the terms of the GNU Lesser General Public License as
%% published by the Free Software Foundation; either version 2 of the
%% License, or (at your option) any later version.
%%
%% This library is distributed in the hope that it will be useful, but
%% WITHOUT ANY WARRANTY; without even the implied warranty of
%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%% Lesser General Public License for more details.
%%
%% You should have received a copy of the GNU Lesser General Public
%% License along with this library; if not, write to the Free Software
%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
%% USA
%%
%% =====================================================================
%% @copyright 1997-2006 Richard Carlsson
%% @author Richard Carlsson <[email protected]>
%% @end
%% =====================================================================
%% @doc Functions for reading comment lines from Erlang source code.
-module(erl_comment_scan).
-export([file/1, join_lines/1, scan_lines/1, string/1]).
-export_type([comment/0]).
%% =====================================================================
-type comment() :: {integer(), integer(), integer(), [string()]}.
-type commentLine() :: {integer(), integer(), integer(), string()}.
%% =====================================================================
%% @spec file(FileName::file:filename()) -> [Comment]
%%
%% Comment = {Line, Column, Indentation, Text}
%% Line = integer()
%% Column = integer()
%% Indentation = integer()
%% Text = [string()]
%%
%% @doc Extracts comments from an Erlang source code file. Returns a
%% list of entries representing <em>multi-line</em> comments, listed in
%% order of increasing line-numbers. For each entry, `Text'
%% is a list of strings representing the consecutive comment lines in
%% top-down order; the strings contain <em>all</em> characters following
%% (but not including) the first comment-introducing `%'
%% character on the line, up to (but not including) the line-terminating
%% newline.
%%
%% Furthermore, `Line' is the line number and
%% `Column' the left column of the comment (i.e., the column
%% of the comment-introducing `%' character).
%% `Indent' is the indentation (or padding), measured in
%% character positions between the last non-whitespace character before
%% the comment (or the left margin), and the left column of the comment.
%% `Line' and `Column' are always positive
%% integers, and `Indentation' is a nonnegative integer.
%%
%% Evaluation exits with reason `{read, Reason}' if a read
%% error occurred, where `Reason' is an atom corresponding to
%% a Posix error code; see the module {@link //kernel/file} for details.
-spec file(file:filename()) -> [comment()].
file(Name) ->
Name1 = filename(Name),
case catch {ok, file:read_file(Name1)} of
{ok, V} ->
case V of
{ok, B} ->
Enc = case epp:read_encoding(Name) of
none -> epp:default_encoding();
Enc0 -> Enc0
end,
case catch unicode:characters_to_list(B, Enc) of
String when is_list(String) ->
string(String);
R ->
error_read_file(Name1),
exit(R)
end;
{error, E} ->
error_read_file(Name1),
exit({read, E})
end;
{'EXIT', E} ->
error_read_file(Name1),
exit(E);
R ->
error_read_file(Name1),
throw(R)
end.
%% =====================================================================
%% @spec string(string()) -> [Comment]
%%
%% Comment = {Line, Column, Indentation, Text}
%% Line = integer()
%% Column = integer()
%% Indentation = integer()
%% Text = [string()]
%%
%% @doc Extracts comments from a string containing Erlang source code.
%% Except for reading directly from a string, the behaviour is the same
%% as for {@link file/1}.
%%
%% @see file/1
-spec string(string()) -> [comment()].
string(Text) ->
lists:reverse(join_lines(scan_lines(Text))).
%% =====================================================================
%% @spec scan_lines(string()) -> [CommentLine]
%%
%% CommentLine = {Line, Column, Indent, Text}
%% Line = integer()
%% Column = integer()
%% Indent = integer()
%% Text = string()
%%
%% @doc Extracts individual comment lines from a source code string.
%% Returns a list of comment lines found in the text, listed in order of
%% <em>decreasing</em> line-numbers, i.e., the last comment line in the
%% input is first in the resulting list. `Text' is a single
%% string, containing all characters following (but not including) the
%% first comment-introducing `%' character on the line, up
%% to (but not including) the line-terminating newline. For details on
%% `Line', `Column' and `Indent', see {@link file/1}.
-spec scan_lines(string()) -> [commentLine()].
scan_lines(Text) ->
scan_lines(Text, 1, 0, 0, []).
scan_lines([$\040 | Cs], L, Col, M, Ack) ->
scan_lines(Cs, L, Col + 1, M, Ack);
scan_lines([$\t | Cs], L, Col, M, Ack) ->
scan_lines(Cs, L, tab(Col), M, Ack);
scan_lines([$\n | Cs], L, _Col, _M, Ack) ->
scan_lines(Cs, L + 1, 0, 0, Ack);
scan_lines([$\r, $\n | Cs], L, _Col, _M, Ack) ->
scan_lines(Cs, L + 1, 0, 0, Ack);
scan_lines([$\r | Cs], L, _Col, _M, Ack) ->
scan_lines(Cs, L + 1, 0, 0, Ack);
scan_lines([$% | Cs], L, Col, M, Ack) ->
scan_comment(Cs, "", L, Col, M, Ack);
scan_lines([$$ | Cs], L, Col, _M, Ack) ->
scan_char(Cs, L, Col + 1, Ack);
scan_lines([$" | Cs], L, Col, _M, Ack) ->
scan_string(Cs, $", L, Col + 1, Ack);
scan_lines([$' | Cs], L, Col, _M, Ack) ->
scan_string(Cs, $', L, Col + 1, Ack);
scan_lines([_C | Cs], L, Col, _M, Ack) ->
N = Col + 1,
scan_lines(Cs, L, N, N, Ack);
scan_lines([], _L, _Col, _M, Ack) ->
Ack.
tab(Col) ->
Col - (Col rem 8) + 8.
scan_comment([$\n | Cs], Cs1, L, Col, M, Ack) ->
seen_comment(Cs, Cs1, L, Col, M, Ack);
scan_comment([$\r, $\n | Cs], Cs1, L, Col, M, Ack) ->
seen_comment(Cs, Cs1, L, Col, M, Ack);
scan_comment([$\r | Cs], Cs1, L, Col, M, Ack) ->
seen_comment(Cs, Cs1, L, Col, M, Ack);
scan_comment([C | Cs], Cs1, L, Col, M, Ack) ->
scan_comment(Cs, [C | Cs1], L, Col, M, Ack);
scan_comment([], Cs1, L, Col, M, Ack) ->
seen_comment([], Cs1, L, Col, M, Ack).
%% Add a comment line to the ackumulator and return to normal
%% scanning. Note that we compute column positions starting at 0
%% internally, but the column values in the comment descriptors
%% should start at 1.
seen_comment(Cs, Cs1, L, Col, M, Ack) ->
%% Compute indentation and strip trailing spaces
N = Col - M,
Text = lists:reverse(string:strip(Cs1, left)),
Ack1 = [{L, Col + 1, N, Text} | Ack],
scan_lines(Cs, L + 1, 0, 0, Ack1).
scan_string([Quote | Cs], Quote, L, Col, Ack) ->
N = Col + 1,
scan_lines(Cs, L, N, N, Ack);
scan_string([$\t | Cs], Quote, L, Col, Ack) ->
scan_string(Cs, Quote, L, tab(Col), Ack);
scan_string([$\n | Cs], Quote, L, _Col, Ack) ->
%% Newlines should really not occur in strings/atoms, but we
%% want to be well behaved even if the input is not.
scan_string(Cs, Quote, L + 1, 0, Ack);
scan_string([$\r, $\n | Cs], Quote, L, _Col, Ack) ->
scan_string(Cs, Quote, L + 1, 0, Ack);
scan_string([$\r | Cs], Quote, L, _Col, Ack) ->
scan_string(Cs, Quote, L + 1, 0, Ack);
scan_string([$\\, _C | Cs], Quote, L, Col, Ack) ->
scan_string(Cs, Quote, L, Col + 2, Ack); % ignore character C
scan_string([_C | Cs], Quote, L, Col, Ack) ->
scan_string(Cs, Quote, L, Col + 1, Ack);
scan_string([], _Quote, _L, _Col, Ack) ->
%% Finish quietly.
Ack.
scan_char([$\t | Cs], L, Col, Ack) ->
N = tab(Col),
scan_lines(Cs, L, N, N, Ack); % this is not just any whitespace
scan_char([$\n | Cs], L, _Col, Ack) ->
scan_lines(Cs, L + 1, 0, 0, Ack); % handle this, just in case
scan_char([$\r, $\n | Cs], L, _Col, Ack) ->
scan_lines(Cs, L + 1, 0, 0, Ack);
scan_char([$\r | Cs], L, _Col, Ack) ->
scan_lines(Cs, L + 1, 0, 0, Ack);
scan_char([$\\, _C | Cs], L, Col, Ack) ->
N = Col + 2, % character C must be ignored
scan_lines(Cs, L, N, N, Ack);
scan_char([_C | Cs], L, Col, Ack) ->
N = Col + 1, % character C must be ignored
scan_lines(Cs, L, N, N, Ack);
scan_char([], _L, _Col, Ack) ->
%% Finish quietly.
Ack.
%% =====================================================================
%% @spec join_lines([CommentLine]) -> [Comment]
%%
%% CommentLine = {Line, Column, Indent, string()}
%% Line = integer()
%% Column = integer()
%% Indent = integer()
%% Comment = {Line, Column, Indent, Text}
%% Text = [string()]
%%
%% @doc Joins individual comment lines into multi-line comments. The
%% input is a list of entries representing individual comment lines,
%% <em>in order of decreasing line-numbers</em>; see
%% {@link scan_lines/1} for details. The result is a list of
%% entries representing <em>multi-line</em> comments, <em>still listed
%% in order of decreasing line-numbers</em>, but where for each entry,
%% `Text' is a list of consecutive comment lines in order of
%% <em>increasing</em> line-numbers (i.e., top-down).
%%
%% @see scan_lines/1
-spec join_lines([commentLine()]) -> [comment()].
join_lines([{L, Col, Ind, Txt} | Lines]) ->
join_lines(Lines, [Txt], L, Col, Ind);
join_lines([]) ->
[].
%% In the following, we assume that the current `Txt' is never empty.
%% Recall that the list is in reverse line-number order.
join_lines([{L1, Col1, Ind1, Txt1} | Lines], Txt, L, Col, Ind) ->
if L1 =:= L - 1, Col1 =:= Col, Ind + 1 =:= Col ->
%% The last test above checks that the previous
%% comment was alone on its line; otherwise it won't
%% be joined with the current; this is not always what
%% one wants, but works well in general.
join_lines(Lines, [Txt1 | Txt], L1, Col1, Ind1);
true ->
%% Finish the current comment and let the new line
%% start the next one.
[{L, Col, Ind, Txt}
| join_lines(Lines, [Txt1], L1, Col1, Ind1)]
end;
join_lines([], Txt, L, Col, Ind) ->
[{L, Col, Ind, Txt}].
%% =====================================================================
%% Utility functions for internal use
filename([C|T]) when is_integer(C), C > 0, C =< 255 ->
[C | filename(T)];
filename([]) ->
[];
filename(N) ->
report_error("bad filename: `~P'.", [N, 25]),
exit(error).
error_read_file(Name) ->
report_error("error reading file `~s'.", [Name]).
report_error(S, Vs) ->
error_logger:error_msg(lists:concat([?MODULE, ": ", S, "\n"]), Vs).
%% =====================================================================