aboutsummaryrefslogtreecommitdiffstats
path: root/lib/syntax_tools/src/erl_comment_scan.erl
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/syntax_tools/src/erl_comment_scan.erl
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/syntax_tools/src/erl_comment_scan.erl')
-rw-r--r--lib/syntax_tools/src/erl_comment_scan.erl280
1 files changed, 280 insertions, 0 deletions
diff --git a/lib/syntax_tools/src/erl_comment_scan.erl b/lib/syntax_tools/src/erl_comment_scan.erl
new file mode 100644
index 0000000000..df1449da4e
--- /dev/null
+++ b/lib/syntax_tools/src/erl_comment_scan.erl
@@ -0,0 +1,280 @@
+%% =====================================================================
+%% This library is free software; you can redistribute it and/or modify
+%% it under the terms of the GNU Lesser General Public License as
+%% published by the Free Software Foundation; either version 2 of the
+%% License, or (at your option) any later version.
+%%
+%% This library is distributed in the hope that it will be useful, but
+%% WITHOUT ANY WARRANTY; without even the implied warranty of
+%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+%% Lesser General Public License for more details.
+%%
+%% You should have received a copy of the GNU Lesser General Public
+%% License along with this library; if not, write to the Free Software
+%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+%% USA
+%%
+%% $Id$
+%%
+%% @copyright 1997-2006 Richard Carlsson
+%% @author Richard Carlsson <[email protected]>
+%% @end
+%% =====================================================================
+
+%% @doc Functions for reading comment lines from Erlang source code.
+
+-module(erl_comment_scan).
+
+-export([file/1, join_lines/1, scan_lines/1, string/1]).
+
+
+%% =====================================================================
+%% @spec file(FileName::file:filename()) -> [Comment]
+%%
+%% Comment = {Line, Column, Indentation, Text}
+%% Line = integer()
+%% Column = integer()
+%% Indentation = integer()
+%% Text = [string()]
+%%
+%% @doc Extracts comments from an Erlang source code file. Returns a
+%% list of entries representing <em>multi-line</em> comments, listed in
+%% order of increasing line-numbers. For each entry, `Text'
+%% is a list of strings representing the consecutive comment lines in
+%% top-down order; the strings contain <em>all</em> characters following
+%% (but not including) the first comment-introducing `%'
+%% character on the line, up to (but not including) the line-terminating
+%% newline.
+%%
+%% Furthermore, `Line' is the line number and
+%% `Column' the left column of the comment (i.e., the column
+%% of the comment-introducing `%' character).
+%% `Indent' is the indentation (or padding), measured in
+%% character positions between the last non-whitespace character before
+%% the comment (or the left margin), and the left column of the comment.
+%% `Line' and `Column' are always positive
+%% integers, and `Indentation' is a nonnegative integer.
+%%
+%% Evaluation exits with reason `{read, Reason}' if a read
+%% error occurred, where `Reason' is an atom corresponding to
+%% a Posix error code; see the module {@link //kernel/file} for details.
+
+file(Name) ->
+ Name1 = filename(Name),
+ case catch {ok, file:read_file(Name1)} of
+ {ok, V} ->
+ case V of
+ {ok, B} ->
+ string(binary_to_list(B));
+ {error, E} ->
+ error_read_file(Name1),
+ exit({read, E})
+ end;
+ {'EXIT', E} ->
+ error_read_file(Name1),
+ exit(E);
+ R ->
+ error_read_file(Name1),
+ throw(R)
+ end.
+
+
+%% =====================================================================
+%% string(string()) -> [Comment]
+%%
+%% Comment = {Line, Column, Indentation, Text}
+%% Line = integer()
+%% Column = integer()
+%% Indentation = integer()
+%% Text = [string()]
+%%
+%% @doc Extracts comments from a string containing Erlang source code.
+%% Except for reading directly from a string, the behaviour is the same
+%% as for {@link file/1}.
+%%
+%% @see file/1
+
+string(Text) ->
+ lists:reverse(join_lines(scan_lines(Text))).
+
+
+%% =====================================================================
+%% @spec scan_lines(string()) -> [CommentLine]
+%%
+%% CommentLine = {Line, Column, Indent, Text}
+%% Line = integer()
+%% Column = integer()
+%% Indent = integer()
+%% Text = string()
+%%
+%% @doc Extracts individual comment lines from a source code string.
+%% Returns a list of comment lines found in the text, listed in order of
+%% <em>decreasing</em> line-numbers, i.e., the last comment line in the
+%% input is first in the resulting list. `Text' is a single
+%% string, containing all characters following (but not including) the
+%% first comment-introducing `%' character on the line, up
+%% to (but not including) the line-terminating newline. For details on
+%% `Line', `Column' and `Indent', see {@link file/1}.
+
+scan_lines(Text) ->
+ scan_lines(Text, 1, 0, 0, []).
+
+scan_lines([$\040 | Cs], L, Col, M, Ack) ->
+ scan_lines(Cs, L, Col + 1, M, Ack);
+scan_lines([$\t | Cs], L, Col, M, Ack) ->
+ scan_lines(Cs, L, tab(Col), M, Ack);
+scan_lines([$\n | Cs], L, _Col, _M, Ack) ->
+ scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_lines([$\r, $\n | Cs], L, _Col, _M, Ack) ->
+ scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_lines([$\r | Cs], L, _Col, _M, Ack) ->
+ scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_lines([$% | Cs], L, Col, M, Ack) ->
+ scan_comment(Cs, "", L, Col, M, Ack);
+scan_lines([$$ | Cs], L, Col, _M, Ack) ->
+ scan_char(Cs, L, Col + 1, Ack);
+scan_lines([$" | Cs], L, Col, _M, Ack) ->
+ scan_string(Cs, $", L, Col + 1, Ack);
+scan_lines([$' | Cs], L, Col, _M, Ack) ->
+ scan_string(Cs, $', L, Col + 1, Ack);
+scan_lines([_C | Cs], L, Col, _M, Ack) ->
+ N = Col + 1,
+ scan_lines(Cs, L, N, N, Ack);
+scan_lines([], _L, _Col, _M, Ack) ->
+ Ack.
+
+tab(Col) ->
+ Col - (Col rem 8) + 8.
+
+scan_comment([$\n | Cs], Cs1, L, Col, M, Ack) ->
+ seen_comment(Cs, Cs1, L, Col, M, Ack);
+scan_comment([$\r, $\n | Cs], Cs1, L, Col, M, Ack) ->
+ seen_comment(Cs, Cs1, L, Col, M, Ack);
+scan_comment([$\r | Cs], Cs1, L, Col, M, Ack) ->
+ seen_comment(Cs, Cs1, L, Col, M, Ack);
+scan_comment([C | Cs], Cs1, L, Col, M, Ack) ->
+ scan_comment(Cs, [C | Cs1], L, Col, M, Ack);
+scan_comment([], Cs1, L, Col, M, Ack) ->
+ seen_comment([], Cs1, L, Col, M, Ack).
+
+%% Add a comment line to the ackumulator and return to normal
+%% scanning. Note that we compute column positions starting at 0
+%% internally, but the column values in the comment descriptors
+%% should start at 1.
+
+seen_comment(Cs, Cs1, L, Col, M, Ack) ->
+ %% Compute indentation and strip trailing spaces
+ N = Col - M,
+ Text = lists:reverse(string:strip(Cs1, left)),
+ Ack1 = [{L, Col + 1, N, Text} | Ack],
+ scan_lines(Cs, L + 1, 0, 0, Ack1).
+
+scan_string([Quote | Cs], Quote, L, Col, Ack) ->
+ N = Col + 1,
+ scan_lines(Cs, L, N, N, Ack);
+scan_string([$\t | Cs], Quote, L, Col, Ack) ->
+ scan_string(Cs, Quote, L, tab(Col), Ack);
+scan_string([$\n | Cs], Quote, L, _Col, Ack) ->
+ %% Newlines should really not occur in strings/atoms, but we
+ %% want to be well behaved even if the input is not.
+ scan_string(Cs, Quote, L + 1, 0, Ack);
+scan_string([$\r, $\n | Cs], Quote, L, _Col, Ack) ->
+ scan_string(Cs, Quote, L + 1, 0, Ack);
+scan_string([$\r | Cs], Quote, L, _Col, Ack) ->
+ scan_string(Cs, Quote, L + 1, 0, Ack);
+scan_string([$\\, _C | Cs], Quote, L, Col, Ack) ->
+ scan_string(Cs, Quote, L, Col + 2, Ack); % ignore character C
+scan_string([_C | Cs], Quote, L, Col, Ack) ->
+ scan_string(Cs, Quote, L, Col + 1, Ack);
+scan_string([], _Quote, _L, _Col, Ack) ->
+ %% Finish quietly.
+ Ack.
+
+scan_char([$\t | Cs], L, Col, Ack) ->
+ N = tab(Col),
+ scan_lines(Cs, L, N, N, Ack); % this is not just any whitespace
+scan_char([$\n | Cs], L, _Col, Ack) ->
+ scan_lines(Cs, L + 1, 0, 0, Ack); % handle this, just in case
+scan_char([$\r, $\n | Cs], L, _Col, Ack) ->
+ scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_char([$\r | Cs], L, _Col, Ack) ->
+ scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_char([$\\, _C | Cs], L, Col, Ack) ->
+ N = Col + 2, % character C must be ignored
+ scan_lines(Cs, L, N, N, Ack);
+scan_char([_C | Cs], L, Col, Ack) ->
+ N = Col + 1, % character C must be ignored
+ scan_lines(Cs, L, N, N, Ack);
+scan_char([], _L, _Col, Ack) ->
+ %% Finish quietly.
+ Ack.
+
+
+%% =====================================================================
+%% @spec join_lines([CommentLine]) -> [Comment]
+%%
+%% CommentLine = {Line, Column, Indent, string()}
+%% Line = integer()
+%% Column = integer()
+%% Indent = integer()
+%% Comment = {Line, Column, Indent, Text}
+%% Text = [string()]
+%%
+%% @doc Joins individual comment lines into multi-line comments. The
+%% input is a list of entries representing individual comment lines,
+%% <em>in order of decreasing line-numbers</em>; see
+%% {@link scan_lines/1} for details. The result is a list of
+%% entries representing <em>multi-line</em> comments, <em>still listed
+%% in order of decreasing line-numbers</em>, but where for each entry,
+%% `Text' is a list of consecutive comment lines in order of
+%% <em>increasing</em> line-numbers (i.e., top-down).
+%%
+%% @see scan_lines/1
+
+join_lines([{L, Col, Ind, Txt} | Lines]) ->
+ join_lines(Lines, [Txt], L, Col, Ind);
+join_lines([]) ->
+ [].
+
+%% In the following, we assume that the current `Txt' is never empty.
+%% Recall that the list is in reverse line-number order.
+
+join_lines([{L1, Col1, Ind1, Txt1} | Lines], Txt, L, Col, Ind) ->
+ if L1 =:= L - 1, Col1 =:= Col, Ind + 1 =:= Col ->
+ %% The last test above checks that the previous
+ %% comment was alone on its line; otherwise it won't
+ %% be joined with the current; this is not always what
+ %% one wants, but works well in general.
+ join_lines(Lines, [Txt1 | Txt], L1, Col1, Ind1);
+ true ->
+ %% Finish the current comment and let the new line
+ %% start the next one.
+ [{L, Col, Ind, Txt}
+ | join_lines(Lines, [Txt1], L1, Col1, Ind1)]
+ end;
+join_lines([], Txt, L, Col, Ind) ->
+ [{L, Col, Ind, Txt}].
+
+
+%% =====================================================================
+%% Utility functions for internal use
+
+filename([C|T]) when is_integer(C), C > 0, C =< 255 ->
+ [C | filename(T)];
+filename([H|T]) ->
+ filename(H) ++ filename(T);
+filename([]) ->
+ [];
+filename(N) when is_atom(N) ->
+ atom_to_list(N);
+filename(N) ->
+ report_error("bad filename: `~P'.", [N, 25]),
+ exit(error).
+
+error_read_file(Name) ->
+ report_error("error reading file `~s'.", [Name]).
+
+report_error(S, Vs) ->
+ error_logger:error_msg(lists:concat([?MODULE, ": ", S, "\n"]), Vs).
+
+%% =====================================================================