1 files changed, 280 insertions, 0 deletions
diff --git a/lib/syntax_tools/src/erl_comment_scan.erl b/lib/syntax_tools/src/erl_comment_scan.erl
new file mode 100644
index 0000000000..df1449da4e
--- /dev/null
+++ b/lib/syntax_tools/src/erl_comment_scan.erl
@@ -0,0 +1,280 @@
+%% =====================================================================
+%% This library is free software; you can redistribute it and/or modify
+%% it under the terms of the GNU Lesser General Public License as
+%% published by the Free Software Foundation; either version 2 of the
+%% License, or (at your option) any later version.
+%%
+%% This library is distributed in the hope that it will be useful, but
+%% WITHOUT ANY WARRANTY; without even the implied warranty of
+%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+%% Lesser General Public License for more details.
+%%
+%% You should have received a copy of the GNU Lesser General Public
+%% License along with this library; if not, write to the Free Software
+%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+%% USA
+%%
+%% $Id$
+%%
+%% @copyright 1997-2006 Richard Carlsson
+%% @author Richard Carlsson <[email protected]>
+%% @end
+%% =====================================================================
+
+%% @doc Functions for reading comment lines from Erlang source code.
+
+-module(erl_comment_scan).
+
+-export([file/1, join_lines/1, scan_lines/1, string/1]).
+
+
+%% =====================================================================
+%% @spec file(FileName::file:filename()) -> [Comment]
+%%
+%%	    Comment = {Line, Column, Indentation, Text}
+%%	    Line = integer()
+%%          Column = integer()
+%%          Indentation = integer()
+%%          Text = [string()]
+%%
+%% @doc Extracts comments from an Erlang source code file. Returns a
+%% list of entries representing <em>multi-line</em> comments, listed in
+%% order of increasing line-numbers. For each entry, `Text'
+%% is a list of strings representing the consecutive comment lines in
+%% top-down order; the strings contain <em>all</em> characters following
+%% (but not including) the first comment-introducing `%'
+%% character on the line, up to (but not including) the line-terminating
+%% newline.
+%%
+%% Furthermore, `Line' is the line number and
+%% `Column' the left column of the comment (i.e., the column
+%% of the comment-introducing `%' character).
+%% `Indent' is the indentation (or padding), measured in
+%% character positions between the last non-whitespace character before
+%% the comment (or the left margin), and the left column of the comment.
+%% `Line' and `Column' are always positive
+%% integers, and `Indentation' is a nonnegative integer.
+%%
+%% Evaluation exits with reason `{read, Reason}' if a read
+%% error occurred, where `Reason' is an atom corresponding to
+%% a Posix error code; see the module {@link //kernel/file} for details.
+
+file(Name) ->
+    Name1 = filename(Name),
+    case catch {ok, file:read_file(Name1)} of
+	{ok, V} ->
+	    case V of
+		{ok, B} ->
+		    string(binary_to_list(B));
+		{error, E} ->
+		    error_read_file(Name1),
+		    exit({read, E})
+	    end;
+	{'EXIT', E} ->
+	    error_read_file(Name1),
+	    exit(E);
+	R ->
+	    error_read_file(Name1),
+	    throw(R)
+    end.
+
+
+%% =====================================================================
+%% string(string()) -> [Comment]
+%%
+%%	    Comment = {Line, Column, Indentation, Text}
+%%	    Line = integer()
+%%          Column = integer()
+%%          Indentation = integer()
+%%          Text = [string()]
+%%
+%% @doc Extracts comments from a string containing Erlang source code.
+%% Except for reading directly from a string, the behaviour is the same
+%% as for {@link file/1}.
+%%
+%% @see file/1
+
+string(Text) ->
+    lists:reverse(join_lines(scan_lines(Text))).
+
+
+%% =====================================================================
+%% @spec scan_lines(string()) -> [CommentLine]
+%%
+%%	    CommentLine = {Line, Column, Indent, Text}
+%%	    Line = integer()
+%%	    Column = integer()
+%%	    Indent = integer()
+%%	    Text = string()
+%%
+%% @doc Extracts individual comment lines from a source code string.
+%% Returns a list of comment lines found in the text, listed in order of
+%% <em>decreasing</em> line-numbers, i.e., the last comment line in the
+%% input is first in the resulting list. `Text' is a single
+%% string, containing all characters following (but not including) the
+%% first comment-introducing `%' character on the line, up
+%% to (but not including) the line-terminating newline. For details on
+%% `Line', `Column' and `Indent', see {@link file/1}.
+
+scan_lines(Text) ->
+    scan_lines(Text, 1, 0, 0, []).
+
+scan_lines([$\040 | Cs], L, Col, M, Ack) ->
+    scan_lines(Cs, L, Col + 1, M, Ack);
+scan_lines([$\t | Cs], L, Col, M, Ack) ->
+    scan_lines(Cs, L, tab(Col), M, Ack);
+scan_lines([$\n | Cs], L, _Col, _M, Ack) ->
+    scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_lines([$\r, $\n | Cs], L, _Col, _M, Ack) ->
+    scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_lines([$\r | Cs], L, _Col, _M, Ack) ->
+    scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_lines([$% | Cs], L, Col, M, Ack) ->
+    scan_comment(Cs, "", L, Col, M, Ack);
+scan_lines([$$ | Cs], L, Col, _M, Ack) ->
+    scan_char(Cs, L, Col + 1, Ack);
+scan_lines([$" | Cs], L, Col, _M, Ack) ->
+    scan_string(Cs, $", L, Col + 1, Ack);
+scan_lines([$' | Cs], L, Col, _M, Ack) ->
+    scan_string(Cs, $', L, Col + 1, Ack);
+scan_lines([_C | Cs], L, Col, _M, Ack) ->
+    N = Col + 1,
+    scan_lines(Cs, L, N, N, Ack);
+scan_lines([], _L, _Col, _M, Ack) ->
+    Ack.
+
+tab(Col) ->
+    Col - (Col rem 8) + 8.
+
+scan_comment([$\n | Cs], Cs1, L, Col, M, Ack) ->
+    seen_comment(Cs, Cs1, L, Col, M, Ack);
+scan_comment([$\r, $\n | Cs], Cs1, L, Col, M, Ack) ->
+    seen_comment(Cs, Cs1, L, Col, M, Ack);
+scan_comment([$\r | Cs], Cs1, L, Col, M, Ack) ->
+    seen_comment(Cs, Cs1, L, Col, M, Ack);
+scan_comment([C | Cs], Cs1, L, Col, M, Ack) ->
+    scan_comment(Cs, [C | Cs1], L, Col, M, Ack);
+scan_comment([], Cs1, L, Col, M, Ack) ->
+    seen_comment([], Cs1, L, Col, M, Ack).
+
+%% Add a comment line to the ackumulator and return to normal
+%% scanning. Note that we compute column positions starting at 0
+%% internally, but the column values in the comment descriptors
+%% should start at 1.
+
+seen_comment(Cs, Cs1, L, Col, M, Ack) ->
+    %% Compute indentation and strip trailing spaces
+    N = Col - M,
+    Text = lists:reverse(string:strip(Cs1, left)),
+    Ack1 = [{L, Col + 1, N, Text} | Ack],
+    scan_lines(Cs, L + 1, 0, 0, Ack1).
+
+scan_string([Quote | Cs], Quote, L, Col, Ack) ->
+    N = Col + 1,
+    scan_lines(Cs, L, N, N, Ack);
+scan_string([$\t | Cs], Quote, L, Col, Ack) ->
+    scan_string(Cs, Quote, L, tab(Col), Ack);
+scan_string([$\n | Cs], Quote, L, _Col, Ack) ->
+    %% Newlines should really not occur in strings/atoms, but we
+    %% want to be well behaved even if the input is not.
+    scan_string(Cs, Quote, L + 1, 0, Ack);
+scan_string([$\r, $\n | Cs], Quote, L, _Col, Ack) ->
+    scan_string(Cs, Quote, L + 1, 0, Ack);
+scan_string([$\r | Cs], Quote, L, _Col, Ack) ->
+    scan_string(Cs, Quote, L + 1, 0, Ack);
+scan_string([$\\, _C | Cs], Quote, L, Col, Ack) ->
+    scan_string(Cs, Quote, L, Col + 2, Ack);  % ignore character C
+scan_string([_C | Cs], Quote, L, Col, Ack) ->
+    scan_string(Cs, Quote, L, Col + 1, Ack);
+scan_string([], _Quote, _L, _Col, Ack) ->
+    %% Finish quietly.
+    Ack.
+
+scan_char([$\t | Cs], L, Col, Ack) ->
+    N = tab(Col),
+    scan_lines(Cs, L, N, N, Ack);    % this is not just any whitespace
+scan_char([$\n | Cs], L, _Col, Ack) ->
+    scan_lines(Cs, L + 1, 0, 0, Ack);    % handle this, just in case
+scan_char([$\r, $\n | Cs], L, _Col, Ack) ->
+    scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_char([$\r | Cs], L, _Col, Ack) ->
+    scan_lines(Cs, L + 1, 0, 0, Ack);
+scan_char([$\\, _C | Cs], L, Col, Ack) ->
+    N = Col + 2,    % character C must be ignored
+    scan_lines(Cs, L, N, N, Ack);
+scan_char([_C | Cs], L, Col, Ack) ->
+    N = Col + 1,    % character C must be ignored
+    scan_lines(Cs, L, N, N, Ack);
+scan_char([], _L, _Col, Ack) ->
+    %% Finish quietly.
+    Ack.
+
+
+%% =====================================================================
+%% @spec join_lines([CommentLine]) -> [Comment]
+%%
+%%	    CommentLine = {Line, Column, Indent, string()}
+%%	    Line = integer()
+%%	    Column = integer()
+%%	    Indent = integer()
+%%	    Comment = {Line, Column, Indent, Text}
+%%	    Text = [string()]
+%%
+%% @doc Joins individual comment lines into multi-line comments. The
+%% input is a list of entries representing individual comment lines,
+%% <em>in order of decreasing line-numbers</em>; see
+%% {@link scan_lines/1} for details. The result is a list of
+%% entries representing <em>multi-line</em> comments, <em>still listed
+%% in order of decreasing line-numbers</em>, but where for each entry,
+%% `Text' is a list of consecutive comment lines in order of
+%% <em>increasing</em> line-numbers (i.e., top-down).
+%%
+%% @see scan_lines/1
+
+join_lines([{L, Col, Ind, Txt} | Lines]) ->
+    join_lines(Lines, [Txt], L, Col, Ind);
+join_lines([]) ->
+    [].
+
+%% In the following, we assume that the current `Txt' is never empty.
+%% Recall that the list is in reverse line-number order.
+
+join_lines([{L1, Col1, Ind1, Txt1} | Lines], Txt, L, Col, Ind) ->
+    if L1 =:= L - 1, Col1 =:= Col, Ind + 1 =:= Col ->
+	    %% The last test above checks that the previous
+	    %% comment was alone on its line; otherwise it won't
+	    %% be joined with the current; this is not always what
+	    %% one wants, but works well in general.
+	    join_lines(Lines, [Txt1 | Txt], L1, Col1, Ind1);
+       true ->
+	    %% Finish the current comment and let the new line
+	    %% start the next one.
+	    [{L, Col, Ind, Txt}
+	     | join_lines(Lines, [Txt1], L1, Col1, Ind1)]
+    end;
+join_lines([], Txt, L, Col, Ind) ->
+    [{L, Col, Ind, Txt}].
+
+
+%% =====================================================================
+%% Utility functions for internal use
+
+filename([C|T]) when is_integer(C), C > 0, C =< 255 ->
+    [C | filename(T)];
+filename([H|T]) ->
+    filename(H) ++ filename(T);
+filename([]) ->
+    [];
+filename(N) when is_atom(N) ->
+    atom_to_list(N);
+filename(N) ->
+    report_error("bad filename: `~P'.", [N, 25]),
+    exit(error).
+
+error_read_file(Name) ->
+    report_error("error reading file `~s'.", [Name]).
+
+report_error(S, Vs) ->
+    error_logger:error_msg(lists:concat([?MODULE, ": ", S, "\n"]), Vs).
+
+%% =====================================================================