%% ===================================================================== %% This library is free software; you can redistribute it and/or modify %% it under the terms of the GNU Lesser General Public License as %% published by the Free Software Foundation; either version 2 of the %% License, or (at your option) any later version. %% %% This library is distributed in the hope that it will be useful, but %% WITHOUT ANY WARRANTY; without even the implied warranty of %% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU %% Lesser General Public License for more details. %% %% You should have received a copy of the GNU Lesser General Public %% License along with this library; if not, write to the Free Software %% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 %% USA %% %% @copyright 1997-2006 Richard Carlsson %% @author Richard Carlsson <carlsson.richard@gmail.com> %% @end %% ===================================================================== %% @doc Inserting comments into abstract Erlang syntax trees %% %% <p>This module contains functions for inserting comments, described %% by position, indentation and text, as attachments on an abstract %% syntax tree, at the correct places.</p> -module(erl_recomment). -export([recomment_forms/2, quick_recomment_forms/2, recomment_tree/2]). %% ===================================================================== %% @spec quick_recomment_forms(Forms, Comments::[Comment]) -> %% syntaxTree() %% %% Forms = syntaxTree() | [syntaxTree()] %% Comment = {Line, Column, Indentation, Text} %% Line = integer() %% Column = integer() %% Indentation = integer() %% Text = [string()] %% %% @doc Like {@link recomment_forms/2}, but only inserts top-level %% comments. Comments within function definitions or declarations %% ("forms") are simply ignored. -spec quick_recomment_forms(erl_syntax:forms(), [erl_comment_scan:comment()]) -> erl_syntax:syntaxTree(). quick_recomment_forms(Tree, Cs) -> recomment_forms(Tree, Cs, false). %% ===================================================================== %% @spec recomment_forms(Forms, Comments::[Comment]) -> syntaxTree() %% %% syntaxTree() = erl_syntax:syntaxTree() %% Forms = syntaxTree() | [syntaxTree()] %% Comment = {Line, Column, Indentation, Text} %% Line = integer() %% Column = integer() %% Indentation = integer() %% Text = [string()] %% %% @doc Attaches comments to the syntax tree/trees representing a %% program. The given <code>Forms</code> should be a single syntax tree %% of type <code>form_list</code>, or a list of syntax trees %% representing "program forms". The syntax trees must contain valid %% position information (for details, see %% <code>recomment_tree/2</code>). The result is a corresponding syntax %% tree of type <code>form_list</code> in which all comments in the list %% <code>Comments</code> have been attached at the proper places. %% %% <p>Assuming <code>Forms</code> represents a program (or any sequence %% of "program forms"), any comments whose first lines are not directly %% associated with a specific program form will become standalone %% comments inserted between the neighbouring program forms. %% Furthermore, comments whose column position is less than or equal to %% one will not be attached to a program form that begins at a %% conflicting line number (this can happen with preprocessor-generated %% <code>line</code>-attributes).</p> %% %% <p>If <code>Forms</code> is a syntax tree of some other type than %% <code>form_list</code>, the comments will be inserted directly using %% <code>recomment_tree/2</code>, and any comments left over from that %% process are added as postcomments on the result.</p> %% %% <p>Entries in <code>Comments</code> represent multi-line comments. %% For each entry, <code>Line</code> is the line number and %% <code>Column</code> the left column of the comment (the column of the %% first comment-introducing "<code>%</code>" character). %% <code>Indentation</code> is the number of character positions between %% the last non-whitespace character before the comment (or the left %% margin) and the left column of the comment. <code>Text</code> is a %% list of strings representing the consecutive comment lines in %% top-down order, where each string contains all characters following %% (but not including) the comment-introducing "<code>%</code>" and up %% to (but not including) the terminating newline. (Cf. module %% <code>erl_comment_scan</code>.)</p> %% %% <p>Evaluation exits with reason <code>{bad_position, Pos}</code> if %% the associated position information <code>Pos</code> of some subtree %% in the input does not have a recognizable format, or with reason %% <code>{bad_tree, L, C}</code> if insertion of a comment at line %% <code>L</code>, column <code>C</code>, fails because the tree %% structure is ill-formed.</p> %% %% @see erl_comment_scan %% @see recomment_tree/2 %% @see quick_recomment_forms/2 -spec recomment_forms(erl_syntax:forms(), [erl_comment_scan:comment()]) -> erl_syntax:syntaxTree(). recomment_forms(Tree, Cs) -> recomment_forms(Tree, Cs, true). recomment_forms(Tree, Cs, Insert) when is_list(Tree) -> recomment_forms(erl_syntax:form_list(Tree), Cs, Insert); recomment_forms(Tree, Cs, Insert) -> case erl_syntax:type(Tree) of form_list -> Tree1 = erl_syntax:flatten_form_list(Tree), Node = build_tree(Tree1), %% Here we make a small assumption about the substructure of %% a `form_list' tree: it has exactly one group of subtrees. [Node1] = node_subtrees(Node), List = filter_forms(node_subtrees(Node1)), List1 = recomment_forms_1(Cs, List, Insert), revert_tree(set_node_subtrees(Node, [set_node_subtrees(Node1, List1)])); _ -> %% Not a form list - just call `recomment_tree' and %% append any leftover comments. {Tree1, Cs1} = recomment_tree(Tree, Cs), revert_tree(append_comments(Cs1, Tree1)) end. append_comments([C | Cs], Tree) -> append_comments(Cs, node_add_postcomment(C, Tree)); append_comments([], Tree) -> Tree. %% This part goes over each comment in turn and inserts it into the %% proper place in the given list of program forms: recomment_forms_1([C | Cs], Ns, Insert) -> Ns1 = recomment_forms_2(C, Ns, Insert), recomment_forms_1(Cs, Ns1, Insert); recomment_forms_1([], Ns, _Insert) -> Ns. recomment_forms_2(C, [N | Ns] = Nodes, Insert) -> {L, Col, Ind, Text} = C, Min = node_min(N), Max = node_max(N), Delta = comment_delta(Text), Trailing = case Ns of [] -> true; [Next | _] -> L + Delta < node_min(Next) - 2 end, if L > Max + 1 ; L =:= Max + 1, not Trailing -> [N | recomment_forms_2(C, Ns, Insert)]; L + Delta < Min - 1 -> %% At least one empty line between the current form %% and the comment, so we make it a standalone. [standalone_comment(C) | Nodes]; L < Min -> %% The comment line should be above this node. %% (This duplicates what insert/5 would have done.) [node_add_precomment(C, N) | Ns]; Col =< 1, L =< Min, L + Delta >= Min -> %% This is a conflict - the "first" token of the node %% overlaps with some comment line, but the comment %% started at column 1. N1 = standalone_comment(C), if L < Min -> [N1 | Nodes]; true -> [N, N1 | Ns] end; Insert =:= true -> [insert(N, L, Col, Ind, C) | Ns]; true -> Nodes % skipping non-toplevel comment end; recomment_forms_2(C, [], _Top) -> [standalone_comment(C)]. %% Creating a leaf node for a standalone comment. Note that we try to %% preserve the original starting column rather than the indentation. standalone_comment({L, Col, _Ind, Text}) -> leaf_node(L, L + comment_delta(Text), erl_syntax:set_pos(erl_syntax:comment(Col - 1, Text), L)). %% Compute delta between first and last line of a comment, given %% the lines of text. comment_delta(Text) -> case length(Text) of N when N > 0 -> N - 1; _ -> 0 % avoid negative delta end. %% This kills line information for program forms that do not come from %% the source file itself, but have been included by preprocessing. This %% way, comments will not be inserted into such parts by mistake. -record(filter, {file = undefined :: file:filename() | 'undefined', line = 0 :: integer()}). filter_forms(Fs) -> filter_forms(Fs, false, #filter{}). filter_forms([F | Fs], Kill, S) -> case check_file_attr(F) of {true, A1, A2} -> S1 = case S#filter.file of undefined -> S#filter{file = A1, line = A2}; _ -> S end, if S1#filter.file =:= A1, S1#filter.line =< A2 -> [F | filter_forms(Fs, false, S1#filter{line = A2})]; Kill =:= true -> [node_kill_range(F) | filter_forms(Fs, true, S1)]; true -> [F | filter_forms(Fs, true, S1)] end; false -> case Kill of true -> [node_kill_range(F) | filter_forms(Fs, Kill, S)]; false -> [F | filter_forms(Fs, Kill, S)] end end; filter_forms([], _, _) -> []. %% This structure matching gets a bit painful... check_file_attr(F) -> case node_type(F) of tree_node -> case tree_node_type(F) of attribute -> case node_subtrees(F) of [L1, L2 | _] -> check_file_attr_1(L1, L2); _ -> false end; _ -> false end; _ -> false end. check_file_attr_1(L1, L2) -> case node_subtrees(L1) of [N1 | _] -> N2 = leaf_node_value(N1), case erl_syntax:type(N2) of atom -> case erl_syntax:atom_value(N2) of file -> check_file_attr_2(L2); _ -> false end; _ -> false end; _ -> false end. check_file_attr_2(L) -> case node_subtrees(L) of [N1, N2 | _] -> T1 = erl_syntax:concrete(revert_tree(N1)), T2 = erl_syntax:concrete(revert_tree(N2)), {true, T1, T2}; _ -> false end. %% ===================================================================== %% @spec recomment_tree(Tree::syntaxTree(), Comments::[Comment]) -> %% {syntaxTree(), [Comment]} %% %% Comment = {Line, Column, Indentation, Text} %% Line = integer() %% Column = integer() %% Indentation = integer() %% Text = [string()] %% %% @doc Attaches comments to a syntax tree. The result is a pair %% <code>{NewTree, Remainder}</code> where <code>NewTree</code> is the %% given <code>Tree</code> where comments from the list %% <code>Comments</code> have been attached at the proper places. %% <code>Remainder</code> is the list of entries in %% <code>Comments</code> which have not been inserted, because their %% line numbers are greater than those of any node in the tree. The %% entries in <code>Comments</code> are inserted in order; if two %% comments become attached to the same node, they will appear in the %% same order in the program text. %% %% <p>The nodes of the syntax tree must contain valid position %% information. This can be single integers, assumed to represent a line %% number, or 2- or 3-tuples where the first or second element is an %% integer, in which case the leftmost integer element is assumed to %% represent the line number. Line numbers less than one are ignored %% (usually, the default line number for newly created nodes is %% zero).</p> %% %% <p>For details on the <code>Line</code>, <code>Column</code> and %% <code>Indentation</code> fields, and the behaviour in case of errors, %% see <code>recomment_forms/2</code>.</p> %% %% @see recomment_forms/2 -spec recomment_tree(erl_syntax:syntaxTree(), [erl_comment_scan:comment()]) -> {erl_syntax:syntaxTree(), [erl_comment_scan:comment()]}. recomment_tree(Tree, Cs) -> {Tree1, Cs1} = insert_comments(Cs, build_tree(Tree)), {revert_tree(Tree1), Cs1}. %% Comments are inserted in the tree one at a time. Note that this %% part makes no assumptions about how tree nodes and list nodes %% are nested; only `build_tree' and `revert_tree' knows about %% such things. insert_comments(Cs, Node) -> insert_comments(Cs, Node, []). insert_comments([C | Cs], Node, Cs1) -> {L, Col, Ind, _Text} = C, Max = node_max(Node), if L =< Max -> insert_comments(Cs, insert(Node, L, Col, Ind, C), Cs1); true -> insert_comments(Cs, Node, [C | Cs1]) end; insert_comments([], Node, Cs) -> {Node, lists:reverse(Cs)}. %% Here, we assume that the comment is located on some line not %% below the last element of the given node. insert(Node, L, Col, Ind, C) -> case node_type(Node) of list_node -> %% We cannot attach comments directly to a list node. set_node_subtrees(Node, insert_in_list(node_subtrees(Node), L, Col, Ind, C)); _ -> %% We check if the comment belongs before, or inside %% the range of the current node. Min = node_min(Node), Max = node_max(Node), if L < Min -> %% The comment line should be above this node. node_add_precomment(C, Node); Min =:= Max -> %% The whole node is on a single line (this %% should usually catch all leaf nodes), so we %% postfix the comment. node_add_postcomment(C, Node); true -> %% The comment should be inserted in the %% subrange of the node, i.e., attached either %% to the node itself, or to one of its %% subtrees. insert_1(Node, L, Col, Ind, C) end end. insert_1(Node, L, Col, Ind, C) -> case node_type(Node) of tree_node -> %% Insert in one of the subtrees. set_node_subtrees(Node, insert_in_list(node_subtrees(Node), L, Col, Ind, C)); leaf_node -> %% Odd case: no components, but not on a single line. %% (Never mind anyway - just postfix the comment.) node_add_postcomment(C, Node) end. %% We assume that there exists at least one tree node in some tree %% in the list; since we have decided to insert here, we're %% screwed if there isn't one. insert_in_list([Node | Ns], L, Col, Ind, C) -> Max = node_max(Node), %% Get the `Min' of the next node that follows in the %% flattened left-to-right order, or -1 (minus one) if no such %% tree node exists. NextMin = next_min_in_list(Ns), %% `NextMin' could be less than `Max', in inconsistent trees. if NextMin < 0 -> %% There is no following leaf/tree node, so we try %% to insert at this node. insert_here(Node, L, Col, Ind, C, Ns); L >= NextMin, NextMin >= Max -> %% Tend to select the later node, in case the next %% node should also match. insert_later(Node, L, Col, Ind, C, Ns); L =< Max -> insert_here(Node, L, Col, Ind, C, Ns); true -> insert_later(Node, L, Col, Ind, C, Ns) end; insert_in_list([], L, Col, _, _) -> exit({bad_tree, L, Col}). %% The comment belongs to the current subrange insert_here(Node, L, Col, Ind, C, Ns) -> [insert(Node, L, Col, Ind, C) | Ns]. %% The comment should be inserted later insert_later(Node, L, Col, Ind, C, Ns) -> [Node | insert_in_list(Ns, L, Col, Ind, C)]. %% `next_min_in_list' returns the `Min' field of the leftmost tree %% or leaf node in the given node list, or the integer -1 (minus %% one) if no such element exists. next_min_in_list(Ts) -> next_min_in_list(Ts, []). next_min_in_list([T | Ts], Ack) -> next_min_in_node(T, [Ts | Ack]); next_min_in_list([], [T | Ts]) -> next_min_in_list(T, Ts); next_min_in_list([], []) -> -1. next_min_in_node(Node, Ack) -> case node_type(Node) of leaf_node -> node_min(Node); tree_node -> node_min(Node); list_node -> next_min_in_list(node_subtrees(Node), Ack) end. %% Building an extended syntax tree from an `erl_syntax' abstract %% syntax tree. build_tree(Node) -> L = get_line(Node), case erl_syntax:subtrees(Node) of [] -> %% This guarantees that Min =< Max for the base case. leaf_node(L, L, Node); Ts -> %% `Ts' is a list of lists of abstract terms. {Subtrees, Min, Max} = build_list_list(Ts), %% Include L, while preserving Min =< Max. tree_node(minpos(L, Min), erlang:max(L, Max), erl_syntax:type(Node), erl_syntax:get_attrs(Node), Subtrees) end. %% Since `erl_syntax:subtrees' yields the components in %% left-to-right textual order, the line numbers should grow %% monotonically as the list is traversed, and the maximum line %% number of the list should therefore be the dito of the last %% component. However, we do not want to make such a strong %% assumption about the consistency of the line numbering, so we %% take the trouble to find the maximum line number in the subtree %% taken over all its elements. build_list(Ts) -> build_list(Ts, 0, 0, []). build_list([T | Ts], Min, Max, Ack) -> Node = build_tree(T), Min1 = minpos(node_min(Node), Min), Max1 = erlang:max(node_max(Node), Max), build_list(Ts, Min1, Max1, [Node | Ack]); build_list([], Min, Max, Ack) -> list_node(Min, Max, lists:reverse(Ack)). build_list_list(Ls) -> build_list_list(Ls, 0, 0, []). build_list_list([L | Ls], Min, Max, Ack) -> Node = build_list(L), Min1 = minpos(node_min(Node), Min), Max1 = erlang:max(node_max(Node), Max), build_list_list(Ls, Min1, Max1, [Node | Ack]); build_list_list([], Min, Max, Ack) -> {lists:reverse(Ack), Min, Max}. %% Reverting to an abstract syntax tree from the extended form. %% Note that the new comments are inserted after the original %% attributes are restored. revert_tree(Node) -> case node_type(Node) of leaf_node -> add_comments(Node, leaf_node_value(Node)); tree_node -> add_comments(Node, erl_syntax:set_attrs( erl_syntax:make_tree( tree_node_type(Node), revert_list(node_subtrees(Node))), tree_node_attrs(Node))); list_node -> revert_list(node_subtrees(Node)) end. revert_list([T | Ts]) -> [revert_tree(T) | revert_list(Ts)]; revert_list([]) -> []. add_comments(Node, Tree) -> case node_precomments(Node) of [] -> add_comments_1(Node, Tree); Cs -> Cs1 = lists:reverse(expand_comments(Cs)), add_comments_1(Node, erl_syntax:add_precomments(Cs1, Tree)) end. add_comments_1(Node, Tree) -> case node_postcomments(Node) of [] -> Tree; Cs -> Cs1 = lists:reverse(expand_comments(Cs)), erl_syntax:add_postcomments(Cs1, Tree) end. expand_comments([C | Cs]) -> [expand_comment(C) | expand_comments(Cs)]; expand_comments([]) -> []. expand_comment(C) -> {L, _Col, Ind, Text} = C, erl_syntax:set_pos(erl_syntax:comment(Ind, Text), L). %% ===================================================================== %% Abstract data type for extended syntax trees. %% %% These explicitly distinguish between leaf and tree nodes, both %% corresponding to a single abstract syntax tree, and list nodes, %% corresponding to a left-to-right ordered sequence of such trees. %% %% All nodes have `min' and `max' fields, containing the first and last %% source lines, respectively, over which the tree extends. %% %% Tree nodes and list nodes have a `subtrees' field, containing the %% (extended) subtrees of the node. Tree nodes also have a `type' field, %% containing the atom returned by `erl_syntax:type' for the %% corresponding abstract syntax tree, and an `attrs' field, containing %% the value of `erl_syntax:get_attrs' for the abstract syntax tree. %% %% Leaf nodes and tree nodes also have `precomments' and `postcomments' %% fields. The comment fields are lists of comment structures (in %% top-down order); the representation of comments has no consequence to %% the tree representation. %% %% Leaf nodes, lastly, have a `value' field containing the abstract %% syntax tree for any such tree that can have no subtrees, i.e., such %% that `erl_syntax:is_leaf' yields `true'. -record(leaf, {min = 0 :: integer(), max = 0 :: integer(), precomments = [] :: [erl_syntax:syntaxTree()], postcomments = [] :: [erl_syntax:syntaxTree()], value :: erl_syntax:syntaxTree()}). -record(tree, {min = 0 :: integer(), max = 0 :: integer(), type :: atom(), attrs :: erl_syntax:syntaxTreeAttributes(), precomments = [] :: [erl_syntax:syntaxTree()], postcomments = [] :: [erl_syntax:syntaxTree()], subtrees = [] :: [erl_syntax:syntaxTree()]}). -record(list, {min = 0 :: integer(), max = 0 :: integer(), subtrees = [] :: [erl_syntax:syntaxTree()]}). leaf_node(Min, Max, Value) -> #leaf{min = Min, max = Max, value = Value}. tree_node(Min, Max, Type, Attrs, Subtrees) -> #tree{min = Min, max = Max, type = Type, attrs = Attrs, subtrees = Subtrees}. list_node(Min, Max, Subtrees) -> #list{min = Min, max = Max, subtrees = Subtrees}. node_type(#leaf{}) -> leaf_node; node_type(#tree{}) -> tree_node; node_type(#list{}) -> list_node. node_min(#leaf{min = Min}) -> Min; node_min(#tree{min = Min}) -> Min; node_min(#list{min = Min}) -> Min. node_max(#leaf{max = Max}) -> Max; node_max(#tree{max = Max}) -> Max; node_max(#list{max = Max}) -> Max. node_kill_range(Node) -> case Node of #leaf{} -> Node#leaf{min = -1, max = -1}; #tree{} -> Node#tree{min = -1, max = -1}; #list{} -> Node#list{min = -1, max = -1} end. node_precomments(#leaf{precomments = Cs}) -> Cs; node_precomments(#tree{precomments = Cs}) -> Cs. node_add_precomment(C, Node) -> case Node of #leaf{} -> Node#leaf{precomments = [C | Node#leaf.precomments]}; #tree{} -> Node#tree{precomments = [C | Node#tree.precomments]} end. node_postcomments(#leaf{postcomments = Cs}) -> Cs; node_postcomments(#tree{postcomments = Cs}) -> Cs. node_add_postcomment(C, Node) -> case Node of #leaf{} -> Node#leaf{postcomments = [C | Node#leaf.postcomments]}; #tree{} -> Node#tree{postcomments = [C | Node#tree.postcomments]} end. node_subtrees(#tree{subtrees = Subtrees}) -> Subtrees; node_subtrees(#list{subtrees = Subtrees}) -> Subtrees. leaf_node_value(#leaf{value = Value}) -> Value. tree_node_type(#tree{type = Type}) -> Type. set_node_subtrees(Node, Subtrees) -> case Node of #tree{} -> Node#tree{subtrees = Subtrees}; #list{} -> Node#list{subtrees = Subtrees} end. tree_node_attrs(#tree{attrs = Attrs}) -> Attrs. %% ===================================================================== %% General utility functions %% Return the least positive integer of X and Y, or zero if none of them %% are positive. (This is necessary for computing minimum source line %% numbers, since zero (or negative) numbers may occur, but they %% represent the "undefined" line number.) minpos(X, Y) when X < Y -> minpos1(X, Y); minpos(X, Y) -> minpos1(Y, X). minpos1(X, Y) when X < 1 -> minpos2(Y); minpos1(X, _) -> X. minpos2(X) when X < 1 -> 0; minpos2(X) -> X. get_line(Node) -> case erl_syntax:get_pos(Node) of L when is_integer(L) -> L; {L, _} when is_integer(L) -> L; {_, L} when is_integer(L) -> L; {L, _, _} when is_integer(L) -> L; {_, L, _} when is_integer(L) -> L; Pos -> try erl_anno:line(Pos) of Line -> Line catch _:_ -> exit({bad_position, Pos}) end end. %% =====================================================================