aboutsummaryrefslogtreecommitdiffstats
path: root/lib/edoc/src/edoc_wiki.erl
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/edoc/src/edoc_wiki.erl
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/edoc/src/edoc_wiki.erl')
-rw-r--r--lib/edoc/src/edoc_wiki.erl456
1 files changed, 456 insertions, 0 deletions
diff --git a/lib/edoc/src/edoc_wiki.erl b/lib/edoc/src/edoc_wiki.erl
new file mode 100644
index 0000000000..e4a3d74734
--- /dev/null
+++ b/lib/edoc/src/edoc_wiki.erl
@@ -0,0 +1,456 @@
+%% =====================================================================
+%% This library is free software; you can redistribute it and/or modify
+%% it under the terms of the GNU Lesser General Public License as
+%% published by the Free Software Foundation; either version 2 of the
+%% License, or (at your option) any later version.
+%%
+%% This library is distributed in the hope that it will be useful, but
+%% WITHOUT ANY WARRANTY; without even the implied warranty of
+%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+%% Lesser General Public License for more details.
+%%
+%% You should have received a copy of the GNU Lesser General Public
+%% License along with this library; if not, write to the Free Software
+%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+%% USA
+%%
+%% $Id$
+%%
+%% @private
+%% @copyright 2001-2003 Richard Carlsson
+%% @author Richard Carlsson <[email protected]>
+%% @see edoc
+%% @end
+%% =====================================================================
+
+%% @doc EDoc wiki expansion, parsing and postprocessing of XML text.
+%% Uses {@link //xmerl. XMerL}.
+%% @end
+
+%% Notes:
+%%
+%% * Whatever happens in this module, it must interact nicely with the
+%% actual XML-parsing. It is not acceptable to break any existing and
+%% legal XML markup so that it does not parse or is rendered wrong.
+%%
+%% * The focus should always be on making *documentation* easier to
+%% write. No wiki notation should be introduced unless it is clear that
+%% it is better than using plain XHTML, making typing less cumbersome
+%% and the resulting text easier to read. The wiki notation should be a
+%% small bag of easy-to-remember tricks for making XHTML documentation
+%% easier to write, not a complete markup language in itself. As a
+%% typical example, it is hardly worthwile to introduce a special
+%% notation like say, ""..."" for emphasized text, since <em>...</em> is
+%% not much harder to write, not any less readable, and no more
+%% difficult to remember, especially since emphasis is not very often
+%% occurring in normal documentation.
+%%
+%% * The central reasoning for the code-quoting goes like this: I don't
+%% want to have special escape characters within the quotes (like
+%% backslash in C), to allow quoting of the quote characters themselves.
+%% I also don't want to use the "`" character both for opening and
+%% closing quotes. Therefore, you can either use `...' - and then you
+%% cannot use the "'" character without ending the quote - or you can
+%% use ``...'' - which allows single but not double "'" characters
+%% within the quote. Whitespace is automatically removed from the
+%% beginning and the end of the quoted strings; this allows you to write
+%% things like "`` 'foo@bar' ''". Text that contains "''" has to be
+%% written within <code>...</code>.
+%%
+%% To produce a single "`" character without starting a quote, write
+%% "`'" (no space between "`" and "'").
+%%
+%% For verbatim/preformatted text, the ```...'''-quotes expand to
+%% "<pre><![CDATA[...]]></pre>". The indentation at the start of the
+%% quoted string is preserved; whitespace is stripped only at the end.
+%% Whole leading lines of whitespace are however skipped.
+
+-module(edoc_wiki).
+
+-export([parse_xml/2, expand_text/2]).
+
+-include("edoc.hrl").
+-include("xmerl.hrl").
+
+-define(BASE_HEADING, 3).
+
+
+%% Parsing Wiki-XML with pre-and post-expansion.
+
+parse_xml(Data, Line) ->
+ par(parse_xml_1(expand_text(Data, Line), Line)).
+
+parse_xml_1(Text, Line) ->
+ Text1 = "<doc>" ++ Text ++ "</doc>",
+ case catch {ok, xmerl_scan:string(Text1, [{line, Line}])} of
+ {ok, {E, _}} ->
+ E#xmlElement.content;
+ {'EXIT', {fatal, {Reason, L, _C}}} ->
+ throw_error(L, {"XML parse error: ~p.", [Reason]});
+ {'EXIT', Reason} ->
+ throw_error(Line, {"error in XML parser: ~P.", [Reason, 10]});
+ Other ->
+ throw_error(Line, {"nocatch in XML parser: ~P.", [Other, 10]})
+ end.
+
+%% Expand wiki stuff in arbitrary text.
+
+expand_text(Cs, L) ->
+ lists:reverse(expand_new_line(Cs, L, [])).
+
+%% Interestingly, the reverse of "code" is "edoc". :-)
+
+expand_new_line([$\s = C | Cs], L, As) ->
+ expand_new_line(Cs, L, [C | As]);
+expand_new_line([$\t = C | Cs], L, As) ->
+ expand_new_line(Cs, L, [C | As]);
+expand_new_line([$\n = C | Cs], L, As) ->
+ expand_new_line(Cs, L + 1, [C | As]);
+expand_new_line([$=, $=, $=, $= | Cs], L, As) ->
+ expand_heading(Cs, 2, L, As);
+expand_new_line([$=, $=, $= | Cs], L, As) ->
+ expand_heading(Cs, 1, L, As);
+expand_new_line([$=, $= | Cs], L, As) ->
+ expand_heading(Cs, 0, L, As);
+expand_new_line(Cs, L, As) ->
+ expand(Cs, L, As).
+
+expand([$`, $' | Cs], L, As) ->
+ expand(Cs, L, [$` | As]); % produce "`" - don't start a new quote
+expand([$`, $`, $` | Cs], L, As) ->
+ %% If this is the first thing on the line, compensate for the
+ %% indentation, unless we had to skip one or more empty lines.
+ {Cs1, Skipped} = strip_empty_lines(Cs), % avoid vertical space
+ N = if Skipped > 0 ->
+ 0;
+ true ->
+ {As1, _} = edoc_lib:split_at(As, $\n),
+ case edoc_lib:is_space(As1) of
+ true -> 3 + length(As1);
+ false -> 2 % nice default - usually right.
+ end
+ end,
+ Ss = lists:duplicate(N, $\s),
+ expand_triple(Cs1, L + Skipped, Ss ++ "[ATADC[!<>erp<" ++ As);
+expand([$`, $` | Cs], L, As) ->
+ expand_double(edoc_lib:strip_space(Cs), L, ">edoc<" ++ As);
+expand([$` | Cs], L, As) ->
+ expand_single(edoc_lib:strip_space(Cs), L, ">edoc<" ++ As);
+expand([$[ | Cs], L, As) ->
+ expand_uri(Cs, L, As);
+expand([$\n = C | Cs], L, As) ->
+ expand_new_line(Cs, L + 1, [C | As]);
+expand([C | Cs], L, As) ->
+ expand(Cs, L, [C | As]);
+expand([], _, As) ->
+ As.
+
+%% == Heading ==
+%% === SubHeading ===
+%% ==== SubSubHeading ====
+
+expand_heading([$= | _] = Cs, N, L, As) ->
+ expand_heading_1(Cs, N, L, As);
+expand_heading(Cs, N, L, As) ->
+ {Cs1, Cs2} = edoc_lib:split_at(Cs, $\n),
+ case edoc_lib:strip_space(lists:reverse(Cs1)) of
+ [$=, $= | Cs3] ->
+ {Es, Ts} = lists:splitwith(fun (X) -> X =:= $= end, Cs3),
+ if length(Es) =:= N ->
+ Ts1 = edoc_lib:strip_space(
+ lists:reverse(edoc_lib:strip_space(Ts))),
+ expand_heading_2(Ts1, Cs2, N, L, As);
+ true ->
+ H1 = lists:duplicate(N+2, $=),
+ H2 = "==" ++ Es,
+ throw_error(L, {"heading end marker mismatch: "
+ "~s...~s", [H1, H2]})
+ end;
+ _ ->
+ expand_heading_1(Cs, N, L, As)
+ end.
+
+expand_heading_1(Cs, N, L, As) ->
+ expand(Cs, L, lists:duplicate(N + 2, $=) ++ As).
+
+expand_heading_2(Ts, Cs, N, L, As) ->
+ H = ?BASE_HEADING + N,
+ Ts1 = io_lib:format("<h~w><a name=\"~s\">~s</a></h~w>\n",
+ [H, make_label(Ts), Ts, H]),
+ expand_new_line(Cs, L + 1, lists:reverse(lists:flatten(Ts1), As)).
+
+make_label([$\s | Cs]) ->
+ [$_ | make_label(edoc_lib:strip_space(Cs))];
+make_label([$\t | Cs]) ->
+ [$_ | make_label(edoc_lib:strip_space(Cs))];
+make_label([$\n | Cs]) ->
+ [$_ | make_label(edoc_lib:strip_space(Cs))];
+make_label([C | Cs]) ->
+ [C | make_label(Cs)];
+make_label([]) ->
+ [].
+
+%% `...'
+
+expand_single(Cs, L, As) ->
+ expand_single(Cs, L, As, L).
+
+expand_single([$' | Cs], L, As, _L0) ->
+ expand(Cs, L, ">edoc/<" ++ edoc_lib:strip_space(As));
+expand_single([$< | Cs], L, As, L0) ->
+ expand_single(Cs, L, ";tl&" ++ As, L0);
+expand_single([$> | Cs], L, As, L0) ->
+ expand_single(Cs, L, ";tg&" ++ As, L0);
+expand_single([$& | Cs], L, As, L0) ->
+ expand_single(Cs, L, ";pma&" ++ As, L0);
+expand_single([$\n = C | Cs], L, As, L0) ->
+ expand_single(Cs, L + 1, [C | As], L0);
+expand_single([C | Cs], L, As, L0) ->
+ expand_single(Cs, L, [C | As], L0);
+expand_single([], L, _, L0) ->
+ throw_error(L0, {"`-quote ended unexpectedly at line ~w", [L]}).
+
+%% ``...''
+
+expand_double(Cs, L, As) ->
+ expand_double(Cs, L, As, L).
+
+expand_double([$', $' | Cs], L, As, _L0) ->
+ expand(Cs, L, ">edoc/<" ++ edoc_lib:strip_space(As));
+expand_double([$< | Cs], L, As, L0) ->
+ expand_double(Cs, L, ";tl&" ++ As, L0);
+expand_double([$> | Cs], L, As, L0) ->
+ expand_double(Cs, L, ";tg&" ++ As, L0);
+expand_double([$& | Cs], L, As, L0) ->
+ expand_double(Cs, L, ";pma&" ++ As, L0);
+expand_double([$\n = C | Cs], L, As, L0) ->
+ expand_double(Cs, L + 1, [C | As], L0);
+expand_double([C | Cs], L, As, L0) ->
+ expand_double(Cs, L, [C | As], L0);
+expand_double([], L, _, L0) ->
+ throw_error(L0, {"``-quote ended unexpectedly at line ~w", [L]}).
+
+%% ```...'''
+
+expand_triple(Cs, L, As) ->
+ expand_triple(Cs, L, As, L).
+
+expand_triple([$', $', $' | Cs], L, As, _L0) -> % ' stupid emacs
+ expand(Cs, L, ">erp/<>]]" ++ edoc_lib:strip_space(As));
+expand_triple([$], $], $> | Cs], L, As, L0) ->
+ expand_triple(Cs, L, ";tg&]]" ++ As, L0);
+expand_triple([$\n = C | Cs], L, As, L0) ->
+ expand_triple(Cs, L + 1, [C | As], L0);
+expand_triple([C | Cs], L, As, L0) ->
+ expand_triple(Cs, L, [C | As], L0);
+expand_triple([], L, _, L0) ->
+ throw_error(L0, {"```-quote ended unexpectedly at line ~w", [L]}).
+
+%% e.g. [file:/...] or [http://... LinkText]
+
+expand_uri("http:/" ++ Cs, L, As) ->
+ expand_uri(Cs, L, "/:ptth", As);
+expand_uri("ftp:/" ++ Cs, L, As) ->
+ expand_uri(Cs, L, "/:ptf", As);
+expand_uri("file:/" ++ Cs, L, As) ->
+ expand_uri(Cs, L, "/:elif", As);
+expand_uri(Cs, L, As) ->
+ expand(Cs, L, [$[ | As]).
+
+expand_uri([$] | Cs], L, Us, As) ->
+ expand(Cs, L, push_uri(Us, ">tt/<" ++ Us ++ ">tt<", As));
+expand_uri([$\s = C | Cs], L, Us, As) ->
+ expand_uri(Cs, 0, L, [C], Us, As);
+expand_uri([$\t = C | Cs], L, Us, As) ->
+ expand_uri(Cs, 0, L, [C], Us, As);
+expand_uri([$\n = C | Cs], L, Us, As) ->
+ expand_uri(Cs, 1, L, [C], Us, As);
+expand_uri([C | Cs], L, Us, As) ->
+ expand_uri(Cs, L, [C | Us], As);
+expand_uri([], L, Us, _As) ->
+ expand_uri_error(Us, L).
+
+expand_uri([$] | Cs], N, L, Ss, Us, As) ->
+ Ss1 = lists:reverse(edoc_lib:strip_space(
+ lists:reverse(edoc_lib:strip_space(Ss)))),
+ expand(Cs, L + N, push_uri(Us, Ss1, As));
+expand_uri([$\n = C | Cs], N, L, Ss, Us, As) ->
+ expand_uri(Cs, N + 1, L, [C | Ss], Us, As);
+expand_uri([C | Cs], N, L, Ss, Us, As) ->
+ expand_uri(Cs, N, L, [C | Ss], Us, As);
+expand_uri([], _, L, _Ss, Us, _As) ->
+ expand_uri_error(Us, L).
+
+-spec expand_uri_error(list(), pos_integer()) -> no_return().
+
+expand_uri_error(Us, L) ->
+ {Ps, _} = edoc_lib:split_at(lists:reverse(Us), $:),
+ throw_error(L, {"reference '[~s:...' ended unexpectedly", [Ps]}).
+
+
+push_uri(Us, Ss, As) ->
+ ">a/<" ++ Ss ++ ">\"pot_\"=tegrat \"" ++ Us ++ "\"=ferh a<" ++ As.
+
+
+strip_empty_lines(Cs) ->
+ strip_empty_lines(Cs, 0).
+
+strip_empty_lines(Cs, N) ->
+ {Cs1, Cs2} = edoc_lib:split_at(Cs, $\n),
+ case edoc_lib:is_space(Cs1) of
+ true ->
+ strip_empty_lines(Cs2, N + 1);
+ false ->
+ {Cs, N}
+ end.
+
+
+%% Scanning element content for paragraph breaks (empty lines).
+%% Paragraphs are flushed by block level elements.
+
+par(Es) ->
+ par(Es, [], []).
+
+par([E=#xmlText{value = Value} | Es], As, Bs) ->
+ par_text(Value, As, Bs, E, Es);
+par([E=#xmlElement{name = Name} | Es], As, Bs) ->
+ %% (Note that paragraphs may not contain any further block-level
+ %% elements, including other paragraphs. Tables get complicated.)
+ case Name of
+ 'p' -> par_flush(Es, [E | As], Bs);
+ 'hr' -> par_flush(Es, [E | As], Bs);
+ 'h1' -> par_flush(Es, [E | As], Bs);
+ 'h2' -> par_flush(Es, [E | As], Bs);
+ 'h3' -> par_flush(Es, [E | As], Bs);
+ 'h4' -> par_flush(Es, [E | As], Bs);
+ 'h5' -> par_flush(Es, [E | As], Bs);
+ 'h6' -> par_flush(Es, [E | As], Bs);
+ 'pre' -> par_flush(Es, [E | As], Bs);
+ 'address' -> par_flush(Es, [E | As], Bs);
+ 'div' -> par_flush(Es, [par_elem(E) | As], Bs);
+ 'blockquote' -> par_flush(Es, [par_elem(E) | As], Bs);
+ 'form' -> par_flush(Es, [par_elem(E) | As], Bs);
+ 'fieldset' -> par_flush(Es, [par_elem(E) | As], Bs);
+ 'noscript' -> par_flush(Es, [par_elem(E) | As], Bs);
+ 'ul' -> par_flush(Es, [par_subelem(E) | As], Bs);
+ 'ol' -> par_flush(Es, [par_subelem(E) | As], Bs);
+ 'dl' -> par_flush(Es, [par_subelem(E) | As], Bs);
+ 'table' -> par_flush(Es, [par_subelem(E) | As], Bs);
+ _ -> par(Es, [E | As], Bs)
+ end;
+par([E | Es], As, Bs) ->
+ par(Es, [E | As], Bs);
+par([], As, Bs) ->
+ lists:reverse(As ++ Bs).
+
+par_text(Cs, As, Bs, E, Es) ->
+ case ptxt(Cs) of
+ none ->
+ %% no blank lines: keep this element as it is
+ par(Es, [E | As], Bs);
+ {Cs1, Ss, Cs2} ->
+ Es1 = case Cs1 of
+ [] -> lists:reverse(As);
+ _ -> lists:reverse(As, [E#xmlText{value = Cs1}])
+ end,
+ Bs0 = case Es1 of
+ [] -> Bs;
+ _ -> [#xmlElement{name = p, content = Es1} | Bs]
+ end,
+ Bs1 = case Ss of
+ [] -> Bs0;
+ _ -> [#xmlText{value = Ss} | Bs0]
+ end,
+ case Cs2 of
+ [] ->
+ par(Es, [], Bs1);
+ _ ->
+ par_text(Cs2, [], Bs1, #xmlText{value = Cs2}, Es)
+ end
+ end.
+
+par_flush(Es, As, Bs) ->
+ par(Es, [], As ++ Bs).
+
+par_elem(E) ->
+ E#xmlElement{content = par(E#xmlElement.content)}.
+
+%% Only process content of subelements; ignore immediate content.
+par_subelem(E) ->
+ E#xmlElement{content = par_subelem_1(E#xmlElement.content)}.
+
+par_subelem_1([E=#xmlElement{name = Name} | Es]) ->
+ E1 = case par_skip(Name) of
+ true ->
+ E;
+ false ->
+ case par_sub(Name) of
+ true ->
+ par_subelem(E);
+ false ->
+ par_elem(E)
+ end
+ end,
+ [E1 | par_subelem_1(Es)];
+par_subelem_1([E | Es]) ->
+ [E | par_subelem_1(Es)];
+par_subelem_1([]) ->
+ [].
+
+par_skip('caption') -> true;
+par_skip('col') -> true;
+par_skip('colgroup') -> true;
+par_skip(_) -> false.
+
+par_sub(tr) -> true;
+par_sub(thead) -> true;
+par_sub(tfoot) -> true;
+par_sub(tbody) -> true;
+par_sub(_) -> false.
+
+
+%% scanning text content for a blank line
+
+ptxt(Cs) ->
+ ptxt(Cs, []).
+
+ptxt([$\n | Cs], As) ->
+ ptxt_1(Cs, As, [$\n]);
+ptxt([C | Cs], As) ->
+ ptxt(Cs, [C | As]);
+ptxt([], _As) ->
+ none.
+
+%% scanning text following an initial newline
+ptxt_1([C=$\s | Cs], As, Ss) ->
+ ptxt_1(Cs, As, [C | Ss]);
+ptxt_1([C=$\t | Cs], As, Ss) ->
+ ptxt_1(Cs, As, [C | Ss]);
+ptxt_1([C=$\n | Cs], As, Ss) ->
+ %% blank line detected
+ ptxt_2(Cs, As, [C | Ss]);
+ptxt_1(Cs, As, Ss) ->
+ %% not a blank line
+ ptxt(Cs, lists:reverse(Ss, As)).
+
+%% collecting whitespace following a blank line
+ptxt_2([C=$\s | Cs], As, Ss) ->
+ ptxt_2(Cs, As, [C | Ss]);
+ptxt_2([C=$\t | Cs], As, Ss) ->
+ ptxt_2(Cs, As, [C | Ss]);
+ptxt_2([C=$\n | Cs], As, Ss) ->
+ ptxt_2(Cs, As, [C | Ss]);
+ptxt_2(Cs, As, Ss) ->
+ %% ended by non-whitespace or end of element
+ case edoc_lib:is_space(As) of
+ true ->
+ {[], lists:reverse(Ss ++ As), Cs};
+ false ->
+ {lists:reverse(As), lists:reverse(Ss), Cs}
+ end.
+
+
+-spec throw_error(non_neg_integer(), {string(), [_]}) -> no_return().
+
+throw_error(L, D) ->
+ throw({error, L, D}).