From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/edoc/src/edoc_wiki.erl | 456 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 456 insertions(+) create mode 100644 lib/edoc/src/edoc_wiki.erl (limited to 'lib/edoc/src/edoc_wiki.erl') diff --git a/lib/edoc/src/edoc_wiki.erl b/lib/edoc/src/edoc_wiki.erl new file mode 100644 index 0000000000..e4a3d74734 --- /dev/null +++ b/lib/edoc/src/edoc_wiki.erl @@ -0,0 +1,456 @@ +%% ===================================================================== +%% This library is free software; you can redistribute it and/or modify +%% it under the terms of the GNU Lesser General Public License as +%% published by the Free Software Foundation; either version 2 of the +%% License, or (at your option) any later version. +%% +%% This library is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% Lesser General Public License for more details. +%% +%% You should have received a copy of the GNU Lesser General Public +%% License along with this library; if not, write to the Free Software +%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +%% USA +%% +%% $Id$ +%% +%% @private +%% @copyright 2001-2003 Richard Carlsson +%% @author Richard Carlsson +%% @see edoc +%% @end +%% ===================================================================== + +%% @doc EDoc wiki expansion, parsing and postprocessing of XML text. +%% Uses {@link //xmerl. XMerL}. +%% @end + +%% Notes: +%% +%% * Whatever happens in this module, it must interact nicely with the +%% actual XML-parsing. It is not acceptable to break any existing and +%% legal XML markup so that it does not parse or is rendered wrong. +%% +%% * The focus should always be on making *documentation* easier to +%% write. No wiki notation should be introduced unless it is clear that +%% it is better than using plain XHTML, making typing less cumbersome +%% and the resulting text easier to read. The wiki notation should be a +%% small bag of easy-to-remember tricks for making XHTML documentation +%% easier to write, not a complete markup language in itself. As a +%% typical example, it is hardly worthwile to introduce a special +%% notation like say, ""..."" for emphasized text, since ... is +%% not much harder to write, not any less readable, and no more +%% difficult to remember, especially since emphasis is not very often +%% occurring in normal documentation. +%% +%% * The central reasoning for the code-quoting goes like this: I don't +%% want to have special escape characters within the quotes (like +%% backslash in C), to allow quoting of the quote characters themselves. +%% I also don't want to use the "`" character both for opening and +%% closing quotes. Therefore, you can either use `...' - and then you +%% cannot use the "'" character without ending the quote - or you can +%% use ``...'' - which allows single but not double "'" characters +%% within the quote. Whitespace is automatically removed from the +%% beginning and the end of the quoted strings; this allows you to write +%% things like "`` 'foo@bar' ''". Text that contains "''" has to be +%% written within .... +%% +%% To produce a single "`" character without starting a quote, write +%% "`'" (no space between "`" and "'"). +%% +%% For verbatim/preformatted text, the ```...'''-quotes expand to +%% "
". The indentation at the start of the +%% quoted string is preserved; whitespace is stripped only at the end. +%% Whole leading lines of whitespace are however skipped. + +-module(edoc_wiki). + +-export([parse_xml/2, expand_text/2]). + +-include("edoc.hrl"). +-include("xmerl.hrl"). + +-define(BASE_HEADING, 3). + + +%% Parsing Wiki-XML with pre-and post-expansion. + +parse_xml(Data, Line) -> + par(parse_xml_1(expand_text(Data, Line), Line)). + +parse_xml_1(Text, Line) -> + Text1 = "" ++ Text ++ "", + case catch {ok, xmerl_scan:string(Text1, [{line, Line}])} of + {ok, {E, _}} -> + E#xmlElement.content; + {'EXIT', {fatal, {Reason, L, _C}}} -> + throw_error(L, {"XML parse error: ~p.", [Reason]}); + {'EXIT', Reason} -> + throw_error(Line, {"error in XML parser: ~P.", [Reason, 10]}); + Other -> + throw_error(Line, {"nocatch in XML parser: ~P.", [Other, 10]}) + end. + +%% Expand wiki stuff in arbitrary text. + +expand_text(Cs, L) -> + lists:reverse(expand_new_line(Cs, L, [])). + +%% Interestingly, the reverse of "code" is "edoc". :-) + +expand_new_line([$\s = C | Cs], L, As) -> + expand_new_line(Cs, L, [C | As]); +expand_new_line([$\t = C | Cs], L, As) -> + expand_new_line(Cs, L, [C | As]); +expand_new_line([$\n = C | Cs], L, As) -> + expand_new_line(Cs, L + 1, [C | As]); +expand_new_line([$=, $=, $=, $= | Cs], L, As) -> + expand_heading(Cs, 2, L, As); +expand_new_line([$=, $=, $= | Cs], L, As) -> + expand_heading(Cs, 1, L, As); +expand_new_line([$=, $= | Cs], L, As) -> + expand_heading(Cs, 0, L, As); +expand_new_line(Cs, L, As) -> + expand(Cs, L, As). + +expand([$`, $' | Cs], L, As) -> + expand(Cs, L, [$` | As]); % produce "`" - don't start a new quote +expand([$`, $`, $` | Cs], L, As) -> + %% If this is the first thing on the line, compensate for the + %% indentation, unless we had to skip one or more empty lines. + {Cs1, Skipped} = strip_empty_lines(Cs), % avoid vertical space + N = if Skipped > 0 -> + 0; + true -> + {As1, _} = edoc_lib:split_at(As, $\n), + case edoc_lib:is_space(As1) of + true -> 3 + length(As1); + false -> 2 % nice default - usually right. + end + end, + Ss = lists:duplicate(N, $\s), + expand_triple(Cs1, L + Skipped, Ss ++ "[ATADC[!<>erp<" ++ As); +expand([$`, $` | Cs], L, As) -> + expand_double(edoc_lib:strip_space(Cs), L, ">edoc<" ++ As); +expand([$` | Cs], L, As) -> + expand_single(edoc_lib:strip_space(Cs), L, ">edoc<" ++ As); +expand([$[ | Cs], L, As) -> + expand_uri(Cs, L, As); +expand([$\n = C | Cs], L, As) -> + expand_new_line(Cs, L + 1, [C | As]); +expand([C | Cs], L, As) -> + expand(Cs, L, [C | As]); +expand([], _, As) -> + As. + +%% == Heading == +%% === SubHeading === +%% ==== SubSubHeading ==== + +expand_heading([$= | _] = Cs, N, L, As) -> + expand_heading_1(Cs, N, L, As); +expand_heading(Cs, N, L, As) -> + {Cs1, Cs2} = edoc_lib:split_at(Cs, $\n), + case edoc_lib:strip_space(lists:reverse(Cs1)) of + [$=, $= | Cs3] -> + {Es, Ts} = lists:splitwith(fun (X) -> X =:= $= end, Cs3), + if length(Es) =:= N -> + Ts1 = edoc_lib:strip_space( + lists:reverse(edoc_lib:strip_space(Ts))), + expand_heading_2(Ts1, Cs2, N, L, As); + true -> + H1 = lists:duplicate(N+2, $=), + H2 = "==" ++ Es, + throw_error(L, {"heading end marker mismatch: " + "~s...~s", [H1, H2]}) + end; + _ -> + expand_heading_1(Cs, N, L, As) + end. + +expand_heading_1(Cs, N, L, As) -> + expand(Cs, L, lists:duplicate(N + 2, $=) ++ As). + +expand_heading_2(Ts, Cs, N, L, As) -> + H = ?BASE_HEADING + N, + Ts1 = io_lib:format("~s\n", + [H, make_label(Ts), Ts, H]), + expand_new_line(Cs, L + 1, lists:reverse(lists:flatten(Ts1), As)). + +make_label([$\s | Cs]) -> + [$_ | make_label(edoc_lib:strip_space(Cs))]; +make_label([$\t | Cs]) -> + [$_ | make_label(edoc_lib:strip_space(Cs))]; +make_label([$\n | Cs]) -> + [$_ | make_label(edoc_lib:strip_space(Cs))]; +make_label([C | Cs]) -> + [C | make_label(Cs)]; +make_label([]) -> + []. + +%% `...' + +expand_single(Cs, L, As) -> + expand_single(Cs, L, As, L). + +expand_single([$' | Cs], L, As, _L0) -> + expand(Cs, L, ">edoc/<" ++ edoc_lib:strip_space(As)); +expand_single([$< | Cs], L, As, L0) -> + expand_single(Cs, L, ";tl&" ++ As, L0); +expand_single([$> | Cs], L, As, L0) -> + expand_single(Cs, L, ";tg&" ++ As, L0); +expand_single([$& | Cs], L, As, L0) -> + expand_single(Cs, L, ";pma&" ++ As, L0); +expand_single([$\n = C | Cs], L, As, L0) -> + expand_single(Cs, L + 1, [C | As], L0); +expand_single([C | Cs], L, As, L0) -> + expand_single(Cs, L, [C | As], L0); +expand_single([], L, _, L0) -> + throw_error(L0, {"`-quote ended unexpectedly at line ~w", [L]}). + +%% ``...'' + +expand_double(Cs, L, As) -> + expand_double(Cs, L, As, L). + +expand_double([$', $' | Cs], L, As, _L0) -> + expand(Cs, L, ">edoc/<" ++ edoc_lib:strip_space(As)); +expand_double([$< | Cs], L, As, L0) -> + expand_double(Cs, L, ";tl&" ++ As, L0); +expand_double([$> | Cs], L, As, L0) -> + expand_double(Cs, L, ";tg&" ++ As, L0); +expand_double([$& | Cs], L, As, L0) -> + expand_double(Cs, L, ";pma&" ++ As, L0); +expand_double([$\n = C | Cs], L, As, L0) -> + expand_double(Cs, L + 1, [C | As], L0); +expand_double([C | Cs], L, As, L0) -> + expand_double(Cs, L, [C | As], L0); +expand_double([], L, _, L0) -> + throw_error(L0, {"``-quote ended unexpectedly at line ~w", [L]}). + +%% ```...''' + +expand_triple(Cs, L, As) -> + expand_triple(Cs, L, As, L). + +expand_triple([$', $', $' | Cs], L, As, _L0) -> % ' stupid emacs + expand(Cs, L, ">erp/<>]]" ++ edoc_lib:strip_space(As)); +expand_triple([$], $], $> | Cs], L, As, L0) -> + expand_triple(Cs, L, ";tg&]]" ++ As, L0); +expand_triple([$\n = C | Cs], L, As, L0) -> + expand_triple(Cs, L + 1, [C | As], L0); +expand_triple([C | Cs], L, As, L0) -> + expand_triple(Cs, L, [C | As], L0); +expand_triple([], L, _, L0) -> + throw_error(L0, {"```-quote ended unexpectedly at line ~w", [L]}). + +%% e.g. [file:/...] or [http://... LinkText] + +expand_uri("http:/" ++ Cs, L, As) -> + expand_uri(Cs, L, "/:ptth", As); +expand_uri("ftp:/" ++ Cs, L, As) -> + expand_uri(Cs, L, "/:ptf", As); +expand_uri("file:/" ++ Cs, L, As) -> + expand_uri(Cs, L, "/:elif", As); +expand_uri(Cs, L, As) -> + expand(Cs, L, [$[ | As]). + +expand_uri([$] | Cs], L, Us, As) -> + expand(Cs, L, push_uri(Us, ">tt/<" ++ Us ++ ">tt<", As)); +expand_uri([$\s = C | Cs], L, Us, As) -> + expand_uri(Cs, 0, L, [C], Us, As); +expand_uri([$\t = C | Cs], L, Us, As) -> + expand_uri(Cs, 0, L, [C], Us, As); +expand_uri([$\n = C | Cs], L, Us, As) -> + expand_uri(Cs, 1, L, [C], Us, As); +expand_uri([C | Cs], L, Us, As) -> + expand_uri(Cs, L, [C | Us], As); +expand_uri([], L, Us, _As) -> + expand_uri_error(Us, L). + +expand_uri([$] | Cs], N, L, Ss, Us, As) -> + Ss1 = lists:reverse(edoc_lib:strip_space( + lists:reverse(edoc_lib:strip_space(Ss)))), + expand(Cs, L + N, push_uri(Us, Ss1, As)); +expand_uri([$\n = C | Cs], N, L, Ss, Us, As) -> + expand_uri(Cs, N + 1, L, [C | Ss], Us, As); +expand_uri([C | Cs], N, L, Ss, Us, As) -> + expand_uri(Cs, N, L, [C | Ss], Us, As); +expand_uri([], _, L, _Ss, Us, _As) -> + expand_uri_error(Us, L). + +-spec expand_uri_error(list(), pos_integer()) -> no_return(). + +expand_uri_error(Us, L) -> + {Ps, _} = edoc_lib:split_at(lists:reverse(Us), $:), + throw_error(L, {"reference '[~s:...' ended unexpectedly", [Ps]}). + + +push_uri(Us, Ss, As) -> + ">a/<" ++ Ss ++ ">\"pot_\"=tegrat \"" ++ Us ++ "\"=ferh a<" ++ As. + + +strip_empty_lines(Cs) -> + strip_empty_lines(Cs, 0). + +strip_empty_lines(Cs, N) -> + {Cs1, Cs2} = edoc_lib:split_at(Cs, $\n), + case edoc_lib:is_space(Cs1) of + true -> + strip_empty_lines(Cs2, N + 1); + false -> + {Cs, N} + end. + + +%% Scanning element content for paragraph breaks (empty lines). +%% Paragraphs are flushed by block level elements. + +par(Es) -> + par(Es, [], []). + +par([E=#xmlText{value = Value} | Es], As, Bs) -> + par_text(Value, As, Bs, E, Es); +par([E=#xmlElement{name = Name} | Es], As, Bs) -> + %% (Note that paragraphs may not contain any further block-level + %% elements, including other paragraphs. Tables get complicated.) + case Name of + 'p' -> par_flush(Es, [E | As], Bs); + 'hr' -> par_flush(Es, [E | As], Bs); + 'h1' -> par_flush(Es, [E | As], Bs); + 'h2' -> par_flush(Es, [E | As], Bs); + 'h3' -> par_flush(Es, [E | As], Bs); + 'h4' -> par_flush(Es, [E | As], Bs); + 'h5' -> par_flush(Es, [E | As], Bs); + 'h6' -> par_flush(Es, [E | As], Bs); + 'pre' -> par_flush(Es, [E | As], Bs); + 'address' -> par_flush(Es, [E | As], Bs); + 'div' -> par_flush(Es, [par_elem(E) | As], Bs); + 'blockquote' -> par_flush(Es, [par_elem(E) | As], Bs); + 'form' -> par_flush(Es, [par_elem(E) | As], Bs); + 'fieldset' -> par_flush(Es, [par_elem(E) | As], Bs); + 'noscript' -> par_flush(Es, [par_elem(E) | As], Bs); + 'ul' -> par_flush(Es, [par_subelem(E) | As], Bs); + 'ol' -> par_flush(Es, [par_subelem(E) | As], Bs); + 'dl' -> par_flush(Es, [par_subelem(E) | As], Bs); + 'table' -> par_flush(Es, [par_subelem(E) | As], Bs); + _ -> par(Es, [E | As], Bs) + end; +par([E | Es], As, Bs) -> + par(Es, [E | As], Bs); +par([], As, Bs) -> + lists:reverse(As ++ Bs). + +par_text(Cs, As, Bs, E, Es) -> + case ptxt(Cs) of + none -> + %% no blank lines: keep this element as it is + par(Es, [E | As], Bs); + {Cs1, Ss, Cs2} -> + Es1 = case Cs1 of + [] -> lists:reverse(As); + _ -> lists:reverse(As, [E#xmlText{value = Cs1}]) + end, + Bs0 = case Es1 of + [] -> Bs; + _ -> [#xmlElement{name = p, content = Es1} | Bs] + end, + Bs1 = case Ss of + [] -> Bs0; + _ -> [#xmlText{value = Ss} | Bs0] + end, + case Cs2 of + [] -> + par(Es, [], Bs1); + _ -> + par_text(Cs2, [], Bs1, #xmlText{value = Cs2}, Es) + end + end. + +par_flush(Es, As, Bs) -> + par(Es, [], As ++ Bs). + +par_elem(E) -> + E#xmlElement{content = par(E#xmlElement.content)}. + +%% Only process content of subelements; ignore immediate content. +par_subelem(E) -> + E#xmlElement{content = par_subelem_1(E#xmlElement.content)}. + +par_subelem_1([E=#xmlElement{name = Name} | Es]) -> + E1 = case par_skip(Name) of + true -> + E; + false -> + case par_sub(Name) of + true -> + par_subelem(E); + false -> + par_elem(E) + end + end, + [E1 | par_subelem_1(Es)]; +par_subelem_1([E | Es]) -> + [E | par_subelem_1(Es)]; +par_subelem_1([]) -> + []. + +par_skip('caption') -> true; +par_skip('col') -> true; +par_skip('colgroup') -> true; +par_skip(_) -> false. + +par_sub(tr) -> true; +par_sub(thead) -> true; +par_sub(tfoot) -> true; +par_sub(tbody) -> true; +par_sub(_) -> false. + + +%% scanning text content for a blank line + +ptxt(Cs) -> + ptxt(Cs, []). + +ptxt([$\n | Cs], As) -> + ptxt_1(Cs, As, [$\n]); +ptxt([C | Cs], As) -> + ptxt(Cs, [C | As]); +ptxt([], _As) -> + none. + +%% scanning text following an initial newline +ptxt_1([C=$\s | Cs], As, Ss) -> + ptxt_1(Cs, As, [C | Ss]); +ptxt_1([C=$\t | Cs], As, Ss) -> + ptxt_1(Cs, As, [C | Ss]); +ptxt_1([C=$\n | Cs], As, Ss) -> + %% blank line detected + ptxt_2(Cs, As, [C | Ss]); +ptxt_1(Cs, As, Ss) -> + %% not a blank line + ptxt(Cs, lists:reverse(Ss, As)). + +%% collecting whitespace following a blank line +ptxt_2([C=$\s | Cs], As, Ss) -> + ptxt_2(Cs, As, [C | Ss]); +ptxt_2([C=$\t | Cs], As, Ss) -> + ptxt_2(Cs, As, [C | Ss]); +ptxt_2([C=$\n | Cs], As, Ss) -> + ptxt_2(Cs, As, [C | Ss]); +ptxt_2(Cs, As, Ss) -> + %% ended by non-whitespace or end of element + case edoc_lib:is_space(As) of + true -> + {[], lists:reverse(Ss ++ As), Cs}; + false -> + {lists:reverse(As), lists:reverse(Ss), Cs} + end. + + +-spec throw_error(non_neg_integer(), {string(), [_]}) -> no_return(). + +throw_error(L, D) -> + throw({error, L, D}). -- cgit v1.2.3