aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLoïc Hoguin <[email protected]>2018-06-08 14:49:09 +0200
committerLoïc Hoguin <[email protected]>2018-06-08 14:49:09 +0200
commit524777054be30c848c1883ffd15b245c29f73004 (patch)
tree6c3df0022ee6d228341bb6ce2c525011076c123d /src
parent48cbfe8b60f3e555acd2d623db10e4eb56234179 (diff)
downloadasciideck-524777054be30c848c1883ffd15b245c29f73004.tar.gz
asciideck-524777054be30c848c1883ffd15b245c29f73004.tar.bz2
asciideck-524777054be30c848c1883ffd15b245c29f73004.zip
Rewrite the project
The new code is much more readable and easier to extend. I took inspiration from Haskell's Parsec project which seems to only write the happy-path and applied the idea to Erlang's exceptions. When the parser tries to parse, say, a list, and crashes, it tries with a table next, and so on until something matches. Normal paragraphs always match so there can be no parsing failures. The parser now has a number of passes: first the block parser, then lists and tables passes to build a proper tree out of them and finally an inline pass to apply inline formatting. The resulting AST can then be modified at will and passed on to translator modules which output a different format. The man page translator was also rewritten and has been tested against both Cowboy and Gun. Numerous issues were fixed as a result of this rewrite.
Diffstat (limited to 'src')
-rw-r--r--src/asciideck.erl13
-rw-r--r--src/asciideck_attributes_parser.erl120
-rw-r--r--src/asciideck_attributes_pass.erl112
-rw-r--r--src/asciideck_block_parser.erl1116
-rw-r--r--src/asciideck_inline_pass.erl308
-rw-r--r--src/asciideck_line_reader.erl94
-rw-r--r--src/asciideck_lists_pass.erl155
-rw-r--r--src/asciideck_parser.erl388
-rw-r--r--src/asciideck_tables_pass.erl191
-rw-r--r--src/asciideck_to_manpage.erl236
10 files changed, 2246 insertions, 487 deletions
diff --git a/src/asciideck.erl b/src/asciideck.erl
index 749ccec..bd5792c 100644
--- a/src/asciideck.erl
+++ b/src/asciideck.erl
@@ -1,4 +1,4 @@
-%% Copyright (c) 2016, Loïc Hoguin <[email protected]>
+%% Copyright (c) 2016-2018, Loïc Hoguin <[email protected]>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
@@ -32,8 +32,15 @@ parse_file(Filename, St) ->
parse(Data) ->
parse(Data, #{}).
-parse(Data, St) when is_binary(Data) ->
- asciideck_parser:parse(Data, St);
+parse(Data, _St) when is_binary(Data) ->
+ Passes = [
+ asciideck_attributes_pass,
+ asciideck_lists_pass,
+ asciideck_tables_pass,
+ asciideck_inline_pass
+ ],
+ lists:foldl(fun(M, AST) -> M:run(AST) end,
+ asciideck_block_parser:parse(Data), Passes);
parse(Data, St) ->
parse(iolist_to_binary(Data), St).
diff --git a/src/asciideck_attributes_parser.erl b/src/asciideck_attributes_parser.erl
new file mode 100644
index 0000000..b89c3f4
--- /dev/null
+++ b/src/asciideck_attributes_parser.erl
@@ -0,0 +1,120 @@
+%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% Asciidoc User Guide 29
+-module(asciideck_attributes_parser).
+
+-export([parse/1]).
+
+-type attributes() :: #{
+ %% The raw attribute list.
+ 0 := binary(),
+ %% Positional attributes.
+ pos_integer() => binary(),
+ %% Named attributes.
+ binary() => binary()
+}.
+-export_type([attributes/0]).
+
+-define(IS_WS(C), (C =:= $\s) or (C =:= $\t)).
+
+-spec parse(binary()) -> attributes().
+parse(Data) ->
+ parse(Data, #{0 => Data}, 1).
+
+parse(<<>>, Attrs, _) ->
+ Attrs;
+parse(Data, Attrs, Nth) ->
+ case parse_attr(Data, <<>>) of
+ {Value, Rest} when Nth =/= undefined ->
+ parse(Rest, Attrs#{Nth => Value}, Nth + 1);
+ {Name, Value, Rest} ->
+ parse(Rest, Attrs#{Name => Value}, undefined)
+ end.
+
+parse_attr(<<>>, Acc) ->
+ {Acc, <<>>};
+%% Skip preceding whitespace.
+parse_attr(<<C, R/bits>>, <<>>) when ?IS_WS(C) ->
+ parse_attr(R, <<>>);
+%% Parse quoted positional attributes in their own function.
+parse_attr(<<$", R/bits>>, <<>>) ->
+ parse_quoted_attr(R, <<>>);
+%% We have a named attribute, parse the value.
+parse_attr(<<$=, R/bits>>, Name) when Name =/= <<>> ->
+ parse_attr_value(R, asciideck_block_parser:trim(Name, trailing), <<>>);
+%% We have a positional attribute.
+parse_attr(<<$,, R/bits>>, Value) ->
+ {asciideck_block_parser:trim(Value, trailing), R};
+%% Continue.
+parse_attr(<<C, R/bits>>, Acc) when C =/= $= ->
+ parse_attr(R, <<Acc/binary, C>>).
+
+%% Get everything until the next double quote.
+parse_quoted_attr(<<$", R/bits>>, Acc) ->
+ parse_quoted_attr_end(R, Acc);
+parse_quoted_attr(<<$\\, $", R/bits>>, Acc) ->
+ parse_quoted_attr(R, <<Acc/binary, $">>);
+parse_quoted_attr(<<C, R/bits>>, Acc) ->
+ parse_quoted_attr(R, <<Acc/binary, C>>).
+
+%% Skip the whitespace until the next comma or eof.
+parse_quoted_attr_end(<<>>, Value) ->
+ {Value, <<>>};
+parse_quoted_attr_end(<<$,, R/bits>>, Value) ->
+ {Value, R};
+parse_quoted_attr_end(<<C, R/bits>>, Value) when ?IS_WS(C) ->
+ parse_quoted_attr_end(R, Value).
+
+parse_attr_value(<<>>, Name, Acc) ->
+ {Name, Acc, <<>>};
+%% Skip preceding whitespace.
+parse_attr_value(<<C, R/bits>>, Name, <<>>) when ?IS_WS(C) ->
+ parse_attr_value(R, Name, <<>>);
+%% Parse quoted positional attributes in their own function.
+parse_attr_value(<<$", R/bits>>, Name, <<>>) ->
+ {Value, Rest} = parse_quoted_attr(R, <<>>),
+ {Name, Value, Rest};
+%% Done.
+parse_attr_value(<<$,, R/bits>>, Name, Value) ->
+ {Name, asciideck_block_parser:trim(Value, trailing), R};
+%% Continue.
+parse_attr_value(<<C, R/bits>>, Name, Acc) ->
+ parse_attr_value(R, Name, <<Acc/binary, C>>).
+
+-ifdef(TEST).
+attribute_0_test() ->
+ #{0 := <<"Hello,world,width=\"50\"">>} = parse(<<"Hello,world,width=\"50\"">>),
+ ok.
+
+parse_test() ->
+ #{} = parse(<<>>),
+ #{
+ 1 := <<"Hello">>
+ } = parse(<<"Hello">>),
+ #{
+ 1 := <<"quote">>,
+ 2 := <<"Bertrand Russell">>,
+ 3 := <<"The World of Mathematics (1956)">>
+ } = parse(<<"quote, Bertrand Russell, The World of Mathematics (1956)">>),
+ #{
+ 1 := <<"22 times">>,
+ <<"backcolor">> := <<"#0e0e0e">>,
+ <<"options">> := <<"noborders,wide">>
+ } = parse(<<"\"22 times\", backcolor=\"#0e0e0e\", options=\"noborders,wide\"">>),
+ #{
+ 1 := <<"A footnote&#44; &#34;with an image&#34; image:smallnew.png[]">>
+ } = parse(<<"A footnote&#44; &#34;with an image&#34; image:smallnew.png[]">>),
+ ok.
+-endif.
diff --git a/src/asciideck_attributes_pass.erl b/src/asciideck_attributes_pass.erl
new file mode 100644
index 0000000..393b57d
--- /dev/null
+++ b/src/asciideck_attributes_pass.erl
@@ -0,0 +1,112 @@
+%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% The purpose of this pass is to apply attributes to
+%% their corresponding blocks. For macros the attributes
+%% are already applied. For inline elements the inline
+%% pass is taking care of it.
+-module(asciideck_attributes_pass).
+
+-export([run/1]).
+
+run([]) ->
+ [];
+%% A block identifier is an alternative way of specifying
+%% the id attribute for a block.
+run([{block_id, #{id := ID}, <<>>, _}|Tail0]) ->
+ Tail = apply_attributes(Tail0, #{<<"id">> => ID}),
+ run(Tail);
+%% A block title is ultimately treated as an attribute
+%% for the following block.
+run([{block_title, _, Title, _}|Tail0]) ->
+ Tail = apply_attributes(Tail0, #{<<"title">> => Title}),
+ run(Tail);
+run([{attribute_list, Attrs, <<>>, _}|Tail0]) ->
+ Tail = apply_attributes(Tail0, Attrs),
+ run(Tail);
+run([Block|Tail]) ->
+ [Block|run(Tail)].
+
+%% Find the next block to apply the attributes.
+apply_attributes([], _) ->
+ [];
+apply_attributes(AST=[Element0={Type, Attrs0, Content, Ann}|Tail], Attrs) ->
+ case can_apply(Type) of
+ drop ->
+ AST;
+ skip ->
+ [Element0|apply_attributes(Tail, Attrs)];
+ apply ->
+ Element = {Type, maps:merge(Attrs0, Attrs), Content, Ann},
+ [Element|Tail]
+ end.
+
+%% Block macros already come with a mandatory attribute list.
+%% Just to play it safe we drop the attributes for now.
+can_apply(block_macro) -> drop;
+%% If we hit a list item continuation, drop the attributes for now.
+can_apply(list_item_continuation) -> drop;
+%% We skip attribute lists and alike and let it sort itself out.
+can_apply(block_id) -> skip;
+can_apply(attribute_list) -> skip;
+can_apply(block_title) -> skip;
+%% Everything else is a block.
+can_apply(_) -> apply.
+
+-ifdef(TEST).
+attribute_list_test() ->
+ AST0 = [
+ {attribute_list, #{
+ 0 => <<"width=400">>,
+ <<"width">> => <<"400">>
+ }, <<>>, #{line => 1}},
+ {listing_block, #{}, <<"Hello!">>, #{line => 2}}
+ ],
+ AST = [
+ {listing_block, #{
+ 0 => <<"width=400">>,
+ <<"width">> => <<"400">>
+ }, <<"Hello!">>, #{line => 2}}
+ ],
+ AST = run(AST0),
+ ok.
+
+block_id_test() ->
+ AST0 = [
+ {block_id, #{
+ id => <<"cowboy_req">>
+ }, <<>>, #{line => 1}},
+ {listing_block, #{}, <<"Hello!">>, #{line => 2}}
+ ],
+ AST = [
+ {listing_block, #{
+ <<"id">> => <<"cowboy_req">>
+ }, <<"Hello!">>, #{line => 2}}
+ ],
+ AST = run(AST0),
+ ok.
+
+block_title_test() ->
+ AST0 = [
+ {block_title, #{}, <<"Title">>, #{line => 1}},
+ {listing_block, #{}, <<"Hello!">>, #{line => 2}}
+ ],
+ AST = [
+ {listing_block, #{
+ <<"title">> => <<"Title">>
+ }, <<"Hello!">>, #{line => 2}}
+ ],
+ AST = run(AST0),
+ ok.
+-endif.
diff --git a/src/asciideck_block_parser.erl b/src/asciideck_block_parser.erl
new file mode 100644
index 0000000..ad63fa6
--- /dev/null
+++ b/src/asciideck_block_parser.erl
@@ -0,0 +1,1116 @@
+%% Copyright (c) 2016-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% The block parser is the first pass of the parsing of Asciidoc
+%% files. It only isolates the different top-level blocks and
+%% produces a representation that can then be manipulated.
+%%
+%% Further passes are necessary to propagate the parsed lists
+%% of attributes to their respective blocks, to create actual
+%% lists from the parsed list items or to parse the contents
+%% of tables. Finally a final pass will parse inline elements.
+%%
+%% This module may be called again for parsing the content
+%% of individual table cells.
+-module(asciideck_block_parser).
+
+-export([parse/1]).
+
+%% @todo Temporary export. Move somewhere else.
+-export([trim/1]).
+-export([trim/2]).
+-export([while/2]).
+
+-type ast() :: list(). %% @todo
+
+-record(state, {
+ reader :: pid()
+}).
+
+-define(IS_WS(C), (C =:= $\s) or (C =:= $\t)).
+
+-ifdef(TEST).
+-define(NOT(Type, Value), true = Type =/= element(1, hd(Value))).
+
+define_NOT_test() ->
+ %% This succeeds.
+ ?NOT(block_id, parse(<<"[[block,id]]">>)),
+ %% This fails.
+ {'EXIT', _} = (catch ?NOT(block_id, parse(<<"[[block_id]]">>))),
+ ok.
+-endif.
+
+-spec parse(binary()) -> ast().
+parse(Data) ->
+ %% @todo Might want to start it supervised.
+ %% @todo Might want to stop it also.
+ {ok, ReaderPid} = asciideck_line_reader:start_link(Data),
+ blocks(#state{reader=ReaderPid}).
+
+blocks(St) ->
+ case block(St) of
+ eof -> [];
+ Block -> [Block|blocks(St)]
+ end.
+
+%% Asciidoc parsing never fails. If a block is not
+%% formatted properly, it will be treated as a paragraph.
+block(St) ->
+ skip(fun empty_line/1, St),
+ oneof([
+ fun eof/1,
+ %% Section titles.
+ fun section_title/1,
+ fun long_section_title/1,
+ %% Block macros.
+ fun block_id/1,
+ fun block_macro/1,
+ %% Lists.
+ fun bulleted_list/1,
+ fun numbered_list/1,
+ fun labeled_list/1,
+ fun callout_list/1,
+ fun list_item_continuation/1,
+ %% Delimited blocks.
+ fun listing_block/1,
+ fun literal_block/1,
+ fun sidebar_block/1,
+ fun comment_block/1,
+ fun passthrough_block/1,
+ fun quote_block/1,
+ fun example_block/1,
+ fun open_block/1,
+ %% Table.
+ fun table/1,
+ %% Attributes.
+ fun attribute_entry/1,
+ fun attribute_list/1,
+ %% Block title.
+ fun block_title/1,
+ %% Comment lines.
+ fun comment_line/1,
+ %% Paragraphs.
+ fun literal_para/1,
+ fun admonition_para/1,
+ fun para/1
+ ], St).
+
+eof(St) ->
+ eof = read_line(St).
+
+-ifdef(TEST).
+eof_test() ->
+ [] = parse(<<>>).
+-endif.
+
+empty_line(St) ->
+ <<>> = trim(read_line(St)).
+
+-ifdef(TEST).
+empty_line_test() ->
+ [] = parse(<<
+ "\n"
+ " \n"
+ " \n"
+ "\n"
+ >>).
+-endif.
+
+%% Asciidoc User Guide 11.2
+section_title(St) ->
+ {Level, Title0} = case read_line(St) of
+ <<"=", C, R/bits>> when ?IS_WS(C) -> {0, R};
+ <<"==", C, R/bits>> when ?IS_WS(C) -> {1, R};
+ <<"===", C, R/bits>> when ?IS_WS(C) -> {2, R};
+ <<"====", C, R/bits>> when ?IS_WS(C) -> {3, R};
+ <<"=====", C, R/bits>> when ?IS_WS(C) -> {4, R}
+ end,
+ Ann = ann(St),
+ Title1 = trim(Title0),
+ %% Optional: trailing title delimiter.
+ Trailer = case Level of
+ 0 -> <<"=">>;
+ 1 -> <<"==">>;
+ 2 -> <<"===">>;
+ 3 -> <<"====">>;
+ 4 -> <<"=====">>
+ end,
+ Len = byte_size(Title1) - Level - 2,
+ Title = case Title1 of
+ <<Title2:Len/binary, WS, Trailer/binary>> when ?IS_WS(WS) -> trim(Title2);
+ _ -> trim(Title1)
+ end,
+ %% Section titles must be followed by at least one empty line.
+ _ = empty_line(St),
+ %% Good!
+ {section_title, #{level => Level}, Title, Ann}.
+
+-ifdef(TEST).
+section_title_test() ->
+ %% With trailing title delimiter.
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0) =">>),
+ [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}]
+ = parse(<<"== Section Title (level 1) ==">>),
+ [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}]
+ = parse(<<"=== Section Title (level 2) ===">>),
+ [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}]
+ = parse(<<"==== Section Title (level 3) ====">>),
+ [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}]
+ = parse(<<"===== Section Title (level 4) =====">>),
+ %% Without trailing title delimiter.
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0)">>),
+ [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}]
+ = parse(<<"== Section Title (level 1)">>),
+ [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}]
+ = parse(<<"=== Section Title (level 2)">>),
+ [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}]
+ = parse(<<"==== Section Title (level 3)">>),
+ [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}]
+ = parse(<<"===== Section Title (level 4)">>),
+ %% Accept more spaces before/after delimiters.
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0)">>),
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0) =">>),
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0) =">>),
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0) = ">>),
+ %% A space before the first delimiter is not a title.
+ ?NOT(section_title, parse(<<" = Document Title (level 0)">>)),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 11.1
+long_section_title(St) ->
+ %% Title must be hard against the left margin.
+ <<C, _/bits>> = Title0 = read_line(St),
+ Ann = ann(St),
+ false = ?IS_WS(C),
+ Title = trim(Title0),
+ %% Read the underline.
+ {Level, Char, Underline0} = case read_line(St) of
+ U = <<"=", _/bits >> -> {0, $=, U};
+ U = <<"-", _/bits >> -> {1, $-, U};
+ U = <<"~", _/bits >> -> {2, $~, U};
+ U = <<"^", _/bits >> -> {3, $^, U};
+ U = <<"+", _/bits >> -> {4, $+, U}
+ end,
+ Underline = trim(Underline0, trailing),
+ %% Underline must be the same character repeated over the entire line.
+ repeats(Underline, Char),
+ %% Underline must be the same size as the title, +/- 2 characters.
+ TLen = byte_size(Title),
+ ULen = byte_size(Underline),
+ true = (TLen >= ULen - 2) andalso (TLen =< ULen + 2),
+ %% Good!
+ {section_title, #{level => Level}, Title, Ann}.
+
+-ifdef(TEST).
+long_section_title_test() ->
+ %% Same amount of characters for the underline.
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] = parse(<<
+ "Document Title (level 0)\n"
+ "========================">>),
+ [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}] = parse(<<
+ "Section Title (level 1)\n"
+ "-----------------------">>),
+ [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}] = parse(<<
+ "Section Title (level 2)\n"
+ "~~~~~~~~~~~~~~~~~~~~~~~">>),
+ [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}] = parse(<<
+ "Section Title (level 3)\n"
+ "^^^^^^^^^^^^^^^^^^^^^^^">>),
+ [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}] = parse(<<
+ "Section Title (level 4)\n"
+ "+++++++++++++++++++++++">>),
+ %% A shorter title to confirm we are not cheating.
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "======">>),
+ %% Underline can be +/- 2 characters.
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "====">>),
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "=====">>),
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "=======">>),
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "========">>),
+ %% Underline too short/long results in a different block.
+ ?NOT(section_title, parse(<<
+ "Hello!\n"
+ "===">>)),
+ ?NOT(section_title, parse(<<
+ "Hello!\n"
+ "=========">>)),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.2.1
+%%
+%% We currently do not implement the <xreflabel> value.
+%% I am also not sure what characters are allowed,
+%% so what is here is what I came up with guessing.
+block_id(St) ->
+ <<"[[", Line0/bits>> = read_line(St),
+ Line = trim(Line0),
+ Len = byte_size(Line) - 2,
+ <<BlockID:Len/binary, "]]">> = Line,
+ %% Make sure there are only valid characters.
+ {BlockID, <<>>} = while(fun(C) ->
+ (C =/= $,) andalso (C =/= $[) andalso (C =/= $])
+ andalso (C =/= $\s) andalso (C =/= $\t)
+ end, BlockID),
+ %% Good!
+ {block_id, #{id => BlockID}, <<>>, ann(St)}.
+
+-ifdef(TEST).
+block_id_test() ->
+ %% Valid.
+ [{block_id, #{id := <<"X30">>}, <<>>, _}] = parse(<<"[[X30]]">>),
+ %% Invalid.
+ ?NOT(block_id, parse(<<"[[block,id]]">>)),
+ ?NOT(block_id, parse(<<"[[block[id]]">>)),
+ ?NOT(block_id, parse(<<"[[block]id]]">>)),
+ ?NOT(block_id, parse(<<"[[block id]]">>)),
+ ?NOT(block_id, parse(<<"[[block\tid]]">>)),
+ %% Must be hard on the left of the line.
+ ?NOT(block_id, parse(<<" [[block_id]]">>)),
+ ?NOT(block_id, parse(<<"\t[[block_id]]">>)),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.2.3
+comment_line(St) ->
+ <<"//", Comment0/bits>> = read_line(St),
+ Comment = trim(Comment0),
+ %% Good!
+ {comment_line, #{<<"subs">> => <<"verbatim">>}, Comment, ann(St)}.
+
+-ifdef(TEST).
+comment_line_test() ->
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.">>),
+ %% We trim the whitespace around the comment.
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.">>),
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment. ">>),
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"//\tThis is a comment.">>),
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.\t">>),
+ [
+ {comment_line, _, <<"First line.">>, _},
+ {comment_line, _, <<"Second line.">>, _}
+ ] = parse(<<
+ "// First line.\n"
+ "// Second line.\n">>),
+ %% Must be hard on the left of the line.
+ ?NOT(comment_line, parse(<<" // This is a comment.">>)),
+ ?NOT(comment_line, parse(<<"\t// This is a comment.">>)),
+ ok.
+-endif.
+
+%% We currently implement the following block macros
+%% from the Asciidoc User Guide:
+%%
+%% - image (21.2.2)
+%% - include (21.3.1)
+%% - ifdef (21.3.2)
+%% - ifndef (21.3.2)
+%% - endif (21.3.2)
+block_macro(St) ->
+ Line0 = read_line(St),
+ Ann = ann(St),
+ %% Name must contain letters, digits or dash characters.
+ {Name, <<"::", Line1/bits>>} = while(fun(C) ->
+ ((C >= $a) andalso (C =< $z))
+ orelse ((C >= $A) andalso (C =< $Z))
+ orelse ((C >= $0) andalso (C =< $9))
+ orelse (C =:= $-)
+ end, Line0),
+ %% Name must not begin with a dash.
+ true = binary:at(Name, 0) =/= $-,
+ %% Target must not contain whitespace characters.
+ %% It is followed by an [attribute list].
+ {Target, AttrList0 = <<"[", _/bits>>} = while(fun(C) ->
+ (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t)
+ end, Line1),
+ AttrList1 = trim(AttrList0),
+ {attribute_list, AttrList, <<>>, _} = attribute_list(St, AttrList1),
+ %% Block macros must be followed by at least one empty line.
+ _ = empty_line(St),
+ {block_macro, AttrList#{
+ name => Name,
+ target => Target
+ }, <<>>, Ann}.
+
+-ifdef(TEST).
+block_macro_image_test() ->
+ [{block_macro, #{
+ name := <<"image">>,
+ target := <<"images/layout.png">>,
+ 1 := <<"J14P main circuit board">>
+ }, <<>>, _}] = parse(<<"image::images/layout.png[J14P main circuit board]">>),
+ [{block_macro, #{
+ name := <<"image">>,
+ target := <<"images/layout.png">>,
+ 1 := <<"J14P main circuit board">>,
+ <<"title">> := <<"Main circuit board">>
+ }, <<>>, _}] = parse(
+ <<"image::images/layout.png[\"J14P main circuit board\", "
+ "title=\"Main circuit board\"]">>),
+ ok.
+
+block_macro_include_test() ->
+ [{block_macro, #{
+ name := <<"include">>,
+ target := <<"chapter1.txt">>,
+ <<"tabsize">> := <<"4">>
+ }, <<>>, _}] = parse(<<"include::chapter1.txt[tabsize=4]">>),
+ ok.
+
+block_macro_ifdef_test() ->
+ [{block_macro, #{
+ name := <<"ifdef">>,
+ target := <<"revnumber">>,
+ 0 := <<>>
+ }, <<>>, _}] = parse(<<"ifdef::revnumber[]">>),
+ [{block_macro, #{
+ name := <<"ifdef">>,
+ target := <<"revnumber">>,
+ 1 := <<"Version number 42">>
+ }, <<>>, _}] = parse(<<"ifdef::revnumber[Version number 42]">>),
+ ok.
+
+block_macro_ifndef_test() ->
+ [{block_macro, #{
+ name := <<"ifndef">>,
+ target := <<"revnumber">>,
+ 0 := <<>>
+ }, <<>>, _}] = parse(<<"ifndef::revnumber[]">>),
+ ok.
+
+block_macro_endif_test() ->
+ [{block_macro, #{
+ name := <<"endif">>,
+ target := <<"revnumber">>,
+ 0 := <<>>
+ }, <<>>, _}] = parse(<<"endif::revnumber[]">>),
+ %% Some macros accept an empty target.
+ [{block_macro, #{
+ name := <<"endif">>,
+ target := <<>>,
+ 0 := <<>>
+ }, <<>>, _}] = parse(<<"endif::[]">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 17.1
+bulleted_list(St) ->
+ Line0 = read_line(St),
+ Line1 = trim(Line0),
+ {Type0, Level, ListItem} = case Line1 of
+ <<"-", C, R/bits>> when ?IS_WS(C) -> {dash, 1, R};
+ <<"*", C, R/bits>> when ?IS_WS(C) -> {star, 1, R};
+ <<"**", C, R/bits>> when ?IS_WS(C) -> {star, 2, R};
+ <<"***", C, R/bits>> when ?IS_WS(C) -> {star, 3, R};
+ <<"****", C, R/bits>> when ?IS_WS(C) -> {star, 4, R};
+ <<"*****", C, R/bits>> when ?IS_WS(C) -> {star, 5, R}
+ end,
+ Type = case Type0 of
+ dash -> bulleted_alt;
+ star -> bulleted
+ end,
+ list_item(St, #{
+ type => Type,
+ level => Level
+ }, ListItem).
+
+-ifdef(TEST).
+bulleted_list_test() ->
+ [{list_item, #{
+ type := bulleted_alt,
+ level := 1
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"- List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 1
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"* List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 2
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"** List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 3
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"*** List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 4
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"**** List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 5
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"***** List item.">>),
+ %% Two list items one after the other.
+ [
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, _, <<"List item 1.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, _, <<"List item 2.">>, _}], _}
+ ] = parse(<<"* List item 1.\n* List item 2.">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 17.2
+%%
+%% We currently only implement implicit numbering.
+numbered_list(St) ->
+ Line0 = read_line(St),
+ Line1 = trim(Line0),
+ {Level, ListItem} = case Line1 of
+ <<".", C, R/bits>> when ?IS_WS(C) -> {1, R};
+ <<"..", C, R/bits>> when ?IS_WS(C) -> {2, R};
+ <<"...", C, R/bits>> when ?IS_WS(C) -> {3, R};
+ <<"....", C, R/bits>> when ?IS_WS(C) -> {4, R};
+ <<".....", C, R/bits>> when ?IS_WS(C) -> {5, R}
+ end,
+ list_item(St, #{
+ type => numbered,
+ level => Level
+ }, ListItem).
+
+-ifdef(TEST).
+numbered_list_test() ->
+ [{list_item, #{
+ type := numbered,
+ level := 1
+ }, [{paragraph, _, <<"Arabic (decimal) numbered list item.">>, _}], _}]
+ = parse(<<". Arabic (decimal) numbered list item.">>),
+ [{list_item, #{
+ type := numbered,
+ level := 2
+ }, [{paragraph, _, <<"Lower case alpha (letter) numbered list item.">>, _}], _}]
+ = parse(<<".. Lower case alpha (letter) numbered list item.">>),
+ [{list_item, #{
+ type := numbered,
+ level := 3
+ }, [{paragraph, _, <<"Lower case roman numbered list item.">>, _}], _}]
+ = parse(<<"... Lower case roman numbered list item.">>),
+ [{list_item, #{
+ type := numbered,
+ level := 4
+ }, [{paragraph, _, <<"Upper case alpha (letter) numbered list item.">>, _}], _}]
+ = parse(<<".... Upper case alpha (letter) numbered list item.">>),
+ [{list_item, #{
+ type := numbered,
+ level := 5
+ }, [{paragraph, _, <<"Upper case roman numbered list item.">>, _}], _}]
+ = parse(<<"..... Upper case roman numbered list item.">>),
+ %% Two list items one after the other.
+ [
+ {list_item, #{type := numbered, level := 1},
+ [{paragraph, _, <<"List item 1.">>, _}], _},
+ {list_item, #{type := numbered, level := 1},
+ [{paragraph, _, <<"List item 2.">>, _}], _}
+ ] = parse(<<". List item 1.\n. List item 2.">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 17.3
+%%
+%% The Asciidoc User Guide makes it sound like the
+%% label must be hard on the left margin but we don't
+%% enforce that to simplify the implementation.
+labeled_list(St) ->
+ Line0 = read_line(St),
+ %% We can't match directly to find the list separator,
+ %% we have to search for it.
+ {Label0, Sep, ListItem0} = find_labeled_list(Line0),
+ Label = trim(Label0),
+ ListItem = trim(ListItem0),
+ %% The label must not be empty.
+ true = trim(Label) =/= <<>>,
+ list_item(St, #{
+ type => labeled,
+ separator => Sep,
+ label => Label
+ }, ListItem).
+
+find_labeled_list(Line) ->
+ find_labeled_list(Line, <<>>).
+
+%% We don't have a final clause with an empty binary because
+%% we want to crash if we don't find a labeled list.
+find_labeled_list(<<"::">>, Acc) -> {Acc, <<"::">>, <<>>};
+find_labeled_list(<<":::">>, Acc) -> {Acc, <<":::">>, <<>>};
+find_labeled_list(<<"::::">>, Acc) -> {Acc, <<"::::">>, <<>>};
+find_labeled_list(<<";;">>, Acc) -> {Acc, <<";;">>, <<>>};
+find_labeled_list(<<"::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<"::">>, R};
+find_labeled_list(<<":::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<":::">>, R};
+find_labeled_list(<<"::::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<"::::">>, R};
+find_labeled_list(<<";;", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<";;">>, R};
+find_labeled_list(<<C, R/bits>>, Acc) -> find_labeled_list(R, <<Acc/binary, C>>).
+
+-ifdef(TEST).
+labeled_list_test() ->
+ [{list_item, #{type := labeled, separator := <<"::">>, label := <<"Question">>},
+ [{paragraph, _, <<"Answer!">>, _}], _}] = parse(<<"Question:: Answer!">>),
+ [{list_item, #{type := labeled, separator := <<"::">>, label := <<"Question">>},
+ [{paragraph, _, <<"Answer!">>, _}], _}] = parse(<<"Question::\n Answer!">>),
+ %% Long snippet from the Asciidoc User Guide, minus literal paragraph.
+ %% @todo Add the literal paragraph back once they are implemented.
+ [
+ {list_item, #{type := labeled, separator := <<"::">>, label := <<"In">>},
+ [{paragraph, _, <<>>, _}], _},
+ {list_item, #{type := labeled, separator := <<"::">>, label := <<"Lorem">>},
+ [{paragraph, _, <<"Fusce euismod commodo velit.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<"::">>, label := <<"Ipsum">>},
+ [{paragraph, _, <<"Vivamus fringilla mi eu lacus.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, _, <<"Vivamus fringilla mi eu lacus.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, _, <<"Donec eget arcu bibendum nunc consequat lobortis.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<"::">>, label := <<"Dolor">>},
+ [{paragraph, _, <<"Donec eget arcu bibendum nunc consequat lobortis.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<";;">>, label := <<"Suspendisse">>},
+ [{paragraph, _, <<"A massa id sem aliquam auctor.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<";;">>, label := <<"Morbi">>},
+ [{paragraph, _, <<"Pretium nulla vel lorem.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<";;">>, label := <<"In">>},
+ [{paragraph, _, <<"Dictum mauris in urna.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<":::">>, label := <<"Vivamus">>},
+ [{paragraph, _, <<"Fringilla mi eu lacus.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<":::">>, label := <<"Donec">>},
+ [{paragraph, _, <<"Eget arcu bibendum nunc consequat lobortis.">>, _}], _}
+ ] = parse(<<
+ "In::\n"
+ "Lorem::\n"
+ " Fusce euismod commodo velit.\n"
+ %% @todo Add literal paragraph back here.
+ "Ipsum:: Vivamus fringilla mi eu lacus.\n"
+ " * Vivamus fringilla mi eu lacus.\n"
+ " * Donec eget arcu bibendum nunc consequat lobortis.\n"
+ "Dolor::\n"
+ " Donec eget arcu bibendum nunc consequat lobortis.\n"
+ " Suspendisse;;\n"
+ " A massa id sem aliquam auctor.\n"
+ " Morbi;;\n"
+ " Pretium nulla vel lorem.\n"
+ " In;;\n"
+ " Dictum mauris in urna.\n"
+ " Vivamus::: Fringilla mi eu lacus.\n"
+ " Donec::: Eget arcu bibendum nunc consequat lobortis.\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 20
+-spec callout_list(_) -> no_return().
+callout_list(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 17
+%%
+%% We do not apply rules about blocks being contained in
+%% the list item at this stage of parsing. We only concern
+%% ourselves with identifying blocks, and then another pass
+%% will build a tree from the result of this pass.
+list_item(St, Attrs, ListItem0) ->
+ ListItem1 = trim(ListItem0),
+ Ann = ann(St),
+ %% For labeled lists, we may need to skip empty lines
+ %% until the start of the list item contents, since
+ %% it can begin on a separate line from the label.
+ _ = case {ListItem1, Attrs} of
+ {<<>>, #{type := labeled}} ->
+ read_while(St, fun skip_empty_lines/1, <<>>);
+ _ ->
+ ok
+ end,
+ %% A list item ends on end of file, empty line or when a new list starts.
+ %% Any indentation is optional and therefore removed.
+ ListItem = read_while(St, fun fold_list_item/1, ListItem1),
+ {list_item, Attrs, [{paragraph, #{}, ListItem, Ann}], Ann}.
+
+skip_empty_lines(eof) ->
+ done;
+skip_empty_lines(Line) ->
+ case trim(Line) of
+ <<>> -> {more, <<>>};
+ _ -> done
+ end.
+
+fold_list_item(eof) ->
+ done;
+fold_list_item(Line0) ->
+ case trim(Line0) of
+ <<>> -> done;
+ <<"+">> -> done;
+ <<"//", _/bits >> -> done;
+ <<"-", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"*", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"**", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"***", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"****", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"*****", C, _/bits>> when ?IS_WS(C) -> done;
+ <<".", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"..", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"...", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"....", C, _/bits>> when ?IS_WS(C) -> done;
+ <<".....", C, _/bits>> when ?IS_WS(C) -> done;
+ Line ->
+ try find_labeled_list(Line) of
+ {_, _, _} -> done
+ catch _:_ ->
+ {more, Line}
+ end
+ end.
+
+-ifdef(TEST).
+list_item_test() ->
+ [
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := bulleted, level := 2},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := numbered, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := numbered, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _}
+ ] = parse(<<
+ "* List item.\n"
+ "** List item.\n"
+ "* List item.\n"
+ " . List item.\n"
+ " . List item.\n"
+ "* List item.\n">>),
+ %% Properly detect a labeled list.
+ [
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"List item.\nMultiline.">>, _}], _},
+ {list_item, #{type := labeled, label := <<"Question">>},
+ [{paragraph, #{}, <<"Answer!">>, _}], _}
+ ] = parse(<<
+ "* List item.\n"
+ "Multiline.\n"
+ "Question:: Answer!\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 17.7
+list_item_continuation(St) ->
+ %% Continuations are a single + hard against the left margin.
+ <<$+, Whitespace/bits>> = read_line(St),
+ <<>> = trim(Whitespace),
+ {list_item_continuation, #{}, <<>>, ann(St)}.
+
+-ifdef(TEST).
+list_item_continuation_test() ->
+ [{list_item_continuation, _, _, _}] = parse(<<"+">>),
+ [{list_item_continuation, _, _, _}] = parse(<<"+ ">>),
+ [{list_item_continuation, _, _, _}] = parse(<<"+\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.2
+listing_block(St) ->
+ delimited_block(St, listing_block, $-, #{<<"subs">> => <<"verbatim">>}).
+
+-ifdef(TEST).
+listing_block_test() ->
+ Block = <<
+ "#include <stdio.h>\n"
+ "\n"
+ "int main() {\n"
+ " printf(\"Hello World!\n\");\n"
+ " exit(0);\n"
+ "}">>,
+ [{listing_block, _, Block, _}] = parse(<<
+ "--------------------------------------\n",
+ Block/binary, "\n"
+ "--------------------------------------\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.3
+literal_block(St) ->
+ delimited_block(St, literal_block, $., #{<<"subs">> => <<"verbatim">>}).
+
+-ifdef(TEST).
+literal_block_test() ->
+ Block = <<
+ "Consul *necessitatibus* per id,\n"
+ "consetetur, eu pro everti postulant\n"
+ "homero verear ea mea, qui.">>,
+ [{literal_block, _, Block, _}] = parse(<<
+ "...................................\n",
+ Block/binary, "\n"
+ "...................................\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.4
+sidebar_block(St) ->
+ delimited_block(St, sidebar_block, $*).
+
+-ifdef(TEST).
+sidebar_block_test() ->
+ Block = <<
+ "Any AsciiDoc SectionBody element (apart from\n"
+ "SidebarBlocks) can be placed inside a sidebar.">>,
+ [{sidebar_block, _, Block, _}] = parse(<<
+ "************************************************\n",
+ Block/binary, "\n"
+ "************************************************\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.5
+comment_block(St) ->
+ delimited_block(St, comment_block, $/).
+
+-ifdef(TEST).
+comment_block_test() ->
+ Block = <<
+ "CommentBlock contents are not processed by\n"
+ "asciidoc(1).">>,
+ [{comment_block, _, Block, _}] = parse(<<
+ "//////////////////////////////////////////\n",
+ Block/binary, "\n"
+ "//////////////////////////////////////////\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.6
+passthrough_block(St) ->
+ delimited_block(St, passthrough_block, $+).
+
+-ifdef(TEST).
+passthrough_block_test() ->
+ Block = <<
+ "<table border=\"1\"><tr>\n"
+ " <td>*Cell 1*</td>\n"
+ " <td>*Cell 2*</td>\n"
+ "</tr></table>">>,
+ [{passthrough_block, _, Block, _}] = parse(<<
+ "++++++++++++++++++++++++++++++++++++++\n",
+ Block/binary, "\n"
+ "++++++++++++++++++++++++++++++++++++++\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.7
+quote_block(St) ->
+ delimited_block(St, quote_block, $_).
+
+-ifdef(TEST).
+quote_block_test() ->
+ Block = <<
+ "As he spoke there was the sharp sound of horses' hoofs and\n"
+ "grating wheels against the curb, followed by a sharp pull at the\n"
+ "bell. Holmes whistled.\n"
+ "\n"
+ "\"A pair, by the sound,\" said he. \"Yes,\" he continued, glancing\n"
+ "out of the window. \"A nice little brougham and a pair of\n"
+ "beauties. A hundred and fifty guineas apiece. There's money in\n"
+ "this case, Watson, if there is nothing else.\"">>,
+ [{quote_block, _, Block, _}] = parse(<<
+ "____________________________________________________________________\n",
+ Block/binary, "\n"
+ "____________________________________________________________________\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.8
+example_block(St) ->
+ delimited_block(St, example_block, $=).
+
+-ifdef(TEST).
+example_block_test() ->
+ Block = <<
+ "Qui in magna commodo, est labitur dolorum an. Est ne magna primis\n"
+ "adolescens.">>,
+ [{example_block, _, Block, _}] = parse(<<
+ "=====================================================================\n",
+ Block/binary, "\n"
+ "=====================================================================\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16
+delimited_block(St, Name, Char) ->
+ delimited_block(St, Name, Char, #{}, <<Char, Char, Char, Char>>).
+
+delimited_block(St, Name, Char, Attrs) ->
+ delimited_block(St, Name, Char, Attrs, <<Char, Char, Char, Char>>).
+
+delimited_block(St, Name, Char, Attrs, Four) ->
+ %% A delimiter block begins by a series of four or more repeated characters.
+ <<Four:4/binary, Line0/bits>> = read_line(St),
+ Ann = ann(St),
+ Line = trim(Line0, trailing),
+ repeats(Line, Char),
+ %% Get the content of the block as-is.
+ Block = read_while(St, fun(L) -> fold_delimited_block(L, Four, Char) end, <<>>),
+ %% Skip the trailing delimiter line.
+ _ = read_line(St),
+ {Name, Attrs, Block, Ann}.
+
+%% Accept eof as a closing delimiter.
+fold_delimited_block(eof, _, _) ->
+ done;
+fold_delimited_block(Line0, Four, Char) ->
+ case Line0 of
+ <<Four:4/binary, Line1/bits>> ->
+ try
+ Line = trim(Line1, trailing),
+ repeats(Line, Char),
+ done
+ catch _:_ ->
+ {more, Line0}
+ end;
+ _ ->
+ {more, Line0}
+ end.
+
+-ifdef(TEST).
+delimited_block_test() ->
+ %% Confirm that the block ends at eof.
+ %%
+ %% We see an extra line break because asciideck_line_reader adds
+ %% one at the end of every files to ease processing.
+ [{listing_block, _, <<"Hello!\n\n">>, _}] = parse(<<
+ "----\n"
+ "Hello!\n">>),
+ %% Same without a trailing line break.
+ %%
+ %% We also see an extra line break for the aforementioned reasons.
+ [{listing_block, _, <<"Hello!\n">>, _}] = parse(<<
+ "----\n"
+ "Hello!">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.10
+-spec open_block(_) -> no_return().
+open_block(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 23
+%%
+%% We do not parse the table in this pass. Instead we
+%% treat it like any other delimited block.
+table(St) ->
+ delimited_block(St, table, $=, #{}, <<"|===">>).
+
+-ifdef(TEST).
+table_test() ->
+ Block = <<
+ "|1 |2 |A\n"
+ "|3 |4 |B\n"
+ "|5 |6 |C">>,
+ [{table, _, Block, _}] = parse(<<
+ "|=======\n",
+ Block/binary, "\n"
+ "|=======\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 28
+-spec attribute_entry(_) -> no_return().
+attribute_entry(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 14, 29
+attribute_list(St) ->
+ AttrList = read_line(St),
+ attribute_list(St, AttrList).
+
+attribute_list(St, AttrList0) ->
+ %% First we remove the enclosing square brackets.
+ <<$[, AttrList1/bits>> = AttrList0,
+ AttrList2 = trim(AttrList1),
+ Len = byte_size(AttrList2) - 1,
+ <<AttrList3:Len/binary, $]>> = AttrList2,
+ AttrList = asciideck_attributes_parser:parse(AttrList3),
+ {attribute_list, AttrList, <<>>, ann(St)}.
+
+-ifdef(TEST).
+attribute_list_test() ->
+ [{attribute_list, #{0 := <<"Hello">>, 1 := <<"Hello">>}, <<>>, _}]
+ = parse(<<"[Hello]">>),
+ [{attribute_list, #{
+ 1 := <<"quote">>,
+ 2 := <<"Bertrand Russell">>,
+ 3 := <<"The World of Mathematics (1956)">>
+ }, <<>>, _}]
+ = parse(<<"[quote, Bertrand Russell, The World of Mathematics (1956)]">>),
+ [{attribute_list, #{
+ 1 := <<"22 times">>,
+ <<"backcolor">> := <<"#0e0e0e">>,
+ <<"options">> := <<"noborders,wide">>
+ }, <<>>, _}]
+ = parse(<<"[\"22 times\", backcolor=\"#0e0e0e\", options=\"noborders,wide\"]">>),
+ [{attribute_list, #{
+ 1 := <<"A footnote&#44; &#34;with an image&#34; image:smallnew.png[]">>
+ }, <<>>, _}]
+ = parse(<<"[A footnote&#44; &#34;with an image&#34; image:smallnew.png[]]">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 12
+block_title(St) ->
+ %% A block title line begins with a period and is followed by the title text.
+ <<$., Title0/bits>> = read_line(St),
+ Ann = ann(St),
+ Title = trim(Title0),
+ {block_title, #{}, Title, Ann}.
+
+-ifdef(TEST).
+block_title_test() ->
+ %% Valid.
+ [{block_title, _, <<"Notes">>, _}] = parse(<<".Notes">>),
+ [{block_title, _, <<"Notes">>, _}] = parse(<<".Notes ">>),
+ %% Invalid.
+ ?NOT(block_title, parse(<<". Notes">>)),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 15.2
+-spec literal_para(_) -> no_return().
+literal_para(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 15.4
+-spec admonition_para(_) -> no_return().
+admonition_para(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 15.1
+para(St) ->
+ %% Paragraph must be hard against the left margin.
+ <<C, _/bits>> = Para0 = read_line(St),
+ Ann = ann(St),
+ %% @todo Uncomment this line once everything else has been implemented.
+ _ = ?IS_WS(C), % false = ?IS_WS(C),
+ Para1 = trim(Para0),
+ %% Paragraph ends at blank line, end of file or start of delimited block or list.
+ Para = read_while(St, fun fold_para/1, Para1),
+ {paragraph, #{}, Para, Ann}.
+
+fold_para(eof) ->
+ done;
+fold_para(Line0) ->
+ case trim(Line0) of
+ <<>> -> done;
+ <<"+">> -> done;
+ %% @todo Detect delimited block or list.
+ Line -> {more, Line}
+ end.
+
+-ifdef(TEST).
+para_test() ->
+ LoremIpsum = <<
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit,\n"
+ "sed do eiusmod tempor incididunt ut labore et dolore\n"
+ "magna aliqua. Ut enim ad minim veniam, quis nostrud\n"
+ "exercitation ullamco laboris nisi ut aliquip ex ea\n"
+ "commodo consequat. Duis aute irure dolor in reprehenderit\n"
+ "in voluptate velit esse cillum dolore eu fugiat nulla\n"
+ "pariatur. Excepteur sint occaecat cupidatat non proident,\n"
+ "sunt in culpa qui officia deserunt mollit anim id est laborum."
+ >>,
+ %% Paragraph followed by end of file.
+ [{paragraph, _, LoremIpsum, _}] = parse(<< LoremIpsum/binary, "\n">>),
+ %% Paragraph followed by end of file with no trailing line break..
+ [{paragraph, _, LoremIpsum, _}] = parse(LoremIpsum),
+ %% Two paragraphs.
+ [{paragraph, _, LoremIpsum, _}, {paragraph, _, LoremIpsum, _}]
+ = parse(<<
+ LoremIpsum/binary,
+ "\n\n",
+ LoremIpsum/binary >>),
+ ok.
+-endif.
+
+%% Control functions.
+
+oneof([], St) ->
+ throw({error, St}); %% @todo
+oneof([Parse|Tail], St=#state{reader=ReaderPid}) ->
+ Ln = asciideck_line_reader:get_position(ReaderPid),
+ try
+ Parse(St)
+ catch _:_ ->
+ asciideck_line_reader:set_position(ReaderPid, Ln),
+ oneof(Tail, St)
+ end.
+
+skip(Parse, St=#state{reader=ReaderPid}) ->
+ Ln = asciideck_line_reader:get_position(ReaderPid),
+ try
+ _ = Parse(St),
+ skip(Parse, St)
+ catch _:_ ->
+ asciideck_line_reader:set_position(ReaderPid, Ln),
+ ok
+ end.
+
+%% Line functions.
+
+read_line(#state{reader=ReaderPid}) ->
+ asciideck_line_reader:read_line(ReaderPid).
+
+read_while(St=#state{reader=ReaderPid}, F, Acc) ->
+ Ln = asciideck_line_reader:get_position(ReaderPid),
+ case F(read_line(St)) of
+ done ->
+ asciideck_line_reader:set_position(ReaderPid, Ln),
+ Acc;
+ {more, Line} ->
+ case Acc of
+ <<>> -> read_while(St, F, Line);
+ _ -> read_while(St, F, <<Acc/binary, $\n, Line/binary>>)
+ end
+ end.
+
+ann(#state{reader=ReaderPid}) ->
+ #{line => asciideck_line_reader:get_position(ReaderPid)}.
+
+trim(Line) ->
+ trim(Line, both).
+
+trim(Line, Direction) ->
+ Regex = case Direction of
+ both -> "^[ \\t\\r\\n]+|[ \\t\\r\\n]+$";
+ trailing -> "[ \\t\\r\\n]+$"
+ end,
+ iolist_to_binary(re:replace(Line, Regex, <<>>, [global])).
+
+repeats(<<>>, _) -> ok;
+repeats(<<C, Rest/bits>>, C) -> repeats(Rest, C).
+
+while(F, Bin) ->
+ while(Bin, F, <<>>).
+
+while(<<>>, _, Acc) ->
+ {Acc, <<>>};
+while(<<C, R/bits>>, F, Acc) ->
+ case F(C) of
+ true -> while(R, F, <<Acc/binary, C>>);
+ false -> {Acc, <<C, R/bits>>}
+ end.
diff --git a/src/asciideck_inline_pass.erl b/src/asciideck_inline_pass.erl
new file mode 100644
index 0000000..3ed79b1
--- /dev/null
+++ b/src/asciideck_inline_pass.erl
@@ -0,0 +1,308 @@
+%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% This pass walks over the tree and parses inline elements.
+-module(asciideck_inline_pass).
+
+-export([run/1]).
+
+-import(asciideck_block_parser, [trim/1, while/2]).
+
+-type inline_ast() :: list(). %% @todo
+-export_type([inline_ast/0]).
+
+run([]) ->
+ [];
+run([Data|Tail]) when is_binary(Data) ->
+ [inline(Data)|run(Tail)];
+%% We do not do any inline formatting for verbatim blocks,
+%% for example listing blocks.
+%%
+%% @todo subs is a list of values.
+run([Item={_, #{<<"subs">> := <<"verbatim">>}, _, _}|Tail]) ->
+ [Item|run(Tail)];
+%% Labeled lists' labels can also have inline formatting.
+run([{Type, Attrs=#{label := Label}, Items, Ann}|Tail]) when is_list(Items) ->
+ [{Type, Attrs#{label => inline(Label)}, run(Items), Ann}|run(Tail)];
+run([{Type, Attrs, Items, Ann}|Tail]) when is_list(Items) ->
+ [{Type, Attrs, run(Items), Ann}|run(Tail)];
+run([{Type, Attrs, Data, Ann}|Tail]) ->
+ [{Type, Attrs, inline(Data), Ann}|run(Tail)].
+
+%% We reduce inline content with a single text element
+%% with no formatting to a simple binary.
+inline(<<>>) ->
+ <<>>;
+inline(Data) ->
+ case inline(Data, <<>>, []) of
+ [] -> <<>>;
+ [Text] when is_binary(Text) -> Text;
+ AST -> AST
+ end.
+
+-spec inline(binary(), binary(), inline_ast()) -> inline_ast().
+inline(<<>>, <<>>, Acc) ->
+ lists:reverse(Acc);
+inline(<<>>, BinAcc, Acc) ->
+ lists:reverse([BinAcc|Acc]);
+inline(Data, BinAcc, Acc) ->
+ oneof(Data, BinAcc, Acc, [
+ %% Links.
+ fun xref/2,
+ fun link/2,
+ fun http_link/2,
+ fun https_link/2,
+ %% Quoted text.
+ fun emphasized_single_quote/2,
+ fun emphasized_underline/2,
+ fun strong/2,
+ %% Passthrough macros.
+ fun inline_literal_passthrough/2
+ ]).
+
+%% The inline pass replaces \r\n and \n with a simple space
+%% when it occurs within normal text.
+oneof(<<$\r, $\n, Rest/bits>>, BinAcc, Acc, []) ->
+ inline(Rest, <<BinAcc/binary, $\s>>, Acc);
+oneof(<<$\n, Rest/bits>>, BinAcc, Acc, []) ->
+ inline(Rest, <<BinAcc/binary, $\s>>, Acc);
+oneof(<<C, Rest/bits>>, BinAcc, Acc, []) ->
+ inline(Rest, <<BinAcc/binary, C>>, Acc);
+oneof(Data, BinAcc, Acc, [Parse|Tail]) ->
+ Prev = case BinAcc of
+ <<>> -> undefined;
+ _ -> binary:last(BinAcc)
+ end,
+ try Parse(Data, Prev) of
+ {ok, Inline, Rest} when BinAcc =:= <<>> ->
+ inline(Rest, BinAcc, [Inline|Acc]);
+ {ok, Inline, Rest} ->
+ inline(Rest, <<>>, [Inline, BinAcc|Acc]);
+ {skip, Text, Rest} ->
+ oneof(Rest, <<BinAcc/binary, Text/binary>>, Acc, Tail)
+ catch _:_ ->
+ oneof(Data, BinAcc, Acc, Tail)
+ end.
+
+-ifdef(TEST).
+text_test() ->
+ <<>> = inline(<<>>),
+ <<"Hello, Robert">> = inline(<<"Hello, Robert">>),
+ ok.
+-endif.
+
+-define(IS_BOUNDARY(C), C =:= undefined; C =:= $\s; C =:= $\t; C =:= $\r; C =:= $\n; C =:= $().
+
+%% Asciidoc User Guide 21.2.1
+%%
+%% We currently do not implement the <<...>> form.
+xref(<<"xref:", IDAndCaption/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ %% ID must not contain whitespace characters.
+ {ID, <<"[", Caption0/bits>>} = while(fun(C) ->
+ (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t)
+ end, IDAndCaption),
+ %% It is followed by a caption.
+ {Caption1, <<"]", Rest/bits>>} = while(fun(C) ->
+ C =/= $]
+ end, Caption0),
+ Caption = trim(Caption1),
+ {ok, {xref, #{
+ id => ID
+ }, Caption, inline}, Rest}.
+
+-ifdef(TEST).
+xref_test() ->
+ [{xref, #{
+ id := <<"tiger_image">>
+ }, <<"face of a tiger">>, _}] = inline(<<"xref:tiger_image[face of a tiger]">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.1.3
+link(<<"link:", TargetAndCaption/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ %% Target must not contain whitespace characters.
+ {Target, <<"[", Caption0/bits>>} = while(fun(C) ->
+ (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t)
+ andalso (C =/= $\r) andalso (C =/= $\n)
+ end, TargetAndCaption),
+ %% It is followed by a caption.
+ {Caption1, <<"]", Rest/bits>>} = while(fun(C) ->
+ C =/= $]
+ end, Caption0),
+ Caption = trim(Caption1),
+ {ok, {link, #{
+ target => Target
+ }, Caption, inline}, Rest}.
+
+-ifdef(TEST).
+link_test() ->
+ [{link, #{
+ target := <<"downloads/foo.zip">>
+ }, <<"download foo.zip">>, _}] = inline(<<"link:downloads/foo.zip[download foo.zip]">>),
+ [{link, #{
+ target := <<"chapter1.asciidoc#fragment">>
+ }, <<"Chapter 1.">>, _}] = inline(<<"link:chapter1.asciidoc#fragment[Chapter 1.]">>),
+ [
+ {link, #{target := <<"first.zip">>}, <<"first">>, _},
+ <<", ">>,
+ {link, #{target := <<"second.zip">>}, <<"second">>, _}
+ ] = inline(<<"link:first.zip[first],\nlink:second.zip[second]">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.1.3
+http_link(<<"http:", Rest/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ direct_link(Rest, <<"http:">>).
+
+direct_link(Data, Prefix) ->
+ %% Target must not contain whitespace characters.
+ {Target0, Rest0} = while(fun(C) ->
+ (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t)
+ andalso (C =/= $\r) andalso (C =/= $\n)
+ end, Data),
+ Target = <<Prefix/binary, Target0/binary>>,
+ %% It is optionally followed by a caption. Otherwise
+ %% the link itself is the caption.
+ case Rest0 of
+ <<"[", Caption0/bits>> ->
+ {Caption1, <<"]", Rest/bits>>} = while(fun(C) ->
+ C =/= $]
+ end, Caption0),
+ Caption = trim(Caption1),
+ {ok, {link, #{
+ target => Target
+ }, Caption, inline}, Rest};
+ _ ->
+ {ok, {link, #{
+ target => Target
+ }, Target, inline}, Rest0}
+ end.
+
+-ifdef(TEST).
+http_link_test() ->
+ [
+ <<"If you have ">>,
+ {link, #{
+ target := <<"http://example.org/hello#fragment">>
+ }, <<"http://example.org/hello#fragment">>, _},
+ <<" then:">>
+ ] = inline(<<"If you have http://example.org/hello#fragment then:">>),
+ [
+ <<"If you have ">>,
+ {link, #{
+ target := <<"http://example.org/hello#fragment">>
+ }, <<"http://example.org/hello#fragment">>, _},
+ <<" then:">>
+ ] = inline(<<"If you have http://example.org/hello#fragment\nthen:">>),
+ [
+ <<"Oh, ">>,
+ {link, #{
+ target := <<"http://example.org/hello#fragment">>
+ }, <<"hello there">>, _},
+ <<", young lad.">>
+ ] = inline(<<"Oh, http://example.org/hello#fragment[hello there], young lad.">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.1.3
+https_link(<<"https:", Rest/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ direct_link(Rest, <<"https:">>).
+
+-ifdef(TEST).
+https_link_test() ->
+ [
+ <<"If you have ">>,
+ {link, #{
+ target := <<"https://example.org/hello#fragment">>
+ }, <<"https://example.org/hello#fragment">>, _},
+ <<" then:">>
+ ] = inline(<<"If you have https://example.org/hello#fragment then:">>),
+ [
+ <<"If you have ">>,
+ {link, #{
+ target := <<"https://example.org/hello#fragment">>
+ }, <<"https://example.org/hello#fragment">>, _},
+ <<" then:">>
+ ] = inline(<<"If you have https://example.org/hello#fragment\nthen:">>),
+ [
+ <<"Oh, ">>,
+ {link, #{
+ target := <<"https://example.org/hello#fragment">>
+ }, <<"hello there">>, _},
+ <<", young lad.">>
+ ] = inline(<<"Oh, https://example.org/hello#fragment[hello there], young lad.">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 10.1
+%% @todo <<"\\**"
+%% @todo <<"\\*"
+%% @todo <<"**"
+emphasized_single_quote(Data, Prev) ->
+ quoted_text(Data, Prev, emphasized, $', $').
+emphasized_underline(Data, Prev) ->
+ quoted_text(Data, Prev, emphasized, $_, $_).
+strong(Data, Prev) ->
+ quoted_text(Data, Prev, strong, $*, $*).
+
+quoted_text(<<Left, Rest0/bits>>, Prev, Type, Left, Right) when ?IS_BOUNDARY(Prev) ->
+ {Content, <<Right, Rest/bits>>} = while(fun(C) -> C =/= Right end, Rest0),
+ {ok, {Type, #{
+ left => Left,
+ right => Right
+ }, inline(Content), inline}, Rest}.
+
+-ifdef(TEST).
+emphasized_test() ->
+ [
+ <<"Word phrases ">>,
+ {emphasized, #{left := $', right := $'},
+ <<"enclosed in single quote characters">>, _},
+ <<" (acute accents) or ">>,
+ {emphasized, #{left := $_, right := $_},
+ <<"underline characters">>, _},
+ <<" are emphasized.">>
+ ] = inline(<<
+ "Word phrases 'enclosed in single quote characters' (acute accents) "
+ "or _underline characters_ are emphasized."
+ >>),
+ ok.
+
+strong_test() ->
+ [
+ <<"Word phrases ">>,
+ {strong, #{left := $*, right := $*},
+ <<"enclosed in asterisk characters">>, _},
+ <<" are rendered in a strong font (usually bold).">>
+ ] = inline(<<
+ "Word phrases *enclosed in asterisk characters* "
+ "are rendered in a strong font (usually bold)."
+ >>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.4
+inline_literal_passthrough(<<"`", Rest0/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ {Content, <<"`", Rest/bits>>} = while(fun(C) -> C =/= $` end, Rest0),
+ {ok, {inline_literal_passthrough, #{}, Content, inline}, Rest}.
+
+-ifdef(TEST).
+inline_literal_passthrough_test() ->
+ [
+ <<"Word phrases ">>,
+ {inline_literal_passthrough, #{}, <<"enclosed in backtick characters">>, _},
+ <<" (grave accents)...">>
+ ] = inline(<<"Word phrases `enclosed in backtick characters` (grave accents)...">>),
+ ok.
+-endif.
diff --git a/src/asciideck_line_reader.erl b/src/asciideck_line_reader.erl
new file mode 100644
index 0000000..240c70b
--- /dev/null
+++ b/src/asciideck_line_reader.erl
@@ -0,0 +1,94 @@
+%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+-module(asciideck_line_reader).
+-behaviour(gen_server).
+
+%% API.
+-export([start_link/1]).
+-export([read_line/1]).
+-export([get_position/1]).
+-export([set_position/2]).
+
+%% gen_server.
+-export([init/1]).
+-export([handle_call/3]).
+-export([handle_cast/2]).
+-export([handle_info/2]).
+-export([terminate/2]).
+-export([code_change/3]).
+
+-record(state, {
+ lines :: [binary()],
+ length :: non_neg_integer(),
+ pos = 1 :: non_neg_integer()
+}).
+
+%% API.
+
+-spec start_link(binary()) -> {ok, pid()}.
+start_link(Data) ->
+ gen_server:start_link(?MODULE, [Data], []).
+
+-spec read_line(pid()) -> binary() | eof.
+read_line(Pid) ->
+ gen_server:call(Pid, read_line).
+
+%% @todo peek_line
+
+-spec get_position(pid()) -> pos_integer().
+get_position(Pid) ->
+ gen_server:call(Pid, get_position).
+
+-spec set_position(pid(), pos_integer()) -> ok.
+set_position(Pid, Pos) ->
+ gen_server:cast(Pid, {set_position, Pos}).
+
+%% gen_server.
+
+init([Data]) ->
+ Lines0 = binary:split(Data, <<"\n">>, [global]),
+ %% We add an empty line at the end to simplify parsing.
+ %% This has the inconvenient that when parsing blocks
+ %% this empty line will be included in the result if
+ %% the block is not properly closed.
+ Lines = lists:append(Lines0, [<<>>]),
+ {ok, #state{lines=Lines, length=length(Lines)}}.
+
+handle_call(read_line, _From, State=#state{length=Length, pos=Pos})
+ when Pos > Length ->
+ {reply, eof, State};
+%% @todo I know this isn't the most efficient. We could keep
+%% the lines read separately and roll back when set_position
+%% wants us to. But it works fine for now.
+handle_call(read_line, _From, State=#state{lines=Lines, pos=Pos}) ->
+ {reply, lists:nth(Pos, Lines), State#state{pos=Pos + 1}};
+handle_call(get_position, _From, State=#state{pos=Pos}) ->
+ {reply, Pos, State};
+handle_call(_Request, _From, State) ->
+ {reply, ignored, State}.
+
+handle_cast({set_position, Pos}, State) ->
+ {noreply, State#state{pos=Pos}};
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
diff --git a/src/asciideck_lists_pass.erl b/src/asciideck_lists_pass.erl
new file mode 100644
index 0000000..efb8e87
--- /dev/null
+++ b/src/asciideck_lists_pass.erl
@@ -0,0 +1,155 @@
+%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% The purpose of this pass is to aggregate list_item
+%% blocks into proper lists. This involves building a
+%% tree based on the rules for list items.
+%%
+%% The general rules are:
+%%
+%% - Any list item of different type/level than the
+%% current list item is a child of the latter.
+%%
+%% - The level ultimately does not matter when building
+%% the tree, * then **** then ** is accepted just fine.
+%%
+%% - Lists of the same type as a parent are not allowed.
+%% On the other hand reusing a type in different parts
+%% of the tree is not a problem.
+%%
+%% - Any literal paragraph following a list item is a
+%% child of that list item. @todo
+%%
+%% - Any other block can be included as a child by using
+%% list continuations.
+-module(asciideck_lists_pass).
+
+-export([run/1]).
+
+run(AST) ->
+ list(AST, []).
+
+list([], Acc) ->
+ lists:reverse(Acc);
+%% Any trailing block continuation is ignored.
+list([{list_item_continuation, _, _, _}], Acc) ->
+ lists:reverse(Acc);
+%% The first list item contains the attributes for the list.
+list([LI={list_item, Attrs, _, Ann}|Tail0], Acc) ->
+ {Items, Tail} = item(Tail0, LI, [type(Attrs)], []),
+ list(Tail, [{list, Attrs, Items, Ann}|Acc]);
+list([Block|Tail], Acc) ->
+ list(Tail, [Block|Acc]).
+
+%% Bulleted/numbered list item of the same type.
+item([NextLI={list_item, #{type := T, level := L}, _, _}|Tail],
+ CurrentLI={list_item, #{type := T, level := L}, _, _}, Parents, Acc) ->
+ item(Tail, NextLI, Parents, [reverse_children(CurrentLI)|Acc]);
+%% Labeled list item of the same type.
+item([NextLI={list_item, #{type := T, separator := S}, _, _}|Tail],
+ CurrentLI={list_item, #{type := T, separator := S}, _, _}, Parents, Acc) ->
+ item(Tail, NextLI, Parents, [reverse_children(CurrentLI)|Acc]);
+%% Other list items are either parent or children lists.
+item(FullTail=[NextLI={list_item, Attrs, _, Ann}|Tail0], CurrentLI, Parents, Acc) ->
+ case lists:member(type(Attrs), Parents) of
+ %% We have a parent list item. This is the end of this child list.
+ true ->
+ {lists:reverse([reverse_children(CurrentLI)|Acc]), FullTail};
+ %% We have a child list item. This is the beginning of a new list.
+ false ->
+ {Items, Tail} = item(Tail0, NextLI, [type(Attrs)|Parents], []),
+ item(Tail, add_child(CurrentLI, {list, Attrs, Items, Ann}), Parents, Acc)
+ end;
+%% Ignore multiple contiguous list continuations.
+item([LIC={list_item_continuation, _, _, _},
+ {list_item_continuation, _, _, _}|Tail], CurrentLI, Parents, Acc) ->
+ item([LIC|Tail], CurrentLI, Parents, Acc);
+%% Blocks that immediately follow list_item_continuation are children,
+%% unless they are list_item themselves in which case it depends on the
+%% type and level of the list item.
+item([{list_item_continuation, _, _, _}, LI={list_item, _, _, _}|Tail], CurrentLI, Parents, Acc) ->
+ item([LI|Tail], CurrentLI, Parents, Acc);
+item([{list_item_continuation, _, _, _}, Block|Tail], CurrentLI, Parents, Acc) ->
+ item(Tail, add_child(CurrentLI, Block), Parents, Acc);
+%% Anything else is the end of the list.
+item(Tail, CurrentLI, _, Acc) ->
+ {lists:reverse([reverse_children(CurrentLI)|Acc]), Tail}.
+
+type(Attrs) ->
+ maps:with([type, level, separator], Attrs).
+
+add_child({list_item, Attrs, Children, Ann}, Child) ->
+ {list_item, Attrs, [Child|Children], Ann}.
+
+reverse_children({list_item, Attrs, Children, Ann}) ->
+ {list_item, Attrs, lists:reverse(Children), Ann}.
+
+-ifdef(TEST).
+list_test() ->
+ [{list, #{type := bulleted, level := 1}, [
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"Hello!">>, _}], #{line := 1}},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"World!">>, _}], #{line := 2}}
+ ], #{line := 1}}] = run([
+ {list_item, #{type => bulleted, level => 1},
+ [{paragraph, #{}, <<"Hello!">>, #{line => 1}}], #{line => 1}},
+ {list_item, #{type => bulleted, level => 1},
+ [{paragraph, #{}, <<"World!">>, #{line => 2}}], #{line => 2}}
+ ]),
+ ok.
+
+list_of_list_test() ->
+ [{list, #{type := bulleted, level := 1}, [
+ {list_item, #{type := bulleted, level := 1}, [
+ {paragraph, #{}, <<"Hello!">>, _},
+ {list, #{type := bulleted, level := 2}, [
+ {list_item, #{type := bulleted, level := 2},
+ [{paragraph, #{}, <<"Cat!">>, _}], #{line := 2}},
+ {list_item, #{type := bulleted, level := 2},
+ [{paragraph, #{}, <<"Dog!">>, _}], #{line := 3}}
+ ], #{line := 2}}
+ ], #{line := 1}},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"World!">>, _}], #{line := 4}}
+ ], #{line := 1}}] = run([
+ {list_item, #{type => bulleted, level => 1},
+ [{paragraph, #{}, <<"Hello!">>, #{line => 1}}], #{line => 1}},
+ {list_item, #{type => bulleted, level => 2},
+ [{paragraph, #{}, <<"Cat!">>, #{line => 2}}], #{line => 2}},
+ {list_item, #{type => bulleted, level => 2},
+ [{paragraph, #{}, <<"Dog!">>, #{line => 3}}], #{line => 3}},
+ {list_item, #{type => bulleted, level => 1},
+ [{paragraph, #{}, <<"World!">>, #{line => 4}}], #{line => 4}}
+ ]),
+ ok.
+
+list_continuation_test() ->
+ [{list, #{type := bulleted, level := 1}, [
+ {list_item, #{type := bulleted, level := 1}, [
+ {paragraph, #{}, <<"Hello!">>, _},
+ {listing_block, #{}, <<"hello() -> world.">>, #{line := 3}}
+ ], #{line := 1}},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"World!">>, _}], #{line := 6}}
+ ], #{line := 1}}] = run([
+ {list_item, #{type => bulleted, level => 1},
+ [{paragraph, #{}, <<"Hello!">>, #{line => 1}}], #{line => 1}},
+ {list_item_continuation, #{}, <<>>, #{line => 2}},
+ {listing_block, #{}, <<"hello() -> world.">>, #{line => 3}},
+ {list_item, #{type => bulleted, level => 1},
+ [{paragraph, #{}, <<"World!">>, #{line => 6}}], #{line => 6}}
+ ]),
+ ok.
+-endif.
diff --git a/src/asciideck_parser.erl b/src/asciideck_parser.erl
deleted file mode 100644
index 8016395..0000000
--- a/src/asciideck_parser.erl
+++ /dev/null
@@ -1,388 +0,0 @@
-%% Copyright (c) 2016, Loïc Hoguin <[email protected]>
-%%
-%% Permission to use, copy, modify, and/or distribute this software for any
-%% purpose with or without fee is hereby granted, provided that the above
-%% copyright notice and this permission notice appear in all copies.
-%%
-%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
--module(asciideck_parser).
-
--export([parse/2]).
-
-%% @todo
-%% All nodes in the AST are of type {Type, Attrs, Text | Nodes, Ann}
-%% except for text formatting nodes at the moment. Text formatting
-%% nodes will be converted to this form in a future change.
-
-%% Parsing occurs in a few passes:
-%%
-%% * p1: Line-based parsing of the raw Asciidoc document
-%% * p2: Deal with more compp1 structures like lists and tables
-
-parse(Data, St) ->
- Lines0 = binary:split(Data, <<"\n">>, [global]),
- %% Ensure there's an empty line at the end, to simplify parsing.
- Lines1 = lists:append(Lines0, [<<>>]),
- LineNumbers = lists:seq(1, length(Lines1)),
- Lines = lists:zip(LineNumbers, Lines1),
- %% @todo Document header, if any. Recognized by the author info/doc attributes?
- %% Alternatively, don't recognize it, and only use attribute entries for the same info.
- p2(p1(Lines, [], St), []).
-
-%% First pass.
-
-%% @todo When a block element is encountered asciidoc(1) determines the type of block by checking in the following order (first to last): (section) Titles, BlockMacros, Lists, DelimitedBlocks, Tables, AttributeEntrys, AttributeLists, BlockTitles, Paragraphs.
-
-%% @todo And this function is parsing, not p1ing.
-p1([], AST, _St) ->
- lists:reverse(AST);
-%% Extra empty lines.
-p1([{_, <<>>}|Tail], AST, St) ->
- p1(Tail, AST, St);
-%% Comments.
-p1([{LN, <<"//", Comment/bits >>}|Tail], AST, St) ->
- p1(Tail, [comment(trim_ws(Comment), ann(LN, St))|AST], St);
-%% Section titles.
-p1([{LN, <<"= ", Title/bits >>}, {_, <<>>}|Tail], AST, St) ->
- p1_title_short(Tail, AST, St, LN, Title, 0);
-p1([{LN, <<"== ", Title/bits >>}, {_, <<>>}|Tail], AST, St) ->
- p1_title_short(Tail, AST, St, LN, Title, 1);
-p1([{LN, <<"=== ", Title/bits >>}, {_, <<>>}|Tail], AST, St) ->
- p1_title_short(Tail, AST, St, LN, Title, 2);
-p1([{LN, <<"==== ", Title/bits >>}, {_, <<>>}|Tail], AST, St) ->
- p1_title_short(Tail, AST, St, LN, Title, 3);
-p1([{LN, <<"===== ", Title/bits >>}, {_, <<>>}|Tail], AST, St) ->
- p1_title_short(Tail, AST, St, LN, Title, 4);
-%% Block titles.
-p1([{_LN, <<".", Title/bits >>}|Tail], AST, St) ->
- p1(Tail, [{block_title, Title}|AST], St);
-%% Attribute lists.
-p1([{_LN, <<"[", Attrs/bits >>}|Tail], AST, St) ->
- p1(Tail, [{attribute_list, p1_attr_list(Attrs)}|AST], St);
-%% Listing blocks.
-p1([{LN, <<"----", _/bits >>}|Tail], AST, St) ->
- p1_listing(Tail, AST, St, LN, []);
-%% Lists.
-p1([{LN, <<"* ", Text/bits >>}|Tail], AST, St) ->
- p1_li(Tail, AST, St, uli1, {LN, Text});
-p1([{LN, <<"** ", Text/bits >>}|Tail], AST, St) ->
- p1_li(Tail, AST, St, uli2, {LN, Text});
-p1([{LN, <<"*** ", Text/bits >>}|Tail], AST, St) ->
- p1_li(Tail, AST, St, uli3, {LN, Text});
-p1([{LN, <<"**** ", Text/bits >>}|Tail], AST, St) ->
- p1_li(Tail, AST, St, uli4, {LN, Text});
-p1([{LN, <<"***** ", Text/bits >>}|Tail], AST, St) ->
- p1_li(Tail, AST, St, uli5, {LN, Text});
-%% Tables.
-p1([{LN, <<"|===", _/bits >>}|Tail], AST, St) ->
- p1_table(Tail, AST, St, LN);
-p1([{LN, <<"|", Text/bits >>}|Tail], AST, St) ->
- p1_cell(Tail, AST, St, LN, Text);
-%% Prefix-based or paragraph.
-p1(Lines, AST, St) ->
- p1_text(Lines, AST, St).
-
-p1_title_short(Tail, AST, St, LN, Text0, Level) ->
- %% Remove the trailer, if any.
- Text1 = trim_ws(Text0),
- Trailer = case Level of
- 0 -> <<" =">>;
- 1 -> <<" ==">>;
- 2 -> <<" ===">>;
- 3 -> <<" ====">>;
- 4 -> <<" =====">>
- end,
- TrailerSize = byte_size(Trailer),
- Size = byte_size(Text1) - TrailerSize,
- Text3 = case Text1 of
- << Text2:Size/binary, Trailer:TrailerSize/binary >> -> Text2;
- _ -> Text1
- end,
- Text = trim_ws(Text3),
- p1(Tail, [title(Text, #{level => Level}, ann(LN, St))|AST], St).
-
-p1_attr_list(AttrList0) ->
- [AttrList|_] = binary:split(AttrList0, <<"]">>),
- binary:split(AttrList, <<",">>).
-
-%% @todo Parse attributes properly.
-p1_table(Tail, [{attribute_list, Attrs}, {block_title, Title}|AST], St, LN) ->
- p1(Tail, [{begin_table, #{title => Title, todo => Attrs}, ann(LN, St)}|AST], St);
-p1_table(Tail, [{attribute_list, Attrs}|AST], St, LN) ->
- p1(Tail, [{begin_table, #{todo => Attrs}, ann(LN, St)}|AST], St);
-p1_table(Tail, AST=[nl, {cell, _, _, _}|_], St, _) ->
- p1(Tail, [end_table|AST], St);
-p1_table(Tail, AST=[{cell, _, _, _}|_], St, _) ->
- p1(Tail, [end_table|AST], St);
-p1_table(Tail, AST, St, LN) ->
- p1(Tail, [{begin_table, #{}, ann(LN, St)}|AST], St).
-
-%% @todo Multiline cells.
-%% @todo Styled cells.
-%% @todo Strip whitespace at the beginning of the cell if on the same line.
-p1_cell(Tail=[{_, NextLine}|_], AST0, St, LN, Text) ->
- case p1_cell_split(Text, <<>>) of
- [Cell] ->
- AST1 = [nl, cell(p1([{LN, trim_ws(Cell)}, {LN, <<>>}], [], St), ann(LN, St))|AST0],
- AST = case NextLine of
- <<>> -> [nl|AST1];
- _ -> AST1
- end,
- p1(Tail, AST, St);
- [Cell, Rest] ->
- p1_cell(Tail, [cell(p1([{LN, trim_ws(Cell)}, {LN, <<>>}], [], St), ann(LN, St))|AST0], St, LN, Rest)
- end.
-
-p1_cell_split(<<>>, Acc) ->
- [Acc];
-p1_cell_split(<< $\\, $|, Rest/bits >>, Acc) ->
- p1_cell_split(Rest, << Acc/binary, $| >>);
-p1_cell_split(<< $|, Rest/bits >>, Acc) ->
- [Acc, Rest];
-p1_cell_split(<< C, Rest/bits >>, Acc) ->
- p1_cell_split(Rest, << Acc/binary, C >>).
-
-p1_listing([{_, <<"----", _/bits >>}, {_, <<>>}|Tail], AST0, St, LN, [_|Acc]) ->
- Text = iolist_to_binary(lists:reverse(Acc)),
- case AST0 of
- [{attribute_list, [<<"source">>, Lang]}, {block_title, Title}|AST] ->
- p1(Tail, [listing(Text, #{title => Title, language => Lang}, ann(LN, St))|AST], St);
- [{block_title, Title}, {attribute_list, [<<"source">>, Lang]}|AST] ->
- p1(Tail, [listing(Text, #{title => Title, language => Lang}, ann(LN, St))|AST], St);
- [{attribute_list, [<<"source">>, Lang]}|AST] ->
- p1(Tail, [listing(Text, #{language => Lang}, ann(LN, St))|AST], St);
- [{block_title, Title}|AST] ->
- p1(Tail, [listing(Text, #{title => Title}, ann(LN, St))|AST], St);
- AST ->
- p1(Tail, [listing(Text, #{}, ann(LN, St))|AST], St)
- end;
-p1_listing([{_, Line}|Tail], AST, St, LN, Acc) ->
- p1_listing(Tail, AST, St, LN, [<<"\n">>, Line|Acc]).
-
-p1_li(Lines, AST, St, Type, FirstLine = {LN, _}) ->
- {Tail, Glob} = p1_li_glob(Lines, []),
- p1(Tail, [{Type, p1([FirstLine|Glob], [], St), ann(LN, St)}|AST], St).
-
-%% Glob everything until next list or empty line.
-p1_li_glob(Tail = [{LN, << "*", _/bits >>}|_], Acc) ->
- {Tail, lists:reverse([{LN, <<>>}|Acc])};
-p1_li_glob(Tail = [{LN, <<>>}|_], Acc) ->
- {Tail, lists:reverse([{LN, <<>>}|Acc])};
-p1_li_glob([{LN, <<"+">>}|Tail], Acc) ->
- p1_li_glob(Tail, [{LN, <<>>}|Acc]);
-p1_li_glob([Line|Tail], Acc) ->
- p1_li_glob(Tail, [Line|Acc]).
-
-%% Skip initial empty lines and then glob like normal lists.
-p1_ll_glob(Lines=[{_, Line}|Tail]) ->
- case trim_ws(Line) of
- <<>> -> p1_ll_glob(Tail);
- _ -> p1_ll_glob(Lines, [])
- end.
-
-%% Glob everything until empty line.
-%% @todo Detect next list.
-p1_ll_glob(Tail = [{LN, <<>>}|_], Acc) ->
- {Tail, lists:reverse([{LN, <<>>}|Acc])};
-p1_ll_glob([{LN, <<"+">>}|Tail], Acc) ->
- p1_ll_glob(Tail, [{LN, <<>>}|Acc]);
-p1_ll_glob([{LN, <<" ", Line/bits>>}|Tail], Acc) ->
- p1_ll_glob([{LN, trim_ws(Line)}|Tail], Acc);
-p1_ll_glob(Lines=[Line={LN, Text}|Tail], Acc) ->
- case binary:split(<< Text/binary, $\s >>, <<":: ">>) of
- [_, _] ->
- {Lines, lists:reverse([{LN, <<>>}|Acc])};
- _ ->
- p1_ll_glob(Tail, [Line|Acc])
- end.
-
-p1_text(Lines=[{LN, Line}|Tail], AST, St) ->
- case binary:split(<< Line/binary, $\s >>, <<":: ">>) of
- %% Nothing else on the line.
- [Label, <<>>] ->
- {Tail1, Glob} = p1_ll_glob(Tail),
- p1(Tail1, [{label, Label, p1(Glob, [], St), ann(LN, St)}|AST], St);
- %% Text on the same line.
- [Label, Text0] ->
- Size = byte_size(Text0) - 1,
- << Text:Size/binary, _ >> = Text0,
- {Tail1, Glob} = p1_ll_glob([{LN, Text}|Tail]),
- %% Text on the same line is necessarily a paragraph I believe.
- p1_p(Tail1, [{label, Label, p1(Glob, [], St), ann(LN, St)}|AST], St, LN, []);
- %% Not a labeled list.
- _ ->
- p1_maybe_p(Lines, AST, St)
- end.
-
-%% @todo Literal paragraphs.
-p1_maybe_p([{_LN, << " ", Line/bits >>}|Tail], AST, St) ->
- <<>> = trim_ws(Line),
- p1(Tail, AST, St);
-p1_maybe_p(Lines=[{LN, _}|_], AST, St) ->
- p1_p(Lines, AST, St, LN, []).
-
-p1_p([{_, <<>>}|Tail], AST0, St, LN, [_|Acc]) ->
- Text = format(iolist_to_binary(lists:reverse(Acc)), LN, St),
- case AST0 of
- [{block_title, Title}|AST] ->
- p1(Tail, [paragraph(Text, #{title => Title}, ann(LN, St))|AST], St);
- AST ->
- p1(Tail, [paragraph(Text, #{}, ann(LN, St))|AST], St)
- end;
-%% Ignore comments inside paragraphs.
-%% @todo Keep in the AST.
-p1_p([{_, <<"//", _/bits>>}|Tail], AST, St, LN, Acc) ->
- p1_p(Tail, AST, St, LN, Acc);
-p1_p([{_, Line}|Tail], AST, St, LN, Acc) ->
- %% @todo We need to keep line/col information. To do this
- %% we probably should keep an index of character number -> line/col
- %% that we pass to the format function. Otherwise the line/col
- %% information on text will point to the paragraph start.
- p1_p(Tail, AST, St, LN, [<<" ">>, Line|Acc]).
-
-%% Inline formatting.
-
-%% @todo Probably do it as part of the node functions that require it.
-format(Text, LN, St) ->
- case format(Text, LN, St, [], <<>>, $\s) of
- [Bin] when is_binary(Bin) -> Bin;
- Formatted -> Formatted
- end.
-
-format(<<>>, _, _, Acc, <<>>, _) ->
- lists:reverse(Acc);
-format(<<>>, _, _, Acc, BinAcc, _) ->
- lists:reverse([BinAcc|Acc]);
-format(<< "link:", Rest0/bits >>, LN, St, Acc0, BinAcc, Prev) when Prev =:= $\s ->
- case re:run(Rest0, "^([^[]*)\\[([^]]*)\\](.*)", [{capture, all, binary}]) of
- nomatch ->
- format(Rest0, LN, St, Acc0, << BinAcc/binary, "link:" >>, $:);
- {match, [_, Link, Text, Rest]} ->
- Acc = case BinAcc of
- <<>> -> Acc0;
- _ -> [BinAcc|Acc0]
- end,
- format(Rest, LN, St, [rel_link(Text, Link, ann(LN, St))|Acc], <<>>, $])
- end;
-format(<< C, Rest0/bits >>, LN, St, Acc0, BinAcc, Prev) when Prev =:= $\s ->
- %% @todo In some cases we must format inside the quoted text too.
- %% Therefore we need to have some information about what to do here.
- Quotes = #{
- $* => {strong, text},
- $` => {mono, literal}
- },
- case maps:get(C, Quotes, undefined) of
- undefined ->
- format(Rest0, LN, St, Acc0, << BinAcc/binary, C >>, C);
- {NodeType, QuotedType} ->
- case binary:split(Rest0, << C >>) of
- [_] ->
- format(Rest0, LN, St, Acc0, << BinAcc/binary, $* >>, $*);
- [QuotedText0, Rest] ->
- Acc = case BinAcc of
- <<>> -> Acc0;
- _ -> [BinAcc|Acc0]
- end,
- QuotedText = case QuotedType of
- text -> format(QuotedText0, LN, St);
- literal -> QuotedText0
- end,
- format(Rest, LN, St, [quoted(NodeType, QuotedText, ann(LN, St))|Acc], <<>>, $*)
- end
- end;
-format(<< C, Rest/bits >>, LN, St, Acc, BinAcc, _) ->
- format(Rest, LN, St, Acc, << BinAcc/binary, C >>, C).
-
-%% Second pass.
-
-p2([], Acc) ->
- lists:reverse(Acc);
-p2([{label, Label, Items, Ann}|Tail], Acc) ->
- %% @todo Handle this like other lists.
- p2(Tail, [ll([li(p2(Items, []), #{label => Label}, Ann)], #{}, Ann)|Acc]);
-p2(Tail0=[{uli1, _, UlAnn}|_], Acc) ->
- {LIs0, Tail} = lists:splitwith(fun({uli1, _, _}) -> true; (_) -> false end, Tail0),
- LIs = [li(I, LiAnn) || {uli1, I, LiAnn} <- LIs0],
- p2(Tail, [ul(LIs, #{}, UlAnn)|Acc]);
-p2([{begin_table, Attrs, Ann}|Tail0], Acc) ->
- %% @todo Can also get them from Attrs?
- N = count_table_columns(Tail0),
- {Rows, Tail} = p2_rows(Tail0, [], [], N, 1),
- p2(Tail, [table(Rows, Attrs, Ann)|Acc]);
-p2([Item|Tail], Acc) ->
- p2(Tail, [Item|Acc]).
-
-%% @todo One cell per line version.
-count_table_columns(Cells) ->
- length(lists:takewhile(fun({cell, _, _, _}) -> true; (_) -> false end, Cells)).
-
-p2_rows([nl|Tail], Rows, Cols, NumCols, N) ->
- p2_rows(Tail, Rows, Cols, NumCols, N);
-p2_rows([Cell = {cell, _, _, Ann}|Tail], Rows, Cols, NumCols, NumCols) ->
- p2_rows(Tail, [row(lists:reverse([Cell|Cols]), Ann)|Rows], [], NumCols, 1);
-p2_rows([Cell = {cell, _, _, _}|Tail], Rows, Cols, NumCols, N) ->
- p2_rows(Tail, Rows, [Cell|Cols], NumCols, N + 1);
-p2_rows([end_table|Tail], Rows, [], _, _) ->
- {lists:reverse(Rows), Tail}.
-
-%% Annotations.
-
-ann(Line, St) ->
- ann(Line, 1, St).
-
-%% @todo Take filename too, if any.
-ann(Line, Col, _St) ->
- #{line => Line, col => Col}.
-
-%% Nodes.
-
-cell(Nodes, Ann) ->
- {cell, #{}, Nodes, Ann}.
-
-comment(Text, Ann) ->
- {comment, #{}, Text, Ann}.
-
-li(Nodes, Ann) ->
- li(Nodes, #{}, Ann).
-
-li(Nodes, Attrs, Ann) ->
- {li, Attrs, Nodes, Ann}.
-
-listing(Text, Attrs, Ann) ->
- {listing, Attrs, Text, Ann}.
-
-ll(Nodes, Attrs, Ann) ->
- {ll, Attrs, Nodes, Ann}.
-
-paragraph(Text, Attrs, Ann) ->
- {p, Attrs, Text, Ann}.
-
-quoted(NodeType, Text, Ann) ->
- {NodeType, #{}, Text, Ann}.
-
-rel_link(Text, Link, Ann) ->
- {rel_link, #{target => Link}, Text, Ann}.
-
-row(Nodes, Ann) ->
- {row, #{}, Nodes, Ann}.
-
-table(Nodes, Attrs, Ann) ->
- {table, Attrs, Nodes, Ann}.
-
-title(Text, Attrs, Ann) ->
- {title, Attrs, Text, Ann}.
-
-ul(Nodes, Attrs, Ann) ->
- {ul, Attrs, Nodes, Ann}.
-
-%% Utility functions.
-
-trim_ws(Text) ->
- iolist_to_binary(re:replace(Text, "^[ \\t]+|[ \\t]+$", <<>>, [global])).
diff --git a/src/asciideck_tables_pass.erl b/src/asciideck_tables_pass.erl
new file mode 100644
index 0000000..fdda6ef
--- /dev/null
+++ b/src/asciideck_tables_pass.erl
@@ -0,0 +1,191 @@
+%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% This pass parses and builds a table from the contents
+%% of a table block.
+%%
+%% Asciidoc User Guide 23
+%%
+%% @todo Rows and cells are currently not annotated.
+-module(asciideck_tables_pass).
+
+-export([run/1]).
+
+-define(IS_WS(C), (C =:= $\s) or (C =:= $\t) or (C =:= $\n).
+
+run([]) ->
+ [];
+run([Table={table, _, _, _}|Tail]) ->
+ [table(Table)|run(Tail)];
+run([Block|Tail]) ->
+ [Block|run(Tail)].
+
+table({table, Attrs, Contents, Ann}) ->
+ {Cells, NumCols} = parse_table(Contents, Attrs),
+ Children = rows(Cells, NumCols),
+ {table, Attrs, Children, Ann}.
+
+-ifdef(TEST).
+table_test() ->
+ {table, _, [
+ {row, _, [
+ {cell, _, <<"1">>, _},
+ {cell, _, <<"2">>, _},
+ {cell, _, <<"A">>, _}
+ ], _},
+ {row, _, [
+ {cell, _, <<"3">>, _},
+ {cell, _, <<"4">>, _},
+ {cell, _, <<"B">>, _}
+ ], _},
+ {row, _, [
+ {cell, _, <<"5">>, _},
+ {cell, _, <<"6">>, _},
+ {cell, _, <<"C">>, _}
+ ], _}
+ ], _} = table({table, #{}, <<
+ "|1 |2 |A\n"
+ "|3 |4 |B\n"
+ "|5 |6 |C">>, #{line => 1}}),
+ ok.
+-endif.
+
+%% If the cols attribute is not specified, the number of
+%% columns is the number of cells on the first line.
+parse_table(Contents, #{<<"cols">> := Cols}) ->
+ {parse_cells(Contents, []), num_cols(Cols)};
+%% We get the first line, parse the cells in it then
+%% count the number of columns in the table. Finally
+%% we parse all the remaining cells.
+parse_table(Contents, _) ->
+ case binary:split(Contents, <<$\n>>) of
+ %% We only have the one line. Who writes tables like this?
+ [Line] ->
+ Cells = parse_cells(Line, []),
+ {Cells, length(Cells)};
+ %% We have a useful table with more than one line. Good user!
+ [Line, Rest] ->
+ Cells0 = parse_cells(Line, []),
+ Cells = parse_cells(Rest, lists:reverse(Cells0)),
+ {Cells, length(Cells0)}
+ end.
+
+num_cols(Cols) ->
+ %% @todo Handle column specifiers.
+ Specs = binary:split(Cols, <<$,>>, [global]),
+ length(Specs).
+
+parse_cells(Contents, Acc) ->
+ Cells = split_cells(Contents),%binary:split(Contents, [<<$|>>], [global]),
+ do_parse_cells(Cells, Acc).
+ %% Split on |
+ %% Look at the end of each element see if there's a cell specifier
+ %% Add it as an attribute to the cell for now and consolidate
+ %% when processing rows.
+
+split_cells(Contents) ->
+ split_cells(Contents, <<>>, []).
+
+split_cells(<<>>, Cell, Acc) ->
+ lists:reverse([Cell|Acc]);
+split_cells(<<$\\, $|, R/bits>>, Cell, Acc) ->
+ split_cells(R, <<Cell/binary, $|>>, Acc);
+split_cells(<<$|, R/bits>>, Cell, Acc) ->
+ split_cells(R, <<>>, [Cell|Acc]);
+split_cells(<<C, R/bits>>, Cell, Acc) ->
+ split_cells(R, <<Cell/binary, C>>, Acc).
+
+%% Malformed table (no pipe before cell). Process it like it is a single cell.
+do_parse_cells([Contents], Acc) ->
+ %% @todo Annotations.
+ lists:reverse([{cell, #{specifiers => <<>>}, Contents, #{}}|Acc]);
+%% Last cell. There are no further cell specifiers.
+do_parse_cells([Specs, Contents0], Acc) ->
+ Contents = asciideck_block_parser:trim(Contents0, both),
+ %% @todo Annotations.
+ Cell = {cell, #{specifiers => Specs}, Contents, #{}},
+ lists:reverse([Cell|Acc]);
+%% If there are cell specifiers we need to extract them from the cell
+%% contents. Cell specifiers are everything from the last whitespace
+%% until the end of the binary.
+do_parse_cells([Specs, Contents0|Tail], Acc) ->
+ NextSpecs = <<>>, %% @todo find_r(Contents0, <<>>),
+ Len = byte_size(Contents0) - byte_size(NextSpecs),
+ <<Contents1:Len/binary, _/bits>> = Contents0,
+ Contents = asciideck_block_parser:trim(Contents1, both),
+ %% @todo Annotations.
+ Cell = {cell, #{specifiers => Specs}, Contents, #{}},
+ do_parse_cells([NextSpecs|Tail], [Cell|Acc]).
+
+%% @todo This is not correct. Not all remaining data is specifiers.
+%% In addition, for columns at the end of the line this doesn't apply.
+%% Find the remaining data after the last whitespace character.
+%find_r(<<>>, Acc) ->
+% Acc;
+%find_r(<<C, Rest/bits>>, _) when ?IS_WS(C) ->
+% find_r(Rest, Rest);
+%find_r(<<_, Rest/bits>>, Acc) ->
+% find_r(Rest, Acc).
+
+-ifdef(TEST).
+parse_table_test() ->
+ {[
+ {cell, _, <<"1">>, _},
+ {cell, _, <<"2">>, _},
+ {cell, _, <<"A">>, _},
+ {cell, _, <<"3">>, _},
+ {cell, _, <<"4">>, _},
+ {cell, _, <<"B">>, _},
+ {cell, _, <<"5">>, _},
+ {cell, _, <<"6">>, _},
+ {cell, _, <<"C">>, _}
+ ], 3} = parse_table(<<
+ "|1 |2 |A\n"
+ "|3 |4 |B\n"
+ "|5 |6 |C">>, #{}),
+ ok.
+
+parse_table_escape_pipe_test() ->
+ {[
+ {cell, _, <<"1">>, _},
+ {cell, _, <<"2">>, _},
+ {cell, _, <<"3 |4">>, _},
+ {cell, _, <<"5">>, _}
+ ], 2} = parse_table(<<
+ "|1 |2\n"
+ "|3 \\|4 |5">>, #{}),
+ ok.
+-endif.
+
+%% @todo We currently don't handle colspans and rowspans.
+rows(Cells, NumCols) ->
+ rows(Cells, [], NumCols, [], NumCols).
+
+%% End of row.
+rows(Tail, Acc, NumCols, RowAcc, CurCol) when CurCol =< 0 ->
+ %% @todo Annotations.
+ Row = {row, #{}, lists:reverse(RowAcc), #{}},
+ rows(Tail, [Row|Acc], NumCols, [], NumCols);
+%% Add a cell to the row.
+rows([Cell|Tail], Acc, NumCols, RowAcc, CurCol) ->
+ rows(Tail, Acc, NumCols, [Cell|RowAcc], CurCol - 1);
+%% End of a properly formed table.
+rows([], Acc, _, [], _) ->
+ lists:reverse(Acc);
+%% Malformed table. Even if we expect more columns,
+%% if there are no more cells there's nothing we can do.
+rows([], Acc, _, RowAcc, _) ->
+ %% @todo Annotations.
+ Row = {row, #{}, lists:reverse(RowAcc), #{}},
+ lists:reverse([Row|Acc]).
diff --git a/src/asciideck_to_manpage.erl b/src/asciideck_to_manpage.erl
index bdff90e..37e4e73 100644
--- a/src/asciideck_to_manpage.erl
+++ b/src/asciideck_to_manpage.erl
@@ -1,4 +1,4 @@
-%% Copyright (c) 2016, Loïc Hoguin <[email protected]>
+%% Copyright (c) 2016-2018, Loïc Hoguin <[email protected]>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
@@ -19,7 +19,7 @@
-export([translate/2]).
translate(AST, Opts) ->
- {Man, Section, Output0} = translate_man(AST, Opts),
+ {Man, Section, Output0} = man(AST, Opts),
{CompressExt, Output} = case Opts of
#{compress := gzip} -> {".gz", zlib:gzip(Output0)};
_ -> {"", Output0}
@@ -32,7 +32,9 @@ translate(AST, Opts) ->
Output
end.
-translate_man([{title, #{level := 0}, Title0, _Ann}|AST], Opts) ->
+%% Header of the man page file.
+
+man([{section_title, #{level := 0}, Title0, _Ann}|AST], Opts) ->
ensure_name_section(AST),
[Title, << Section:1/binary, _/bits >>] = binary:split(Title0, <<"(">>),
Extra1 = maps:get(extra1, Opts, today()),
@@ -42,10 +44,10 @@ translate_man([{title, #{level := 0}, Title0, _Ann}|AST], Opts) ->
".TH \"", Title, "\" \"", Section, "\" \"",
Extra1, "\" \"", Extra2, "\" \"", Extra3, "\"\n"
".ta T 4n\n\\&\n",
- man(AST, [])
+ ast(AST)
]}.
-ensure_name_section([{title, #{level := 1}, Title, _}|_]) ->
+ensure_name_section([{section_title, #{level := 1}, Title, _}|_]) ->
case string:to_lower(string:strip(binary_to_list(Title))) of
"name" -> ok;
_ -> error(badarg)
@@ -57,22 +59,56 @@ today() ->
{{Y, M, D}, _} = calendar:universal_time(),
io_lib:format("~b-~2.10.0b-~2.10.0b", [Y, M, D]).
-man([], Acc) ->
- lists:reverse(Acc);
-man([{title, #{level := 1}, Title, _Ann}|Tail], Acc) ->
- man(Tail, [[".SH ", string:to_upper(binary_to_list(Title)), "\n"]|Acc]);
-man([{title, #{level := 2}, Title, _Ann}|Tail], Acc) ->
- man(Tail, [[".SS ", Title, "\n"]|Acc]);
-man([{p, _Attrs, Text, _Ann}|Tail], Acc) ->
- man(Tail, [[".LP\n", man_format(Text), "\n.sp\n"]|Acc]);
-man([{listing, Attrs, Listing, _Ann}|Tail], Acc0) ->
- Acc1 = case Attrs of
- #{title := Title} ->
- [[".PP\n\\fB", Title, "\\fR\n"]|Acc0];
- _ ->
- Acc0
- end,
- Acc = [[
+%% Loop over all types of AST nodes.
+
+ast(AST) ->
+ fold(AST, fun ast_node/1).
+
+fold(AST, Fun) ->
+ lists:reverse(lists:foldl(
+ fun(Node, Acc) -> [Fun(Node)|Acc] end,
+ [], AST)).
+
+ast_node(Node={Type, _, _, _}) ->
+ try
+ case Type of
+ section_title -> section_title(Node);
+ paragraph -> paragraph(Node);
+ listing_block -> listing_block(Node);
+ list -> list(Node);
+ table -> table(Node);
+ comment_line -> comment_line(Node);
+ _ ->
+ io:format("Ignored AST node ~p~n", [Node]),
+ []
+ end
+ catch _:_ ->
+ io:format("Ignored AST node ~p~n", [Node]),
+ []
+ end.
+
+%% Section titles.
+
+section_title({section_title, #{level := 1}, Title, _}) ->
+ [".SH ", string:to_upper(binary_to_list(Title)), "\n"];
+section_title({section_title, #{level := 2}, Title, _}) ->
+ [".SS ", Title, "\n"].
+
+%% Paragraphs.
+
+paragraph({paragraph, _, Text, _}) ->
+ [".LP\n", inline(Text), "\n.sp\n"].
+
+%% Listing blocks.
+
+listing_block({listing_block, Attrs, Listing, _}) ->
+ [
+ case Attrs of
+ #{<<"title">> := Title} ->
+ [".PP\n\\fB", Title, "\\fR\n"];
+ _ ->
+ []
+ end,
".if n \\{\\\n"
".RS 4\n"
".\\}\n"
@@ -82,55 +118,18 @@ man([{listing, Attrs, Listing, _Ann}|Tail], Acc0) ->
".fi\n"
".if n \\{\\\n"
".RE\n"
- ".\\}\n"]|Acc1],
- man(Tail, Acc);
-man([{ul, _Attrs, Items, _Ann}|Tail], Acc0) ->
- Acc = man_ul(Items, Acc0),
- man(Tail, Acc);
-man([{ll, _Attrs, Items, _Ann}|Tail], Acc0) ->
- Acc = man_ll(Items, Acc0),
- man(Tail, Acc);
-%% @todo Attributes.
-%% Currently acts as if options="headers" was always set.
-man([{table, _TAttrs, [{row, RowAttrs, Headers0, RowAnn}|Rows0], _TAnn}|Tail], Acc0) ->
- Headers = [{cell, CAttrs, [{p, Attrs, [{strong, #{}, P, CAnn}], Ann}], CAnn}
- || {cell, CAttrs, [{p, Attrs, P, Ann}], CAnn} <- Headers0],
- Rows = [{row, RowAttrs, Headers, RowAnn}|Rows0],
- Acc = [[
- ".TS\n"
- "allbox tab(:);\n",
- man_table_style(Rows, []),
- man_table_contents(Rows),
- ".TE\n"
- ".sp 1\n"]|Acc0],
- man(Tail, Acc);
-%% Skip everything we don't understand.
-man([_Ignore|Tail], Acc) ->
- io:format("Ignore ~p~n", [_Ignore]), %% @todo lol io:format
- man(Tail, Acc).
-
-man_ll([], Acc) ->
- Acc;
-man_ll([{li, #{label := Label}, Item, _LiAnn}|Tail], Acc0) ->
- Acc = [[
- ".PP\n"
- "\\fB", Label, "\\fR\n",
- ".RS 4\n",
- man_ll_item(Item),
- ".RE\n"]|Acc0],
- man_ll(Tail, Acc).
-
-man_ll_item([{ul, _Attrs, Items, _Ann}]) ->
- [man_ul(Items, []), "\n"];
-man_ll_item([{p, _PAttrs, Text, _PAnn}]) ->
- [man_format(Text), "\n"];
-man_ll_item([{p, _PAttrs, Text, _PAnn}|Tail]) ->
- [man_format(Text), "\n\n", man_ll_item(Tail)].
-
-man_ul([], Acc) ->
- Acc;
-man_ul([{li, _LiAttrs, [{p, _PAttrs, Text, _PAnn}], _LiAnn}|Tail], Acc0) ->
- Acc = [[
+ ".\\}\n"
+ ].
+
+%% Lists.
+
+list({list, #{type := bulleted}, Items, _}) ->
+ fold(Items, fun bulleted_list_item/1);
+list({list, #{type := labeled}, Items, _}) ->
+ fold(Items, fun labeled_list_item/1).
+
+bulleted_list_item({list_item, _, [{paragraph, _, Text, _}|AST], _}) ->
+ [
".ie n \\{\\\n"
".RS 2\n"
"\\h'-02'\\(bu\\h'+01'\\c\n"
@@ -140,40 +139,85 @@ man_ul([{li, _LiAttrs, [{p, _PAttrs, Text, _PAnn}], _LiAnn}|Tail], Acc0) ->
".sp -1\n"
".IP \\(bu 2.3\n"
".\\}\n",
- man_format(Text), "\n"
- ".RE\n"]|Acc0],
- man_ul(Tail, Acc).
+ inline(Text), "\n",
+ ast(AST),
+ ".RE\n"
+ ].
+
+labeled_list_item({list_item, #{label := Label}, [{paragraph, _, Text, _}|AST], _}) ->
+ [
+ ".PP\n"
+ "\\fB", inline(Label), "\\fR\n",
+ ".RS 4\n",
+ inline(Text), "\n",
+ ast(AST),
+ ".RE\n"
+ ].
+
+%% Tables.
+
+table({table, _, Rows0, _}) ->
+ Rows = table_apply_options(Rows0),
+ [
+ ".TS\n"
+ "allbox tab(:);\n",
+ table_style(Rows), ".\n",
+ table_contents(Rows),
+ ".TE\n"
+ ".sp 1\n"
+ ].
+
+%% @todo Currently acts as if options="headers" was always set.
+table_apply_options([{row, RAttrs, Headers0, RAnn}|Tail]) ->
+ Headers = [{cell, CAttrs, [{strong, #{}, CText, CAnn}], CAnn}
+ || {cell, CAttrs, CText, CAnn} <- Headers0],
+ [{row, RAttrs, Headers, RAnn}|Tail].
+
+table_style(Rows) ->
+ [[table_style_cells(Cells), "\n"]
+ || {row, _, Cells, _} <- Rows].
+
+table_style_cells(Cells) ->
+ ["lt " || {cell, _, _, _} <- Cells].
+
+table_contents(Rows) ->
+ [[table_contents_cells(Cells), "\n"]
+ || {row, _, Cells, _} <- Rows].
+
+table_contents_cells([FirstCell|Cells]) ->
+ [table_contents_cell(FirstCell),
+ [[":", table_contents_cell(Cell)] || Cell <- Cells]].
-man_table_style([], [_|Acc]) ->
- lists:reverse([".\n"|Acc]);
-man_table_style([{row, _, Cols, _}|Tail], Acc) ->
- man_table_style(Tail, [$\n, man_table_style_cols(Cols, [])|Acc]).
+table_contents_cell({cell, _, Text, _}) ->
+ ["T{\n", inline(Text), "\nT}"].
-man_table_style_cols([], [_|Acc]) ->
- lists:reverse(Acc);
-man_table_style_cols([{cell, _, _, _}|Tail], Acc) ->
- man_table_style_cols(Tail, [$\s, "lt"|Acc]).
+%% Comment lines are printed in the generated file
+%% but are not visible in viewers.
-man_table_contents(Rows) ->
- [man_table_contents_cols(Cols, []) || {row, _, Cols, _} <- Rows].
+comment_line({comment_line, _, Text, _}) ->
+ ["\\# ", Text, "\n"].
-man_table_contents_cols([], [_|Acc]) ->
- lists:reverse(["\n"|Acc]);
-man_table_contents_cols([{cell, _CAttrs, [{p, _PAttrs, Text, _PAnn}], _CAnn}|Tail], Acc) ->
- man_table_contents_cols(Tail, [$:, "\nT}", man_format(Text), "T{\n"|Acc]).
+%% Inline formatting.
-man_format(Text) when is_binary(Text) ->
+inline(Text) when is_binary(Text) ->
Text;
-man_format({rel_link, #{target := Link}, Text, _}) ->
+%% When the link is the text we only print it once.
+inline({link, #{target := Link}, Link, _}) ->
+ Link;
+inline({link, #{target := Link}, Text, _}) ->
case re:run(Text, "^([-_:.a-zA-Z0-9]*)(\\([0-9]\\))$", [{capture, all, binary}]) of
nomatch -> [Text, " (", Link, ")"];
{match, [_, ManPage, ManSection]} -> ["\\fB", ManPage, "\\fR", ManSection]
end;
-man_format({strong, _, Text, _}) ->
- ["\\fB", man_format(Text), "\\fR"];
+inline({emphasized, _, Text, _}) ->
+ ["\\fI", inline(Text), "\\fR"];
+inline({strong, _, Text, _}) ->
+ ["\\fB", inline(Text), "\\fR"];
%% We are already using a monospace font.
-%% @todo Maybe there's a readable formatting we could use to differentiate from normal text?
-man_format({mono, _, Text, _}) ->
- man_format(Text);
-man_format(Text) when is_list(Text) ->
- [man_format(T) || T <- Text].
+inline({inline_literal_passthrough, _, Text, _}) ->
+ inline(Text);
+%% Xref links appear as plain text in manuals.
+inline({xref, _, Text, _}) ->
+ inline(Text);
+inline(Text) when is_list(Text) ->
+ [inline(T) || T <- Text].