aboutsummaryrefslogtreecommitdiffstats
path: root/src/asciideck_block_parser.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/asciideck_block_parser.erl')
-rw-r--r--src/asciideck_block_parser.erl1116
1 files changed, 1116 insertions, 0 deletions
diff --git a/src/asciideck_block_parser.erl b/src/asciideck_block_parser.erl
new file mode 100644
index 0000000..ad63fa6
--- /dev/null
+++ b/src/asciideck_block_parser.erl
@@ -0,0 +1,1116 @@
+%% Copyright (c) 2016-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% The block parser is the first pass of the parsing of Asciidoc
+%% files. It only isolates the different top-level blocks and
+%% produces a representation that can then be manipulated.
+%%
+%% Further passes are necessary to propagate the parsed lists
+%% of attributes to their respective blocks, to create actual
+%% lists from the parsed list items or to parse the contents
+%% of tables. Finally a final pass will parse inline elements.
+%%
+%% This module may be called again for parsing the content
+%% of individual table cells.
+-module(asciideck_block_parser).
+
+-export([parse/1]).
+
+%% @todo Temporary export. Move somewhere else.
+-export([trim/1]).
+-export([trim/2]).
+-export([while/2]).
+
+-type ast() :: list(). %% @todo
+
+-record(state, {
+ reader :: pid()
+}).
+
+-define(IS_WS(C), (C =:= $\s) or (C =:= $\t)).
+
+-ifdef(TEST).
+-define(NOT(Type, Value), true = Type =/= element(1, hd(Value))).
+
+define_NOT_test() ->
+ %% This succeeds.
+ ?NOT(block_id, parse(<<"[[block,id]]">>)),
+ %% This fails.
+ {'EXIT', _} = (catch ?NOT(block_id, parse(<<"[[block_id]]">>))),
+ ok.
+-endif.
+
+-spec parse(binary()) -> ast().
+parse(Data) ->
+ %% @todo Might want to start it supervised.
+ %% @todo Might want to stop it also.
+ {ok, ReaderPid} = asciideck_line_reader:start_link(Data),
+ blocks(#state{reader=ReaderPid}).
+
+blocks(St) ->
+ case block(St) of
+ eof -> [];
+ Block -> [Block|blocks(St)]
+ end.
+
+%% Asciidoc parsing never fails. If a block is not
+%% formatted properly, it will be treated as a paragraph.
+block(St) ->
+ skip(fun empty_line/1, St),
+ oneof([
+ fun eof/1,
+ %% Section titles.
+ fun section_title/1,
+ fun long_section_title/1,
+ %% Block macros.
+ fun block_id/1,
+ fun block_macro/1,
+ %% Lists.
+ fun bulleted_list/1,
+ fun numbered_list/1,
+ fun labeled_list/1,
+ fun callout_list/1,
+ fun list_item_continuation/1,
+ %% Delimited blocks.
+ fun listing_block/1,
+ fun literal_block/1,
+ fun sidebar_block/1,
+ fun comment_block/1,
+ fun passthrough_block/1,
+ fun quote_block/1,
+ fun example_block/1,
+ fun open_block/1,
+ %% Table.
+ fun table/1,
+ %% Attributes.
+ fun attribute_entry/1,
+ fun attribute_list/1,
+ %% Block title.
+ fun block_title/1,
+ %% Comment lines.
+ fun comment_line/1,
+ %% Paragraphs.
+ fun literal_para/1,
+ fun admonition_para/1,
+ fun para/1
+ ], St).
+
+eof(St) ->
+ eof = read_line(St).
+
+-ifdef(TEST).
+eof_test() ->
+ [] = parse(<<>>).
+-endif.
+
+empty_line(St) ->
+ <<>> = trim(read_line(St)).
+
+-ifdef(TEST).
+empty_line_test() ->
+ [] = parse(<<
+ "\n"
+ " \n"
+ " \n"
+ "\n"
+ >>).
+-endif.
+
+%% Asciidoc User Guide 11.2
+section_title(St) ->
+ {Level, Title0} = case read_line(St) of
+ <<"=", C, R/bits>> when ?IS_WS(C) -> {0, R};
+ <<"==", C, R/bits>> when ?IS_WS(C) -> {1, R};
+ <<"===", C, R/bits>> when ?IS_WS(C) -> {2, R};
+ <<"====", C, R/bits>> when ?IS_WS(C) -> {3, R};
+ <<"=====", C, R/bits>> when ?IS_WS(C) -> {4, R}
+ end,
+ Ann = ann(St),
+ Title1 = trim(Title0),
+ %% Optional: trailing title delimiter.
+ Trailer = case Level of
+ 0 -> <<"=">>;
+ 1 -> <<"==">>;
+ 2 -> <<"===">>;
+ 3 -> <<"====">>;
+ 4 -> <<"=====">>
+ end,
+ Len = byte_size(Title1) - Level - 2,
+ Title = case Title1 of
+ <<Title2:Len/binary, WS, Trailer/binary>> when ?IS_WS(WS) -> trim(Title2);
+ _ -> trim(Title1)
+ end,
+ %% Section titles must be followed by at least one empty line.
+ _ = empty_line(St),
+ %% Good!
+ {section_title, #{level => Level}, Title, Ann}.
+
+-ifdef(TEST).
+section_title_test() ->
+ %% With trailing title delimiter.
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0) =">>),
+ [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}]
+ = parse(<<"== Section Title (level 1) ==">>),
+ [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}]
+ = parse(<<"=== Section Title (level 2) ===">>),
+ [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}]
+ = parse(<<"==== Section Title (level 3) ====">>),
+ [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}]
+ = parse(<<"===== Section Title (level 4) =====">>),
+ %% Without trailing title delimiter.
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0)">>),
+ [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}]
+ = parse(<<"== Section Title (level 1)">>),
+ [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}]
+ = parse(<<"=== Section Title (level 2)">>),
+ [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}]
+ = parse(<<"==== Section Title (level 3)">>),
+ [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}]
+ = parse(<<"===== Section Title (level 4)">>),
+ %% Accept more spaces before/after delimiters.
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0)">>),
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0) =">>),
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0) =">>),
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}]
+ = parse(<<"= Document Title (level 0) = ">>),
+ %% A space before the first delimiter is not a title.
+ ?NOT(section_title, parse(<<" = Document Title (level 0)">>)),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 11.1
+long_section_title(St) ->
+ %% Title must be hard against the left margin.
+ <<C, _/bits>> = Title0 = read_line(St),
+ Ann = ann(St),
+ false = ?IS_WS(C),
+ Title = trim(Title0),
+ %% Read the underline.
+ {Level, Char, Underline0} = case read_line(St) of
+ U = <<"=", _/bits >> -> {0, $=, U};
+ U = <<"-", _/bits >> -> {1, $-, U};
+ U = <<"~", _/bits >> -> {2, $~, U};
+ U = <<"^", _/bits >> -> {3, $^, U};
+ U = <<"+", _/bits >> -> {4, $+, U}
+ end,
+ Underline = trim(Underline0, trailing),
+ %% Underline must be the same character repeated over the entire line.
+ repeats(Underline, Char),
+ %% Underline must be the same size as the title, +/- 2 characters.
+ TLen = byte_size(Title),
+ ULen = byte_size(Underline),
+ true = (TLen >= ULen - 2) andalso (TLen =< ULen + 2),
+ %% Good!
+ {section_title, #{level => Level}, Title, Ann}.
+
+-ifdef(TEST).
+long_section_title_test() ->
+ %% Same amount of characters for the underline.
+ [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] = parse(<<
+ "Document Title (level 0)\n"
+ "========================">>),
+ [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}] = parse(<<
+ "Section Title (level 1)\n"
+ "-----------------------">>),
+ [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}] = parse(<<
+ "Section Title (level 2)\n"
+ "~~~~~~~~~~~~~~~~~~~~~~~">>),
+ [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}] = parse(<<
+ "Section Title (level 3)\n"
+ "^^^^^^^^^^^^^^^^^^^^^^^">>),
+ [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}] = parse(<<
+ "Section Title (level 4)\n"
+ "+++++++++++++++++++++++">>),
+ %% A shorter title to confirm we are not cheating.
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "======">>),
+ %% Underline can be +/- 2 characters.
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "====">>),
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "=====">>),
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "=======">>),
+ [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<<
+ "Hello!\n"
+ "========">>),
+ %% Underline too short/long results in a different block.
+ ?NOT(section_title, parse(<<
+ "Hello!\n"
+ "===">>)),
+ ?NOT(section_title, parse(<<
+ "Hello!\n"
+ "=========">>)),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.2.1
+%%
+%% We currently do not implement the <xreflabel> value.
+%% I am also not sure what characters are allowed,
+%% so what is here is what I came up with guessing.
+block_id(St) ->
+ <<"[[", Line0/bits>> = read_line(St),
+ Line = trim(Line0),
+ Len = byte_size(Line) - 2,
+ <<BlockID:Len/binary, "]]">> = Line,
+ %% Make sure there are only valid characters.
+ {BlockID, <<>>} = while(fun(C) ->
+ (C =/= $,) andalso (C =/= $[) andalso (C =/= $])
+ andalso (C =/= $\s) andalso (C =/= $\t)
+ end, BlockID),
+ %% Good!
+ {block_id, #{id => BlockID}, <<>>, ann(St)}.
+
+-ifdef(TEST).
+block_id_test() ->
+ %% Valid.
+ [{block_id, #{id := <<"X30">>}, <<>>, _}] = parse(<<"[[X30]]">>),
+ %% Invalid.
+ ?NOT(block_id, parse(<<"[[block,id]]">>)),
+ ?NOT(block_id, parse(<<"[[block[id]]">>)),
+ ?NOT(block_id, parse(<<"[[block]id]]">>)),
+ ?NOT(block_id, parse(<<"[[block id]]">>)),
+ ?NOT(block_id, parse(<<"[[block\tid]]">>)),
+ %% Must be hard on the left of the line.
+ ?NOT(block_id, parse(<<" [[block_id]]">>)),
+ ?NOT(block_id, parse(<<"\t[[block_id]]">>)),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.2.3
+comment_line(St) ->
+ <<"//", Comment0/bits>> = read_line(St),
+ Comment = trim(Comment0),
+ %% Good!
+ {comment_line, #{<<"subs">> => <<"verbatim">>}, Comment, ann(St)}.
+
+-ifdef(TEST).
+comment_line_test() ->
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.">>),
+ %% We trim the whitespace around the comment.
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.">>),
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment. ">>),
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"//\tThis is a comment.">>),
+ [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.\t">>),
+ [
+ {comment_line, _, <<"First line.">>, _},
+ {comment_line, _, <<"Second line.">>, _}
+ ] = parse(<<
+ "// First line.\n"
+ "// Second line.\n">>),
+ %% Must be hard on the left of the line.
+ ?NOT(comment_line, parse(<<" // This is a comment.">>)),
+ ?NOT(comment_line, parse(<<"\t// This is a comment.">>)),
+ ok.
+-endif.
+
+%% We currently implement the following block macros
+%% from the Asciidoc User Guide:
+%%
+%% - image (21.2.2)
+%% - include (21.3.1)
+%% - ifdef (21.3.2)
+%% - ifndef (21.3.2)
+%% - endif (21.3.2)
+block_macro(St) ->
+ Line0 = read_line(St),
+ Ann = ann(St),
+ %% Name must contain letters, digits or dash characters.
+ {Name, <<"::", Line1/bits>>} = while(fun(C) ->
+ ((C >= $a) andalso (C =< $z))
+ orelse ((C >= $A) andalso (C =< $Z))
+ orelse ((C >= $0) andalso (C =< $9))
+ orelse (C =:= $-)
+ end, Line0),
+ %% Name must not begin with a dash.
+ true = binary:at(Name, 0) =/= $-,
+ %% Target must not contain whitespace characters.
+ %% It is followed by an [attribute list].
+ {Target, AttrList0 = <<"[", _/bits>>} = while(fun(C) ->
+ (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t)
+ end, Line1),
+ AttrList1 = trim(AttrList0),
+ {attribute_list, AttrList, <<>>, _} = attribute_list(St, AttrList1),
+ %% Block macros must be followed by at least one empty line.
+ _ = empty_line(St),
+ {block_macro, AttrList#{
+ name => Name,
+ target => Target
+ }, <<>>, Ann}.
+
+-ifdef(TEST).
+block_macro_image_test() ->
+ [{block_macro, #{
+ name := <<"image">>,
+ target := <<"images/layout.png">>,
+ 1 := <<"J14P main circuit board">>
+ }, <<>>, _}] = parse(<<"image::images/layout.png[J14P main circuit board]">>),
+ [{block_macro, #{
+ name := <<"image">>,
+ target := <<"images/layout.png">>,
+ 1 := <<"J14P main circuit board">>,
+ <<"title">> := <<"Main circuit board">>
+ }, <<>>, _}] = parse(
+ <<"image::images/layout.png[\"J14P main circuit board\", "
+ "title=\"Main circuit board\"]">>),
+ ok.
+
+block_macro_include_test() ->
+ [{block_macro, #{
+ name := <<"include">>,
+ target := <<"chapter1.txt">>,
+ <<"tabsize">> := <<"4">>
+ }, <<>>, _}] = parse(<<"include::chapter1.txt[tabsize=4]">>),
+ ok.
+
+block_macro_ifdef_test() ->
+ [{block_macro, #{
+ name := <<"ifdef">>,
+ target := <<"revnumber">>,
+ 0 := <<>>
+ }, <<>>, _}] = parse(<<"ifdef::revnumber[]">>),
+ [{block_macro, #{
+ name := <<"ifdef">>,
+ target := <<"revnumber">>,
+ 1 := <<"Version number 42">>
+ }, <<>>, _}] = parse(<<"ifdef::revnumber[Version number 42]">>),
+ ok.
+
+block_macro_ifndef_test() ->
+ [{block_macro, #{
+ name := <<"ifndef">>,
+ target := <<"revnumber">>,
+ 0 := <<>>
+ }, <<>>, _}] = parse(<<"ifndef::revnumber[]">>),
+ ok.
+
+block_macro_endif_test() ->
+ [{block_macro, #{
+ name := <<"endif">>,
+ target := <<"revnumber">>,
+ 0 := <<>>
+ }, <<>>, _}] = parse(<<"endif::revnumber[]">>),
+ %% Some macros accept an empty target.
+ [{block_macro, #{
+ name := <<"endif">>,
+ target := <<>>,
+ 0 := <<>>
+ }, <<>>, _}] = parse(<<"endif::[]">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 17.1
+bulleted_list(St) ->
+ Line0 = read_line(St),
+ Line1 = trim(Line0),
+ {Type0, Level, ListItem} = case Line1 of
+ <<"-", C, R/bits>> when ?IS_WS(C) -> {dash, 1, R};
+ <<"*", C, R/bits>> when ?IS_WS(C) -> {star, 1, R};
+ <<"**", C, R/bits>> when ?IS_WS(C) -> {star, 2, R};
+ <<"***", C, R/bits>> when ?IS_WS(C) -> {star, 3, R};
+ <<"****", C, R/bits>> when ?IS_WS(C) -> {star, 4, R};
+ <<"*****", C, R/bits>> when ?IS_WS(C) -> {star, 5, R}
+ end,
+ Type = case Type0 of
+ dash -> bulleted_alt;
+ star -> bulleted
+ end,
+ list_item(St, #{
+ type => Type,
+ level => Level
+ }, ListItem).
+
+-ifdef(TEST).
+bulleted_list_test() ->
+ [{list_item, #{
+ type := bulleted_alt,
+ level := 1
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"- List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 1
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"* List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 2
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"** List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 3
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"*** List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 4
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"**** List item.">>),
+ [{list_item, #{
+ type := bulleted,
+ level := 5
+ }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"***** List item.">>),
+ %% Two list items one after the other.
+ [
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, _, <<"List item 1.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, _, <<"List item 2.">>, _}], _}
+ ] = parse(<<"* List item 1.\n* List item 2.">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 17.2
+%%
+%% We currently only implement implicit numbering.
+numbered_list(St) ->
+ Line0 = read_line(St),
+ Line1 = trim(Line0),
+ {Level, ListItem} = case Line1 of
+ <<".", C, R/bits>> when ?IS_WS(C) -> {1, R};
+ <<"..", C, R/bits>> when ?IS_WS(C) -> {2, R};
+ <<"...", C, R/bits>> when ?IS_WS(C) -> {3, R};
+ <<"....", C, R/bits>> when ?IS_WS(C) -> {4, R};
+ <<".....", C, R/bits>> when ?IS_WS(C) -> {5, R}
+ end,
+ list_item(St, #{
+ type => numbered,
+ level => Level
+ }, ListItem).
+
+-ifdef(TEST).
+numbered_list_test() ->
+ [{list_item, #{
+ type := numbered,
+ level := 1
+ }, [{paragraph, _, <<"Arabic (decimal) numbered list item.">>, _}], _}]
+ = parse(<<". Arabic (decimal) numbered list item.">>),
+ [{list_item, #{
+ type := numbered,
+ level := 2
+ }, [{paragraph, _, <<"Lower case alpha (letter) numbered list item.">>, _}], _}]
+ = parse(<<".. Lower case alpha (letter) numbered list item.">>),
+ [{list_item, #{
+ type := numbered,
+ level := 3
+ }, [{paragraph, _, <<"Lower case roman numbered list item.">>, _}], _}]
+ = parse(<<"... Lower case roman numbered list item.">>),
+ [{list_item, #{
+ type := numbered,
+ level := 4
+ }, [{paragraph, _, <<"Upper case alpha (letter) numbered list item.">>, _}], _}]
+ = parse(<<".... Upper case alpha (letter) numbered list item.">>),
+ [{list_item, #{
+ type := numbered,
+ level := 5
+ }, [{paragraph, _, <<"Upper case roman numbered list item.">>, _}], _}]
+ = parse(<<"..... Upper case roman numbered list item.">>),
+ %% Two list items one after the other.
+ [
+ {list_item, #{type := numbered, level := 1},
+ [{paragraph, _, <<"List item 1.">>, _}], _},
+ {list_item, #{type := numbered, level := 1},
+ [{paragraph, _, <<"List item 2.">>, _}], _}
+ ] = parse(<<". List item 1.\n. List item 2.">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 17.3
+%%
+%% The Asciidoc User Guide makes it sound like the
+%% label must be hard on the left margin but we don't
+%% enforce that to simplify the implementation.
+labeled_list(St) ->
+ Line0 = read_line(St),
+ %% We can't match directly to find the list separator,
+ %% we have to search for it.
+ {Label0, Sep, ListItem0} = find_labeled_list(Line0),
+ Label = trim(Label0),
+ ListItem = trim(ListItem0),
+ %% The label must not be empty.
+ true = trim(Label) =/= <<>>,
+ list_item(St, #{
+ type => labeled,
+ separator => Sep,
+ label => Label
+ }, ListItem).
+
+find_labeled_list(Line) ->
+ find_labeled_list(Line, <<>>).
+
+%% We don't have a final clause with an empty binary because
+%% we want to crash if we don't find a labeled list.
+find_labeled_list(<<"::">>, Acc) -> {Acc, <<"::">>, <<>>};
+find_labeled_list(<<":::">>, Acc) -> {Acc, <<":::">>, <<>>};
+find_labeled_list(<<"::::">>, Acc) -> {Acc, <<"::::">>, <<>>};
+find_labeled_list(<<";;">>, Acc) -> {Acc, <<";;">>, <<>>};
+find_labeled_list(<<"::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<"::">>, R};
+find_labeled_list(<<":::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<":::">>, R};
+find_labeled_list(<<"::::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<"::::">>, R};
+find_labeled_list(<<";;", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<";;">>, R};
+find_labeled_list(<<C, R/bits>>, Acc) -> find_labeled_list(R, <<Acc/binary, C>>).
+
+-ifdef(TEST).
+labeled_list_test() ->
+ [{list_item, #{type := labeled, separator := <<"::">>, label := <<"Question">>},
+ [{paragraph, _, <<"Answer!">>, _}], _}] = parse(<<"Question:: Answer!">>),
+ [{list_item, #{type := labeled, separator := <<"::">>, label := <<"Question">>},
+ [{paragraph, _, <<"Answer!">>, _}], _}] = parse(<<"Question::\n Answer!">>),
+ %% Long snippet from the Asciidoc User Guide, minus literal paragraph.
+ %% @todo Add the literal paragraph back once they are implemented.
+ [
+ {list_item, #{type := labeled, separator := <<"::">>, label := <<"In">>},
+ [{paragraph, _, <<>>, _}], _},
+ {list_item, #{type := labeled, separator := <<"::">>, label := <<"Lorem">>},
+ [{paragraph, _, <<"Fusce euismod commodo velit.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<"::">>, label := <<"Ipsum">>},
+ [{paragraph, _, <<"Vivamus fringilla mi eu lacus.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, _, <<"Vivamus fringilla mi eu lacus.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, _, <<"Donec eget arcu bibendum nunc consequat lobortis.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<"::">>, label := <<"Dolor">>},
+ [{paragraph, _, <<"Donec eget arcu bibendum nunc consequat lobortis.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<";;">>, label := <<"Suspendisse">>},
+ [{paragraph, _, <<"A massa id sem aliquam auctor.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<";;">>, label := <<"Morbi">>},
+ [{paragraph, _, <<"Pretium nulla vel lorem.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<";;">>, label := <<"In">>},
+ [{paragraph, _, <<"Dictum mauris in urna.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<":::">>, label := <<"Vivamus">>},
+ [{paragraph, _, <<"Fringilla mi eu lacus.">>, _}], _},
+ {list_item, #{type := labeled, separator := <<":::">>, label := <<"Donec">>},
+ [{paragraph, _, <<"Eget arcu bibendum nunc consequat lobortis.">>, _}], _}
+ ] = parse(<<
+ "In::\n"
+ "Lorem::\n"
+ " Fusce euismod commodo velit.\n"
+ %% @todo Add literal paragraph back here.
+ "Ipsum:: Vivamus fringilla mi eu lacus.\n"
+ " * Vivamus fringilla mi eu lacus.\n"
+ " * Donec eget arcu bibendum nunc consequat lobortis.\n"
+ "Dolor::\n"
+ " Donec eget arcu bibendum nunc consequat lobortis.\n"
+ " Suspendisse;;\n"
+ " A massa id sem aliquam auctor.\n"
+ " Morbi;;\n"
+ " Pretium nulla vel lorem.\n"
+ " In;;\n"
+ " Dictum mauris in urna.\n"
+ " Vivamus::: Fringilla mi eu lacus.\n"
+ " Donec::: Eget arcu bibendum nunc consequat lobortis.\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 20
+-spec callout_list(_) -> no_return().
+callout_list(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 17
+%%
+%% We do not apply rules about blocks being contained in
+%% the list item at this stage of parsing. We only concern
+%% ourselves with identifying blocks, and then another pass
+%% will build a tree from the result of this pass.
+list_item(St, Attrs, ListItem0) ->
+ ListItem1 = trim(ListItem0),
+ Ann = ann(St),
+ %% For labeled lists, we may need to skip empty lines
+ %% until the start of the list item contents, since
+ %% it can begin on a separate line from the label.
+ _ = case {ListItem1, Attrs} of
+ {<<>>, #{type := labeled}} ->
+ read_while(St, fun skip_empty_lines/1, <<>>);
+ _ ->
+ ok
+ end,
+ %% A list item ends on end of file, empty line or when a new list starts.
+ %% Any indentation is optional and therefore removed.
+ ListItem = read_while(St, fun fold_list_item/1, ListItem1),
+ {list_item, Attrs, [{paragraph, #{}, ListItem, Ann}], Ann}.
+
+skip_empty_lines(eof) ->
+ done;
+skip_empty_lines(Line) ->
+ case trim(Line) of
+ <<>> -> {more, <<>>};
+ _ -> done
+ end.
+
+fold_list_item(eof) ->
+ done;
+fold_list_item(Line0) ->
+ case trim(Line0) of
+ <<>> -> done;
+ <<"+">> -> done;
+ <<"//", _/bits >> -> done;
+ <<"-", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"*", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"**", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"***", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"****", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"*****", C, _/bits>> when ?IS_WS(C) -> done;
+ <<".", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"..", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"...", C, _/bits>> when ?IS_WS(C) -> done;
+ <<"....", C, _/bits>> when ?IS_WS(C) -> done;
+ <<".....", C, _/bits>> when ?IS_WS(C) -> done;
+ Line ->
+ try find_labeled_list(Line) of
+ {_, _, _} -> done
+ catch _:_ ->
+ {more, Line}
+ end
+ end.
+
+-ifdef(TEST).
+list_item_test() ->
+ [
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := bulleted, level := 2},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := numbered, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := numbered, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _},
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"List item.">>, _}], _}
+ ] = parse(<<
+ "* List item.\n"
+ "** List item.\n"
+ "* List item.\n"
+ " . List item.\n"
+ " . List item.\n"
+ "* List item.\n">>),
+ %% Properly detect a labeled list.
+ [
+ {list_item, #{type := bulleted, level := 1},
+ [{paragraph, #{}, <<"List item.\nMultiline.">>, _}], _},
+ {list_item, #{type := labeled, label := <<"Question">>},
+ [{paragraph, #{}, <<"Answer!">>, _}], _}
+ ] = parse(<<
+ "* List item.\n"
+ "Multiline.\n"
+ "Question:: Answer!\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 17.7
+list_item_continuation(St) ->
+ %% Continuations are a single + hard against the left margin.
+ <<$+, Whitespace/bits>> = read_line(St),
+ <<>> = trim(Whitespace),
+ {list_item_continuation, #{}, <<>>, ann(St)}.
+
+-ifdef(TEST).
+list_item_continuation_test() ->
+ [{list_item_continuation, _, _, _}] = parse(<<"+">>),
+ [{list_item_continuation, _, _, _}] = parse(<<"+ ">>),
+ [{list_item_continuation, _, _, _}] = parse(<<"+\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.2
+listing_block(St) ->
+ delimited_block(St, listing_block, $-, #{<<"subs">> => <<"verbatim">>}).
+
+-ifdef(TEST).
+listing_block_test() ->
+ Block = <<
+ "#include <stdio.h>\n"
+ "\n"
+ "int main() {\n"
+ " printf(\"Hello World!\n\");\n"
+ " exit(0);\n"
+ "}">>,
+ [{listing_block, _, Block, _}] = parse(<<
+ "--------------------------------------\n",
+ Block/binary, "\n"
+ "--------------------------------------\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.3
+literal_block(St) ->
+ delimited_block(St, literal_block, $., #{<<"subs">> => <<"verbatim">>}).
+
+-ifdef(TEST).
+literal_block_test() ->
+ Block = <<
+ "Consul *necessitatibus* per id,\n"
+ "consetetur, eu pro everti postulant\n"
+ "homero verear ea mea, qui.">>,
+ [{literal_block, _, Block, _}] = parse(<<
+ "...................................\n",
+ Block/binary, "\n"
+ "...................................\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.4
+sidebar_block(St) ->
+ delimited_block(St, sidebar_block, $*).
+
+-ifdef(TEST).
+sidebar_block_test() ->
+ Block = <<
+ "Any AsciiDoc SectionBody element (apart from\n"
+ "SidebarBlocks) can be placed inside a sidebar.">>,
+ [{sidebar_block, _, Block, _}] = parse(<<
+ "************************************************\n",
+ Block/binary, "\n"
+ "************************************************\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.5
+comment_block(St) ->
+ delimited_block(St, comment_block, $/).
+
+-ifdef(TEST).
+comment_block_test() ->
+ Block = <<
+ "CommentBlock contents are not processed by\n"
+ "asciidoc(1).">>,
+ [{comment_block, _, Block, _}] = parse(<<
+ "//////////////////////////////////////////\n",
+ Block/binary, "\n"
+ "//////////////////////////////////////////\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.6
+passthrough_block(St) ->
+ delimited_block(St, passthrough_block, $+).
+
+-ifdef(TEST).
+passthrough_block_test() ->
+ Block = <<
+ "<table border=\"1\"><tr>\n"
+ " <td>*Cell 1*</td>\n"
+ " <td>*Cell 2*</td>\n"
+ "</tr></table>">>,
+ [{passthrough_block, _, Block, _}] = parse(<<
+ "++++++++++++++++++++++++++++++++++++++\n",
+ Block/binary, "\n"
+ "++++++++++++++++++++++++++++++++++++++\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.7
+quote_block(St) ->
+ delimited_block(St, quote_block, $_).
+
+-ifdef(TEST).
+quote_block_test() ->
+ Block = <<
+ "As he spoke there was the sharp sound of horses' hoofs and\n"
+ "grating wheels against the curb, followed by a sharp pull at the\n"
+ "bell. Holmes whistled.\n"
+ "\n"
+ "\"A pair, by the sound,\" said he. \"Yes,\" he continued, glancing\n"
+ "out of the window. \"A nice little brougham and a pair of\n"
+ "beauties. A hundred and fifty guineas apiece. There's money in\n"
+ "this case, Watson, if there is nothing else.\"">>,
+ [{quote_block, _, Block, _}] = parse(<<
+ "____________________________________________________________________\n",
+ Block/binary, "\n"
+ "____________________________________________________________________\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.8
+example_block(St) ->
+ delimited_block(St, example_block, $=).
+
+-ifdef(TEST).
+example_block_test() ->
+ Block = <<
+ "Qui in magna commodo, est labitur dolorum an. Est ne magna primis\n"
+ "adolescens.">>,
+ [{example_block, _, Block, _}] = parse(<<
+ "=====================================================================\n",
+ Block/binary, "\n"
+ "=====================================================================\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16
+delimited_block(St, Name, Char) ->
+ delimited_block(St, Name, Char, #{}, <<Char, Char, Char, Char>>).
+
+delimited_block(St, Name, Char, Attrs) ->
+ delimited_block(St, Name, Char, Attrs, <<Char, Char, Char, Char>>).
+
+delimited_block(St, Name, Char, Attrs, Four) ->
+ %% A delimiter block begins by a series of four or more repeated characters.
+ <<Four:4/binary, Line0/bits>> = read_line(St),
+ Ann = ann(St),
+ Line = trim(Line0, trailing),
+ repeats(Line, Char),
+ %% Get the content of the block as-is.
+ Block = read_while(St, fun(L) -> fold_delimited_block(L, Four, Char) end, <<>>),
+ %% Skip the trailing delimiter line.
+ _ = read_line(St),
+ {Name, Attrs, Block, Ann}.
+
+%% Accept eof as a closing delimiter.
+fold_delimited_block(eof, _, _) ->
+ done;
+fold_delimited_block(Line0, Four, Char) ->
+ case Line0 of
+ <<Four:4/binary, Line1/bits>> ->
+ try
+ Line = trim(Line1, trailing),
+ repeats(Line, Char),
+ done
+ catch _:_ ->
+ {more, Line0}
+ end;
+ _ ->
+ {more, Line0}
+ end.
+
+-ifdef(TEST).
+delimited_block_test() ->
+ %% Confirm that the block ends at eof.
+ %%
+ %% We see an extra line break because asciideck_line_reader adds
+ %% one at the end of every files to ease processing.
+ [{listing_block, _, <<"Hello!\n\n">>, _}] = parse(<<
+ "----\n"
+ "Hello!\n">>),
+ %% Same without a trailing line break.
+ %%
+ %% We also see an extra line break for the aforementioned reasons.
+ [{listing_block, _, <<"Hello!\n">>, _}] = parse(<<
+ "----\n"
+ "Hello!">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 16.10
+-spec open_block(_) -> no_return().
+open_block(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 23
+%%
+%% We do not parse the table in this pass. Instead we
+%% treat it like any other delimited block.
+table(St) ->
+ delimited_block(St, table, $=, #{}, <<"|===">>).
+
+-ifdef(TEST).
+table_test() ->
+ Block = <<
+ "|1 |2 |A\n"
+ "|3 |4 |B\n"
+ "|5 |6 |C">>,
+ [{table, _, Block, _}] = parse(<<
+ "|=======\n",
+ Block/binary, "\n"
+ "|=======\n">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 28
+-spec attribute_entry(_) -> no_return().
+attribute_entry(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 14, 29
+attribute_list(St) ->
+ AttrList = read_line(St),
+ attribute_list(St, AttrList).
+
+attribute_list(St, AttrList0) ->
+ %% First we remove the enclosing square brackets.
+ <<$[, AttrList1/bits>> = AttrList0,
+ AttrList2 = trim(AttrList1),
+ Len = byte_size(AttrList2) - 1,
+ <<AttrList3:Len/binary, $]>> = AttrList2,
+ AttrList = asciideck_attributes_parser:parse(AttrList3),
+ {attribute_list, AttrList, <<>>, ann(St)}.
+
+-ifdef(TEST).
+attribute_list_test() ->
+ [{attribute_list, #{0 := <<"Hello">>, 1 := <<"Hello">>}, <<>>, _}]
+ = parse(<<"[Hello]">>),
+ [{attribute_list, #{
+ 1 := <<"quote">>,
+ 2 := <<"Bertrand Russell">>,
+ 3 := <<"The World of Mathematics (1956)">>
+ }, <<>>, _}]
+ = parse(<<"[quote, Bertrand Russell, The World of Mathematics (1956)]">>),
+ [{attribute_list, #{
+ 1 := <<"22 times">>,
+ <<"backcolor">> := <<"#0e0e0e">>,
+ <<"options">> := <<"noborders,wide">>
+ }, <<>>, _}]
+ = parse(<<"[\"22 times\", backcolor=\"#0e0e0e\", options=\"noborders,wide\"]">>),
+ [{attribute_list, #{
+ 1 := <<"A footnote&#44; &#34;with an image&#34; image:smallnew.png[]">>
+ }, <<>>, _}]
+ = parse(<<"[A footnote&#44; &#34;with an image&#34; image:smallnew.png[]]">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 12
+block_title(St) ->
+ %% A block title line begins with a period and is followed by the title text.
+ <<$., Title0/bits>> = read_line(St),
+ Ann = ann(St),
+ Title = trim(Title0),
+ {block_title, #{}, Title, Ann}.
+
+-ifdef(TEST).
+block_title_test() ->
+ %% Valid.
+ [{block_title, _, <<"Notes">>, _}] = parse(<<".Notes">>),
+ [{block_title, _, <<"Notes">>, _}] = parse(<<".Notes ">>),
+ %% Invalid.
+ ?NOT(block_title, parse(<<". Notes">>)),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 15.2
+-spec literal_para(_) -> no_return().
+literal_para(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 15.4
+-spec admonition_para(_) -> no_return().
+admonition_para(St) -> throw({not_implemented, St}). %% @todo
+
+%% Asciidoc User Guide 15.1
+para(St) ->
+ %% Paragraph must be hard against the left margin.
+ <<C, _/bits>> = Para0 = read_line(St),
+ Ann = ann(St),
+ %% @todo Uncomment this line once everything else has been implemented.
+ _ = ?IS_WS(C), % false = ?IS_WS(C),
+ Para1 = trim(Para0),
+ %% Paragraph ends at blank line, end of file or start of delimited block or list.
+ Para = read_while(St, fun fold_para/1, Para1),
+ {paragraph, #{}, Para, Ann}.
+
+fold_para(eof) ->
+ done;
+fold_para(Line0) ->
+ case trim(Line0) of
+ <<>> -> done;
+ <<"+">> -> done;
+ %% @todo Detect delimited block or list.
+ Line -> {more, Line}
+ end.
+
+-ifdef(TEST).
+para_test() ->
+ LoremIpsum = <<
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit,\n"
+ "sed do eiusmod tempor incididunt ut labore et dolore\n"
+ "magna aliqua. Ut enim ad minim veniam, quis nostrud\n"
+ "exercitation ullamco laboris nisi ut aliquip ex ea\n"
+ "commodo consequat. Duis aute irure dolor in reprehenderit\n"
+ "in voluptate velit esse cillum dolore eu fugiat nulla\n"
+ "pariatur. Excepteur sint occaecat cupidatat non proident,\n"
+ "sunt in culpa qui officia deserunt mollit anim id est laborum."
+ >>,
+ %% Paragraph followed by end of file.
+ [{paragraph, _, LoremIpsum, _}] = parse(<< LoremIpsum/binary, "\n">>),
+ %% Paragraph followed by end of file with no trailing line break..
+ [{paragraph, _, LoremIpsum, _}] = parse(LoremIpsum),
+ %% Two paragraphs.
+ [{paragraph, _, LoremIpsum, _}, {paragraph, _, LoremIpsum, _}]
+ = parse(<<
+ LoremIpsum/binary,
+ "\n\n",
+ LoremIpsum/binary >>),
+ ok.
+-endif.
+
+%% Control functions.
+
+oneof([], St) ->
+ throw({error, St}); %% @todo
+oneof([Parse|Tail], St=#state{reader=ReaderPid}) ->
+ Ln = asciideck_line_reader:get_position(ReaderPid),
+ try
+ Parse(St)
+ catch _:_ ->
+ asciideck_line_reader:set_position(ReaderPid, Ln),
+ oneof(Tail, St)
+ end.
+
+skip(Parse, St=#state{reader=ReaderPid}) ->
+ Ln = asciideck_line_reader:get_position(ReaderPid),
+ try
+ _ = Parse(St),
+ skip(Parse, St)
+ catch _:_ ->
+ asciideck_line_reader:set_position(ReaderPid, Ln),
+ ok
+ end.
+
+%% Line functions.
+
+read_line(#state{reader=ReaderPid}) ->
+ asciideck_line_reader:read_line(ReaderPid).
+
+read_while(St=#state{reader=ReaderPid}, F, Acc) ->
+ Ln = asciideck_line_reader:get_position(ReaderPid),
+ case F(read_line(St)) of
+ done ->
+ asciideck_line_reader:set_position(ReaderPid, Ln),
+ Acc;
+ {more, Line} ->
+ case Acc of
+ <<>> -> read_while(St, F, Line);
+ _ -> read_while(St, F, <<Acc/binary, $\n, Line/binary>>)
+ end
+ end.
+
+ann(#state{reader=ReaderPid}) ->
+ #{line => asciideck_line_reader:get_position(ReaderPid)}.
+
+trim(Line) ->
+ trim(Line, both).
+
+trim(Line, Direction) ->
+ Regex = case Direction of
+ both -> "^[ \\t\\r\\n]+|[ \\t\\r\\n]+$";
+ trailing -> "[ \\t\\r\\n]+$"
+ end,
+ iolist_to_binary(re:replace(Line, Regex, <<>>, [global])).
+
+repeats(<<>>, _) -> ok;
+repeats(<<C, Rest/bits>>, C) -> repeats(Rest, C).
+
+while(F, Bin) ->
+ while(Bin, F, <<>>).
+
+while(<<>>, _, Acc) ->
+ {Acc, <<>>};
+while(<<C, R/bits>>, F, Acc) ->
+ case F(C) of
+ true -> while(R, F, <<Acc/binary, C>>);
+ false -> {Acc, <<C, R/bits>>}
+ end.