diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/asciideck.erl | 13 | ||||
-rw-r--r-- | src/asciideck_attributes_parser.erl | 120 | ||||
-rw-r--r-- | src/asciideck_attributes_pass.erl | 112 | ||||
-rw-r--r-- | src/asciideck_block_parser.erl | 1116 | ||||
-rw-r--r-- | src/asciideck_inline_pass.erl | 308 | ||||
-rw-r--r-- | src/asciideck_line_reader.erl | 94 | ||||
-rw-r--r-- | src/asciideck_lists_pass.erl | 155 | ||||
-rw-r--r-- | src/asciideck_parser.erl | 388 | ||||
-rw-r--r-- | src/asciideck_tables_pass.erl | 191 | ||||
-rw-r--r-- | src/asciideck_to_manpage.erl | 236 |
10 files changed, 2246 insertions, 487 deletions
diff --git a/src/asciideck.erl b/src/asciideck.erl index 749ccec..bd5792c 100644 --- a/src/asciideck.erl +++ b/src/asciideck.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2016, Loïc Hoguin <[email protected]> +%% Copyright (c) 2016-2018, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -32,8 +32,15 @@ parse_file(Filename, St) -> parse(Data) -> parse(Data, #{}). -parse(Data, St) when is_binary(Data) -> - asciideck_parser:parse(Data, St); +parse(Data, _St) when is_binary(Data) -> + Passes = [ + asciideck_attributes_pass, + asciideck_lists_pass, + asciideck_tables_pass, + asciideck_inline_pass + ], + lists:foldl(fun(M, AST) -> M:run(AST) end, + asciideck_block_parser:parse(Data), Passes); parse(Data, St) -> parse(iolist_to_binary(Data), St). diff --git a/src/asciideck_attributes_parser.erl b/src/asciideck_attributes_parser.erl new file mode 100644 index 0000000..b89c3f4 --- /dev/null +++ b/src/asciideck_attributes_parser.erl @@ -0,0 +1,120 @@ +%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% Asciidoc User Guide 29 +-module(asciideck_attributes_parser). + +-export([parse/1]). + +-type attributes() :: #{ + %% The raw attribute list. + 0 := binary(), + %% Positional attributes. + pos_integer() => binary(), + %% Named attributes. + binary() => binary() +}. +-export_type([attributes/0]). + +-define(IS_WS(C), (C =:= $\s) or (C =:= $\t)). + +-spec parse(binary()) -> attributes(). +parse(Data) -> + parse(Data, #{0 => Data}, 1). + +parse(<<>>, Attrs, _) -> + Attrs; +parse(Data, Attrs, Nth) -> + case parse_attr(Data, <<>>) of + {Value, Rest} when Nth =/= undefined -> + parse(Rest, Attrs#{Nth => Value}, Nth + 1); + {Name, Value, Rest} -> + parse(Rest, Attrs#{Name => Value}, undefined) + end. + +parse_attr(<<>>, Acc) -> + {Acc, <<>>}; +%% Skip preceding whitespace. +parse_attr(<<C, R/bits>>, <<>>) when ?IS_WS(C) -> + parse_attr(R, <<>>); +%% Parse quoted positional attributes in their own function. +parse_attr(<<$", R/bits>>, <<>>) -> + parse_quoted_attr(R, <<>>); +%% We have a named attribute, parse the value. +parse_attr(<<$=, R/bits>>, Name) when Name =/= <<>> -> + parse_attr_value(R, asciideck_block_parser:trim(Name, trailing), <<>>); +%% We have a positional attribute. +parse_attr(<<$,, R/bits>>, Value) -> + {asciideck_block_parser:trim(Value, trailing), R}; +%% Continue. +parse_attr(<<C, R/bits>>, Acc) when C =/= $= -> + parse_attr(R, <<Acc/binary, C>>). + +%% Get everything until the next double quote. +parse_quoted_attr(<<$", R/bits>>, Acc) -> + parse_quoted_attr_end(R, Acc); +parse_quoted_attr(<<$\\, $", R/bits>>, Acc) -> + parse_quoted_attr(R, <<Acc/binary, $">>); +parse_quoted_attr(<<C, R/bits>>, Acc) -> + parse_quoted_attr(R, <<Acc/binary, C>>). + +%% Skip the whitespace until the next comma or eof. +parse_quoted_attr_end(<<>>, Value) -> + {Value, <<>>}; +parse_quoted_attr_end(<<$,, R/bits>>, Value) -> + {Value, R}; +parse_quoted_attr_end(<<C, R/bits>>, Value) when ?IS_WS(C) -> + parse_quoted_attr_end(R, Value). + +parse_attr_value(<<>>, Name, Acc) -> + {Name, Acc, <<>>}; +%% Skip preceding whitespace. +parse_attr_value(<<C, R/bits>>, Name, <<>>) when ?IS_WS(C) -> + parse_attr_value(R, Name, <<>>); +%% Parse quoted positional attributes in their own function. +parse_attr_value(<<$", R/bits>>, Name, <<>>) -> + {Value, Rest} = parse_quoted_attr(R, <<>>), + {Name, Value, Rest}; +%% Done. +parse_attr_value(<<$,, R/bits>>, Name, Value) -> + {Name, asciideck_block_parser:trim(Value, trailing), R}; +%% Continue. +parse_attr_value(<<C, R/bits>>, Name, Acc) -> + parse_attr_value(R, Name, <<Acc/binary, C>>). + +-ifdef(TEST). +attribute_0_test() -> + #{0 := <<"Hello,world,width=\"50\"">>} = parse(<<"Hello,world,width=\"50\"">>), + ok. + +parse_test() -> + #{} = parse(<<>>), + #{ + 1 := <<"Hello">> + } = parse(<<"Hello">>), + #{ + 1 := <<"quote">>, + 2 := <<"Bertrand Russell">>, + 3 := <<"The World of Mathematics (1956)">> + } = parse(<<"quote, Bertrand Russell, The World of Mathematics (1956)">>), + #{ + 1 := <<"22 times">>, + <<"backcolor">> := <<"#0e0e0e">>, + <<"options">> := <<"noborders,wide">> + } = parse(<<"\"22 times\", backcolor=\"#0e0e0e\", options=\"noborders,wide\"">>), + #{ + 1 := <<"A footnote, "with an image" image:smallnew.png[]">> + } = parse(<<"A footnote, "with an image" image:smallnew.png[]">>), + ok. +-endif. diff --git a/src/asciideck_attributes_pass.erl b/src/asciideck_attributes_pass.erl new file mode 100644 index 0000000..393b57d --- /dev/null +++ b/src/asciideck_attributes_pass.erl @@ -0,0 +1,112 @@ +%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% The purpose of this pass is to apply attributes to +%% their corresponding blocks. For macros the attributes +%% are already applied. For inline elements the inline +%% pass is taking care of it. +-module(asciideck_attributes_pass). + +-export([run/1]). + +run([]) -> + []; +%% A block identifier is an alternative way of specifying +%% the id attribute for a block. +run([{block_id, #{id := ID}, <<>>, _}|Tail0]) -> + Tail = apply_attributes(Tail0, #{<<"id">> => ID}), + run(Tail); +%% A block title is ultimately treated as an attribute +%% for the following block. +run([{block_title, _, Title, _}|Tail0]) -> + Tail = apply_attributes(Tail0, #{<<"title">> => Title}), + run(Tail); +run([{attribute_list, Attrs, <<>>, _}|Tail0]) -> + Tail = apply_attributes(Tail0, Attrs), + run(Tail); +run([Block|Tail]) -> + [Block|run(Tail)]. + +%% Find the next block to apply the attributes. +apply_attributes([], _) -> + []; +apply_attributes(AST=[Element0={Type, Attrs0, Content, Ann}|Tail], Attrs) -> + case can_apply(Type) of + drop -> + AST; + skip -> + [Element0|apply_attributes(Tail, Attrs)]; + apply -> + Element = {Type, maps:merge(Attrs0, Attrs), Content, Ann}, + [Element|Tail] + end. + +%% Block macros already come with a mandatory attribute list. +%% Just to play it safe we drop the attributes for now. +can_apply(block_macro) -> drop; +%% If we hit a list item continuation, drop the attributes for now. +can_apply(list_item_continuation) -> drop; +%% We skip attribute lists and alike and let it sort itself out. +can_apply(block_id) -> skip; +can_apply(attribute_list) -> skip; +can_apply(block_title) -> skip; +%% Everything else is a block. +can_apply(_) -> apply. + +-ifdef(TEST). +attribute_list_test() -> + AST0 = [ + {attribute_list, #{ + 0 => <<"width=400">>, + <<"width">> => <<"400">> + }, <<>>, #{line => 1}}, + {listing_block, #{}, <<"Hello!">>, #{line => 2}} + ], + AST = [ + {listing_block, #{ + 0 => <<"width=400">>, + <<"width">> => <<"400">> + }, <<"Hello!">>, #{line => 2}} + ], + AST = run(AST0), + ok. + +block_id_test() -> + AST0 = [ + {block_id, #{ + id => <<"cowboy_req">> + }, <<>>, #{line => 1}}, + {listing_block, #{}, <<"Hello!">>, #{line => 2}} + ], + AST = [ + {listing_block, #{ + <<"id">> => <<"cowboy_req">> + }, <<"Hello!">>, #{line => 2}} + ], + AST = run(AST0), + ok. + +block_title_test() -> + AST0 = [ + {block_title, #{}, <<"Title">>, #{line => 1}}, + {listing_block, #{}, <<"Hello!">>, #{line => 2}} + ], + AST = [ + {listing_block, #{ + <<"title">> => <<"Title">> + }, <<"Hello!">>, #{line => 2}} + ], + AST = run(AST0), + ok. +-endif. diff --git a/src/asciideck_block_parser.erl b/src/asciideck_block_parser.erl new file mode 100644 index 0000000..ad63fa6 --- /dev/null +++ b/src/asciideck_block_parser.erl @@ -0,0 +1,1116 @@ +%% Copyright (c) 2016-2018, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% The block parser is the first pass of the parsing of Asciidoc +%% files. It only isolates the different top-level blocks and +%% produces a representation that can then be manipulated. +%% +%% Further passes are necessary to propagate the parsed lists +%% of attributes to their respective blocks, to create actual +%% lists from the parsed list items or to parse the contents +%% of tables. Finally a final pass will parse inline elements. +%% +%% This module may be called again for parsing the content +%% of individual table cells. +-module(asciideck_block_parser). + +-export([parse/1]). + +%% @todo Temporary export. Move somewhere else. +-export([trim/1]). +-export([trim/2]). +-export([while/2]). + +-type ast() :: list(). %% @todo + +-record(state, { + reader :: pid() +}). + +-define(IS_WS(C), (C =:= $\s) or (C =:= $\t)). + +-ifdef(TEST). +-define(NOT(Type, Value), true = Type =/= element(1, hd(Value))). + +define_NOT_test() -> + %% This succeeds. + ?NOT(block_id, parse(<<"[[block,id]]">>)), + %% This fails. + {'EXIT', _} = (catch ?NOT(block_id, parse(<<"[[block_id]]">>))), + ok. +-endif. + +-spec parse(binary()) -> ast(). +parse(Data) -> + %% @todo Might want to start it supervised. + %% @todo Might want to stop it also. + {ok, ReaderPid} = asciideck_line_reader:start_link(Data), + blocks(#state{reader=ReaderPid}). + +blocks(St) -> + case block(St) of + eof -> []; + Block -> [Block|blocks(St)] + end. + +%% Asciidoc parsing never fails. If a block is not +%% formatted properly, it will be treated as a paragraph. +block(St) -> + skip(fun empty_line/1, St), + oneof([ + fun eof/1, + %% Section titles. + fun section_title/1, + fun long_section_title/1, + %% Block macros. + fun block_id/1, + fun block_macro/1, + %% Lists. + fun bulleted_list/1, + fun numbered_list/1, + fun labeled_list/1, + fun callout_list/1, + fun list_item_continuation/1, + %% Delimited blocks. + fun listing_block/1, + fun literal_block/1, + fun sidebar_block/1, + fun comment_block/1, + fun passthrough_block/1, + fun quote_block/1, + fun example_block/1, + fun open_block/1, + %% Table. + fun table/1, + %% Attributes. + fun attribute_entry/1, + fun attribute_list/1, + %% Block title. + fun block_title/1, + %% Comment lines. + fun comment_line/1, + %% Paragraphs. + fun literal_para/1, + fun admonition_para/1, + fun para/1 + ], St). + +eof(St) -> + eof = read_line(St). + +-ifdef(TEST). +eof_test() -> + [] = parse(<<>>). +-endif. + +empty_line(St) -> + <<>> = trim(read_line(St)). + +-ifdef(TEST). +empty_line_test() -> + [] = parse(<< + "\n" + " \n" + " \n" + "\n" + >>). +-endif. + +%% Asciidoc User Guide 11.2 +section_title(St) -> + {Level, Title0} = case read_line(St) of + <<"=", C, R/bits>> when ?IS_WS(C) -> {0, R}; + <<"==", C, R/bits>> when ?IS_WS(C) -> {1, R}; + <<"===", C, R/bits>> when ?IS_WS(C) -> {2, R}; + <<"====", C, R/bits>> when ?IS_WS(C) -> {3, R}; + <<"=====", C, R/bits>> when ?IS_WS(C) -> {4, R} + end, + Ann = ann(St), + Title1 = trim(Title0), + %% Optional: trailing title delimiter. + Trailer = case Level of + 0 -> <<"=">>; + 1 -> <<"==">>; + 2 -> <<"===">>; + 3 -> <<"====">>; + 4 -> <<"=====">> + end, + Len = byte_size(Title1) - Level - 2, + Title = case Title1 of + <<Title2:Len/binary, WS, Trailer/binary>> when ?IS_WS(WS) -> trim(Title2); + _ -> trim(Title1) + end, + %% Section titles must be followed by at least one empty line. + _ = empty_line(St), + %% Good! + {section_title, #{level => Level}, Title, Ann}. + +-ifdef(TEST). +section_title_test() -> + %% With trailing title delimiter. + [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] + = parse(<<"= Document Title (level 0) =">>), + [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}] + = parse(<<"== Section Title (level 1) ==">>), + [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}] + = parse(<<"=== Section Title (level 2) ===">>), + [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}] + = parse(<<"==== Section Title (level 3) ====">>), + [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}] + = parse(<<"===== Section Title (level 4) =====">>), + %% Without trailing title delimiter. + [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] + = parse(<<"= Document Title (level 0)">>), + [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}] + = parse(<<"== Section Title (level 1)">>), + [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}] + = parse(<<"=== Section Title (level 2)">>), + [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}] + = parse(<<"==== Section Title (level 3)">>), + [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}] + = parse(<<"===== Section Title (level 4)">>), + %% Accept more spaces before/after delimiters. + [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] + = parse(<<"= Document Title (level 0)">>), + [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] + = parse(<<"= Document Title (level 0) =">>), + [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] + = parse(<<"= Document Title (level 0) =">>), + [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] + = parse(<<"= Document Title (level 0) = ">>), + %% A space before the first delimiter is not a title. + ?NOT(section_title, parse(<<" = Document Title (level 0)">>)), + ok. +-endif. + +%% Asciidoc User Guide 11.1 +long_section_title(St) -> + %% Title must be hard against the left margin. + <<C, _/bits>> = Title0 = read_line(St), + Ann = ann(St), + false = ?IS_WS(C), + Title = trim(Title0), + %% Read the underline. + {Level, Char, Underline0} = case read_line(St) of + U = <<"=", _/bits >> -> {0, $=, U}; + U = <<"-", _/bits >> -> {1, $-, U}; + U = <<"~", _/bits >> -> {2, $~, U}; + U = <<"^", _/bits >> -> {3, $^, U}; + U = <<"+", _/bits >> -> {4, $+, U} + end, + Underline = trim(Underline0, trailing), + %% Underline must be the same character repeated over the entire line. + repeats(Underline, Char), + %% Underline must be the same size as the title, +/- 2 characters. + TLen = byte_size(Title), + ULen = byte_size(Underline), + true = (TLen >= ULen - 2) andalso (TLen =< ULen + 2), + %% Good! + {section_title, #{level => Level}, Title, Ann}. + +-ifdef(TEST). +long_section_title_test() -> + %% Same amount of characters for the underline. + [{section_title, #{level := 0}, <<"Document Title (level 0)">>, _}] = parse(<< + "Document Title (level 0)\n" + "========================">>), + [{section_title, #{level := 1}, <<"Section Title (level 1)">>, _}] = parse(<< + "Section Title (level 1)\n" + "-----------------------">>), + [{section_title, #{level := 2}, <<"Section Title (level 2)">>, _}] = parse(<< + "Section Title (level 2)\n" + "~~~~~~~~~~~~~~~~~~~~~~~">>), + [{section_title, #{level := 3}, <<"Section Title (level 3)">>, _}] = parse(<< + "Section Title (level 3)\n" + "^^^^^^^^^^^^^^^^^^^^^^^">>), + [{section_title, #{level := 4}, <<"Section Title (level 4)">>, _}] = parse(<< + "Section Title (level 4)\n" + "+++++++++++++++++++++++">>), + %% A shorter title to confirm we are not cheating. + [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<< + "Hello!\n" + "======">>), + %% Underline can be +/- 2 characters. + [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<< + "Hello!\n" + "====">>), + [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<< + "Hello!\n" + "=====">>), + [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<< + "Hello!\n" + "=======">>), + [{section_title, #{level := 0}, <<"Hello!">>, _}] = parse(<< + "Hello!\n" + "========">>), + %% Underline too short/long results in a different block. + ?NOT(section_title, parse(<< + "Hello!\n" + "===">>)), + ?NOT(section_title, parse(<< + "Hello!\n" + "=========">>)), + ok. +-endif. + +%% Asciidoc User Guide 21.2.1 +%% +%% We currently do not implement the <xreflabel> value. +%% I am also not sure what characters are allowed, +%% so what is here is what I came up with guessing. +block_id(St) -> + <<"[[", Line0/bits>> = read_line(St), + Line = trim(Line0), + Len = byte_size(Line) - 2, + <<BlockID:Len/binary, "]]">> = Line, + %% Make sure there are only valid characters. + {BlockID, <<>>} = while(fun(C) -> + (C =/= $,) andalso (C =/= $[) andalso (C =/= $]) + andalso (C =/= $\s) andalso (C =/= $\t) + end, BlockID), + %% Good! + {block_id, #{id => BlockID}, <<>>, ann(St)}. + +-ifdef(TEST). +block_id_test() -> + %% Valid. + [{block_id, #{id := <<"X30">>}, <<>>, _}] = parse(<<"[[X30]]">>), + %% Invalid. + ?NOT(block_id, parse(<<"[[block,id]]">>)), + ?NOT(block_id, parse(<<"[[block[id]]">>)), + ?NOT(block_id, parse(<<"[[block]id]]">>)), + ?NOT(block_id, parse(<<"[[block id]]">>)), + ?NOT(block_id, parse(<<"[[block\tid]]">>)), + %% Must be hard on the left of the line. + ?NOT(block_id, parse(<<" [[block_id]]">>)), + ?NOT(block_id, parse(<<"\t[[block_id]]">>)), + ok. +-endif. + +%% Asciidoc User Guide 21.2.3 +comment_line(St) -> + <<"//", Comment0/bits>> = read_line(St), + Comment = trim(Comment0), + %% Good! + {comment_line, #{<<"subs">> => <<"verbatim">>}, Comment, ann(St)}. + +-ifdef(TEST). +comment_line_test() -> + [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.">>), + %% We trim the whitespace around the comment. + [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.">>), + [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment. ">>), + [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"//\tThis is a comment.">>), + [{comment_line, _, <<"This is a comment.">>, _}] = parse(<<"// This is a comment.\t">>), + [ + {comment_line, _, <<"First line.">>, _}, + {comment_line, _, <<"Second line.">>, _} + ] = parse(<< + "// First line.\n" + "// Second line.\n">>), + %% Must be hard on the left of the line. + ?NOT(comment_line, parse(<<" // This is a comment.">>)), + ?NOT(comment_line, parse(<<"\t// This is a comment.">>)), + ok. +-endif. + +%% We currently implement the following block macros +%% from the Asciidoc User Guide: +%% +%% - image (21.2.2) +%% - include (21.3.1) +%% - ifdef (21.3.2) +%% - ifndef (21.3.2) +%% - endif (21.3.2) +block_macro(St) -> + Line0 = read_line(St), + Ann = ann(St), + %% Name must contain letters, digits or dash characters. + {Name, <<"::", Line1/bits>>} = while(fun(C) -> + ((C >= $a) andalso (C =< $z)) + orelse ((C >= $A) andalso (C =< $Z)) + orelse ((C >= $0) andalso (C =< $9)) + orelse (C =:= $-) + end, Line0), + %% Name must not begin with a dash. + true = binary:at(Name, 0) =/= $-, + %% Target must not contain whitespace characters. + %% It is followed by an [attribute list]. + {Target, AttrList0 = <<"[", _/bits>>} = while(fun(C) -> + (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t) + end, Line1), + AttrList1 = trim(AttrList0), + {attribute_list, AttrList, <<>>, _} = attribute_list(St, AttrList1), + %% Block macros must be followed by at least one empty line. + _ = empty_line(St), + {block_macro, AttrList#{ + name => Name, + target => Target + }, <<>>, Ann}. + +-ifdef(TEST). +block_macro_image_test() -> + [{block_macro, #{ + name := <<"image">>, + target := <<"images/layout.png">>, + 1 := <<"J14P main circuit board">> + }, <<>>, _}] = parse(<<"image::images/layout.png[J14P main circuit board]">>), + [{block_macro, #{ + name := <<"image">>, + target := <<"images/layout.png">>, + 1 := <<"J14P main circuit board">>, + <<"title">> := <<"Main circuit board">> + }, <<>>, _}] = parse( + <<"image::images/layout.png[\"J14P main circuit board\", " + "title=\"Main circuit board\"]">>), + ok. + +block_macro_include_test() -> + [{block_macro, #{ + name := <<"include">>, + target := <<"chapter1.txt">>, + <<"tabsize">> := <<"4">> + }, <<>>, _}] = parse(<<"include::chapter1.txt[tabsize=4]">>), + ok. + +block_macro_ifdef_test() -> + [{block_macro, #{ + name := <<"ifdef">>, + target := <<"revnumber">>, + 0 := <<>> + }, <<>>, _}] = parse(<<"ifdef::revnumber[]">>), + [{block_macro, #{ + name := <<"ifdef">>, + target := <<"revnumber">>, + 1 := <<"Version number 42">> + }, <<>>, _}] = parse(<<"ifdef::revnumber[Version number 42]">>), + ok. + +block_macro_ifndef_test() -> + [{block_macro, #{ + name := <<"ifndef">>, + target := <<"revnumber">>, + 0 := <<>> + }, <<>>, _}] = parse(<<"ifndef::revnumber[]">>), + ok. + +block_macro_endif_test() -> + [{block_macro, #{ + name := <<"endif">>, + target := <<"revnumber">>, + 0 := <<>> + }, <<>>, _}] = parse(<<"endif::revnumber[]">>), + %% Some macros accept an empty target. + [{block_macro, #{ + name := <<"endif">>, + target := <<>>, + 0 := <<>> + }, <<>>, _}] = parse(<<"endif::[]">>), + ok. +-endif. + +%% Asciidoc User Guide 17.1 +bulleted_list(St) -> + Line0 = read_line(St), + Line1 = trim(Line0), + {Type0, Level, ListItem} = case Line1 of + <<"-", C, R/bits>> when ?IS_WS(C) -> {dash, 1, R}; + <<"*", C, R/bits>> when ?IS_WS(C) -> {star, 1, R}; + <<"**", C, R/bits>> when ?IS_WS(C) -> {star, 2, R}; + <<"***", C, R/bits>> when ?IS_WS(C) -> {star, 3, R}; + <<"****", C, R/bits>> when ?IS_WS(C) -> {star, 4, R}; + <<"*****", C, R/bits>> when ?IS_WS(C) -> {star, 5, R} + end, + Type = case Type0 of + dash -> bulleted_alt; + star -> bulleted + end, + list_item(St, #{ + type => Type, + level => Level + }, ListItem). + +-ifdef(TEST). +bulleted_list_test() -> + [{list_item, #{ + type := bulleted_alt, + level := 1 + }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"- List item.">>), + [{list_item, #{ + type := bulleted, + level := 1 + }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"* List item.">>), + [{list_item, #{ + type := bulleted, + level := 2 + }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"** List item.">>), + [{list_item, #{ + type := bulleted, + level := 3 + }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"*** List item.">>), + [{list_item, #{ + type := bulleted, + level := 4 + }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"**** List item.">>), + [{list_item, #{ + type := bulleted, + level := 5 + }, [{paragraph, _, <<"List item.">>, _}], _}] = parse(<<"***** List item.">>), + %% Two list items one after the other. + [ + {list_item, #{type := bulleted, level := 1}, + [{paragraph, _, <<"List item 1.">>, _}], _}, + {list_item, #{type := bulleted, level := 1}, + [{paragraph, _, <<"List item 2.">>, _}], _} + ] = parse(<<"* List item 1.\n* List item 2.">>), + ok. +-endif. + +%% Asciidoc User Guide 17.2 +%% +%% We currently only implement implicit numbering. +numbered_list(St) -> + Line0 = read_line(St), + Line1 = trim(Line0), + {Level, ListItem} = case Line1 of + <<".", C, R/bits>> when ?IS_WS(C) -> {1, R}; + <<"..", C, R/bits>> when ?IS_WS(C) -> {2, R}; + <<"...", C, R/bits>> when ?IS_WS(C) -> {3, R}; + <<"....", C, R/bits>> when ?IS_WS(C) -> {4, R}; + <<".....", C, R/bits>> when ?IS_WS(C) -> {5, R} + end, + list_item(St, #{ + type => numbered, + level => Level + }, ListItem). + +-ifdef(TEST). +numbered_list_test() -> + [{list_item, #{ + type := numbered, + level := 1 + }, [{paragraph, _, <<"Arabic (decimal) numbered list item.">>, _}], _}] + = parse(<<". Arabic (decimal) numbered list item.">>), + [{list_item, #{ + type := numbered, + level := 2 + }, [{paragraph, _, <<"Lower case alpha (letter) numbered list item.">>, _}], _}] + = parse(<<".. Lower case alpha (letter) numbered list item.">>), + [{list_item, #{ + type := numbered, + level := 3 + }, [{paragraph, _, <<"Lower case roman numbered list item.">>, _}], _}] + = parse(<<"... Lower case roman numbered list item.">>), + [{list_item, #{ + type := numbered, + level := 4 + }, [{paragraph, _, <<"Upper case alpha (letter) numbered list item.">>, _}], _}] + = parse(<<".... Upper case alpha (letter) numbered list item.">>), + [{list_item, #{ + type := numbered, + level := 5 + }, [{paragraph, _, <<"Upper case roman numbered list item.">>, _}], _}] + = parse(<<"..... Upper case roman numbered list item.">>), + %% Two list items one after the other. + [ + {list_item, #{type := numbered, level := 1}, + [{paragraph, _, <<"List item 1.">>, _}], _}, + {list_item, #{type := numbered, level := 1}, + [{paragraph, _, <<"List item 2.">>, _}], _} + ] = parse(<<". List item 1.\n. List item 2.">>), + ok. +-endif. + +%% Asciidoc User Guide 17.3 +%% +%% The Asciidoc User Guide makes it sound like the +%% label must be hard on the left margin but we don't +%% enforce that to simplify the implementation. +labeled_list(St) -> + Line0 = read_line(St), + %% We can't match directly to find the list separator, + %% we have to search for it. + {Label0, Sep, ListItem0} = find_labeled_list(Line0), + Label = trim(Label0), + ListItem = trim(ListItem0), + %% The label must not be empty. + true = trim(Label) =/= <<>>, + list_item(St, #{ + type => labeled, + separator => Sep, + label => Label + }, ListItem). + +find_labeled_list(Line) -> + find_labeled_list(Line, <<>>). + +%% We don't have a final clause with an empty binary because +%% we want to crash if we don't find a labeled list. +find_labeled_list(<<"::">>, Acc) -> {Acc, <<"::">>, <<>>}; +find_labeled_list(<<":::">>, Acc) -> {Acc, <<":::">>, <<>>}; +find_labeled_list(<<"::::">>, Acc) -> {Acc, <<"::::">>, <<>>}; +find_labeled_list(<<";;">>, Acc) -> {Acc, <<";;">>, <<>>}; +find_labeled_list(<<"::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<"::">>, R}; +find_labeled_list(<<":::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<":::">>, R}; +find_labeled_list(<<"::::", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<"::::">>, R}; +find_labeled_list(<<";;", C, R/bits>>, Acc) when ?IS_WS(C) -> {Acc, <<";;">>, R}; +find_labeled_list(<<C, R/bits>>, Acc) -> find_labeled_list(R, <<Acc/binary, C>>). + +-ifdef(TEST). +labeled_list_test() -> + [{list_item, #{type := labeled, separator := <<"::">>, label := <<"Question">>}, + [{paragraph, _, <<"Answer!">>, _}], _}] = parse(<<"Question:: Answer!">>), + [{list_item, #{type := labeled, separator := <<"::">>, label := <<"Question">>}, + [{paragraph, _, <<"Answer!">>, _}], _}] = parse(<<"Question::\n Answer!">>), + %% Long snippet from the Asciidoc User Guide, minus literal paragraph. + %% @todo Add the literal paragraph back once they are implemented. + [ + {list_item, #{type := labeled, separator := <<"::">>, label := <<"In">>}, + [{paragraph, _, <<>>, _}], _}, + {list_item, #{type := labeled, separator := <<"::">>, label := <<"Lorem">>}, + [{paragraph, _, <<"Fusce euismod commodo velit.">>, _}], _}, + {list_item, #{type := labeled, separator := <<"::">>, label := <<"Ipsum">>}, + [{paragraph, _, <<"Vivamus fringilla mi eu lacus.">>, _}], _}, + {list_item, #{type := bulleted, level := 1}, + [{paragraph, _, <<"Vivamus fringilla mi eu lacus.">>, _}], _}, + {list_item, #{type := bulleted, level := 1}, + [{paragraph, _, <<"Donec eget arcu bibendum nunc consequat lobortis.">>, _}], _}, + {list_item, #{type := labeled, separator := <<"::">>, label := <<"Dolor">>}, + [{paragraph, _, <<"Donec eget arcu bibendum nunc consequat lobortis.">>, _}], _}, + {list_item, #{type := labeled, separator := <<";;">>, label := <<"Suspendisse">>}, + [{paragraph, _, <<"A massa id sem aliquam auctor.">>, _}], _}, + {list_item, #{type := labeled, separator := <<";;">>, label := <<"Morbi">>}, + [{paragraph, _, <<"Pretium nulla vel lorem.">>, _}], _}, + {list_item, #{type := labeled, separator := <<";;">>, label := <<"In">>}, + [{paragraph, _, <<"Dictum mauris in urna.">>, _}], _}, + {list_item, #{type := labeled, separator := <<":::">>, label := <<"Vivamus">>}, + [{paragraph, _, <<"Fringilla mi eu lacus.">>, _}], _}, + {list_item, #{type := labeled, separator := <<":::">>, label := <<"Donec">>}, + [{paragraph, _, <<"Eget arcu bibendum nunc consequat lobortis.">>, _}], _} + ] = parse(<< + "In::\n" + "Lorem::\n" + " Fusce euismod commodo velit.\n" + %% @todo Add literal paragraph back here. + "Ipsum:: Vivamus fringilla mi eu lacus.\n" + " * Vivamus fringilla mi eu lacus.\n" + " * Donec eget arcu bibendum nunc consequat lobortis.\n" + "Dolor::\n" + " Donec eget arcu bibendum nunc consequat lobortis.\n" + " Suspendisse;;\n" + " A massa id sem aliquam auctor.\n" + " Morbi;;\n" + " Pretium nulla vel lorem.\n" + " In;;\n" + " Dictum mauris in urna.\n" + " Vivamus::: Fringilla mi eu lacus.\n" + " Donec::: Eget arcu bibendum nunc consequat lobortis.\n">>), + ok. +-endif. + +%% Asciidoc User Guide 20 +-spec callout_list(_) -> no_return(). +callout_list(St) -> throw({not_implemented, St}). %% @todo + +%% Asciidoc User Guide 17 +%% +%% We do not apply rules about blocks being contained in +%% the list item at this stage of parsing. We only concern +%% ourselves with identifying blocks, and then another pass +%% will build a tree from the result of this pass. +list_item(St, Attrs, ListItem0) -> + ListItem1 = trim(ListItem0), + Ann = ann(St), + %% For labeled lists, we may need to skip empty lines + %% until the start of the list item contents, since + %% it can begin on a separate line from the label. + _ = case {ListItem1, Attrs} of + {<<>>, #{type := labeled}} -> + read_while(St, fun skip_empty_lines/1, <<>>); + _ -> + ok + end, + %% A list item ends on end of file, empty line or when a new list starts. + %% Any indentation is optional and therefore removed. + ListItem = read_while(St, fun fold_list_item/1, ListItem1), + {list_item, Attrs, [{paragraph, #{}, ListItem, Ann}], Ann}. + +skip_empty_lines(eof) -> + done; +skip_empty_lines(Line) -> + case trim(Line) of + <<>> -> {more, <<>>}; + _ -> done + end. + +fold_list_item(eof) -> + done; +fold_list_item(Line0) -> + case trim(Line0) of + <<>> -> done; + <<"+">> -> done; + <<"//", _/bits >> -> done; + <<"-", C, _/bits>> when ?IS_WS(C) -> done; + <<"*", C, _/bits>> when ?IS_WS(C) -> done; + <<"**", C, _/bits>> when ?IS_WS(C) -> done; + <<"***", C, _/bits>> when ?IS_WS(C) -> done; + <<"****", C, _/bits>> when ?IS_WS(C) -> done; + <<"*****", C, _/bits>> when ?IS_WS(C) -> done; + <<".", C, _/bits>> when ?IS_WS(C) -> done; + <<"..", C, _/bits>> when ?IS_WS(C) -> done; + <<"...", C, _/bits>> when ?IS_WS(C) -> done; + <<"....", C, _/bits>> when ?IS_WS(C) -> done; + <<".....", C, _/bits>> when ?IS_WS(C) -> done; + Line -> + try find_labeled_list(Line) of + {_, _, _} -> done + catch _:_ -> + {more, Line} + end + end. + +-ifdef(TEST). +list_item_test() -> + [ + {list_item, #{type := bulleted, level := 1}, + [{paragraph, #{}, <<"List item.">>, _}], _}, + {list_item, #{type := bulleted, level := 2}, + [{paragraph, #{}, <<"List item.">>, _}], _}, + {list_item, #{type := bulleted, level := 1}, + [{paragraph, #{}, <<"List item.">>, _}], _}, + {list_item, #{type := numbered, level := 1}, + [{paragraph, #{}, <<"List item.">>, _}], _}, + {list_item, #{type := numbered, level := 1}, + [{paragraph, #{}, <<"List item.">>, _}], _}, + {list_item, #{type := bulleted, level := 1}, + [{paragraph, #{}, <<"List item.">>, _}], _} + ] = parse(<< + "* List item.\n" + "** List item.\n" + "* List item.\n" + " . List item.\n" + " . List item.\n" + "* List item.\n">>), + %% Properly detect a labeled list. + [ + {list_item, #{type := bulleted, level := 1}, + [{paragraph, #{}, <<"List item.\nMultiline.">>, _}], _}, + {list_item, #{type := labeled, label := <<"Question">>}, + [{paragraph, #{}, <<"Answer!">>, _}], _} + ] = parse(<< + "* List item.\n" + "Multiline.\n" + "Question:: Answer!\n">>), + ok. +-endif. + +%% Asciidoc User Guide 17.7 +list_item_continuation(St) -> + %% Continuations are a single + hard against the left margin. + <<$+, Whitespace/bits>> = read_line(St), + <<>> = trim(Whitespace), + {list_item_continuation, #{}, <<>>, ann(St)}. + +-ifdef(TEST). +list_item_continuation_test() -> + [{list_item_continuation, _, _, _}] = parse(<<"+">>), + [{list_item_continuation, _, _, _}] = parse(<<"+ ">>), + [{list_item_continuation, _, _, _}] = parse(<<"+\n">>), + ok. +-endif. + +%% Asciidoc User Guide 16.2 +listing_block(St) -> + delimited_block(St, listing_block, $-, #{<<"subs">> => <<"verbatim">>}). + +-ifdef(TEST). +listing_block_test() -> + Block = << + "#include <stdio.h>\n" + "\n" + "int main() {\n" + " printf(\"Hello World!\n\");\n" + " exit(0);\n" + "}">>, + [{listing_block, _, Block, _}] = parse(<< + "--------------------------------------\n", + Block/binary, "\n" + "--------------------------------------\n">>), + ok. +-endif. + +%% Asciidoc User Guide 16.3 +literal_block(St) -> + delimited_block(St, literal_block, $., #{<<"subs">> => <<"verbatim">>}). + +-ifdef(TEST). +literal_block_test() -> + Block = << + "Consul *necessitatibus* per id,\n" + "consetetur, eu pro everti postulant\n" + "homero verear ea mea, qui.">>, + [{literal_block, _, Block, _}] = parse(<< + "...................................\n", + Block/binary, "\n" + "...................................\n">>), + ok. +-endif. + +%% Asciidoc User Guide 16.4 +sidebar_block(St) -> + delimited_block(St, sidebar_block, $*). + +-ifdef(TEST). +sidebar_block_test() -> + Block = << + "Any AsciiDoc SectionBody element (apart from\n" + "SidebarBlocks) can be placed inside a sidebar.">>, + [{sidebar_block, _, Block, _}] = parse(<< + "************************************************\n", + Block/binary, "\n" + "************************************************\n">>), + ok. +-endif. + +%% Asciidoc User Guide 16.5 +comment_block(St) -> + delimited_block(St, comment_block, $/). + +-ifdef(TEST). +comment_block_test() -> + Block = << + "CommentBlock contents are not processed by\n" + "asciidoc(1).">>, + [{comment_block, _, Block, _}] = parse(<< + "//////////////////////////////////////////\n", + Block/binary, "\n" + "//////////////////////////////////////////\n">>), + ok. +-endif. + +%% Asciidoc User Guide 16.6 +passthrough_block(St) -> + delimited_block(St, passthrough_block, $+). + +-ifdef(TEST). +passthrough_block_test() -> + Block = << + "<table border=\"1\"><tr>\n" + " <td>*Cell 1*</td>\n" + " <td>*Cell 2*</td>\n" + "</tr></table>">>, + [{passthrough_block, _, Block, _}] = parse(<< + "++++++++++++++++++++++++++++++++++++++\n", + Block/binary, "\n" + "++++++++++++++++++++++++++++++++++++++\n">>), + ok. +-endif. + +%% Asciidoc User Guide 16.7 +quote_block(St) -> + delimited_block(St, quote_block, $_). + +-ifdef(TEST). +quote_block_test() -> + Block = << + "As he spoke there was the sharp sound of horses' hoofs and\n" + "grating wheels against the curb, followed by a sharp pull at the\n" + "bell. Holmes whistled.\n" + "\n" + "\"A pair, by the sound,\" said he. \"Yes,\" he continued, glancing\n" + "out of the window. \"A nice little brougham and a pair of\n" + "beauties. A hundred and fifty guineas apiece. There's money in\n" + "this case, Watson, if there is nothing else.\"">>, + [{quote_block, _, Block, _}] = parse(<< + "____________________________________________________________________\n", + Block/binary, "\n" + "____________________________________________________________________\n">>), + ok. +-endif. + +%% Asciidoc User Guide 16.8 +example_block(St) -> + delimited_block(St, example_block, $=). + +-ifdef(TEST). +example_block_test() -> + Block = << + "Qui in magna commodo, est labitur dolorum an. Est ne magna primis\n" + "adolescens.">>, + [{example_block, _, Block, _}] = parse(<< + "=====================================================================\n", + Block/binary, "\n" + "=====================================================================\n">>), + ok. +-endif. + +%% Asciidoc User Guide 16 +delimited_block(St, Name, Char) -> + delimited_block(St, Name, Char, #{}, <<Char, Char, Char, Char>>). + +delimited_block(St, Name, Char, Attrs) -> + delimited_block(St, Name, Char, Attrs, <<Char, Char, Char, Char>>). + +delimited_block(St, Name, Char, Attrs, Four) -> + %% A delimiter block begins by a series of four or more repeated characters. + <<Four:4/binary, Line0/bits>> = read_line(St), + Ann = ann(St), + Line = trim(Line0, trailing), + repeats(Line, Char), + %% Get the content of the block as-is. + Block = read_while(St, fun(L) -> fold_delimited_block(L, Four, Char) end, <<>>), + %% Skip the trailing delimiter line. + _ = read_line(St), + {Name, Attrs, Block, Ann}. + +%% Accept eof as a closing delimiter. +fold_delimited_block(eof, _, _) -> + done; +fold_delimited_block(Line0, Four, Char) -> + case Line0 of + <<Four:4/binary, Line1/bits>> -> + try + Line = trim(Line1, trailing), + repeats(Line, Char), + done + catch _:_ -> + {more, Line0} + end; + _ -> + {more, Line0} + end. + +-ifdef(TEST). +delimited_block_test() -> + %% Confirm that the block ends at eof. + %% + %% We see an extra line break because asciideck_line_reader adds + %% one at the end of every files to ease processing. + [{listing_block, _, <<"Hello!\n\n">>, _}] = parse(<< + "----\n" + "Hello!\n">>), + %% Same without a trailing line break. + %% + %% We also see an extra line break for the aforementioned reasons. + [{listing_block, _, <<"Hello!\n">>, _}] = parse(<< + "----\n" + "Hello!">>), + ok. +-endif. + +%% Asciidoc User Guide 16.10 +-spec open_block(_) -> no_return(). +open_block(St) -> throw({not_implemented, St}). %% @todo + +%% Asciidoc User Guide 23 +%% +%% We do not parse the table in this pass. Instead we +%% treat it like any other delimited block. +table(St) -> + delimited_block(St, table, $=, #{}, <<"|===">>). + +-ifdef(TEST). +table_test() -> + Block = << + "|1 |2 |A\n" + "|3 |4 |B\n" + "|5 |6 |C">>, + [{table, _, Block, _}] = parse(<< + "|=======\n", + Block/binary, "\n" + "|=======\n">>), + ok. +-endif. + +%% Asciidoc User Guide 28 +-spec attribute_entry(_) -> no_return(). +attribute_entry(St) -> throw({not_implemented, St}). %% @todo + +%% Asciidoc User Guide 14, 29 +attribute_list(St) -> + AttrList = read_line(St), + attribute_list(St, AttrList). + +attribute_list(St, AttrList0) -> + %% First we remove the enclosing square brackets. + <<$[, AttrList1/bits>> = AttrList0, + AttrList2 = trim(AttrList1), + Len = byte_size(AttrList2) - 1, + <<AttrList3:Len/binary, $]>> = AttrList2, + AttrList = asciideck_attributes_parser:parse(AttrList3), + {attribute_list, AttrList, <<>>, ann(St)}. + +-ifdef(TEST). +attribute_list_test() -> + [{attribute_list, #{0 := <<"Hello">>, 1 := <<"Hello">>}, <<>>, _}] + = parse(<<"[Hello]">>), + [{attribute_list, #{ + 1 := <<"quote">>, + 2 := <<"Bertrand Russell">>, + 3 := <<"The World of Mathematics (1956)">> + }, <<>>, _}] + = parse(<<"[quote, Bertrand Russell, The World of Mathematics (1956)]">>), + [{attribute_list, #{ + 1 := <<"22 times">>, + <<"backcolor">> := <<"#0e0e0e">>, + <<"options">> := <<"noborders,wide">> + }, <<>>, _}] + = parse(<<"[\"22 times\", backcolor=\"#0e0e0e\", options=\"noborders,wide\"]">>), + [{attribute_list, #{ + 1 := <<"A footnote, "with an image" image:smallnew.png[]">> + }, <<>>, _}] + = parse(<<"[A footnote, "with an image" image:smallnew.png[]]">>), + ok. +-endif. + +%% Asciidoc User Guide 12 +block_title(St) -> + %% A block title line begins with a period and is followed by the title text. + <<$., Title0/bits>> = read_line(St), + Ann = ann(St), + Title = trim(Title0), + {block_title, #{}, Title, Ann}. + +-ifdef(TEST). +block_title_test() -> + %% Valid. + [{block_title, _, <<"Notes">>, _}] = parse(<<".Notes">>), + [{block_title, _, <<"Notes">>, _}] = parse(<<".Notes ">>), + %% Invalid. + ?NOT(block_title, parse(<<". Notes">>)), + ok. +-endif. + +%% Asciidoc User Guide 15.2 +-spec literal_para(_) -> no_return(). +literal_para(St) -> throw({not_implemented, St}). %% @todo + +%% Asciidoc User Guide 15.4 +-spec admonition_para(_) -> no_return(). +admonition_para(St) -> throw({not_implemented, St}). %% @todo + +%% Asciidoc User Guide 15.1 +para(St) -> + %% Paragraph must be hard against the left margin. + <<C, _/bits>> = Para0 = read_line(St), + Ann = ann(St), + %% @todo Uncomment this line once everything else has been implemented. + _ = ?IS_WS(C), % false = ?IS_WS(C), + Para1 = trim(Para0), + %% Paragraph ends at blank line, end of file or start of delimited block or list. + Para = read_while(St, fun fold_para/1, Para1), + {paragraph, #{}, Para, Ann}. + +fold_para(eof) -> + done; +fold_para(Line0) -> + case trim(Line0) of + <<>> -> done; + <<"+">> -> done; + %% @todo Detect delimited block or list. + Line -> {more, Line} + end. + +-ifdef(TEST). +para_test() -> + LoremIpsum = << + "Lorem ipsum dolor sit amet, consectetur adipiscing elit,\n" + "sed do eiusmod tempor incididunt ut labore et dolore\n" + "magna aliqua. Ut enim ad minim veniam, quis nostrud\n" + "exercitation ullamco laboris nisi ut aliquip ex ea\n" + "commodo consequat. Duis aute irure dolor in reprehenderit\n" + "in voluptate velit esse cillum dolore eu fugiat nulla\n" + "pariatur. Excepteur sint occaecat cupidatat non proident,\n" + "sunt in culpa qui officia deserunt mollit anim id est laborum." + >>, + %% Paragraph followed by end of file. + [{paragraph, _, LoremIpsum, _}] = parse(<< LoremIpsum/binary, "\n">>), + %% Paragraph followed by end of file with no trailing line break.. + [{paragraph, _, LoremIpsum, _}] = parse(LoremIpsum), + %% Two paragraphs. + [{paragraph, _, LoremIpsum, _}, {paragraph, _, LoremIpsum, _}] + = parse(<< + LoremIpsum/binary, + "\n\n", + LoremIpsum/binary >>), + ok. +-endif. + +%% Control functions. + +oneof([], St) -> + throw({error, St}); %% @todo +oneof([Parse|Tail], St=#state{reader=ReaderPid}) -> + Ln = asciideck_line_reader:get_position(ReaderPid), + try + Parse(St) + catch _:_ -> + asciideck_line_reader:set_position(ReaderPid, Ln), + oneof(Tail, St) + end. + +skip(Parse, St=#state{reader=ReaderPid}) -> + Ln = asciideck_line_reader:get_position(ReaderPid), + try + _ = Parse(St), + skip(Parse, St) + catch _:_ -> + asciideck_line_reader:set_position(ReaderPid, Ln), + ok + end. + +%% Line functions. + +read_line(#state{reader=ReaderPid}) -> + asciideck_line_reader:read_line(ReaderPid). + +read_while(St=#state{reader=ReaderPid}, F, Acc) -> + Ln = asciideck_line_reader:get_position(ReaderPid), + case F(read_line(St)) of + done -> + asciideck_line_reader:set_position(ReaderPid, Ln), + Acc; + {more, Line} -> + case Acc of + <<>> -> read_while(St, F, Line); + _ -> read_while(St, F, <<Acc/binary, $\n, Line/binary>>) + end + end. + +ann(#state{reader=ReaderPid}) -> + #{line => asciideck_line_reader:get_position(ReaderPid)}. + +trim(Line) -> + trim(Line, both). + +trim(Line, Direction) -> + Regex = case Direction of + both -> "^[ \\t\\r\\n]+|[ \\t\\r\\n]+$"; + trailing -> "[ \\t\\r\\n]+$" + end, + iolist_to_binary(re:replace(Line, Regex, <<>>, [global])). + +repeats(<<>>, _) -> ok; +repeats(<<C, Rest/bits>>, C) -> repeats(Rest, C). + +while(F, Bin) -> + while(Bin, F, <<>>). + +while(<<>>, _, Acc) -> + {Acc, <<>>}; +while(<<C, R/bits>>, F, Acc) -> + case F(C) of + true -> while(R, F, <<Acc/binary, C>>); + false -> {Acc, <<C, R/bits>>} + end. diff --git a/src/asciideck_inline_pass.erl b/src/asciideck_inline_pass.erl new file mode 100644 index 0000000..3ed79b1 --- /dev/null +++ b/src/asciideck_inline_pass.erl @@ -0,0 +1,308 @@ +%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% This pass walks over the tree and parses inline elements. +-module(asciideck_inline_pass). + +-export([run/1]). + +-import(asciideck_block_parser, [trim/1, while/2]). + +-type inline_ast() :: list(). %% @todo +-export_type([inline_ast/0]). + +run([]) -> + []; +run([Data|Tail]) when is_binary(Data) -> + [inline(Data)|run(Tail)]; +%% We do not do any inline formatting for verbatim blocks, +%% for example listing blocks. +%% +%% @todo subs is a list of values. +run([Item={_, #{<<"subs">> := <<"verbatim">>}, _, _}|Tail]) -> + [Item|run(Tail)]; +%% Labeled lists' labels can also have inline formatting. +run([{Type, Attrs=#{label := Label}, Items, Ann}|Tail]) when is_list(Items) -> + [{Type, Attrs#{label => inline(Label)}, run(Items), Ann}|run(Tail)]; +run([{Type, Attrs, Items, Ann}|Tail]) when is_list(Items) -> + [{Type, Attrs, run(Items), Ann}|run(Tail)]; +run([{Type, Attrs, Data, Ann}|Tail]) -> + [{Type, Attrs, inline(Data), Ann}|run(Tail)]. + +%% We reduce inline content with a single text element +%% with no formatting to a simple binary. +inline(<<>>) -> + <<>>; +inline(Data) -> + case inline(Data, <<>>, []) of + [] -> <<>>; + [Text] when is_binary(Text) -> Text; + AST -> AST + end. + +-spec inline(binary(), binary(), inline_ast()) -> inline_ast(). +inline(<<>>, <<>>, Acc) -> + lists:reverse(Acc); +inline(<<>>, BinAcc, Acc) -> + lists:reverse([BinAcc|Acc]); +inline(Data, BinAcc, Acc) -> + oneof(Data, BinAcc, Acc, [ + %% Links. + fun xref/2, + fun link/2, + fun http_link/2, + fun https_link/2, + %% Quoted text. + fun emphasized_single_quote/2, + fun emphasized_underline/2, + fun strong/2, + %% Passthrough macros. + fun inline_literal_passthrough/2 + ]). + +%% The inline pass replaces \r\n and \n with a simple space +%% when it occurs within normal text. +oneof(<<$\r, $\n, Rest/bits>>, BinAcc, Acc, []) -> + inline(Rest, <<BinAcc/binary, $\s>>, Acc); +oneof(<<$\n, Rest/bits>>, BinAcc, Acc, []) -> + inline(Rest, <<BinAcc/binary, $\s>>, Acc); +oneof(<<C, Rest/bits>>, BinAcc, Acc, []) -> + inline(Rest, <<BinAcc/binary, C>>, Acc); +oneof(Data, BinAcc, Acc, [Parse|Tail]) -> + Prev = case BinAcc of + <<>> -> undefined; + _ -> binary:last(BinAcc) + end, + try Parse(Data, Prev) of + {ok, Inline, Rest} when BinAcc =:= <<>> -> + inline(Rest, BinAcc, [Inline|Acc]); + {ok, Inline, Rest} -> + inline(Rest, <<>>, [Inline, BinAcc|Acc]); + {skip, Text, Rest} -> + oneof(Rest, <<BinAcc/binary, Text/binary>>, Acc, Tail) + catch _:_ -> + oneof(Data, BinAcc, Acc, Tail) + end. + +-ifdef(TEST). +text_test() -> + <<>> = inline(<<>>), + <<"Hello, Robert">> = inline(<<"Hello, Robert">>), + ok. +-endif. + +-define(IS_BOUNDARY(C), C =:= undefined; C =:= $\s; C =:= $\t; C =:= $\r; C =:= $\n; C =:= $(). + +%% Asciidoc User Guide 21.2.1 +%% +%% We currently do not implement the <<...>> form. +xref(<<"xref:", IDAndCaption/bits>>, Prev) when ?IS_BOUNDARY(Prev) -> + %% ID must not contain whitespace characters. + {ID, <<"[", Caption0/bits>>} = while(fun(C) -> + (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t) + end, IDAndCaption), + %% It is followed by a caption. + {Caption1, <<"]", Rest/bits>>} = while(fun(C) -> + C =/= $] + end, Caption0), + Caption = trim(Caption1), + {ok, {xref, #{ + id => ID + }, Caption, inline}, Rest}. + +-ifdef(TEST). +xref_test() -> + [{xref, #{ + id := <<"tiger_image">> + }, <<"face of a tiger">>, _}] = inline(<<"xref:tiger_image[face of a tiger]">>), + ok. +-endif. + +%% Asciidoc User Guide 21.1.3 +link(<<"link:", TargetAndCaption/bits>>, Prev) when ?IS_BOUNDARY(Prev) -> + %% Target must not contain whitespace characters. + {Target, <<"[", Caption0/bits>>} = while(fun(C) -> + (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t) + andalso (C =/= $\r) andalso (C =/= $\n) + end, TargetAndCaption), + %% It is followed by a caption. + {Caption1, <<"]", Rest/bits>>} = while(fun(C) -> + C =/= $] + end, Caption0), + Caption = trim(Caption1), + {ok, {link, #{ + target => Target + }, Caption, inline}, Rest}. + +-ifdef(TEST). +link_test() -> + [{link, #{ + target := <<"downloads/foo.zip">> + }, <<"download foo.zip">>, _}] = inline(<<"link:downloads/foo.zip[download foo.zip]">>), + [{link, #{ + target := <<"chapter1.asciidoc#fragment">> + }, <<"Chapter 1.">>, _}] = inline(<<"link:chapter1.asciidoc#fragment[Chapter 1.]">>), + [ + {link, #{target := <<"first.zip">>}, <<"first">>, _}, + <<", ">>, + {link, #{target := <<"second.zip">>}, <<"second">>, _} + ] = inline(<<"link:first.zip[first],\nlink:second.zip[second]">>), + ok. +-endif. + +%% Asciidoc User Guide 21.1.3 +http_link(<<"http:", Rest/bits>>, Prev) when ?IS_BOUNDARY(Prev) -> + direct_link(Rest, <<"http:">>). + +direct_link(Data, Prefix) -> + %% Target must not contain whitespace characters. + {Target0, Rest0} = while(fun(C) -> + (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t) + andalso (C =/= $\r) andalso (C =/= $\n) + end, Data), + Target = <<Prefix/binary, Target0/binary>>, + %% It is optionally followed by a caption. Otherwise + %% the link itself is the caption. + case Rest0 of + <<"[", Caption0/bits>> -> + {Caption1, <<"]", Rest/bits>>} = while(fun(C) -> + C =/= $] + end, Caption0), + Caption = trim(Caption1), + {ok, {link, #{ + target => Target + }, Caption, inline}, Rest}; + _ -> + {ok, {link, #{ + target => Target + }, Target, inline}, Rest0} + end. + +-ifdef(TEST). +http_link_test() -> + [ + <<"If you have ">>, + {link, #{ + target := <<"http://example.org/hello#fragment">> + }, <<"http://example.org/hello#fragment">>, _}, + <<" then:">> + ] = inline(<<"If you have http://example.org/hello#fragment then:">>), + [ + <<"If you have ">>, + {link, #{ + target := <<"http://example.org/hello#fragment">> + }, <<"http://example.org/hello#fragment">>, _}, + <<" then:">> + ] = inline(<<"If you have http://example.org/hello#fragment\nthen:">>), + [ + <<"Oh, ">>, + {link, #{ + target := <<"http://example.org/hello#fragment">> + }, <<"hello there">>, _}, + <<", young lad.">> + ] = inline(<<"Oh, http://example.org/hello#fragment[hello there], young lad.">>), + ok. +-endif. + +%% Asciidoc User Guide 21.1.3 +https_link(<<"https:", Rest/bits>>, Prev) when ?IS_BOUNDARY(Prev) -> + direct_link(Rest, <<"https:">>). + +-ifdef(TEST). +https_link_test() -> + [ + <<"If you have ">>, + {link, #{ + target := <<"https://example.org/hello#fragment">> + }, <<"https://example.org/hello#fragment">>, _}, + <<" then:">> + ] = inline(<<"If you have https://example.org/hello#fragment then:">>), + [ + <<"If you have ">>, + {link, #{ + target := <<"https://example.org/hello#fragment">> + }, <<"https://example.org/hello#fragment">>, _}, + <<" then:">> + ] = inline(<<"If you have https://example.org/hello#fragment\nthen:">>), + [ + <<"Oh, ">>, + {link, #{ + target := <<"https://example.org/hello#fragment">> + }, <<"hello there">>, _}, + <<", young lad.">> + ] = inline(<<"Oh, https://example.org/hello#fragment[hello there], young lad.">>), + ok. +-endif. + +%% Asciidoc User Guide 10.1 +%% @todo <<"\\**" +%% @todo <<"\\*" +%% @todo <<"**" +emphasized_single_quote(Data, Prev) -> + quoted_text(Data, Prev, emphasized, $', $'). +emphasized_underline(Data, Prev) -> + quoted_text(Data, Prev, emphasized, $_, $_). +strong(Data, Prev) -> + quoted_text(Data, Prev, strong, $*, $*). + +quoted_text(<<Left, Rest0/bits>>, Prev, Type, Left, Right) when ?IS_BOUNDARY(Prev) -> + {Content, <<Right, Rest/bits>>} = while(fun(C) -> C =/= Right end, Rest0), + {ok, {Type, #{ + left => Left, + right => Right + }, inline(Content), inline}, Rest}. + +-ifdef(TEST). +emphasized_test() -> + [ + <<"Word phrases ">>, + {emphasized, #{left := $', right := $'}, + <<"enclosed in single quote characters">>, _}, + <<" (acute accents) or ">>, + {emphasized, #{left := $_, right := $_}, + <<"underline characters">>, _}, + <<" are emphasized.">> + ] = inline(<< + "Word phrases 'enclosed in single quote characters' (acute accents) " + "or _underline characters_ are emphasized." + >>), + ok. + +strong_test() -> + [ + <<"Word phrases ">>, + {strong, #{left := $*, right := $*}, + <<"enclosed in asterisk characters">>, _}, + <<" are rendered in a strong font (usually bold).">> + ] = inline(<< + "Word phrases *enclosed in asterisk characters* " + "are rendered in a strong font (usually bold)." + >>), + ok. +-endif. + +%% Asciidoc User Guide 21.4 +inline_literal_passthrough(<<"`", Rest0/bits>>, Prev) when ?IS_BOUNDARY(Prev) -> + {Content, <<"`", Rest/bits>>} = while(fun(C) -> C =/= $` end, Rest0), + {ok, {inline_literal_passthrough, #{}, Content, inline}, Rest}. + +-ifdef(TEST). +inline_literal_passthrough_test() -> + [ + <<"Word phrases ">>, + {inline_literal_passthrough, #{}, <<"enclosed in backtick characters">>, _}, + <<" (grave accents)...">> + ] = inline(<<"Word phrases `enclosed in backtick characters` (grave accents)...">>), + ok. +-endif. diff --git a/src/asciideck_line_reader.erl b/src/asciideck_line_reader.erl new file mode 100644 index 0000000..240c70b --- /dev/null +++ b/src/asciideck_line_reader.erl @@ -0,0 +1,94 @@ +%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-module(asciideck_line_reader). +-behaviour(gen_server). + +%% API. +-export([start_link/1]). +-export([read_line/1]). +-export([get_position/1]). +-export([set_position/2]). + +%% gen_server. +-export([init/1]). +-export([handle_call/3]). +-export([handle_cast/2]). +-export([handle_info/2]). +-export([terminate/2]). +-export([code_change/3]). + +-record(state, { + lines :: [binary()], + length :: non_neg_integer(), + pos = 1 :: non_neg_integer() +}). + +%% API. + +-spec start_link(binary()) -> {ok, pid()}. +start_link(Data) -> + gen_server:start_link(?MODULE, [Data], []). + +-spec read_line(pid()) -> binary() | eof. +read_line(Pid) -> + gen_server:call(Pid, read_line). + +%% @todo peek_line + +-spec get_position(pid()) -> pos_integer(). +get_position(Pid) -> + gen_server:call(Pid, get_position). + +-spec set_position(pid(), pos_integer()) -> ok. +set_position(Pid, Pos) -> + gen_server:cast(Pid, {set_position, Pos}). + +%% gen_server. + +init([Data]) -> + Lines0 = binary:split(Data, <<"\n">>, [global]), + %% We add an empty line at the end to simplify parsing. + %% This has the inconvenient that when parsing blocks + %% this empty line will be included in the result if + %% the block is not properly closed. + Lines = lists:append(Lines0, [<<>>]), + {ok, #state{lines=Lines, length=length(Lines)}}. + +handle_call(read_line, _From, State=#state{length=Length, pos=Pos}) + when Pos > Length -> + {reply, eof, State}; +%% @todo I know this isn't the most efficient. We could keep +%% the lines read separately and roll back when set_position +%% wants us to. But it works fine for now. +handle_call(read_line, _From, State=#state{lines=Lines, pos=Pos}) -> + {reply, lists:nth(Pos, Lines), State#state{pos=Pos + 1}}; +handle_call(get_position, _From, State=#state{pos=Pos}) -> + {reply, Pos, State}; +handle_call(_Request, _From, State) -> + {reply, ignored, State}. + +handle_cast({set_position, Pos}, State) -> + {noreply, State#state{pos=Pos}}; +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/src/asciideck_lists_pass.erl b/src/asciideck_lists_pass.erl new file mode 100644 index 0000000..efb8e87 --- /dev/null +++ b/src/asciideck_lists_pass.erl @@ -0,0 +1,155 @@ +%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% The purpose of this pass is to aggregate list_item +%% blocks into proper lists. This involves building a +%% tree based on the rules for list items. +%% +%% The general rules are: +%% +%% - Any list item of different type/level than the +%% current list item is a child of the latter. +%% +%% - The level ultimately does not matter when building +%% the tree, * then **** then ** is accepted just fine. +%% +%% - Lists of the same type as a parent are not allowed. +%% On the other hand reusing a type in different parts +%% of the tree is not a problem. +%% +%% - Any literal paragraph following a list item is a +%% child of that list item. @todo +%% +%% - Any other block can be included as a child by using +%% list continuations. +-module(asciideck_lists_pass). + +-export([run/1]). + +run(AST) -> + list(AST, []). + +list([], Acc) -> + lists:reverse(Acc); +%% Any trailing block continuation is ignored. +list([{list_item_continuation, _, _, _}], Acc) -> + lists:reverse(Acc); +%% The first list item contains the attributes for the list. +list([LI={list_item, Attrs, _, Ann}|Tail0], Acc) -> + {Items, Tail} = item(Tail0, LI, [type(Attrs)], []), + list(Tail, [{list, Attrs, Items, Ann}|Acc]); +list([Block|Tail], Acc) -> + list(Tail, [Block|Acc]). + +%% Bulleted/numbered list item of the same type. +item([NextLI={list_item, #{type := T, level := L}, _, _}|Tail], + CurrentLI={list_item, #{type := T, level := L}, _, _}, Parents, Acc) -> + item(Tail, NextLI, Parents, [reverse_children(CurrentLI)|Acc]); +%% Labeled list item of the same type. +item([NextLI={list_item, #{type := T, separator := S}, _, _}|Tail], + CurrentLI={list_item, #{type := T, separator := S}, _, _}, Parents, Acc) -> + item(Tail, NextLI, Parents, [reverse_children(CurrentLI)|Acc]); +%% Other list items are either parent or children lists. +item(FullTail=[NextLI={list_item, Attrs, _, Ann}|Tail0], CurrentLI, Parents, Acc) -> + case lists:member(type(Attrs), Parents) of + %% We have a parent list item. This is the end of this child list. + true -> + {lists:reverse([reverse_children(CurrentLI)|Acc]), FullTail}; + %% We have a child list item. This is the beginning of a new list. + false -> + {Items, Tail} = item(Tail0, NextLI, [type(Attrs)|Parents], []), + item(Tail, add_child(CurrentLI, {list, Attrs, Items, Ann}), Parents, Acc) + end; +%% Ignore multiple contiguous list continuations. +item([LIC={list_item_continuation, _, _, _}, + {list_item_continuation, _, _, _}|Tail], CurrentLI, Parents, Acc) -> + item([LIC|Tail], CurrentLI, Parents, Acc); +%% Blocks that immediately follow list_item_continuation are children, +%% unless they are list_item themselves in which case it depends on the +%% type and level of the list item. +item([{list_item_continuation, _, _, _}, LI={list_item, _, _, _}|Tail], CurrentLI, Parents, Acc) -> + item([LI|Tail], CurrentLI, Parents, Acc); +item([{list_item_continuation, _, _, _}, Block|Tail], CurrentLI, Parents, Acc) -> + item(Tail, add_child(CurrentLI, Block), Parents, Acc); +%% Anything else is the end of the list. +item(Tail, CurrentLI, _, Acc) -> + {lists:reverse([reverse_children(CurrentLI)|Acc]), Tail}. + +type(Attrs) -> + maps:with([type, level, separator], Attrs). + +add_child({list_item, Attrs, Children, Ann}, Child) -> + {list_item, Attrs, [Child|Children], Ann}. + +reverse_children({list_item, Attrs, Children, Ann}) -> + {list_item, Attrs, lists:reverse(Children), Ann}. + +-ifdef(TEST). +list_test() -> + [{list, #{type := bulleted, level := 1}, [ + {list_item, #{type := bulleted, level := 1}, + [{paragraph, #{}, <<"Hello!">>, _}], #{line := 1}}, + {list_item, #{type := bulleted, level := 1}, + [{paragraph, #{}, <<"World!">>, _}], #{line := 2}} + ], #{line := 1}}] = run([ + {list_item, #{type => bulleted, level => 1}, + [{paragraph, #{}, <<"Hello!">>, #{line => 1}}], #{line => 1}}, + {list_item, #{type => bulleted, level => 1}, + [{paragraph, #{}, <<"World!">>, #{line => 2}}], #{line => 2}} + ]), + ok. + +list_of_list_test() -> + [{list, #{type := bulleted, level := 1}, [ + {list_item, #{type := bulleted, level := 1}, [ + {paragraph, #{}, <<"Hello!">>, _}, + {list, #{type := bulleted, level := 2}, [ + {list_item, #{type := bulleted, level := 2}, + [{paragraph, #{}, <<"Cat!">>, _}], #{line := 2}}, + {list_item, #{type := bulleted, level := 2}, + [{paragraph, #{}, <<"Dog!">>, _}], #{line := 3}} + ], #{line := 2}} + ], #{line := 1}}, + {list_item, #{type := bulleted, level := 1}, + [{paragraph, #{}, <<"World!">>, _}], #{line := 4}} + ], #{line := 1}}] = run([ + {list_item, #{type => bulleted, level => 1}, + [{paragraph, #{}, <<"Hello!">>, #{line => 1}}], #{line => 1}}, + {list_item, #{type => bulleted, level => 2}, + [{paragraph, #{}, <<"Cat!">>, #{line => 2}}], #{line => 2}}, + {list_item, #{type => bulleted, level => 2}, + [{paragraph, #{}, <<"Dog!">>, #{line => 3}}], #{line => 3}}, + {list_item, #{type => bulleted, level => 1}, + [{paragraph, #{}, <<"World!">>, #{line => 4}}], #{line => 4}} + ]), + ok. + +list_continuation_test() -> + [{list, #{type := bulleted, level := 1}, [ + {list_item, #{type := bulleted, level := 1}, [ + {paragraph, #{}, <<"Hello!">>, _}, + {listing_block, #{}, <<"hello() -> world.">>, #{line := 3}} + ], #{line := 1}}, + {list_item, #{type := bulleted, level := 1}, + [{paragraph, #{}, <<"World!">>, _}], #{line := 6}} + ], #{line := 1}}] = run([ + {list_item, #{type => bulleted, level => 1}, + [{paragraph, #{}, <<"Hello!">>, #{line => 1}}], #{line => 1}}, + {list_item_continuation, #{}, <<>>, #{line => 2}}, + {listing_block, #{}, <<"hello() -> world.">>, #{line => 3}}, + {list_item, #{type => bulleted, level => 1}, + [{paragraph, #{}, <<"World!">>, #{line => 6}}], #{line => 6}} + ]), + ok. +-endif. diff --git a/src/asciideck_parser.erl b/src/asciideck_parser.erl deleted file mode 100644 index 8016395..0000000 --- a/src/asciideck_parser.erl +++ /dev/null @@ -1,388 +0,0 @@ -%% Copyright (c) 2016, Loïc Hoguin <[email protected]> -%% -%% Permission to use, copy, modify, and/or distribute this software for any -%% purpose with or without fee is hereby granted, provided that the above -%% copyright notice and this permission notice appear in all copies. -%% -%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - --module(asciideck_parser). - --export([parse/2]). - -%% @todo -%% All nodes in the AST are of type {Type, Attrs, Text | Nodes, Ann} -%% except for text formatting nodes at the moment. Text formatting -%% nodes will be converted to this form in a future change. - -%% Parsing occurs in a few passes: -%% -%% * p1: Line-based parsing of the raw Asciidoc document -%% * p2: Deal with more compp1 structures like lists and tables - -parse(Data, St) -> - Lines0 = binary:split(Data, <<"\n">>, [global]), - %% Ensure there's an empty line at the end, to simplify parsing. - Lines1 = lists:append(Lines0, [<<>>]), - LineNumbers = lists:seq(1, length(Lines1)), - Lines = lists:zip(LineNumbers, Lines1), - %% @todo Document header, if any. Recognized by the author info/doc attributes? - %% Alternatively, don't recognize it, and only use attribute entries for the same info. - p2(p1(Lines, [], St), []). - -%% First pass. - -%% @todo When a block element is encountered asciidoc(1) determines the type of block by checking in the following order (first to last): (section) Titles, BlockMacros, Lists, DelimitedBlocks, Tables, AttributeEntrys, AttributeLists, BlockTitles, Paragraphs. - -%% @todo And this function is parsing, not p1ing. -p1([], AST, _St) -> - lists:reverse(AST); -%% Extra empty lines. -p1([{_, <<>>}|Tail], AST, St) -> - p1(Tail, AST, St); -%% Comments. -p1([{LN, <<"//", Comment/bits >>}|Tail], AST, St) -> - p1(Tail, [comment(trim_ws(Comment), ann(LN, St))|AST], St); -%% Section titles. -p1([{LN, <<"= ", Title/bits >>}, {_, <<>>}|Tail], AST, St) -> - p1_title_short(Tail, AST, St, LN, Title, 0); -p1([{LN, <<"== ", Title/bits >>}, {_, <<>>}|Tail], AST, St) -> - p1_title_short(Tail, AST, St, LN, Title, 1); -p1([{LN, <<"=== ", Title/bits >>}, {_, <<>>}|Tail], AST, St) -> - p1_title_short(Tail, AST, St, LN, Title, 2); -p1([{LN, <<"==== ", Title/bits >>}, {_, <<>>}|Tail], AST, St) -> - p1_title_short(Tail, AST, St, LN, Title, 3); -p1([{LN, <<"===== ", Title/bits >>}, {_, <<>>}|Tail], AST, St) -> - p1_title_short(Tail, AST, St, LN, Title, 4); -%% Block titles. -p1([{_LN, <<".", Title/bits >>}|Tail], AST, St) -> - p1(Tail, [{block_title, Title}|AST], St); -%% Attribute lists. -p1([{_LN, <<"[", Attrs/bits >>}|Tail], AST, St) -> - p1(Tail, [{attribute_list, p1_attr_list(Attrs)}|AST], St); -%% Listing blocks. -p1([{LN, <<"----", _/bits >>}|Tail], AST, St) -> - p1_listing(Tail, AST, St, LN, []); -%% Lists. -p1([{LN, <<"* ", Text/bits >>}|Tail], AST, St) -> - p1_li(Tail, AST, St, uli1, {LN, Text}); -p1([{LN, <<"** ", Text/bits >>}|Tail], AST, St) -> - p1_li(Tail, AST, St, uli2, {LN, Text}); -p1([{LN, <<"*** ", Text/bits >>}|Tail], AST, St) -> - p1_li(Tail, AST, St, uli3, {LN, Text}); -p1([{LN, <<"**** ", Text/bits >>}|Tail], AST, St) -> - p1_li(Tail, AST, St, uli4, {LN, Text}); -p1([{LN, <<"***** ", Text/bits >>}|Tail], AST, St) -> - p1_li(Tail, AST, St, uli5, {LN, Text}); -%% Tables. -p1([{LN, <<"|===", _/bits >>}|Tail], AST, St) -> - p1_table(Tail, AST, St, LN); -p1([{LN, <<"|", Text/bits >>}|Tail], AST, St) -> - p1_cell(Tail, AST, St, LN, Text); -%% Prefix-based or paragraph. -p1(Lines, AST, St) -> - p1_text(Lines, AST, St). - -p1_title_short(Tail, AST, St, LN, Text0, Level) -> - %% Remove the trailer, if any. - Text1 = trim_ws(Text0), - Trailer = case Level of - 0 -> <<" =">>; - 1 -> <<" ==">>; - 2 -> <<" ===">>; - 3 -> <<" ====">>; - 4 -> <<" =====">> - end, - TrailerSize = byte_size(Trailer), - Size = byte_size(Text1) - TrailerSize, - Text3 = case Text1 of - << Text2:Size/binary, Trailer:TrailerSize/binary >> -> Text2; - _ -> Text1 - end, - Text = trim_ws(Text3), - p1(Tail, [title(Text, #{level => Level}, ann(LN, St))|AST], St). - -p1_attr_list(AttrList0) -> - [AttrList|_] = binary:split(AttrList0, <<"]">>), - binary:split(AttrList, <<",">>). - -%% @todo Parse attributes properly. -p1_table(Tail, [{attribute_list, Attrs}, {block_title, Title}|AST], St, LN) -> - p1(Tail, [{begin_table, #{title => Title, todo => Attrs}, ann(LN, St)}|AST], St); -p1_table(Tail, [{attribute_list, Attrs}|AST], St, LN) -> - p1(Tail, [{begin_table, #{todo => Attrs}, ann(LN, St)}|AST], St); -p1_table(Tail, AST=[nl, {cell, _, _, _}|_], St, _) -> - p1(Tail, [end_table|AST], St); -p1_table(Tail, AST=[{cell, _, _, _}|_], St, _) -> - p1(Tail, [end_table|AST], St); -p1_table(Tail, AST, St, LN) -> - p1(Tail, [{begin_table, #{}, ann(LN, St)}|AST], St). - -%% @todo Multiline cells. -%% @todo Styled cells. -%% @todo Strip whitespace at the beginning of the cell if on the same line. -p1_cell(Tail=[{_, NextLine}|_], AST0, St, LN, Text) -> - case p1_cell_split(Text, <<>>) of - [Cell] -> - AST1 = [nl, cell(p1([{LN, trim_ws(Cell)}, {LN, <<>>}], [], St), ann(LN, St))|AST0], - AST = case NextLine of - <<>> -> [nl|AST1]; - _ -> AST1 - end, - p1(Tail, AST, St); - [Cell, Rest] -> - p1_cell(Tail, [cell(p1([{LN, trim_ws(Cell)}, {LN, <<>>}], [], St), ann(LN, St))|AST0], St, LN, Rest) - end. - -p1_cell_split(<<>>, Acc) -> - [Acc]; -p1_cell_split(<< $\\, $|, Rest/bits >>, Acc) -> - p1_cell_split(Rest, << Acc/binary, $| >>); -p1_cell_split(<< $|, Rest/bits >>, Acc) -> - [Acc, Rest]; -p1_cell_split(<< C, Rest/bits >>, Acc) -> - p1_cell_split(Rest, << Acc/binary, C >>). - -p1_listing([{_, <<"----", _/bits >>}, {_, <<>>}|Tail], AST0, St, LN, [_|Acc]) -> - Text = iolist_to_binary(lists:reverse(Acc)), - case AST0 of - [{attribute_list, [<<"source">>, Lang]}, {block_title, Title}|AST] -> - p1(Tail, [listing(Text, #{title => Title, language => Lang}, ann(LN, St))|AST], St); - [{block_title, Title}, {attribute_list, [<<"source">>, Lang]}|AST] -> - p1(Tail, [listing(Text, #{title => Title, language => Lang}, ann(LN, St))|AST], St); - [{attribute_list, [<<"source">>, Lang]}|AST] -> - p1(Tail, [listing(Text, #{language => Lang}, ann(LN, St))|AST], St); - [{block_title, Title}|AST] -> - p1(Tail, [listing(Text, #{title => Title}, ann(LN, St))|AST], St); - AST -> - p1(Tail, [listing(Text, #{}, ann(LN, St))|AST], St) - end; -p1_listing([{_, Line}|Tail], AST, St, LN, Acc) -> - p1_listing(Tail, AST, St, LN, [<<"\n">>, Line|Acc]). - -p1_li(Lines, AST, St, Type, FirstLine = {LN, _}) -> - {Tail, Glob} = p1_li_glob(Lines, []), - p1(Tail, [{Type, p1([FirstLine|Glob], [], St), ann(LN, St)}|AST], St). - -%% Glob everything until next list or empty line. -p1_li_glob(Tail = [{LN, << "*", _/bits >>}|_], Acc) -> - {Tail, lists:reverse([{LN, <<>>}|Acc])}; -p1_li_glob(Tail = [{LN, <<>>}|_], Acc) -> - {Tail, lists:reverse([{LN, <<>>}|Acc])}; -p1_li_glob([{LN, <<"+">>}|Tail], Acc) -> - p1_li_glob(Tail, [{LN, <<>>}|Acc]); -p1_li_glob([Line|Tail], Acc) -> - p1_li_glob(Tail, [Line|Acc]). - -%% Skip initial empty lines and then glob like normal lists. -p1_ll_glob(Lines=[{_, Line}|Tail]) -> - case trim_ws(Line) of - <<>> -> p1_ll_glob(Tail); - _ -> p1_ll_glob(Lines, []) - end. - -%% Glob everything until empty line. -%% @todo Detect next list. -p1_ll_glob(Tail = [{LN, <<>>}|_], Acc) -> - {Tail, lists:reverse([{LN, <<>>}|Acc])}; -p1_ll_glob([{LN, <<"+">>}|Tail], Acc) -> - p1_ll_glob(Tail, [{LN, <<>>}|Acc]); -p1_ll_glob([{LN, <<" ", Line/bits>>}|Tail], Acc) -> - p1_ll_glob([{LN, trim_ws(Line)}|Tail], Acc); -p1_ll_glob(Lines=[Line={LN, Text}|Tail], Acc) -> - case binary:split(<< Text/binary, $\s >>, <<":: ">>) of - [_, _] -> - {Lines, lists:reverse([{LN, <<>>}|Acc])}; - _ -> - p1_ll_glob(Tail, [Line|Acc]) - end. - -p1_text(Lines=[{LN, Line}|Tail], AST, St) -> - case binary:split(<< Line/binary, $\s >>, <<":: ">>) of - %% Nothing else on the line. - [Label, <<>>] -> - {Tail1, Glob} = p1_ll_glob(Tail), - p1(Tail1, [{label, Label, p1(Glob, [], St), ann(LN, St)}|AST], St); - %% Text on the same line. - [Label, Text0] -> - Size = byte_size(Text0) - 1, - << Text:Size/binary, _ >> = Text0, - {Tail1, Glob} = p1_ll_glob([{LN, Text}|Tail]), - %% Text on the same line is necessarily a paragraph I believe. - p1_p(Tail1, [{label, Label, p1(Glob, [], St), ann(LN, St)}|AST], St, LN, []); - %% Not a labeled list. - _ -> - p1_maybe_p(Lines, AST, St) - end. - -%% @todo Literal paragraphs. -p1_maybe_p([{_LN, << " ", Line/bits >>}|Tail], AST, St) -> - <<>> = trim_ws(Line), - p1(Tail, AST, St); -p1_maybe_p(Lines=[{LN, _}|_], AST, St) -> - p1_p(Lines, AST, St, LN, []). - -p1_p([{_, <<>>}|Tail], AST0, St, LN, [_|Acc]) -> - Text = format(iolist_to_binary(lists:reverse(Acc)), LN, St), - case AST0 of - [{block_title, Title}|AST] -> - p1(Tail, [paragraph(Text, #{title => Title}, ann(LN, St))|AST], St); - AST -> - p1(Tail, [paragraph(Text, #{}, ann(LN, St))|AST], St) - end; -%% Ignore comments inside paragraphs. -%% @todo Keep in the AST. -p1_p([{_, <<"//", _/bits>>}|Tail], AST, St, LN, Acc) -> - p1_p(Tail, AST, St, LN, Acc); -p1_p([{_, Line}|Tail], AST, St, LN, Acc) -> - %% @todo We need to keep line/col information. To do this - %% we probably should keep an index of character number -> line/col - %% that we pass to the format function. Otherwise the line/col - %% information on text will point to the paragraph start. - p1_p(Tail, AST, St, LN, [<<" ">>, Line|Acc]). - -%% Inline formatting. - -%% @todo Probably do it as part of the node functions that require it. -format(Text, LN, St) -> - case format(Text, LN, St, [], <<>>, $\s) of - [Bin] when is_binary(Bin) -> Bin; - Formatted -> Formatted - end. - -format(<<>>, _, _, Acc, <<>>, _) -> - lists:reverse(Acc); -format(<<>>, _, _, Acc, BinAcc, _) -> - lists:reverse([BinAcc|Acc]); -format(<< "link:", Rest0/bits >>, LN, St, Acc0, BinAcc, Prev) when Prev =:= $\s -> - case re:run(Rest0, "^([^[]*)\\[([^]]*)\\](.*)", [{capture, all, binary}]) of - nomatch -> - format(Rest0, LN, St, Acc0, << BinAcc/binary, "link:" >>, $:); - {match, [_, Link, Text, Rest]} -> - Acc = case BinAcc of - <<>> -> Acc0; - _ -> [BinAcc|Acc0] - end, - format(Rest, LN, St, [rel_link(Text, Link, ann(LN, St))|Acc], <<>>, $]) - end; -format(<< C, Rest0/bits >>, LN, St, Acc0, BinAcc, Prev) when Prev =:= $\s -> - %% @todo In some cases we must format inside the quoted text too. - %% Therefore we need to have some information about what to do here. - Quotes = #{ - $* => {strong, text}, - $` => {mono, literal} - }, - case maps:get(C, Quotes, undefined) of - undefined -> - format(Rest0, LN, St, Acc0, << BinAcc/binary, C >>, C); - {NodeType, QuotedType} -> - case binary:split(Rest0, << C >>) of - [_] -> - format(Rest0, LN, St, Acc0, << BinAcc/binary, $* >>, $*); - [QuotedText0, Rest] -> - Acc = case BinAcc of - <<>> -> Acc0; - _ -> [BinAcc|Acc0] - end, - QuotedText = case QuotedType of - text -> format(QuotedText0, LN, St); - literal -> QuotedText0 - end, - format(Rest, LN, St, [quoted(NodeType, QuotedText, ann(LN, St))|Acc], <<>>, $*) - end - end; -format(<< C, Rest/bits >>, LN, St, Acc, BinAcc, _) -> - format(Rest, LN, St, Acc, << BinAcc/binary, C >>, C). - -%% Second pass. - -p2([], Acc) -> - lists:reverse(Acc); -p2([{label, Label, Items, Ann}|Tail], Acc) -> - %% @todo Handle this like other lists. - p2(Tail, [ll([li(p2(Items, []), #{label => Label}, Ann)], #{}, Ann)|Acc]); -p2(Tail0=[{uli1, _, UlAnn}|_], Acc) -> - {LIs0, Tail} = lists:splitwith(fun({uli1, _, _}) -> true; (_) -> false end, Tail0), - LIs = [li(I, LiAnn) || {uli1, I, LiAnn} <- LIs0], - p2(Tail, [ul(LIs, #{}, UlAnn)|Acc]); -p2([{begin_table, Attrs, Ann}|Tail0], Acc) -> - %% @todo Can also get them from Attrs? - N = count_table_columns(Tail0), - {Rows, Tail} = p2_rows(Tail0, [], [], N, 1), - p2(Tail, [table(Rows, Attrs, Ann)|Acc]); -p2([Item|Tail], Acc) -> - p2(Tail, [Item|Acc]). - -%% @todo One cell per line version. -count_table_columns(Cells) -> - length(lists:takewhile(fun({cell, _, _, _}) -> true; (_) -> false end, Cells)). - -p2_rows([nl|Tail], Rows, Cols, NumCols, N) -> - p2_rows(Tail, Rows, Cols, NumCols, N); -p2_rows([Cell = {cell, _, _, Ann}|Tail], Rows, Cols, NumCols, NumCols) -> - p2_rows(Tail, [row(lists:reverse([Cell|Cols]), Ann)|Rows], [], NumCols, 1); -p2_rows([Cell = {cell, _, _, _}|Tail], Rows, Cols, NumCols, N) -> - p2_rows(Tail, Rows, [Cell|Cols], NumCols, N + 1); -p2_rows([end_table|Tail], Rows, [], _, _) -> - {lists:reverse(Rows), Tail}. - -%% Annotations. - -ann(Line, St) -> - ann(Line, 1, St). - -%% @todo Take filename too, if any. -ann(Line, Col, _St) -> - #{line => Line, col => Col}. - -%% Nodes. - -cell(Nodes, Ann) -> - {cell, #{}, Nodes, Ann}. - -comment(Text, Ann) -> - {comment, #{}, Text, Ann}. - -li(Nodes, Ann) -> - li(Nodes, #{}, Ann). - -li(Nodes, Attrs, Ann) -> - {li, Attrs, Nodes, Ann}. - -listing(Text, Attrs, Ann) -> - {listing, Attrs, Text, Ann}. - -ll(Nodes, Attrs, Ann) -> - {ll, Attrs, Nodes, Ann}. - -paragraph(Text, Attrs, Ann) -> - {p, Attrs, Text, Ann}. - -quoted(NodeType, Text, Ann) -> - {NodeType, #{}, Text, Ann}. - -rel_link(Text, Link, Ann) -> - {rel_link, #{target => Link}, Text, Ann}. - -row(Nodes, Ann) -> - {row, #{}, Nodes, Ann}. - -table(Nodes, Attrs, Ann) -> - {table, Attrs, Nodes, Ann}. - -title(Text, Attrs, Ann) -> - {title, Attrs, Text, Ann}. - -ul(Nodes, Attrs, Ann) -> - {ul, Attrs, Nodes, Ann}. - -%% Utility functions. - -trim_ws(Text) -> - iolist_to_binary(re:replace(Text, "^[ \\t]+|[ \\t]+$", <<>>, [global])). diff --git a/src/asciideck_tables_pass.erl b/src/asciideck_tables_pass.erl new file mode 100644 index 0000000..fdda6ef --- /dev/null +++ b/src/asciideck_tables_pass.erl @@ -0,0 +1,191 @@ +%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +%% This pass parses and builds a table from the contents +%% of a table block. +%% +%% Asciidoc User Guide 23 +%% +%% @todo Rows and cells are currently not annotated. +-module(asciideck_tables_pass). + +-export([run/1]). + +-define(IS_WS(C), (C =:= $\s) or (C =:= $\t) or (C =:= $\n). + +run([]) -> + []; +run([Table={table, _, _, _}|Tail]) -> + [table(Table)|run(Tail)]; +run([Block|Tail]) -> + [Block|run(Tail)]. + +table({table, Attrs, Contents, Ann}) -> + {Cells, NumCols} = parse_table(Contents, Attrs), + Children = rows(Cells, NumCols), + {table, Attrs, Children, Ann}. + +-ifdef(TEST). +table_test() -> + {table, _, [ + {row, _, [ + {cell, _, <<"1">>, _}, + {cell, _, <<"2">>, _}, + {cell, _, <<"A">>, _} + ], _}, + {row, _, [ + {cell, _, <<"3">>, _}, + {cell, _, <<"4">>, _}, + {cell, _, <<"B">>, _} + ], _}, + {row, _, [ + {cell, _, <<"5">>, _}, + {cell, _, <<"6">>, _}, + {cell, _, <<"C">>, _} + ], _} + ], _} = table({table, #{}, << + "|1 |2 |A\n" + "|3 |4 |B\n" + "|5 |6 |C">>, #{line => 1}}), + ok. +-endif. + +%% If the cols attribute is not specified, the number of +%% columns is the number of cells on the first line. +parse_table(Contents, #{<<"cols">> := Cols}) -> + {parse_cells(Contents, []), num_cols(Cols)}; +%% We get the first line, parse the cells in it then +%% count the number of columns in the table. Finally +%% we parse all the remaining cells. +parse_table(Contents, _) -> + case binary:split(Contents, <<$\n>>) of + %% We only have the one line. Who writes tables like this? + [Line] -> + Cells = parse_cells(Line, []), + {Cells, length(Cells)}; + %% We have a useful table with more than one line. Good user! + [Line, Rest] -> + Cells0 = parse_cells(Line, []), + Cells = parse_cells(Rest, lists:reverse(Cells0)), + {Cells, length(Cells0)} + end. + +num_cols(Cols) -> + %% @todo Handle column specifiers. + Specs = binary:split(Cols, <<$,>>, [global]), + length(Specs). + +parse_cells(Contents, Acc) -> + Cells = split_cells(Contents),%binary:split(Contents, [<<$|>>], [global]), + do_parse_cells(Cells, Acc). + %% Split on | + %% Look at the end of each element see if there's a cell specifier + %% Add it as an attribute to the cell for now and consolidate + %% when processing rows. + +split_cells(Contents) -> + split_cells(Contents, <<>>, []). + +split_cells(<<>>, Cell, Acc) -> + lists:reverse([Cell|Acc]); +split_cells(<<$\\, $|, R/bits>>, Cell, Acc) -> + split_cells(R, <<Cell/binary, $|>>, Acc); +split_cells(<<$|, R/bits>>, Cell, Acc) -> + split_cells(R, <<>>, [Cell|Acc]); +split_cells(<<C, R/bits>>, Cell, Acc) -> + split_cells(R, <<Cell/binary, C>>, Acc). + +%% Malformed table (no pipe before cell). Process it like it is a single cell. +do_parse_cells([Contents], Acc) -> + %% @todo Annotations. + lists:reverse([{cell, #{specifiers => <<>>}, Contents, #{}}|Acc]); +%% Last cell. There are no further cell specifiers. +do_parse_cells([Specs, Contents0], Acc) -> + Contents = asciideck_block_parser:trim(Contents0, both), + %% @todo Annotations. + Cell = {cell, #{specifiers => Specs}, Contents, #{}}, + lists:reverse([Cell|Acc]); +%% If there are cell specifiers we need to extract them from the cell +%% contents. Cell specifiers are everything from the last whitespace +%% until the end of the binary. +do_parse_cells([Specs, Contents0|Tail], Acc) -> + NextSpecs = <<>>, %% @todo find_r(Contents0, <<>>), + Len = byte_size(Contents0) - byte_size(NextSpecs), + <<Contents1:Len/binary, _/bits>> = Contents0, + Contents = asciideck_block_parser:trim(Contents1, both), + %% @todo Annotations. + Cell = {cell, #{specifiers => Specs}, Contents, #{}}, + do_parse_cells([NextSpecs|Tail], [Cell|Acc]). + +%% @todo This is not correct. Not all remaining data is specifiers. +%% In addition, for columns at the end of the line this doesn't apply. +%% Find the remaining data after the last whitespace character. +%find_r(<<>>, Acc) -> +% Acc; +%find_r(<<C, Rest/bits>>, _) when ?IS_WS(C) -> +% find_r(Rest, Rest); +%find_r(<<_, Rest/bits>>, Acc) -> +% find_r(Rest, Acc). + +-ifdef(TEST). +parse_table_test() -> + {[ + {cell, _, <<"1">>, _}, + {cell, _, <<"2">>, _}, + {cell, _, <<"A">>, _}, + {cell, _, <<"3">>, _}, + {cell, _, <<"4">>, _}, + {cell, _, <<"B">>, _}, + {cell, _, <<"5">>, _}, + {cell, _, <<"6">>, _}, + {cell, _, <<"C">>, _} + ], 3} = parse_table(<< + "|1 |2 |A\n" + "|3 |4 |B\n" + "|5 |6 |C">>, #{}), + ok. + +parse_table_escape_pipe_test() -> + {[ + {cell, _, <<"1">>, _}, + {cell, _, <<"2">>, _}, + {cell, _, <<"3 |4">>, _}, + {cell, _, <<"5">>, _} + ], 2} = parse_table(<< + "|1 |2\n" + "|3 \\|4 |5">>, #{}), + ok. +-endif. + +%% @todo We currently don't handle colspans and rowspans. +rows(Cells, NumCols) -> + rows(Cells, [], NumCols, [], NumCols). + +%% End of row. +rows(Tail, Acc, NumCols, RowAcc, CurCol) when CurCol =< 0 -> + %% @todo Annotations. + Row = {row, #{}, lists:reverse(RowAcc), #{}}, + rows(Tail, [Row|Acc], NumCols, [], NumCols); +%% Add a cell to the row. +rows([Cell|Tail], Acc, NumCols, RowAcc, CurCol) -> + rows(Tail, Acc, NumCols, [Cell|RowAcc], CurCol - 1); +%% End of a properly formed table. +rows([], Acc, _, [], _) -> + lists:reverse(Acc); +%% Malformed table. Even if we expect more columns, +%% if there are no more cells there's nothing we can do. +rows([], Acc, _, RowAcc, _) -> + %% @todo Annotations. + Row = {row, #{}, lists:reverse(RowAcc), #{}}, + lists:reverse([Row|Acc]). diff --git a/src/asciideck_to_manpage.erl b/src/asciideck_to_manpage.erl index bdff90e..37e4e73 100644 --- a/src/asciideck_to_manpage.erl +++ b/src/asciideck_to_manpage.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2016, Loïc Hoguin <[email protected]> +%% Copyright (c) 2016-2018, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -19,7 +19,7 @@ -export([translate/2]). translate(AST, Opts) -> - {Man, Section, Output0} = translate_man(AST, Opts), + {Man, Section, Output0} = man(AST, Opts), {CompressExt, Output} = case Opts of #{compress := gzip} -> {".gz", zlib:gzip(Output0)}; _ -> {"", Output0} @@ -32,7 +32,9 @@ translate(AST, Opts) -> Output end. -translate_man([{title, #{level := 0}, Title0, _Ann}|AST], Opts) -> +%% Header of the man page file. + +man([{section_title, #{level := 0}, Title0, _Ann}|AST], Opts) -> ensure_name_section(AST), [Title, << Section:1/binary, _/bits >>] = binary:split(Title0, <<"(">>), Extra1 = maps:get(extra1, Opts, today()), @@ -42,10 +44,10 @@ translate_man([{title, #{level := 0}, Title0, _Ann}|AST], Opts) -> ".TH \"", Title, "\" \"", Section, "\" \"", Extra1, "\" \"", Extra2, "\" \"", Extra3, "\"\n" ".ta T 4n\n\\&\n", - man(AST, []) + ast(AST) ]}. -ensure_name_section([{title, #{level := 1}, Title, _}|_]) -> +ensure_name_section([{section_title, #{level := 1}, Title, _}|_]) -> case string:to_lower(string:strip(binary_to_list(Title))) of "name" -> ok; _ -> error(badarg) @@ -57,22 +59,56 @@ today() -> {{Y, M, D}, _} = calendar:universal_time(), io_lib:format("~b-~2.10.0b-~2.10.0b", [Y, M, D]). -man([], Acc) -> - lists:reverse(Acc); -man([{title, #{level := 1}, Title, _Ann}|Tail], Acc) -> - man(Tail, [[".SH ", string:to_upper(binary_to_list(Title)), "\n"]|Acc]); -man([{title, #{level := 2}, Title, _Ann}|Tail], Acc) -> - man(Tail, [[".SS ", Title, "\n"]|Acc]); -man([{p, _Attrs, Text, _Ann}|Tail], Acc) -> - man(Tail, [[".LP\n", man_format(Text), "\n.sp\n"]|Acc]); -man([{listing, Attrs, Listing, _Ann}|Tail], Acc0) -> - Acc1 = case Attrs of - #{title := Title} -> - [[".PP\n\\fB", Title, "\\fR\n"]|Acc0]; - _ -> - Acc0 - end, - Acc = [[ +%% Loop over all types of AST nodes. + +ast(AST) -> + fold(AST, fun ast_node/1). + +fold(AST, Fun) -> + lists:reverse(lists:foldl( + fun(Node, Acc) -> [Fun(Node)|Acc] end, + [], AST)). + +ast_node(Node={Type, _, _, _}) -> + try + case Type of + section_title -> section_title(Node); + paragraph -> paragraph(Node); + listing_block -> listing_block(Node); + list -> list(Node); + table -> table(Node); + comment_line -> comment_line(Node); + _ -> + io:format("Ignored AST node ~p~n", [Node]), + [] + end + catch _:_ -> + io:format("Ignored AST node ~p~n", [Node]), + [] + end. + +%% Section titles. + +section_title({section_title, #{level := 1}, Title, _}) -> + [".SH ", string:to_upper(binary_to_list(Title)), "\n"]; +section_title({section_title, #{level := 2}, Title, _}) -> + [".SS ", Title, "\n"]. + +%% Paragraphs. + +paragraph({paragraph, _, Text, _}) -> + [".LP\n", inline(Text), "\n.sp\n"]. + +%% Listing blocks. + +listing_block({listing_block, Attrs, Listing, _}) -> + [ + case Attrs of + #{<<"title">> := Title} -> + [".PP\n\\fB", Title, "\\fR\n"]; + _ -> + [] + end, ".if n \\{\\\n" ".RS 4\n" ".\\}\n" @@ -82,55 +118,18 @@ man([{listing, Attrs, Listing, _Ann}|Tail], Acc0) -> ".fi\n" ".if n \\{\\\n" ".RE\n" - ".\\}\n"]|Acc1], - man(Tail, Acc); -man([{ul, _Attrs, Items, _Ann}|Tail], Acc0) -> - Acc = man_ul(Items, Acc0), - man(Tail, Acc); -man([{ll, _Attrs, Items, _Ann}|Tail], Acc0) -> - Acc = man_ll(Items, Acc0), - man(Tail, Acc); -%% @todo Attributes. -%% Currently acts as if options="headers" was always set. -man([{table, _TAttrs, [{row, RowAttrs, Headers0, RowAnn}|Rows0], _TAnn}|Tail], Acc0) -> - Headers = [{cell, CAttrs, [{p, Attrs, [{strong, #{}, P, CAnn}], Ann}], CAnn} - || {cell, CAttrs, [{p, Attrs, P, Ann}], CAnn} <- Headers0], - Rows = [{row, RowAttrs, Headers, RowAnn}|Rows0], - Acc = [[ - ".TS\n" - "allbox tab(:);\n", - man_table_style(Rows, []), - man_table_contents(Rows), - ".TE\n" - ".sp 1\n"]|Acc0], - man(Tail, Acc); -%% Skip everything we don't understand. -man([_Ignore|Tail], Acc) -> - io:format("Ignore ~p~n", [_Ignore]), %% @todo lol io:format - man(Tail, Acc). - -man_ll([], Acc) -> - Acc; -man_ll([{li, #{label := Label}, Item, _LiAnn}|Tail], Acc0) -> - Acc = [[ - ".PP\n" - "\\fB", Label, "\\fR\n", - ".RS 4\n", - man_ll_item(Item), - ".RE\n"]|Acc0], - man_ll(Tail, Acc). - -man_ll_item([{ul, _Attrs, Items, _Ann}]) -> - [man_ul(Items, []), "\n"]; -man_ll_item([{p, _PAttrs, Text, _PAnn}]) -> - [man_format(Text), "\n"]; -man_ll_item([{p, _PAttrs, Text, _PAnn}|Tail]) -> - [man_format(Text), "\n\n", man_ll_item(Tail)]. - -man_ul([], Acc) -> - Acc; -man_ul([{li, _LiAttrs, [{p, _PAttrs, Text, _PAnn}], _LiAnn}|Tail], Acc0) -> - Acc = [[ + ".\\}\n" + ]. + +%% Lists. + +list({list, #{type := bulleted}, Items, _}) -> + fold(Items, fun bulleted_list_item/1); +list({list, #{type := labeled}, Items, _}) -> + fold(Items, fun labeled_list_item/1). + +bulleted_list_item({list_item, _, [{paragraph, _, Text, _}|AST], _}) -> + [ ".ie n \\{\\\n" ".RS 2\n" "\\h'-02'\\(bu\\h'+01'\\c\n" @@ -140,40 +139,85 @@ man_ul([{li, _LiAttrs, [{p, _PAttrs, Text, _PAnn}], _LiAnn}|Tail], Acc0) -> ".sp -1\n" ".IP \\(bu 2.3\n" ".\\}\n", - man_format(Text), "\n" - ".RE\n"]|Acc0], - man_ul(Tail, Acc). + inline(Text), "\n", + ast(AST), + ".RE\n" + ]. + +labeled_list_item({list_item, #{label := Label}, [{paragraph, _, Text, _}|AST], _}) -> + [ + ".PP\n" + "\\fB", inline(Label), "\\fR\n", + ".RS 4\n", + inline(Text), "\n", + ast(AST), + ".RE\n" + ]. + +%% Tables. + +table({table, _, Rows0, _}) -> + Rows = table_apply_options(Rows0), + [ + ".TS\n" + "allbox tab(:);\n", + table_style(Rows), ".\n", + table_contents(Rows), + ".TE\n" + ".sp 1\n" + ]. + +%% @todo Currently acts as if options="headers" was always set. +table_apply_options([{row, RAttrs, Headers0, RAnn}|Tail]) -> + Headers = [{cell, CAttrs, [{strong, #{}, CText, CAnn}], CAnn} + || {cell, CAttrs, CText, CAnn} <- Headers0], + [{row, RAttrs, Headers, RAnn}|Tail]. + +table_style(Rows) -> + [[table_style_cells(Cells), "\n"] + || {row, _, Cells, _} <- Rows]. + +table_style_cells(Cells) -> + ["lt " || {cell, _, _, _} <- Cells]. + +table_contents(Rows) -> + [[table_contents_cells(Cells), "\n"] + || {row, _, Cells, _} <- Rows]. + +table_contents_cells([FirstCell|Cells]) -> + [table_contents_cell(FirstCell), + [[":", table_contents_cell(Cell)] || Cell <- Cells]]. -man_table_style([], [_|Acc]) -> - lists:reverse([".\n"|Acc]); -man_table_style([{row, _, Cols, _}|Tail], Acc) -> - man_table_style(Tail, [$\n, man_table_style_cols(Cols, [])|Acc]). +table_contents_cell({cell, _, Text, _}) -> + ["T{\n", inline(Text), "\nT}"]. -man_table_style_cols([], [_|Acc]) -> - lists:reverse(Acc); -man_table_style_cols([{cell, _, _, _}|Tail], Acc) -> - man_table_style_cols(Tail, [$\s, "lt"|Acc]). +%% Comment lines are printed in the generated file +%% but are not visible in viewers. -man_table_contents(Rows) -> - [man_table_contents_cols(Cols, []) || {row, _, Cols, _} <- Rows]. +comment_line({comment_line, _, Text, _}) -> + ["\\# ", Text, "\n"]. -man_table_contents_cols([], [_|Acc]) -> - lists:reverse(["\n"|Acc]); -man_table_contents_cols([{cell, _CAttrs, [{p, _PAttrs, Text, _PAnn}], _CAnn}|Tail], Acc) -> - man_table_contents_cols(Tail, [$:, "\nT}", man_format(Text), "T{\n"|Acc]). +%% Inline formatting. -man_format(Text) when is_binary(Text) -> +inline(Text) when is_binary(Text) -> Text; -man_format({rel_link, #{target := Link}, Text, _}) -> +%% When the link is the text we only print it once. +inline({link, #{target := Link}, Link, _}) -> + Link; +inline({link, #{target := Link}, Text, _}) -> case re:run(Text, "^([-_:.a-zA-Z0-9]*)(\\([0-9]\\))$", [{capture, all, binary}]) of nomatch -> [Text, " (", Link, ")"]; {match, [_, ManPage, ManSection]} -> ["\\fB", ManPage, "\\fR", ManSection] end; -man_format({strong, _, Text, _}) -> - ["\\fB", man_format(Text), "\\fR"]; +inline({emphasized, _, Text, _}) -> + ["\\fI", inline(Text), "\\fR"]; +inline({strong, _, Text, _}) -> + ["\\fB", inline(Text), "\\fR"]; %% We are already using a monospace font. -%% @todo Maybe there's a readable formatting we could use to differentiate from normal text? -man_format({mono, _, Text, _}) -> - man_format(Text); -man_format(Text) when is_list(Text) -> - [man_format(T) || T <- Text]. +inline({inline_literal_passthrough, _, Text, _}) -> + inline(Text); +%% Xref links appear as plain text in manuals. +inline({xref, _, Text, _}) -> + inline(Text); +inline(Text) when is_list(Text) -> + [inline(T) || T <- Text]. |