aboutsummaryrefslogtreecommitdiffstats
path: root/src/asciideck_inline_pass.erl
diff options
context:
space:
mode:
authorLoïc Hoguin <[email protected]>2018-06-08 14:49:09 +0200
committerLoïc Hoguin <[email protected]>2018-06-08 14:49:09 +0200
commit524777054be30c848c1883ffd15b245c29f73004 (patch)
tree6c3df0022ee6d228341bb6ce2c525011076c123d /src/asciideck_inline_pass.erl
parent48cbfe8b60f3e555acd2d623db10e4eb56234179 (diff)
downloadasciideck-524777054be30c848c1883ffd15b245c29f73004.tar.gz
asciideck-524777054be30c848c1883ffd15b245c29f73004.tar.bz2
asciideck-524777054be30c848c1883ffd15b245c29f73004.zip
Rewrite the project
The new code is much more readable and easier to extend. I took inspiration from Haskell's Parsec project which seems to only write the happy-path and applied the idea to Erlang's exceptions. When the parser tries to parse, say, a list, and crashes, it tries with a table next, and so on until something matches. Normal paragraphs always match so there can be no parsing failures. The parser now has a number of passes: first the block parser, then lists and tables passes to build a proper tree out of them and finally an inline pass to apply inline formatting. The resulting AST can then be modified at will and passed on to translator modules which output a different format. The man page translator was also rewritten and has been tested against both Cowboy and Gun. Numerous issues were fixed as a result of this rewrite.
Diffstat (limited to 'src/asciideck_inline_pass.erl')
-rw-r--r--src/asciideck_inline_pass.erl308
1 files changed, 308 insertions, 0 deletions
diff --git a/src/asciideck_inline_pass.erl b/src/asciideck_inline_pass.erl
new file mode 100644
index 0000000..3ed79b1
--- /dev/null
+++ b/src/asciideck_inline_pass.erl
@@ -0,0 +1,308 @@
+%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% This pass walks over the tree and parses inline elements.
+-module(asciideck_inline_pass).
+
+-export([run/1]).
+
+-import(asciideck_block_parser, [trim/1, while/2]).
+
+-type inline_ast() :: list(). %% @todo
+-export_type([inline_ast/0]).
+
+run([]) ->
+ [];
+run([Data|Tail]) when is_binary(Data) ->
+ [inline(Data)|run(Tail)];
+%% We do not do any inline formatting for verbatim blocks,
+%% for example listing blocks.
+%%
+%% @todo subs is a list of values.
+run([Item={_, #{<<"subs">> := <<"verbatim">>}, _, _}|Tail]) ->
+ [Item|run(Tail)];
+%% Labeled lists' labels can also have inline formatting.
+run([{Type, Attrs=#{label := Label}, Items, Ann}|Tail]) when is_list(Items) ->
+ [{Type, Attrs#{label => inline(Label)}, run(Items), Ann}|run(Tail)];
+run([{Type, Attrs, Items, Ann}|Tail]) when is_list(Items) ->
+ [{Type, Attrs, run(Items), Ann}|run(Tail)];
+run([{Type, Attrs, Data, Ann}|Tail]) ->
+ [{Type, Attrs, inline(Data), Ann}|run(Tail)].
+
+%% We reduce inline content with a single text element
+%% with no formatting to a simple binary.
+inline(<<>>) ->
+ <<>>;
+inline(Data) ->
+ case inline(Data, <<>>, []) of
+ [] -> <<>>;
+ [Text] when is_binary(Text) -> Text;
+ AST -> AST
+ end.
+
+-spec inline(binary(), binary(), inline_ast()) -> inline_ast().
+inline(<<>>, <<>>, Acc) ->
+ lists:reverse(Acc);
+inline(<<>>, BinAcc, Acc) ->
+ lists:reverse([BinAcc|Acc]);
+inline(Data, BinAcc, Acc) ->
+ oneof(Data, BinAcc, Acc, [
+ %% Links.
+ fun xref/2,
+ fun link/2,
+ fun http_link/2,
+ fun https_link/2,
+ %% Quoted text.
+ fun emphasized_single_quote/2,
+ fun emphasized_underline/2,
+ fun strong/2,
+ %% Passthrough macros.
+ fun inline_literal_passthrough/2
+ ]).
+
+%% The inline pass replaces \r\n and \n with a simple space
+%% when it occurs within normal text.
+oneof(<<$\r, $\n, Rest/bits>>, BinAcc, Acc, []) ->
+ inline(Rest, <<BinAcc/binary, $\s>>, Acc);
+oneof(<<$\n, Rest/bits>>, BinAcc, Acc, []) ->
+ inline(Rest, <<BinAcc/binary, $\s>>, Acc);
+oneof(<<C, Rest/bits>>, BinAcc, Acc, []) ->
+ inline(Rest, <<BinAcc/binary, C>>, Acc);
+oneof(Data, BinAcc, Acc, [Parse|Tail]) ->
+ Prev = case BinAcc of
+ <<>> -> undefined;
+ _ -> binary:last(BinAcc)
+ end,
+ try Parse(Data, Prev) of
+ {ok, Inline, Rest} when BinAcc =:= <<>> ->
+ inline(Rest, BinAcc, [Inline|Acc]);
+ {ok, Inline, Rest} ->
+ inline(Rest, <<>>, [Inline, BinAcc|Acc]);
+ {skip, Text, Rest} ->
+ oneof(Rest, <<BinAcc/binary, Text/binary>>, Acc, Tail)
+ catch _:_ ->
+ oneof(Data, BinAcc, Acc, Tail)
+ end.
+
+-ifdef(TEST).
+text_test() ->
+ <<>> = inline(<<>>),
+ <<"Hello, Robert">> = inline(<<"Hello, Robert">>),
+ ok.
+-endif.
+
+-define(IS_BOUNDARY(C), C =:= undefined; C =:= $\s; C =:= $\t; C =:= $\r; C =:= $\n; C =:= $().
+
+%% Asciidoc User Guide 21.2.1
+%%
+%% We currently do not implement the <<...>> form.
+xref(<<"xref:", IDAndCaption/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ %% ID must not contain whitespace characters.
+ {ID, <<"[", Caption0/bits>>} = while(fun(C) ->
+ (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t)
+ end, IDAndCaption),
+ %% It is followed by a caption.
+ {Caption1, <<"]", Rest/bits>>} = while(fun(C) ->
+ C =/= $]
+ end, Caption0),
+ Caption = trim(Caption1),
+ {ok, {xref, #{
+ id => ID
+ }, Caption, inline}, Rest}.
+
+-ifdef(TEST).
+xref_test() ->
+ [{xref, #{
+ id := <<"tiger_image">>
+ }, <<"face of a tiger">>, _}] = inline(<<"xref:tiger_image[face of a tiger]">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.1.3
+link(<<"link:", TargetAndCaption/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ %% Target must not contain whitespace characters.
+ {Target, <<"[", Caption0/bits>>} = while(fun(C) ->
+ (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t)
+ andalso (C =/= $\r) andalso (C =/= $\n)
+ end, TargetAndCaption),
+ %% It is followed by a caption.
+ {Caption1, <<"]", Rest/bits>>} = while(fun(C) ->
+ C =/= $]
+ end, Caption0),
+ Caption = trim(Caption1),
+ {ok, {link, #{
+ target => Target
+ }, Caption, inline}, Rest}.
+
+-ifdef(TEST).
+link_test() ->
+ [{link, #{
+ target := <<"downloads/foo.zip">>
+ }, <<"download foo.zip">>, _}] = inline(<<"link:downloads/foo.zip[download foo.zip]">>),
+ [{link, #{
+ target := <<"chapter1.asciidoc#fragment">>
+ }, <<"Chapter 1.">>, _}] = inline(<<"link:chapter1.asciidoc#fragment[Chapter 1.]">>),
+ [
+ {link, #{target := <<"first.zip">>}, <<"first">>, _},
+ <<", ">>,
+ {link, #{target := <<"second.zip">>}, <<"second">>, _}
+ ] = inline(<<"link:first.zip[first],\nlink:second.zip[second]">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.1.3
+http_link(<<"http:", Rest/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ direct_link(Rest, <<"http:">>).
+
+direct_link(Data, Prefix) ->
+ %% Target must not contain whitespace characters.
+ {Target0, Rest0} = while(fun(C) ->
+ (C =/= $[) andalso (C =/= $\s) andalso (C =/= $\t)
+ andalso (C =/= $\r) andalso (C =/= $\n)
+ end, Data),
+ Target = <<Prefix/binary, Target0/binary>>,
+ %% It is optionally followed by a caption. Otherwise
+ %% the link itself is the caption.
+ case Rest0 of
+ <<"[", Caption0/bits>> ->
+ {Caption1, <<"]", Rest/bits>>} = while(fun(C) ->
+ C =/= $]
+ end, Caption0),
+ Caption = trim(Caption1),
+ {ok, {link, #{
+ target => Target
+ }, Caption, inline}, Rest};
+ _ ->
+ {ok, {link, #{
+ target => Target
+ }, Target, inline}, Rest0}
+ end.
+
+-ifdef(TEST).
+http_link_test() ->
+ [
+ <<"If you have ">>,
+ {link, #{
+ target := <<"http://example.org/hello#fragment">>
+ }, <<"http://example.org/hello#fragment">>, _},
+ <<" then:">>
+ ] = inline(<<"If you have http://example.org/hello#fragment then:">>),
+ [
+ <<"If you have ">>,
+ {link, #{
+ target := <<"http://example.org/hello#fragment">>
+ }, <<"http://example.org/hello#fragment">>, _},
+ <<" then:">>
+ ] = inline(<<"If you have http://example.org/hello#fragment\nthen:">>),
+ [
+ <<"Oh, ">>,
+ {link, #{
+ target := <<"http://example.org/hello#fragment">>
+ }, <<"hello there">>, _},
+ <<", young lad.">>
+ ] = inline(<<"Oh, http://example.org/hello#fragment[hello there], young lad.">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.1.3
+https_link(<<"https:", Rest/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ direct_link(Rest, <<"https:">>).
+
+-ifdef(TEST).
+https_link_test() ->
+ [
+ <<"If you have ">>,
+ {link, #{
+ target := <<"https://example.org/hello#fragment">>
+ }, <<"https://example.org/hello#fragment">>, _},
+ <<" then:">>
+ ] = inline(<<"If you have https://example.org/hello#fragment then:">>),
+ [
+ <<"If you have ">>,
+ {link, #{
+ target := <<"https://example.org/hello#fragment">>
+ }, <<"https://example.org/hello#fragment">>, _},
+ <<" then:">>
+ ] = inline(<<"If you have https://example.org/hello#fragment\nthen:">>),
+ [
+ <<"Oh, ">>,
+ {link, #{
+ target := <<"https://example.org/hello#fragment">>
+ }, <<"hello there">>, _},
+ <<", young lad.">>
+ ] = inline(<<"Oh, https://example.org/hello#fragment[hello there], young lad.">>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 10.1
+%% @todo <<"\\**"
+%% @todo <<"\\*"
+%% @todo <<"**"
+emphasized_single_quote(Data, Prev) ->
+ quoted_text(Data, Prev, emphasized, $', $').
+emphasized_underline(Data, Prev) ->
+ quoted_text(Data, Prev, emphasized, $_, $_).
+strong(Data, Prev) ->
+ quoted_text(Data, Prev, strong, $*, $*).
+
+quoted_text(<<Left, Rest0/bits>>, Prev, Type, Left, Right) when ?IS_BOUNDARY(Prev) ->
+ {Content, <<Right, Rest/bits>>} = while(fun(C) -> C =/= Right end, Rest0),
+ {ok, {Type, #{
+ left => Left,
+ right => Right
+ }, inline(Content), inline}, Rest}.
+
+-ifdef(TEST).
+emphasized_test() ->
+ [
+ <<"Word phrases ">>,
+ {emphasized, #{left := $', right := $'},
+ <<"enclosed in single quote characters">>, _},
+ <<" (acute accents) or ">>,
+ {emphasized, #{left := $_, right := $_},
+ <<"underline characters">>, _},
+ <<" are emphasized.">>
+ ] = inline(<<
+ "Word phrases 'enclosed in single quote characters' (acute accents) "
+ "or _underline characters_ are emphasized."
+ >>),
+ ok.
+
+strong_test() ->
+ [
+ <<"Word phrases ">>,
+ {strong, #{left := $*, right := $*},
+ <<"enclosed in asterisk characters">>, _},
+ <<" are rendered in a strong font (usually bold).">>
+ ] = inline(<<
+ "Word phrases *enclosed in asterisk characters* "
+ "are rendered in a strong font (usually bold)."
+ >>),
+ ok.
+-endif.
+
+%% Asciidoc User Guide 21.4
+inline_literal_passthrough(<<"`", Rest0/bits>>, Prev) when ?IS_BOUNDARY(Prev) ->
+ {Content, <<"`", Rest/bits>>} = while(fun(C) -> C =/= $` end, Rest0),
+ {ok, {inline_literal_passthrough, #{}, Content, inline}, Rest}.
+
+-ifdef(TEST).
+inline_literal_passthrough_test() ->
+ [
+ <<"Word phrases ">>,
+ {inline_literal_passthrough, #{}, <<"enclosed in backtick characters">>, _},
+ <<" (grave accents)...">>
+ ] = inline(<<"Word phrases `enclosed in backtick characters` (grave accents)...">>),
+ ok.
+-endif.