aboutsummaryrefslogtreecommitdiffstats
path: root/src/asciideck_tables_pass.erl
diff options
context:
space:
mode:
authorLoïc Hoguin <[email protected]>2018-06-08 14:49:09 +0200
committerLoïc Hoguin <[email protected]>2018-06-08 14:49:09 +0200
commit524777054be30c848c1883ffd15b245c29f73004 (patch)
tree6c3df0022ee6d228341bb6ce2c525011076c123d /src/asciideck_tables_pass.erl
parent48cbfe8b60f3e555acd2d623db10e4eb56234179 (diff)
downloadasciideck-524777054be30c848c1883ffd15b245c29f73004.tar.gz
asciideck-524777054be30c848c1883ffd15b245c29f73004.tar.bz2
asciideck-524777054be30c848c1883ffd15b245c29f73004.zip
Rewrite the project
The new code is much more readable and easier to extend. I took inspiration from Haskell's Parsec project which seems to only write the happy-path and applied the idea to Erlang's exceptions. When the parser tries to parse, say, a list, and crashes, it tries with a table next, and so on until something matches. Normal paragraphs always match so there can be no parsing failures. The parser now has a number of passes: first the block parser, then lists and tables passes to build a proper tree out of them and finally an inline pass to apply inline formatting. The resulting AST can then be modified at will and passed on to translator modules which output a different format. The man page translator was also rewritten and has been tested against both Cowboy and Gun. Numerous issues were fixed as a result of this rewrite.
Diffstat (limited to 'src/asciideck_tables_pass.erl')
-rw-r--r--src/asciideck_tables_pass.erl191
1 files changed, 191 insertions, 0 deletions
diff --git a/src/asciideck_tables_pass.erl b/src/asciideck_tables_pass.erl
new file mode 100644
index 0000000..fdda6ef
--- /dev/null
+++ b/src/asciideck_tables_pass.erl
@@ -0,0 +1,191 @@
+%% Copyright (c) 2017-2018, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% This pass parses and builds a table from the contents
+%% of a table block.
+%%
+%% Asciidoc User Guide 23
+%%
+%% @todo Rows and cells are currently not annotated.
+-module(asciideck_tables_pass).
+
+-export([run/1]).
+
+-define(IS_WS(C), (C =:= $\s) or (C =:= $\t) or (C =:= $\n).
+
+run([]) ->
+ [];
+run([Table={table, _, _, _}|Tail]) ->
+ [table(Table)|run(Tail)];
+run([Block|Tail]) ->
+ [Block|run(Tail)].
+
+table({table, Attrs, Contents, Ann}) ->
+ {Cells, NumCols} = parse_table(Contents, Attrs),
+ Children = rows(Cells, NumCols),
+ {table, Attrs, Children, Ann}.
+
+-ifdef(TEST).
+table_test() ->
+ {table, _, [
+ {row, _, [
+ {cell, _, <<"1">>, _},
+ {cell, _, <<"2">>, _},
+ {cell, _, <<"A">>, _}
+ ], _},
+ {row, _, [
+ {cell, _, <<"3">>, _},
+ {cell, _, <<"4">>, _},
+ {cell, _, <<"B">>, _}
+ ], _},
+ {row, _, [
+ {cell, _, <<"5">>, _},
+ {cell, _, <<"6">>, _},
+ {cell, _, <<"C">>, _}
+ ], _}
+ ], _} = table({table, #{}, <<
+ "|1 |2 |A\n"
+ "|3 |4 |B\n"
+ "|5 |6 |C">>, #{line => 1}}),
+ ok.
+-endif.
+
+%% If the cols attribute is not specified, the number of
+%% columns is the number of cells on the first line.
+parse_table(Contents, #{<<"cols">> := Cols}) ->
+ {parse_cells(Contents, []), num_cols(Cols)};
+%% We get the first line, parse the cells in it then
+%% count the number of columns in the table. Finally
+%% we parse all the remaining cells.
+parse_table(Contents, _) ->
+ case binary:split(Contents, <<$\n>>) of
+ %% We only have the one line. Who writes tables like this?
+ [Line] ->
+ Cells = parse_cells(Line, []),
+ {Cells, length(Cells)};
+ %% We have a useful table with more than one line. Good user!
+ [Line, Rest] ->
+ Cells0 = parse_cells(Line, []),
+ Cells = parse_cells(Rest, lists:reverse(Cells0)),
+ {Cells, length(Cells0)}
+ end.
+
+num_cols(Cols) ->
+ %% @todo Handle column specifiers.
+ Specs = binary:split(Cols, <<$,>>, [global]),
+ length(Specs).
+
+parse_cells(Contents, Acc) ->
+ Cells = split_cells(Contents),%binary:split(Contents, [<<$|>>], [global]),
+ do_parse_cells(Cells, Acc).
+ %% Split on |
+ %% Look at the end of each element see if there's a cell specifier
+ %% Add it as an attribute to the cell for now and consolidate
+ %% when processing rows.
+
+split_cells(Contents) ->
+ split_cells(Contents, <<>>, []).
+
+split_cells(<<>>, Cell, Acc) ->
+ lists:reverse([Cell|Acc]);
+split_cells(<<$\\, $|, R/bits>>, Cell, Acc) ->
+ split_cells(R, <<Cell/binary, $|>>, Acc);
+split_cells(<<$|, R/bits>>, Cell, Acc) ->
+ split_cells(R, <<>>, [Cell|Acc]);
+split_cells(<<C, R/bits>>, Cell, Acc) ->
+ split_cells(R, <<Cell/binary, C>>, Acc).
+
+%% Malformed table (no pipe before cell). Process it like it is a single cell.
+do_parse_cells([Contents], Acc) ->
+ %% @todo Annotations.
+ lists:reverse([{cell, #{specifiers => <<>>}, Contents, #{}}|Acc]);
+%% Last cell. There are no further cell specifiers.
+do_parse_cells([Specs, Contents0], Acc) ->
+ Contents = asciideck_block_parser:trim(Contents0, both),
+ %% @todo Annotations.
+ Cell = {cell, #{specifiers => Specs}, Contents, #{}},
+ lists:reverse([Cell|Acc]);
+%% If there are cell specifiers we need to extract them from the cell
+%% contents. Cell specifiers are everything from the last whitespace
+%% until the end of the binary.
+do_parse_cells([Specs, Contents0|Tail], Acc) ->
+ NextSpecs = <<>>, %% @todo find_r(Contents0, <<>>),
+ Len = byte_size(Contents0) - byte_size(NextSpecs),
+ <<Contents1:Len/binary, _/bits>> = Contents0,
+ Contents = asciideck_block_parser:trim(Contents1, both),
+ %% @todo Annotations.
+ Cell = {cell, #{specifiers => Specs}, Contents, #{}},
+ do_parse_cells([NextSpecs|Tail], [Cell|Acc]).
+
+%% @todo This is not correct. Not all remaining data is specifiers.
+%% In addition, for columns at the end of the line this doesn't apply.
+%% Find the remaining data after the last whitespace character.
+%find_r(<<>>, Acc) ->
+% Acc;
+%find_r(<<C, Rest/bits>>, _) when ?IS_WS(C) ->
+% find_r(Rest, Rest);
+%find_r(<<_, Rest/bits>>, Acc) ->
+% find_r(Rest, Acc).
+
+-ifdef(TEST).
+parse_table_test() ->
+ {[
+ {cell, _, <<"1">>, _},
+ {cell, _, <<"2">>, _},
+ {cell, _, <<"A">>, _},
+ {cell, _, <<"3">>, _},
+ {cell, _, <<"4">>, _},
+ {cell, _, <<"B">>, _},
+ {cell, _, <<"5">>, _},
+ {cell, _, <<"6">>, _},
+ {cell, _, <<"C">>, _}
+ ], 3} = parse_table(<<
+ "|1 |2 |A\n"
+ "|3 |4 |B\n"
+ "|5 |6 |C">>, #{}),
+ ok.
+
+parse_table_escape_pipe_test() ->
+ {[
+ {cell, _, <<"1">>, _},
+ {cell, _, <<"2">>, _},
+ {cell, _, <<"3 |4">>, _},
+ {cell, _, <<"5">>, _}
+ ], 2} = parse_table(<<
+ "|1 |2\n"
+ "|3 \\|4 |5">>, #{}),
+ ok.
+-endif.
+
+%% @todo We currently don't handle colspans and rowspans.
+rows(Cells, NumCols) ->
+ rows(Cells, [], NumCols, [], NumCols).
+
+%% End of row.
+rows(Tail, Acc, NumCols, RowAcc, CurCol) when CurCol =< 0 ->
+ %% @todo Annotations.
+ Row = {row, #{}, lists:reverse(RowAcc), #{}},
+ rows(Tail, [Row|Acc], NumCols, [], NumCols);
+%% Add a cell to the row.
+rows([Cell|Tail], Acc, NumCols, RowAcc, CurCol) ->
+ rows(Tail, Acc, NumCols, [Cell|RowAcc], CurCol - 1);
+%% End of a properly formed table.
+rows([], Acc, _, [], _) ->
+ lists:reverse(Acc);
+%% Malformed table. Even if we expect more columns,
+%% if there are no more cells there's nothing we can do.
+rows([], Acc, _, RowAcc, _) ->
+ %% @todo Annotations.
+ Row = {row, #{}, lists:reverse(RowAcc), #{}},
+ lists:reverse([Row|Acc]).