%% ``The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved via the world wide web at http://www.erlang.org/.
%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%%
%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
%% Portions created by Ericsson are Copyright 1999-2000, Ericsson
%% Utvecklings AB. All Rights Reserved.''
%%
%% $Id$
%%
-module(docb_html_util).
-export([attribute_cdata_to_html/1,
element_cdata_to_html/1,
pcdata_to_html/1, pcdata_to_html/2]).
-export([copy_pics/3]).
-export([extract_header_data/2, all_header_data/1]).
-export([make_uri/1,
make_anchor_href/1, make_anchor_href_short/3,
make_anchor_name_short/2,
make_funcdef_short/2]).
-export([erl_include/2, code_include/2, erl_eval/1]).
-export([number/3, count_sections/1]).
-export([format_toc/1]).
-export([html_latin1_sort_order/1]).
%%--Handle CDATA and PCDATA---------------------------------------------
%% NB: Functions for transforming sgmls/XMerL data output to html.
%% Do not use these for included text files (cf code_include and
%% erl_include).
attribute_cdata_to_html(Data) ->
data2html(Data, false).
element_cdata_to_html(Data) ->
data2html(Data, false).
pcdata_to_html(Data) ->
data2html(Data, true).
pcdata_to_html(Data, RmSp) ->
data2html(Data, RmSp).
%% PCDATA, CDATA: Replace entities, and optionally delete
%% leading and multiple spaces. CDATA never contains entities to
%% replace.
%% data2html(Cs, RmSpace)
data2html([246| Cs], RmSp) ->
[$&, $#, $2, $4, $6, $;| data2html(Cs, RmSp)];
data2html([$>| Cs], RmSp) ->
[$&, $#, $6, $2, $;| data2html(Cs, RmSp)];
data2html([$<| Cs], RmSp) ->
[$&, $#, $6, $0, $;| data2html(Cs, RmSp)];
data2html([$&| Cs], RmSp) ->
[$&, $#, $3, $8, $;| data2html(Cs, RmSp)];
data2html([$\"| Cs], RmSp) ->
[$&, $#, $3, $4, $;| data2html(Cs, RmSp)];
data2html([$\n| Cs], RmSp) ->
data2html(Cs, RmSp);
data2html([$\\, $n| Cs], false) ->
[$\n| data2html(Cs, false)];
data2html([$\\, $n| Cs], true) ->
[$\n| data2html(delete_leading_space(Cs), true)];
data2html([$ , $ | Cs], true) -> % delete multiple space
[$ | data2html(delete_leading_space(Cs), true)];
data2html([$\\, $|| Cs0], RmSp) ->
{Ent, Cs1} = collect_entity(Cs0),
[entity_to_html(Ent)| data2html(Cs1, RmSp)];
data2html([$\\, $0, $1, $2| Cs], RmSp) ->
data2html(Cs, RmSp);
data2html([$\\, $\\, $n| Cs], RmSp) ->
[$\\, $n| data2html(Cs, RmSp)];
data2html([$\\, O1, O2, O3| Cs], RmSp)
when O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
case octal2dec(O1, O2, O3) of
173 -> % soft hyphen
data2html(Cs, RmSp);
C when C > 31, C < 256 ->
Ent = io_lib:format("&#~w;", [C]),
[Ent| data2html(Cs, RmSp)];
C ->
[C| data2html(Cs, RmSp)]
end;
data2html([$\\, $\\| Cs], RmSp) ->
[$\\| data2html(Cs, RmSp)];
data2html([C| Cs], RmSp) ->
[C| data2html(Cs, RmSp)];
data2html([], _) ->
[].
delete_leading_space([$ | Cs]) ->
delete_leading_space(Cs);
delete_leading_space(Cs) ->
Cs.
collect_entity(Data) ->
collect_entity(Data, []).
collect_entity([$\\, $|| Cs], Rs) ->
{lists:reverse(Rs), Cs};
collect_entity([C| Cs], Rs) ->
collect_entity(Cs, [C| Rs]);
collect_entity([], Rs) ->
{[], lists:reverse(Rs)}.
entity_to_html("&") -> "&";
entity_to_html("\"") -> """;
entity_to_html("<") -> "<";
entity_to_html(">") -> ">";
entity_to_html([$\\, O1, O2, O3])
when O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
case octal2dec(O1, O2, O3) of
173 -> % soft hyphen
"";
Value ->
io_lib:format("&#~w;", [Value])
end;
entity_to_html(Other) ->
docb_html_util_iso:entity_to_html(Other).
octal2dec(O1, O2, O3) ->
(O1*8+O2)*8+O3-73*$0.
%%--Copy images---------------------------------------------------------
copy_pics(Src, Dest, Opts) ->
Dir = code:lib_dir(docbuilder),
InFile = filename:join([Dir, "etc", Src]),
OutFile = docb_util:outfile(Dest, "", Opts),
case filelib:last_modified(OutFile) of
0 -> % File doesn't exist
file:copy(InFile, OutFile);
OutMod2 ->
InMod1s = calendar:datetime_to_gregorian_seconds(
filelib:last_modified(InFile)),
OutMod2s = calendar:datetime_to_gregorian_seconds(OutMod2),
if
InMod1s > OutMod2s -> % InFile is newer than OutFile
file:copy(InFile, OutFile);
true ->
ok
end
end.
%%--Resolve header data-------------------------------------------------
extract_header_data(Key, {header, [], List}) ->
case lists:keysearch(Key, 1, List) of
{value, {Key, [], []}} ->
"";
{value, {Key, [], [{pcdata, [], Value}]}} ->
pcdata_to_html(Value);
false ->
""
end.
all_header_data(Header) ->
all_header_data(Header,
[title, prepared, responsible, docno, approved,
checked, date, rev, file]).
all_header_data(_Header, []) ->
[];
all_header_data(Header, [Key| Rest]) ->
[extract_header_data(Key, Header) | all_header_data(Header, Rest)].
%%--Resolve hypertext references----------------------------------------
%% URI regular expression (RFC 2396):
%% "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
%% We split it in five parts:
%% scheme: "^(([^:/?#]+):)?" (includes trailing `:')
%% authority: "^(//([^/?#]*))?" (includes leading `//')
%% path: "^([^?#]*)"
%% query: "^(\\?([^#]*))?" (includes leading `?')
%% fragment: "^(#(.*))?" (includes leading `#')
make_uri(Cs) ->
lists:flatmap(
fun({path, S}) ->
case regexp:match(S, "\.xml?\$") of
{match, _, _} ->
{ok, NS, _} = regexp:sub(S, "\.xml?\$", ".html"),
NS;
_ ->
S
end;
({_, S}) ->
S
end,
split_uri(Cs)).
split_uri(URI) ->
split_uri(URI, [{scheme, "^(([^:/?#]+):)?"},
{authority, "^(//([^/?#]*))?"},
{path, "^([^?#]*)"},
{'query', "^(\\?([^#]*))?"},
{fragment, "^(#(.*))?"}]).
split_uri("", [{Tag, _R}| T]) ->
[{Tag, ""}| split_uri("", T)];
split_uri(Cs0, [{Tag, R}| T]) ->
{match, 1, N} = regexp:match(Cs0, R),
Cs1 = string:substr(Cs0, 1, N),
Cs2 = strip_and_escape_uri_component(Tag, Cs1),
[{Tag, Cs2}| split_uri(string:substr(Cs0, N+1), T)];
split_uri(_, []) ->
[].
strip_and_escape_uri_component(authority, "//" ++ Cs) ->
"//" ++ escape_uri(string:strip(Cs));
strip_and_escape_uri_component(path, Cs) ->
escape_uri(string:strip(Cs));
strip_and_escape_uri_component('query', "?" ++ Cs) ->
"?" ++ escape_uri(string:strip(Cs));
strip_and_escape_uri_component(fragment, "#" ++ Cs) ->
"#" ++ escape_uri(string:strip(Cs));
strip_and_escape_uri_component(_, "") ->
"";
strip_and_escape_uri_component(_, Cs) ->
escape_uri(string:strip(Cs)).
escape_uri([C|Cs]) when C =< 32;
C == $<; C == $<; C == $#; C == $%; C == $";
C == $?;
C == ${; C == $}; C ==$|; C == $\\; C == $^;
C == $[; C == $]; C ==$';
C >= 127 ->
[$%, mk_hex(C div 16), mk_hex(C rem 16)| escape_uri(Cs)];
escape_uri([C|Cs]) ->
[C|escape_uri(Cs)];
escape_uri([]) ->
[].
mk_hex(C) when C<10 ->
C + $0;
mk_hex(C) ->
C - 10 + $a.
make_anchor_href(HRef) ->
case regexp:split(HRef, "#") of
{ok, [HRef]} ->
%% No `#' in HRef, i.e. only path
make_anchor_href(HRef, "");
{ok, [Path, Fragment]}->
make_anchor_href(Path, Fragment)
end.
make_anchor_href(Path0, Frag0) ->
Frag1 = string:strip(Frag0),
Path1 = case Path0 of
"" ->
"";
_ ->
case regexp:match(Path0, "\.xml?\$") of
nomatch ->
Path0 ++ ".html";
_ ->
{ok, NewPath, _} = regexp:sub(Path0,
"\.xml?\$",
".html"),
NewPath
end
end,
case Frag1 of
"" ->
attribute_cdata_to_html(Path1);
_ ->
attribute_cdata_to_html(Path1) ++
"#" ++
attribute_cdata_to_html([case Ch of $/ -> $-; _ -> Ch end||
Ch <-Frag1])
end.
make_anchor_href_short(Path, Frag, RefType) ->
ShortFrag = make_funcdef_short(Frag, RefType,"-"),
make_anchor_href(Path, ShortFrag).
make_anchor_name_short(FuncName0, RefType) ->
FuncName1 = make_funcdef_short(FuncName0, RefType,"-"),
attribute_cdata_to_html(FuncName1).
make_funcdef_short(FuncDef0, RefType) ->
make_funcdef_short(FuncDef0, RefType, "/").
make_funcdef_short(FuncDef0, RefType,Delimiter) ->
FuncDef1 = docb_util:trim(FuncDef0),
Any0 = case lists:member(RefType, [cref, erlref]) of
true ->
case catch docb_util:fknidx(FuncDef1, Delimiter) of
{'EXIT', _} ->
false;
Any1 ->
Any1
end;
false ->
false
end,
case Any0 of
false ->
case string:tokens(FuncDef1, " ") of
[Any2| _] ->
Any2;
_ ->
FuncDef1
end;
_ ->
Any0
end.
%%--Include tags--------------------------------------------------------
%% Only used in report DTD
erl_include(File, Tag) ->
case docb_main:include_file(File, Tag) of
{ok, Cs} ->
{drop, "\n<pre>\n" ++ text_to_html(Cs) ++ "\n</pre>\n"};
error ->
{drop, ""}
end.
code_include(File, Tag) ->
case docb_main:include(File, Tag, Tag) of
{ok, Cs} ->
{ok,text_to_html(Cs)};
error ->
{error, {codeinclude,File}}
end.
erl_eval(Expr) ->
Cs = docb_main:eval_str(Expr),
{drop, "\n<pre>\n" ++ text_to_html(Cs) ++ "\n</pre>\n"}.
%% Only replaces certain characters. Spaces and new lines etc are kept.
%% Used for plain text (e.g. inclusions of code).
text_to_html([$>| Cs]) ->
[$&, $#, $6, $2, $;| text_to_html(Cs)];
text_to_html([$<| Cs]) ->
[$&, $#, $6, $0, $;| text_to_html(Cs)];
text_to_html([$&| Cs]) ->
[$&, $#, $3, $8, $;| text_to_html(Cs)];
text_to_html([$\"| Cs]) ->
[$&, $#, $3, $4, $;| text_to_html(Cs)];
text_to_html([C| Cs]) ->
[C| text_to_html(Cs)];
text_to_html([]) ->
[].
%%--Number sections-----------------------------------------------------
number({Tag, Attrs, More}, none, File) ->
{Tag, Attrs, do_number(More, [1], File)};
number({Tag, Attrs, More}, Prefix, File) ->
{Tag, Attrs, do_number(More, [list_to_integer(Prefix)], File)}.
do_number([], _, _) ->
[];
do_number([{header, Attrs, More}| Rest], NN, File) ->
[{header, Attrs, More}| do_number(Rest, NN, File)];
do_number([{section, Attrs, More}| Rest], [N| NN], File) ->
[{section, Attrs, do_number(More, [1, N| NN], File)}|
do_number(Rest, [N+1| NN], File)];
do_number([{title, _, [{pcdata, _, Title}]}| More], [N| NN], File) ->
Format = make_format(length(NN)),
Number = lists:flatten(io_lib:format(Format, lists:reverse(NN))),
[{marker, [{"ID", "CDATA", Number}], []},
{title, [{"NUMBER", "CDATA", Number},
{"FILE", "CDATA", File}],
[{pcdata, [], Title}]}| do_number(More, [N| NN], File)];
do_number([{pcdata, Attrs, More}| Rest], NN, File) ->
[{pcdata, Attrs, More}| do_number(Rest, NN, File)];
do_number([{Tag, Attrs, More}| Rest], NN, File) ->
[{Tag, Attrs, do_number(More, NN, File)}|do_number(Rest, NN, File)].
make_format(1) ->
"~w";
make_format(N) ->
"~w." ++ make_format(N-1).
count_sections([section| Rest]) ->
1 + count_sections(Rest);
count_sections([_| Rest]) ->
count_sections(Rest);
count_sections([]) ->
0.
%%--Make a ToC----------------------------------------------------------
format_toc(Toc) ->
lists:map(fun({Number, Title}) ->
[Number, " <a href = \"#", Number,
"\">", Title, "</a><br/>\n"]
end, Toc).
%%--Convert HTML ISO Latin 1 characters to ordinary characters----------
%% To be used for sorting. Cs must be flat.
html_latin1_sort_order(Cs) ->
hlso(Cs).
hlso([]) ->
[];
hlso([$&, $#, C2, C1, C0, $;| Cs])
when $0 =< C2, C2 =< $9, $0 =< C1, C1 =< $9, $0 =< C0, C0 =< $9 ->
C = ((C2-$0)*10 + (C1-$0))*10 + C0-$0,
hlso0(C, Cs);
hlso([$&, $#, C1, C0, $;| Cs])
when $0 =< C1, C1 =< $9, $0 =< C0, C0 =< $9 ->
C = (C1-$0)*10 + C0-$0,
hlso0(C, Cs);
hlso([C| Cs]) ->
[C| hlso(Cs)].
hlso0(C, Cs) when 0 =< C, C =< 159 ->
[C| hlso(Cs)];
hlso0(160, Cs) -> %% no-break space
hlso(Cs); % Remove it.
hlso0(161, Cs) -> %% inverted exclamation mark
[$? |hlso(Cs)];
hlso0(162, Cs) -> %% cent sign
[$$|hlso(Cs)];
hlso0(163, Cs) -> %% pound sterling sign
[$$|hlso(Cs)];
hlso0(164, Cs) -> %% general currency sign
[$$|hlso(Cs)];
hlso0(165, Cs) -> %% yen sign
[$$|hlso(Cs)];
hlso0(166, Cs) -> %% broken (vertical) bar
[$| |hlso(Cs)];
hlso0(167, Cs) -> %% section sign
[$$|hlso(Cs)];
hlso0(168, Cs) -> %% umlaut (dieresis)
[$: |hlso(Cs)];
hlso0(169, Cs) -> %% copyright sign
[$c |hlso(Cs)];
hlso0(170, Cs) -> %% ordinal indicator, feminine
[$f |hlso(Cs)];
hlso0(171, Cs) -> %% angle quotation mark, left
[$" |hlso(Cs)];
hlso0(172, Cs) -> %% not sign
[$- |hlso(Cs)];
hlso0(173, Cs) -> %% soft hyphen
[$- |hlso(Cs)];
hlso0(174, Cs) -> %% registered sign
[$r |hlso(Cs)];
hlso0(175, Cs) -> %% macron
[$- |hlso(Cs)];
hlso0(176, Cs) -> %% degree sign
[$d |hlso(Cs)];
hlso0(177, Cs) -> %% plus-or-minus sign
[$+ |hlso(Cs)];
hlso0(178, Cs) -> %% superscript two
[$2 |hlso(Cs)];
hlso0(179, Cs) -> %% superscript three
[$3 |hlso(Cs)];
hlso0(180, Cs) -> %% acute accent
[$' |hlso(Cs)];
hlso0(181, Cs) -> %% micro sign
[$' |hlso(Cs)];
hlso0(182, Cs) -> %% pilcrow (paragraph sign)
[$$|hlso(Cs)];
hlso0(183, Cs) -> %% middle dot
[$. |hlso(Cs)];
hlso0(184, Cs) -> %% cedilla
[$c |hlso(Cs)];
hlso0(185, Cs) -> %% superscript one
[$1 |hlso(Cs)];
hlso0(186, Cs) -> %% ordinal indicator, masculine
[$m |hlso(Cs)];
hlso0(187, Cs) -> %% angle quotation mark, right
[$" |hlso(Cs)];
hlso0(188, Cs) -> %% fraction one-quarter
[$4 |hlso(Cs)];
hlso0(189, Cs) -> %% fraction one-half
[$2 |hlso(Cs)];
hlso0(190, Cs) -> %% fraction three-quarters
[$3 |hlso(Cs)];
hlso0(191, Cs) -> %% inverted question mark
[$? |hlso(Cs)];
hlso0(C, Cs) when 192 =< C, C =< 198 -> %% capital A
[$A |hlso(Cs)];
hlso0(199, Cs) -> %% capital C, cedilla
[$C |hlso(Cs)];
hlso0(C, Cs) when 200 =< C, C =< 203 -> %% capital E
[$E |hlso(Cs)];
hlso0(C, Cs) when 204 =< C, C =< 207 -> %% capital I
[$I |hlso(Cs)];
hlso0(208, Cs) -> %% capital Eth, Icelandic
[$D |hlso(Cs)];
hlso0(209, Cs) -> %% capital N, tilde
[$N |hlso(Cs)];
hlso0(C, Cs) when 210 =< C, C =< 214 -> %% capital O
[$O |hlso(Cs)];
hlso0(215, Cs) -> %% multiply sign
[$x |hlso(Cs)];
hlso0(216, Cs) -> %% capital O, slash
[$O |hlso(Cs)];
hlso0(C, Cs) when 217 =< C, C =< 220 -> %% capital U
[$U |hlso(Cs)];
hlso0(221, Cs) -> %% capital Y, acute accent
[$Y |hlso(Cs)];
hlso0(222, Cs) -> %% capital THORN, Icelandic
[$T |hlso(Cs)];
hlso0(223, Cs) -> %% small sharp s, German (sz
[$s |hlso(Cs)];
hlso0(C, Cs) when 224 =< C, C =< 230-> %% small a
[$a |hlso(Cs)];
hlso0(231, Cs) -> %% small c, cedilla
[$c |hlso(Cs)];
hlso0(C, Cs) when 232 =< C, C =< 235 -> %% small e
[$e |hlso(Cs)];
hlso0(C, Cs) when 236 =< C, C =< 239 -> %% small i
[$i |hlso(Cs)];
hlso0(240, Cs) -> %% small eth, Icelandic
[$d |hlso(Cs)];
hlso0(241, Cs) -> %% small n, tilde
[$n |hlso(Cs)];
hlso0(C, Cs) when 242 =< C, C =< 246 -> %% small o
[$o |hlso(Cs)];
hlso0(247, Cs) -> %% divide sign
[$/ |hlso(Cs)];
hlso0(248, Cs) -> %% small o, slash
[$o |hlso(Cs)];
hlso0(C, Cs) when 249 =< C, C =< 252 -> %% small u
[$u |hlso(Cs)];
hlso0(253, Cs) -> %% small y, acute accent
[$y |hlso(Cs)];
hlso0(254, Cs) -> %% small thorn, Icelandic
[$t |hlso(Cs)];
hlso0(255, Cs) -> %% small y, dieresis or umlaut
[$y |hlso(Cs)].