%% ``The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved via the world wide web at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% The Initial Developer of the Original Code is Ericsson Utvecklings AB. %% Portions created by Ericsson are Copyright 1999-2000, Ericsson %% Utvecklings AB. All Rights Reserved.'' %% %% $Id$ %% -module(docb_html_util). -export([attribute_cdata_to_html/1, element_cdata_to_html/1, pcdata_to_html/1, pcdata_to_html/2]). -export([copy_pics/3]). -export([extract_header_data/2, all_header_data/1]). -export([make_uri/1, make_anchor_href/1, make_anchor_href_short/3, make_anchor_name_short/2, make_funcdef_short/2]). -export([erl_include/2, code_include/2, erl_eval/1]). -export([number/3, count_sections/1]). -export([format_toc/1]). -export([html_latin1_sort_order/1]). %%--Handle CDATA and PCDATA--------------------------------------------- %% NB: Functions for transforming sgmls/XMerL data output to html. %% Do not use these for included text files (cf code_include and %% erl_include). attribute_cdata_to_html(Data) -> data2html(Data, false). element_cdata_to_html(Data) -> data2html(Data, false). pcdata_to_html(Data) -> data2html(Data, true). pcdata_to_html(Data, RmSp) -> data2html(Data, RmSp). %% PCDATA, CDATA: Replace entities, and optionally delete %% leading and multiple spaces. CDATA never contains entities to %% replace. %% data2html(Cs, RmSpace) data2html([246| Cs], RmSp) -> [$&, $#, $2, $4, $6, $;| data2html(Cs, RmSp)]; data2html([$>| Cs], RmSp) -> [$&, $#, $6, $2, $;| data2html(Cs, RmSp)]; data2html([$<| Cs], RmSp) -> [$&, $#, $6, $0, $;| data2html(Cs, RmSp)]; data2html([$&| Cs], RmSp) -> [$&, $#, $3, $8, $;| data2html(Cs, RmSp)]; data2html([$\"| Cs], RmSp) -> [$&, $#, $3, $4, $;| data2html(Cs, RmSp)]; data2html([$\n| Cs], RmSp) -> data2html(Cs, RmSp); data2html([$\\, $n| Cs], false) -> [$\n| data2html(Cs, false)]; data2html([$\\, $n| Cs], true) -> [$\n| data2html(delete_leading_space(Cs), true)]; data2html([$ , $ | Cs], true) -> % delete multiple space [$ | data2html(delete_leading_space(Cs), true)]; data2html([$\\, $|| Cs0], RmSp) -> {Ent, Cs1} = collect_entity(Cs0), [entity_to_html(Ent)| data2html(Cs1, RmSp)]; data2html([$\\, $0, $1, $2| Cs], RmSp) -> data2html(Cs, RmSp); data2html([$\\, $\\, $n| Cs], RmSp) -> [$\\, $n| data2html(Cs, RmSp)]; data2html([$\\, O1, O2, O3| Cs], RmSp) when O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> case octal2dec(O1, O2, O3) of 173 -> % soft hyphen data2html(Cs, RmSp); C when C > 31, C < 256 -> Ent = io_lib:format("&#~w;", [C]), [Ent| data2html(Cs, RmSp)]; C -> [C| data2html(Cs, RmSp)] end; data2html([$\\, $\\| Cs], RmSp) -> [$\\| data2html(Cs, RmSp)]; data2html([C| Cs], RmSp) -> [C| data2html(Cs, RmSp)]; data2html([], _) -> []. delete_leading_space([$ | Cs]) -> delete_leading_space(Cs); delete_leading_space(Cs) -> Cs. collect_entity(Data) -> collect_entity(Data, []). collect_entity([$\\, $|| Cs], Rs) -> {lists:reverse(Rs), Cs}; collect_entity([C| Cs], Rs) -> collect_entity(Cs, [C| Rs]); collect_entity([], Rs) -> {[], lists:reverse(Rs)}. entity_to_html("&") -> "&"; entity_to_html("\"") -> """; entity_to_html("<") -> "<"; entity_to_html(">") -> ">"; entity_to_html([$\\, O1, O2, O3]) when O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> case octal2dec(O1, O2, O3) of 173 -> % soft hyphen ""; Value -> io_lib:format("&#~w;", [Value]) end; entity_to_html(Other) -> docb_html_util_iso:entity_to_html(Other). octal2dec(O1, O2, O3) -> (O1*8+O2)*8+O3-73*$0. %%--Copy images--------------------------------------------------------- copy_pics(Src, Dest, Opts) -> Dir = code:lib_dir(docbuilder), InFile = filename:join([Dir, "etc", Src]), OutFile = docb_util:outfile(Dest, "", Opts), case filelib:last_modified(OutFile) of 0 -> % File doesn't exist file:copy(InFile, OutFile); OutMod2 -> InMod1s = calendar:datetime_to_gregorian_seconds( filelib:last_modified(InFile)), OutMod2s = calendar:datetime_to_gregorian_seconds(OutMod2), if InMod1s > OutMod2s -> % InFile is newer than OutFile file:copy(InFile, OutFile); true -> ok end end. %%--Resolve header data------------------------------------------------- extract_header_data(Key, {header, [], List}) -> case lists:keyfind(Key, 1, List) of {Key, [], []} -> ""; {Key, [], [{pcdata, [], Value}]} -> pcdata_to_html(Value); false -> "" end. all_header_data(Header) -> all_header_data(Header, [title, prepared, responsible, docno, approved, checked, date, rev, file]). all_header_data(_Header, []) -> []; all_header_data(Header, [Key| Rest]) -> [extract_header_data(Key, Header) | all_header_data(Header, Rest)]. %%--Resolve hypertext references---------------------------------------- %% URI regular expression (RFC 2396): %% "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" %% We split it in five parts: %% scheme: "^(([^:/?#]+):)?" (includes trailing `:') %% authority: "^(//([^/?#]*))?" (includes leading `//') %% path: "^([^?#]*)" %% query: "^(\\?([^#]*))?" (includes leading `?') %% fragment: "^(#(.*))?" (includes leading `#') make_uri(Cs) -> lists:flatmap( fun({path, S}) -> case regexp:match(S, "\.xml?\$") of {match, _, _} -> {ok, NS, _} = regexp:sub(S, "\.xml?\$", ".html"), NS; _ -> S end; ({_, S}) -> S end, split_uri(Cs)). split_uri(URI) -> split_uri(URI, [{scheme, "^(([^:/?#]+):)?"}, {authority, "^(//([^/?#]*))?"}, {path, "^([^?#]*)"}, {'query', "^(\\?([^#]*))?"}, {fragment, "^(#(.*))?"}]). split_uri("", [{Tag, _R}| T]) -> [{Tag, ""}| split_uri("", T)]; split_uri(Cs0, [{Tag, R}| T]) -> {match, 1, N} = regexp:match(Cs0, R), Cs1 = string:substr(Cs0, 1, N), Cs2 = strip_and_escape_uri_component(Tag, Cs1), [{Tag, Cs2}| split_uri(string:substr(Cs0, N+1), T)]; split_uri(_, []) -> []. strip_and_escape_uri_component(authority, "//" ++ Cs) -> "//" ++ escape_uri(string:strip(Cs)); strip_and_escape_uri_component(path, Cs) -> escape_uri(string:strip(Cs)); strip_and_escape_uri_component('query', "?" ++ Cs) -> "?" ++ escape_uri(string:strip(Cs)); strip_and_escape_uri_component(fragment, "#" ++ Cs) -> "#" ++ escape_uri(string:strip(Cs)); strip_and_escape_uri_component(_, "") -> ""; strip_and_escape_uri_component(_, Cs) -> escape_uri(string:strip(Cs)). escape_uri([C|Cs]) when C =< 32; C == $<; C == $<; C == $#; C == $%; C == $"; C == $?; C == ${; C == $}; C ==$|; C == $\\; C == $^; C == $[; C == $]; C ==$'; C >= 127 -> [$%, mk_hex(C div 16), mk_hex(C rem 16)| escape_uri(Cs)]; escape_uri([C|Cs]) -> [C|escape_uri(Cs)]; escape_uri([]) -> []. mk_hex(C) when C<10 -> C + $0; mk_hex(C) -> C - 10 + $a. make_anchor_href(HRef) -> case regexp:split(HRef, "#") of {ok, [HRef]} -> %% No `#' in HRef, i.e. only path make_anchor_href(HRef, ""); {ok, [Path, Fragment]} -> make_anchor_href(Path, Fragment) end. make_anchor_href(Path0, Frag0) -> Frag1 = string:strip(Frag0), Path1 = case Path0 of "" -> ""; _ -> case regexp:match(Path0, "\.xml?\$") of nomatch -> Path0 ++ ".html"; _ -> {ok, NewPath, _} = regexp:sub(Path0, "\.xml?\$", ".html"), NewPath end end, case Frag1 of "" -> attribute_cdata_to_html(Path1); _ -> attribute_cdata_to_html(Path1) ++ "#" ++ attribute_cdata_to_html([case Ch of $/ -> $-; _ -> Ch end|| Ch <-Frag1]) end. make_anchor_href_short(Path, Frag, RefType) -> ShortFrag = make_funcdef_short(Frag, RefType,"-"), make_anchor_href(Path, ShortFrag). make_anchor_name_short(FuncName0, RefType) -> FuncName1 = make_funcdef_short(FuncName0, RefType,"-"), attribute_cdata_to_html(FuncName1). make_funcdef_short(FuncDef0, RefType) -> make_funcdef_short(FuncDef0, RefType, "/"). make_funcdef_short(FuncDef0, RefType,Delimiter) -> FuncDef1 = docb_util:trim(FuncDef0), Any0 = case lists:member(RefType, [cref, erlref]) of true -> case catch docb_util:fknidx(FuncDef1, Delimiter) of {'EXIT', _} -> false; Any1 -> Any1 end; false -> false end, case Any0 of false -> case string:tokens(FuncDef1, " ") of [Any2| _] -> Any2; _ -> FuncDef1 end; _ -> Any0 end. %%--Include tags-------------------------------------------------------- %% Only used in report DTD erl_include(File, Tag) -> case docb_main:include_file(File, Tag) of {ok, Cs} -> {drop, "\n<pre>\n" ++ text_to_html(Cs) ++ "\n</pre>\n"}; error -> {drop, ""} end. code_include(File, Tag) -> case docb_main:include(File, Tag, Tag) of {ok, Cs} -> {ok,text_to_html(Cs)}; error -> {error, {codeinclude,File}} end. erl_eval(Expr) -> Cs = docb_main:eval_str(Expr), {drop, "\n<pre>\n" ++ text_to_html(Cs) ++ "\n</pre>\n"}. %% Only replaces certain characters. Spaces and new lines etc are kept. %% Used for plain text (e.g. inclusions of code). text_to_html([$>| Cs]) -> [$&, $#, $6, $2, $;| text_to_html(Cs)]; text_to_html([$<| Cs]) -> [$&, $#, $6, $0, $;| text_to_html(Cs)]; text_to_html([$&| Cs]) -> [$&, $#, $3, $8, $;| text_to_html(Cs)]; text_to_html([$\"| Cs]) -> [$&, $#, $3, $4, $;| text_to_html(Cs)]; text_to_html([C| Cs]) -> [C| text_to_html(Cs)]; text_to_html([]) -> []. %%--Number sections----------------------------------------------------- number({Tag, Attrs, More}, none, File) -> {Tag, Attrs, do_number(More, [1], File)}; number({Tag, Attrs, More}, Prefix, File) -> {Tag, Attrs, do_number(More, [list_to_integer(Prefix)], File)}. do_number([], _, _) -> []; do_number([{header, Attrs, More}| Rest], NN, File) -> [{header, Attrs, More}| do_number(Rest, NN, File)]; do_number([{section, Attrs, More}| Rest], [N| NN], File) -> [{section, Attrs, do_number(More, [1, N| NN], File)}| do_number(Rest, [N+1| NN], File)]; do_number([{title, _, [{pcdata, _, Title}]}| More], [N| NN], File) -> Format = make_format(length(NN)), Number = lists:flatten(io_lib:format(Format, lists:reverse(NN))), [{marker, [{"ID", "CDATA", Number}], []}, {title, [{"NUMBER", "CDATA", Number}, {"FILE", "CDATA", File}], [{pcdata, [], Title}]}| do_number(More, [N| NN], File)]; do_number([{pcdata, Attrs, More}| Rest], NN, File) -> [{pcdata, Attrs, More}| do_number(Rest, NN, File)]; do_number([{Tag, Attrs, More}| Rest], NN, File) -> [{Tag, Attrs, do_number(More, NN, File)}|do_number(Rest, NN, File)]. make_format(1) -> "~w"; make_format(N) -> "~w." ++ make_format(N-1). count_sections([section| Rest]) -> 1 + count_sections(Rest); count_sections([_| Rest]) -> count_sections(Rest); count_sections([]) -> 0. %%--Make a ToC---------------------------------------------------------- format_toc(Toc) -> [format_toc1(T) || T <- Toc]. format_toc1({Number, Title}) -> [Number, " <a href = \"#", Number, "\">", Title, "</a><br/>\n"]. %%--Convert HTML ISO Latin 1 characters to ordinary characters---------- %% To be used for sorting. Cs must be flat. html_latin1_sort_order(Cs) -> hlso(Cs). hlso([]) -> []; hlso([$&, $#, C2, C1, C0, $;| Cs]) when $0 =< C2, C2 =< $9, $0 =< C1, C1 =< $9, $0 =< C0, C0 =< $9 -> C = ((C2-$0)*10 + (C1-$0))*10 + C0-$0, hlso0(C, Cs); hlso([$&, $#, C1, C0, $;| Cs]) when $0 =< C1, C1 =< $9, $0 =< C0, C0 =< $9 -> C = (C1-$0)*10 + C0-$0, hlso0(C, Cs); hlso([C| Cs]) -> [C| hlso(Cs)]. hlso0(C, Cs) when 0 =< C, C =< 159 -> [C| hlso(Cs)]; hlso0(160, Cs) -> %% no-break space hlso(Cs); % Remove it. hlso0(161, Cs) -> %% inverted exclamation mark [$? |hlso(Cs)]; hlso0(162, Cs) -> %% cent sign [$$|hlso(Cs)]; hlso0(163, Cs) -> %% pound sterling sign [$$|hlso(Cs)]; hlso0(164, Cs) -> %% general currency sign [$$|hlso(Cs)]; hlso0(165, Cs) -> %% yen sign [$$|hlso(Cs)]; hlso0(166, Cs) -> %% broken (vertical) bar [$| |hlso(Cs)]; hlso0(167, Cs) -> %% section sign [$$|hlso(Cs)]; hlso0(168, Cs) -> %% umlaut (dieresis) [$: |hlso(Cs)]; hlso0(169, Cs) -> %% copyright sign [$c |hlso(Cs)]; hlso0(170, Cs) -> %% ordinal indicator, feminine [$f |hlso(Cs)]; hlso0(171, Cs) -> %% angle quotation mark, left [$" |hlso(Cs)]; hlso0(172, Cs) -> %% not sign [$- |hlso(Cs)]; hlso0(173, Cs) -> %% soft hyphen [$- |hlso(Cs)]; hlso0(174, Cs) -> %% registered sign [$r |hlso(Cs)]; hlso0(175, Cs) -> %% macron [$- |hlso(Cs)]; hlso0(176, Cs) -> %% degree sign [$d |hlso(Cs)]; hlso0(177, Cs) -> %% plus-or-minus sign [$+ |hlso(Cs)]; hlso0(178, Cs) -> %% superscript two [$2 |hlso(Cs)]; hlso0(179, Cs) -> %% superscript three [$3 |hlso(Cs)]; hlso0(180, Cs) -> %% acute accent [$' |hlso(Cs)]; hlso0(181, Cs) -> %% micro sign [$' |hlso(Cs)]; hlso0(182, Cs) -> %% pilcrow (paragraph sign) [$$|hlso(Cs)]; hlso0(183, Cs) -> %% middle dot [$. |hlso(Cs)]; hlso0(184, Cs) -> %% cedilla [$c |hlso(Cs)]; hlso0(185, Cs) -> %% superscript one [$1 |hlso(Cs)]; hlso0(186, Cs) -> %% ordinal indicator, masculine [$m |hlso(Cs)]; hlso0(187, Cs) -> %% angle quotation mark, right [$" |hlso(Cs)]; hlso0(188, Cs) -> %% fraction one-quarter [$4 |hlso(Cs)]; hlso0(189, Cs) -> %% fraction one-half [$2 |hlso(Cs)]; hlso0(190, Cs) -> %% fraction three-quarters [$3 |hlso(Cs)]; hlso0(191, Cs) -> %% inverted question mark [$? |hlso(Cs)]; hlso0(C, Cs) when 192 =< C, C =< 198 -> %% capital A [$A |hlso(Cs)]; hlso0(199, Cs) -> %% capital C, cedilla [$C |hlso(Cs)]; hlso0(C, Cs) when 200 =< C, C =< 203 -> %% capital E [$E |hlso(Cs)]; hlso0(C, Cs) when 204 =< C, C =< 207 -> %% capital I [$I |hlso(Cs)]; hlso0(208, Cs) -> %% capital Eth, Icelandic [$D |hlso(Cs)]; hlso0(209, Cs) -> %% capital N, tilde [$N |hlso(Cs)]; hlso0(C, Cs) when 210 =< C, C =< 214 -> %% capital O [$O |hlso(Cs)]; hlso0(215, Cs) -> %% multiply sign [$x |hlso(Cs)]; hlso0(216, Cs) -> %% capital O, slash [$O |hlso(Cs)]; hlso0(C, Cs) when 217 =< C, C =< 220 -> %% capital U [$U |hlso(Cs)]; hlso0(221, Cs) -> %% capital Y, acute accent [$Y |hlso(Cs)]; hlso0(222, Cs) -> %% capital THORN, Icelandic [$T |hlso(Cs)]; hlso0(223, Cs) -> %% small sharp s, German (sz [$s |hlso(Cs)]; hlso0(C, Cs) when 224 =< C, C =< 230-> %% small a [$a |hlso(Cs)]; hlso0(231, Cs) -> %% small c, cedilla [$c |hlso(Cs)]; hlso0(C, Cs) when 232 =< C, C =< 235 -> %% small e [$e |hlso(Cs)]; hlso0(C, Cs) when 236 =< C, C =< 239 -> %% small i [$i |hlso(Cs)]; hlso0(240, Cs) -> %% small eth, Icelandic [$d |hlso(Cs)]; hlso0(241, Cs) -> %% small n, tilde [$n |hlso(Cs)]; hlso0(C, Cs) when 242 =< C, C =< 246 -> %% small o [$o |hlso(Cs)]; hlso0(247, Cs) -> %% divide sign [$/ |hlso(Cs)]; hlso0(248, Cs) -> %% small o, slash [$o |hlso(Cs)]; hlso0(C, Cs) when 249 =< C, C =< 252 -> %% small u [$u |hlso(Cs)]; hlso0(253, Cs) -> %% small y, acute accent [$y |hlso(Cs)]; hlso0(254, Cs) -> %% small thorn, Icelandic [$t |hlso(Cs)]; hlso0(255, Cs) -> %% small y, dieresis or umlaut [$y |hlso(Cs)].