diff options
Diffstat (limited to 'lib/docbuilder/src/docb_xmerl_tree_cb.erl')
-rw-r--r-- | lib/docbuilder/src/docb_xmerl_tree_cb.erl | 343 |
1 files changed, 0 insertions, 343 deletions
diff --git a/lib/docbuilder/src/docb_xmerl_tree_cb.erl b/lib/docbuilder/src/docb_xmerl_tree_cb.erl deleted file mode 100644 index bc62069230..0000000000 --- a/lib/docbuilder/src/docb_xmerl_tree_cb.erl +++ /dev/null @@ -1,343 +0,0 @@ -%% ``The contents of this file are subject to the Erlang Public License, -%% Version 1.1, (the "License"); you may not use this file except in -%% compliance with the License. You should have received a copy of the -%% Erlang Public License along with this software. If not, it can be -%% retrieved via the world wide web at http://www.erlang.org/. -%% -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See -%% the Licence for the specific language governing rights and limitations -%% under the License. -%% -%% The Initial Developer of the Original Code is Ericsson AB. -%% Portions created by Ericsson are Copyright 1999-2006, Ericsson AB. -%% All Rights Reserved.�� -%% -%% $Id$ -%% --module(docb_xmerl_tree_cb). - -%% This is the XMerL callback module for exporting XML to the internal -%% tree format used by DocBuilder. -%% {Doc, _Misc} = xmerl_scan:file("file.xml", [{validation,true}]) -%% Tree = xmerl:export([Doc], docb_xmerl_tree_cb) - --export(['#xml-inheritance#'/0]). - --export(['#root#'/4, - '#text#'/1, - '#element#'/5]). --include("xmerl.hrl"). - -%%--Functions used by xmerl--------------------------------------------- - -'#xml-inheritance#'() -> - []. - -'#root#'(Data, _Attrs, [], _E) -> - Data. - -'#text#'(Text) -> - Text2 = strip_leading_blanks(Text), -%% before -%% case Text2 of -%% [$\n|T] -> -%% case is_empty(T) of -%% true -> []; -%% false -> {pcdata, [], nl(Text2)} -%% end; -%% -%% _ -> -%% {pcdata, [], nl(Text2)} -%% end. -%% after - {pcdata, [], nl(Text2)}. - -'#element#'(Tag, Data, Attrs, Parents, _E) when Tag==pre; Tag==code -> - [H|T] = reinsert_nl(Data), - NewData = [strip_nl(H)|T], - NewData2 = case Tag of - code -> - fix_single_pcdata(NewData); - pre -> - NewData - end, - {Tag, attrs(get_dtd(Parents), Tag, Attrs), NewData2}; -'#element#'(Tag, Data, Attrs, Parents, _E) -> - NewData = case tag_content(Tag) of - no_pcdata -> % remove all pcdata - [Dat|| - Dat <- Data, - begin - Fun = fun({pcdata,_,_}) -> false; - (_) -> true end, - Fun(Dat) - end]; - single_pcdata when length(Data)>1 -> - %% merge several pcdata's into one single pcdata - fix_single_pcdata(Data); - _ -> - lists:flatten(Data) - end, - {Tag, attrs(get_dtd(Parents), Tag, Attrs), NewData}. - -%%--Internal functions-------------------------------------------------- - -%% is_empty(Str) -> bool() -%% Returns true if the string Str only contains blanks, tabs and -%% newlines, false otherwise. -%% is_empty("\n" ++ Text) -> -%% is_empty(Text); -%% is_empty("\t" ++ Text) -> -%% is_empty(Text); -%% is_empty(" " ++ Text) -> -%% is_empty(Text); -%% is_empty("") -> -%% true; -%% is_empty(_) -> -%% false. - -%% reinsert_nl(L1) -> L2 -%% Workaround for <pre>: Normally empty lines are ignored. However, -%% Xmerl splits lines whenever it encounters an entity. In the case of -%% <pre>, this may lead to that we ignores what we think is an empty -%% line but is actually a line break that should be kept, for example -%% in this case: -%% <pre> -%% <input>some command</input> <-- this line break is lost! -%% <some result> -%% </pre> -%% This function reinserts line breaks where necessary. -reinsert_nl([[]|T]) -> - [{pcdata,[],"\\n"} | reinsert_nl(T)]; -reinsert_nl([H|T]) -> - [H | reinsert_nl(T)]; -reinsert_nl([]) -> - []. - -%% sgmls treats line breaks in a way that DocBuilder relies on and -%% which must be imitated here. Replace all "\n" with "\\n" and add -%% "\n" to the end of each text element. -nl("") -> - "\n"; -nl("\n"++Text) -> - "\\n"++nl(Text); -nl([Ch|Text]) -> - [Ch|nl(Text)]. - - -%% strip_leading_blanks(Str) -> Str -%% Leading spaces and tabs before a newline are always redundant -%% and are therefore stripped of here -%% If no newline is found the original string is returned unchanged - -strip_leading_blanks(Str) -> - strip_leading_blanks(Str,Str). - -strip_leading_blanks([],Str) -> - Str; -strip_leading_blanks([$\s|T],Str) -> - strip_leading_blanks(T,Str); -strip_leading_blanks([$\t|T],Str) -> - strip_leading_blanks(T,Str); -strip_leading_blanks(Rest=[$\n|_],_) -> - Rest; -strip_leading_blanks(_,Str) -> - Str. - -%% strip_nl(Str) -> Str -%% The XMerL scan will often result in the contents of <pre> or <code> -%% starting with a newline, as the format is normally: -%% <pre> -%% ..contents.. -%% </pre> -%% However, this newline must be removed, or the resulting HTML will be -%% <pre> -%% -%% ..content.. -%% </pre> -strip_nl({pcdata,[],"\\n"++Str}) -> {pcdata,[],Str}; -strip_nl(E) -> E. - -get_dtd([]) -> - none; -get_dtd(Parents) -> - {DTD, _} = lists:last(Parents), - DTD. - -%% attrs(DTD, Tag, GivenAttrs) -> AllAttrs -%% DTD = Tag = atom() DTD and tag name -%% GivenAttrs = [#xmlAttribute{}] -%% AllAttrs = [{Name, Type, Val}] -%% Name = string() (uppercase) Example: "VALIGN" -%% Type = "CDATA" | "TOKEN" -%% Val = string() (uppercase if type is "TOKEN", as-is otherwise) -%% The XMerL scanning of <file>.xml renders only the given attributes. -%% However, DocBuilder needs also the optional attributes (which not -%% necessarily have been given), so we add them here, using the default -%% values according to the DTDs. -%% NOTE: Uses the information from the DTDs. That is, if some change is -%% done to the DTDs, also this file must be updated. Ideally, the DTDs -%% should be parsed automatically in some way. -%% It can also be noted that this check is superfluous in the case where -%% all attributes are required (except that the attributes are sorted -%% in the same order as in the DTD) and where an optional attribute has -%% type "CDATA" as no sensible default value can be specified in this -%% case. -attrs(DTD, Tag, GivenAttrs) -> - merge_attrs(Tag, default_attrs(DTD, Tag), GivenAttrs). - -merge_attrs(Tag, [{NameA, Type, DefVal}|Default], GivenAttrs) -> - Val = case lists:keyfind(NameA, #xmlAttribute.name, GivenAttrs) of - #xmlAttribute{value=Val0} -> Val0; - false -> DefVal - end, - Attr = {attr_name(NameA), Type, attr_val(Type, Val)}, - [Attr | merge_attrs(Tag, Default, GivenAttrs)]; -merge_attrs(_Tag, [], _GivenAttrs) -> - []. - -attr_name(Atom) -> - string:to_upper(atom_to_list(Atom)). - -attr_val("CDATA", Val) -> Val; -attr_val("TOKEN", Val) -> string:to_upper(Val). - -%% Given the DTD and element tag, return a list [{Name, Value}] where -%% Name (atom) is the name of each possible attribute and -%% Value (lowercase string) its default value. -default_attrs(_, cell) -> - [{align, "TOKEN", "left"}, - {valign, "TOKEN", "middle"}]; -default_attrs(_, cite) -> - [{id, "CDATA", ""}]; % required -default_attrs(_, code) -> - [{type, "TOKEN", "none"}]; -default_attrs(_, codeinclude) -> - [{file, "CDATA", ""}, % required - {tag, "CDATA", ""}, - {type, "TOKEN", "none"}]; -default_attrs(book, contents) -> - [{level, "TOKEN", "2"}]; -default_attrs(_, erleval) -> - [{expr, "CDATA", ""}]; % required -default_attrs(report, erlinclude) -> - [{file, "CDATA", ""}, % required - {tag, "CDATA", ""}]; % required -default_attrs(_, fascicule) -> - [{file, "CDATA", ""}, % required - {href, "CDATA", ""}, % required - {entry, "TOKEN", "no"}]; -default_attrs(book, header) -> - [{titlestyle, "TOKEN", "normal"}]; -default_attrs(_, image) -> - [{file, "CDATA", ""}]; % required -default_attrs(_, include) -> - [{file, "CDATA", ""}]; % required -default_attrs(report, index) -> - [{txt, "CDATA", ""}]; % required -default_attrs(_, list) -> - [{type, "TOKEN", "bulleted"}]; -default_attrs(_, marker) -> - [{id, "CDATA", ""}]; % required -default_attrs(book, onepart) -> - [{lift, "TOKEN", "no"}]; -default_attrs(book, parts) -> - [{lift, "TOKEN", "no"}]; -default_attrs(_, path) -> - [{unix, "CDATA", ""}, - {windows, "CDATA", ""}]; -default_attrs(_, seealso) -> - [{marker, "CDATA", ""}]; % required -default_attrs(report, table) -> - [{width, "CDATA", "0"}, - {colspec, "CDATA", ""}]; -default_attrs(_, table) -> - [{align, "TOKEN", "center"}]; -default_attrs(_, term) -> - [{id, "CDATA", ""}]; % required -default_attrs(book, theheader) -> - [{tag, "TOKEN", "none"}]; -default_attrs(bookinsidecover, theheader) -> - [{tag, "TOKEN", "none"}]; -default_attrs(_, url) -> - [{href, "CDATA", ""}]; % required -default_attrs(_, _) -> []. - -%%--Single PCDATA broken into several fix------------------------------- - -%% When text contains an entity, then XMERL splits it into two -%% PCDATA elements, the second starting with the entity. -%% -%% Example: -%% Magnus Fr�berg => [{pcdata,[],"Magnus Fr\n"},{pcdata,[],"�berg\n"}] -%% -%% This is not handled by DocBuilder which expects many tags, for -%% example title and aname, to contain a single PCDATA element. (That -%% is also what nsgmls returned.) - -fix_single_pcdata([{pcdata,[],Str1}, {pcdata,[],Str2}|T]) -> - fix_single_pcdata([{pcdata,[],Str1++Str2}|T]); -fix_single_pcdata(FixedData) -> - FixedData. - -tag_content(aname) -> single_pcdata; -tag_content(app) -> single_pcdata; -tag_content(approved) -> single_pcdata; -tag_content(appsummary) -> single_pcdata; -tag_content(b) -> single_pcdata; -tag_content(c) -> single_pcdata; -tag_content(cauthor) -> single_pcdata; -tag_content(cell) -> mixed_content; -tag_content(checked) -> single_pcdata; -tag_content(chowpublished) -> single_pcdata; -tag_content(code) -> single_pcdata; % mixed? -tag_content(com) -> single_pcdata; -tag_content(comsummary) -> single_pcdata; -tag_content(copyright) -> mixed_content; -tag_content(ctitle) -> single_pcdata; -tag_content(d) -> mixed_content; -tag_content(date) -> single_pcdata; -tag_content(docno) -> single_pcdata; -tag_content(em) -> mixed_content; -tag_content(email) -> single_pcdata; -tag_content(fascicule) -> single_pcdata; -tag_content(file) -> single_pcdata; -tag_content(filesummary) -> single_pcdata; -tag_content(fsummary) -> mixed_content; -tag_content(headline) -> single_pcdata; -tag_content(holder) -> single_pcdata; -tag_content(i) -> single_pcdata; -tag_content(icaption) -> single_pcdata; -tag_content(id) -> single_pcdata; -tag_content(input) -> mixed_content; -tag_content(item) -> mixed_content; -tag_content(legalnotice) -> single_pcdata; -tag_content(lib) -> single_pcdata; -tag_content(libsummary) -> single_pcdata; -tag_content(module) -> single_pcdata; -tag_content(modulesummary) -> single_pcdata; -tag_content(name) -> single_pcdata; -tag_content(nametext) -> single_pcdata; -tag_content(p) -> mixed_content; -tag_content(pagetext) -> single_pcdata; -tag_content(path) -> single_pcdata; % mixed? -tag_content(pre) -> mixed_content; -tag_content(prepared) -> single_pcdata; -tag_content(resp) -> single_pcdata; -tag_content(responsible) -> single_pcdata; -tag_content(ret) -> single_pcdata; -tag_content(rev) -> single_pcdata; -tag_content(seealso) -> single_pcdata; % mixed? -tag_content(shortdef) -> single_pcdata; -tag_content(shorttitle) -> single_pcdata; -tag_content(tag) -> mixed_content; -tag_content(tcaption) -> single_pcdata; -tag_content(termdef) -> single_pcdata; -tag_content(title) -> single_pcdata; -tag_content(url) -> single_pcdata; % mixed -tag_content(v) -> single_pcdata; -tag_content(year) -> single_pcdata; -tag_content(_) -> no_pcdata. - - |