diff options
Diffstat (limited to 'lib/xmerl/doc')
-rw-r--r-- | lib/xmerl/doc/examples/Makefile | 61 | ||||
-rw-r--r-- | lib/xmerl/doc/examples/mkdocs.erl | 9 | ||||
-rw-r--r-- | lib/xmerl/doc/examples/sdocbook2xhtml.erl | 823 | ||||
-rwxr-xr-x | lib/xmerl/doc/examples/test_html.erl | 225 | ||||
-rw-r--r-- | lib/xmerl/doc/examples/xmerl_test.erl | 522 | ||||
-rwxr-xr-x | lib/xmerl/doc/examples/xml/test.xml | 6 | ||||
-rwxr-xr-x | lib/xmerl/doc/examples/xml/test2.xml | 8 | ||||
-rwxr-xr-x | lib/xmerl/doc/examples/xml/test3.xml | 8 | ||||
-rwxr-xr-x | lib/xmerl/doc/examples/xml/test4.xml | 9 | ||||
-rwxr-xr-x | lib/xmerl/doc/examples/xml/test5.xml | 9 | ||||
-rwxr-xr-x | lib/xmerl/doc/examples/xml/testdtd.dtd | 17 | ||||
-rwxr-xr-x | lib/xmerl/doc/examples/xml/xmerl.xml | 523 | ||||
-rw-r--r-- | lib/xmerl/doc/examples/xml/xmerl_xs.xml | 541 | ||||
-rw-r--r-- | lib/xmerl/doc/examples/xserl_test.erl | 85 | ||||
-rw-r--r-- | lib/xmerl/doc/src/notes.xml | 151 | ||||
-rw-r--r-- | lib/xmerl/doc/src/notes_history.xml | 2 | ||||
-rw-r--r-- | lib/xmerl/doc/src/xmerl_sax_parser.xml | 2 |
17 files changed, 2996 insertions, 5 deletions
diff --git a/lib/xmerl/doc/examples/Makefile b/lib/xmerl/doc/examples/Makefile new file mode 100644 index 0000000000..2768ee1985 --- /dev/null +++ b/lib/xmerl/doc/examples/Makefile @@ -0,0 +1,61 @@ +ERLC = erlc +EMULATOR = beam +EBIN = . +HTML = . +XML = ./xml +RM = rm -f + +# ---------------------------------------------------- +# Common Macros +# ---------------------------------------------------- +include ../../vsn.mk +VSN = $(XMERL_VSN) + + +MODULES = \ + xmerl_test \ + test_html \ + xserl_test \ + mkdocs \ + sdocbook2xhtml + + + +DOC_FILES = $(DOCS:%=$(HTML)/%.html) + +ERL_COMPILE_FLAGS += $(DEBUG) -I ../../include +warn_unused_wars +debug_info + +SUB_DIRECTORIES = + +#all: $(MODULES:%=$(EBIN)/%.$(EMULATOR)) xsm $(DOC_FILES) +all opt: $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(DOC_FILES) + +info: + @echo "DOC_FILES: $(DOC_FILES)" + +clean: +# @for d in $(SUB_DIRECTORIES); do \ +# cd $$d; $(MAKE) clean; \ +# done + $(RM) $(HTML)/*.html + $(RM) $(EBIN)/*.beam + $(RM) core *~ + +debug: xsmdebug $(DOC_FILES) + +xsm: + @for d in $(SUB_DIRECTORIES); do \ + cd $$d; $(MAKE); \ + done + +xsmdebug: + @for d in $(SUB_DIRECTORIES); do \ + cd $$d; $(MAKE) DEBUG=-Ddebug=1; \ + done + +$(HTML)/%.html: $(XML)/%.xml + erl -noshell -pa ../../ebin -run mkdocs run $< $@ -s erlang halt + +$(EBIN)/%.beam: %.erl + $(ERLC) $(ERL_COMPILE_FLAGS) -o $(EBIN) $< + diff --git a/lib/xmerl/doc/examples/mkdocs.erl b/lib/xmerl/doc/examples/mkdocs.erl new file mode 100644 index 0000000000..9faa12685b --- /dev/null +++ b/lib/xmerl/doc/examples/mkdocs.erl @@ -0,0 +1,9 @@ +-module(mkdocs). +-author('[email protected]'). + +-export([run/1]). + +run([InFile, OutFile])-> + {A,_}=xmerl_scan:file(InFile,[{fetch_fun, fun(DTDSpec,S) -> {ok,S} end}]), + B = sdocbook2xhtml:process_xml(A), + file:write_file(OutFile,[B]). diff --git a/lib/xmerl/doc/examples/sdocbook2xhtml.erl b/lib/xmerl/doc/examples/sdocbook2xhtml.erl new file mode 100644 index 0000000000..f96bf51963 --- /dev/null +++ b/lib/xmerl/doc/examples/sdocbook2xhtml.erl @@ -0,0 +1,823 @@ +%%%---------------------------------------------------------------------- +%%% File : sdocbook2xhtml.erl +%%% Description : Erlang XSLT like "stylesheet" for exporting +%%% Simplified Docbook XML to XHTML. +%%% +%%% Modules used : lists, io_lib, xmerl, xmerl_lib, xmerl_xs +%%% +%%%---------------------------------------------------------------------- + +-module(sdocbook2xhtml). +-author('[email protected]'). + + +-include("xmerl.hrl"). +-import(xmerl_lib, [markup/3,mapxml/2, foldxml/3, mapfoldxml/3]). +-import(xmerl_xs, [ xslapply/2, value_of/1, select/2, built_in_rules/2]). + +-export([ process_xml/1 ]). + +-export([abbrev/4, + abstract/4, + acronym/4, + address/4, + anchor/4, + appendix/4, + appendixinfo/4, + article/4, + articleinfo/4, + audiodata/4, + audioobject/4, + author/4, + authorgroup/4, + authorinitials/4, + bibliography/4, + bibliomixed/4, + bibliomisc/4, + bibliomset/4, + biblioset/4, + blockquote/4, + caption/4, + citetitle/4, + city/4, + colspec/4, + command/4, + computeroutput/4, + copyright/4, + corpauthor/4, + country/4, + date/4, + edition/4, + editor/4, + email/4, + emphasis/4, + entry/4, + example/4, + fax/4, + figure/4, + filename/4, + firstname/4, + footnote/4, + holder/4, + honorific/4, + imagedata/4, + imageobject/4, + informaltable/4, + inlinemediaobject/4, + isbn/4, + issn/4, + issuenum/4, + legalnotice/4, + lineage/4, + link/4, + literal/4, + itemizedlist/4, + listitem/4, + mediaobject/4, + member/4, + note/4, + orderedlist/4, + othercredit/4, + othername/4, + para/4, + phone/4, + phrase/4, + programlisting/4, + publishername/4, + quote/4, + replaceable/4, + revhistory/4, + revision/4, + revnumber/4, + revremark/4, + row/4, + section/4, + sectioninfo/4, + simplelist/4, + subtitle/4, + surname/4, + systemitem/4, + table/4, + tbody/4, + term/4, + tfoot/4, + tgroup/4, + thead/4, + title/4, + titleabbrev/4, + trademark/4, + ulink/4, + userinput/4, + variablelist/4, + varlistentry/4, + xref/4, + year/4 + ]). + + +xmlhead() -> "<\?xml version=\"1.0\" encoding=\"iso-8859-1\"\?>". +doctype() ->"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\ + \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd \">\n". + +style() -> + "<style type=\"text/css\"> body {margin-left:10%; margin-right:5%;} \ +.logo{float:right;} +.toc UL { + list-style-type: none; + border: solid; + border-width: thin; + padding-left: 10px; + padding-right: 10px; + padding-top: 5px; + padding-bottom: 5px; + background: #f0f0f0; + letter-spacing: 2px; + line-height: 20px; +} +</style>". + +process_xml(E)-> +%% lists:flatten(template( E )). + template( E ). + +%% article is the root element +template(E0 = #xmlElement{name=article})-> + + E = changetitle(E0), %% Add section numbering to titles + + [ xmlhead(), doctype(), + "<html xmlns=\"http://www.w3.org/1999/xhtml\" >" + "<head>" + "<title>", + value_of(select("articleinfo/title",E)), + "</title>", + style(), + "</head>" + "<body>", +%% "<img src=\"/logo.png\" alt=\"no logo\" class=\"logo\" </img>", + xslapply( fun template/1, select("articleinfo",E)), + process_toc(E), %% Insert toc between info and main part of article + xslapply( fun template/1, select("section",E)), + xslapply( fun template/1, select("appendix",E)), + "</body></html>"]; + +template(E = #xmlElement{name=Name})-> + A = xslapply( fun template/1, E), + case catch + sdocbook2xhtml:Name(A, E#xmlElement.attributes, E#xmlElement.parents,E) + of + {'EXIT', {undef, _}} -> + A; + {'EXIT', Reason} -> + exit(Reason); + Res -> + Res + end; + +template(E) -> + built_in_rules( fun template/1, E). + +%% ------------------------------------------------------------------- +%% simple serialize tags + +abbrev(Data, Attrs, [{bibliomset,_}|_], E)-> + ["<dt><abbr>", Data, "</abbr></dt><dd>"]; +abbrev(Data, Attrs, Parents, E)-> + markup("abbr",Attrs, Data). + + +abstract(Data, Attrs, Parents, E)-> + ["<h3>Abstract</h3><blockquote>", Data, "</blockquote>"]. + +acronym(Data, Attrs, Parents, E)-> + markup("acronym",Attrs, Data). + +address(Data, Attrs, Parents, E)-> + markup("address", Attrs, Data). + +anchor(Data, Attrs, Parents, E)-> + case find_attribute(id, Attrs) of + {value,ID} -> + ["<a name=\"" ++ ID ++ "\">", Data, "</a>"]; + false -> + Data + end. + +appendix(Data, Attrs, Parents, E)-> + ["<h1>Appendix</h1>", Data]. + +appendixinfo(Data,_,_,_)-> + Data. + +article(Data, Attrs, Parents, E)-> + ["<body>" + "<img src=\"/logga2.jpg\" alt=\"no logo\" class=\"logo\" +width=\"50\"</img>", + Data, + "</body>"]. + +articleinfo(Data,_,_,_)-> + Data. + +audiodata(Data, Attrs, Parents, E)->Data. + +audioobject(_,_,_,_)-> + []. + +author(Data, Attrs, [{authorgroup,_} | _], E)-> + markup("dd", Attrs, Data); +author(Data, Attrs, Parents, E)-> + Data. + +authorgroup(Data,_,_,_)-> + ["<dl><dt>Author</dt>",Data,"</dl>"]. + +authorinitials(Data,_,_,_)-> Data. + +bibliography(Data, Attrs, Parents, E)-> + ["<h3>Bibliography</h3>" ,Data]. + +bibliomisc(Data,_,_,_)-> Data. +bibliomixed(Data,_,_,_)-> ["<dl>",Data, "</dl>"]. +bibliomset(Data,_,_,_)-> [Data, "</dd>"]. + +biblioset(Data,_,_,_)-> Data. + +blockquote(Data, Attrs, Parents, E)-> + markup("blockquote",Attrs, Data). + +caption(Data, Attrs, Parents, E)-> Data. + +citetitle(Data,_,_,_)-> ["<i>",Data,"</i>"]. + +city(Data,_,_,_)-> + Data. + +%% Fix Me is it "col" element in html? +colspec(_, Attrs,_,_)-> + []. + +command(Data,_,_,_)-> + ["<b><tt>", Data, "</tt></b>"]. + +computeroutput(Data,_,_,_)-> + ["<tt>", Data, "</tt>"]. + +copyright(Data,_,_,_)-> + [ "© ", Data]. + +corpauthor(Data,_,_,_)-> + Data. + +country(Data,_,_,_)-> + Data. + +date(Data,_,[{revision,_}|_],_)-> + ["<td>", Data, "</td>"]; +date(Data,_,_,_)-> + Data. + +edition(Data,_,_,_)-> + Data. + +editor(Data,_,_,_)-> + Data. + +email(Data,_,_,_)-> + ["<i><a href=\"mailto:", Data,"\">",Data,"</a></i>"]. + +emphasis(Data, Attrs, Parents, E)-> + ["<em>", Data, "</em>"]. + +%% Cell in a table +entry(Data, Attrs, [{row,_}, {thead,_} | _], E)-> + ["<th>", Data, "</th>"]; +entry(Data, Attrs, Parents, E)-> + ["<td>", Data, "</td>"]. + +example(Data, Attrs, Parents, E)-> + ["<hr />", Data, "<hr />"]. + +fax(Data, Attrs, Parents, E)-> + ["<address>", Data, "</address>"]. + +%% May contain ulink to image, resolved by ulink type +figure(Data, _, _, _)-> + Data. + +filename(Data, _, _, _)-> + ["<i>", Data, "</i>"]. + +firstname(Data, _, _, _)-> + [Data , " " ]. + +footnote(Data, _, _, _)-> + Data. + +holder(Data, _, _, _)-> + [" ",Data]. + +honorific(Data, _, _, _)-> + Data. + +imagedata(Data, Attrs, Parents, E)-> + SRC = + case find_attribute(fileref, Attrs) of + {value,AS} -> + " src=" ++ AS ++ " "; + false -> + [] + end, + ALT = + case SRC of + [] -> + " alt=\"No image!\" "; + _ ->" alt=\"" ++ SRC ++ "\" " + end, + WIDTH = + case find_attribute(width, Attrs) of + false -> + []; + {value,A} ->" width=" ++ A ++ " " + end, + + ["<img " ++ SRC ++ ALT ++ WIDTH ++ "></img>"]. + + +imageobject(Data, Attrs, Parents, E)-> + Data. + +informaltable(Data, Attrs, Parents, E)-> + ["<table border=\"1\" >", Data, "</table>"]. + + +inlinemediaobject(Data, Attrs, Parents, E)-> + Data. + +isbn(Data, Attrs, Parents, E)-> + Data. + +issn(Data, Attrs, Parents, E)-> + Data. + +issuenum(Data, Attrs, Parents, E)-> + Data. + +itemizedlist(Data, Attrs, Parents, _)-> + markup("ul", Attrs, Data). + +%keyword +%{ +% display: inline; +%} + +%keywordset +%{ +% display: inline; +%} + +legalnotice(Data, Attrs, Parents, _)-> + markup("small", Attrs, Data). + +lineage(Data, Attrs, Parents, _)-> + Data. + +%lineannotation +%{ +% display: inline; +%} + +% Hypertext link +link(Data, Attrs, Parents, _)-> + case find_attribute(linkend, Attrs) of + {value,LINK} -> + ["<a href=\"#" ++ LINK ++ "\">", Data, "</a>"]; + false -> + Data + end. + +listitem(Data, Attrs, [{varlistentry,_} | _], E) -> + markup("dd", Attrs, Data); +listitem(Data, Attrs, Parents, _)-> + markup("li", Attrs, Data). + +literal(Data, Attrs, Parents, _)-> + markup("tt", Attrs, Data). + +%literallayout +%{ +% display: inline; +%} + +mediaobject(Data, Attrs, Parents, _)-> + Data. + +%% simplelist member +member(Data, Attrs, Parents, _)-> + [Data,"<br></br>"]. + +note(Data, Attrs, Parents, _)-> +["<table border=\"1\" cellspacing=\"0\" cellpadding=\"5\" width=\"80%\" +bgcolor=\"#CCCCCC\"><tr><td>NOTE</td><td><i>", Data, "</i></td></tr></table>"]. + +%objectinfo +%{ +% display: inline; +%} + +%option +%{ +% display: inline; +%} + +orderedlist(Data, Attrs, Parents, _)-> + markup("ol",Attrs,Data). + +%% Hmm otheraddr not in DTD +%otheraddr +%{ +% display: inline; +%} + +othercredit(Data, Attrs, Parents, _)->Data. + +othername(Data, Attrs, Parents, E)->Data. + +%% IGNORE +%pagenums +%{ +% display: inline; +%} + +para(Data, Attrs, [{listitem,_}|_], E)-> + Data; +para(Data, Attrs, [{note,_}|_], E)-> + Data; +para(Data, Attrs, Parents, E)-> + markup("p", Attrs, Data). + +phone(Data, Attrs, Parents, E)->Data. + +phrase(Data, Attrs, Parents, E)->Data. + +%pob +%{ +% display: inline; +%} + +%postcode +%{ +% display: inline; +%} + +%printhistory +%{ +% display: inline; +%} + +%procedure +%{ +% display: inline; +%} + +programlisting(Data, Attrs, Parents, E)-> +["<table border=\"1\" cellspacing=\"0\" cellpadding=\"5\" width=\"100%\" +bgcolor=\"#CCCCCC\"><tr><td><pre><code>", Data, +"</code></pre></td></tr></table>"]. + +%pubdate +%{ +% display: inline; +%} + +%publisher +%{ +% display: inline; +%} + +publishername(Data, Attrs, Parents, E)-> + Data. + +quote(Data, Attrs, Parents, _)-> + markup("q", Attrs, Data). + +replaceable(Data, Attrs, Parents,_)-> + markup("i", Attrs, Data). + +revhistory(Data, Attrs, Parents,E)-> + {A,B,C} = case E#xmlElement.language of + "en" -> {"Revision history","Date","Comment"}; + "sv" -> {"Revisionshistoria","Datum","Kommentar"}; + _ ->{"lang is undefined","lang is undefined","lang is undefined"} + end, + + ["<h4>",A,"</h4>","<table><thead>" + "<tr><th>Rev.</th><th>",B,"</th><th>",C,"</th></tr></thead>", Data, + "</table>"]. + +revision(Data, Attrs, Parents,_)-> + markup("tr", Attrs, Data). + +revnumber(Data, Attrs, Parents,_)-> + markup("td", Attrs, Data). + +revremark(Data, Attrs, Parents,_)-> + markup("td", Attrs, Data). + +row(Data, Attrs, Parents, E)-> + markup("tr", Attrs, Data). + +section(Data, Attrs, Parents, E)-> + Data. + +sectioninfo(Data, Attrs, Parents, E)->Data. + +%sidebar +%{ +% display: block; +%} + +simplelist(Data, Attrs, Parents, E)-> + ["<table border=\"1\" cellspacing=\"0\" cellpadding=\"5\" +width=\"100%\"><tr><td>", Data, "</td></tr></table>"]. + + +%state +%{ +% display: inline; +%} + +%step +%{ +% display: inline; +%} + +%street +%{ +% display: inline; +%} + +%substeps +%{ +% display: inline; +%} + +subtitle(Data, Attrs, Parents, E)-> + ["<h3>", Data, "</h3>"]. + +surname(Data, Attrs, Parents, E)->Data. + +systemitem(Data, Attrs, Parents, E)-> + markup("b", Attrs, Data). + +table(Data, Attrs, Parents, E)-> + ["<table border=\"1\" cellspacing=\"0\" cellpadding=\"4\" >", + Data, "</table>"]. + +%% Fix me alot +tbody(Data, Attrs, Parents, E)-> + markup("tbody", Attrs, Data). +%{ +% display: table-row-group; +%} + +term(Data, Attrs, [{varlistentry,_} | _], E) -> + markup("dt", Attrs, Data). + + +%textobject +%{ +% display: inline; +%} + +tfoot(Data, Attrs, Parents, E)-> + markup("tfoot",Attrs, Data). + +%% Fixme alot +tgroup(Data, Attrs, Parents, E)-> + markup("colgroup", Attrs, Data). +%{ +% display: table; +%} + +thead(Data, Attrs, Parents, E)-> + markup("thead",Attrs, Data). +%{ +% display: table-row-group; +%} + +title(Data, Attrs, Parents, E)-> +%% io:fwrite("Parents ~p~n", [Parents]), + title1(Data, Attrs, Parents, E). + +title1(Data, Attrs, [{section,_}, {section,_}, {section,_}, + {section,_}, {section,_}, {appendix,_} | _], E) -> + ["<h6>", Data, "</h6>"]; +title1(Data, Attrs, [{section,_}, {section,_}, {section,_}, + {section,_}, {appendix,_} | _], E) -> + ["<h5>", Data, "</h5>"]; +title1(Data, Attrs, [{section,_}, {section,_}, {section,_}, + {apendix,_} | _], E) -> + ["<h4>", Data, "</h4>"]; +title1(Data, Attrs, [{section,_}, {section,_}, {appendix,_} | _], E) -> + ["<h3>", Data, "</h3>"]; +title1(Data, Attrs, [{section,_}, {appendix,_} | _], E) -> + ["<h2>", Data, "</h2>"]; +title1(Data, Attrs, [{appendix,_} | _], E) -> + ["<h1>", Data, "</h1>"]; + +title1(Data, Attrs, [{section,_}, {section,_}, {section,_}, + {section,_}, {section,_}, {section,_} | _], E) -> + ["<h6>", Data, "</h6>"]; +title1(Data, Attrs, [{section,_}, {section,_}, {section,_}, + {section,_}, {section,_} | _], E) -> + ["<h6>", Data, "</h6>"]; +title1(Data, Attrs, [{section,_}, {section,_}, {section,_}, + {section,_} | _], E) -> + ["<h5>", Data, "</h5>"]; +title1(Data, Attrs, [{section,C}, {section,B}, {section,A} | _], E) -> + {value, Id} = find_attribute(id,Attrs), + ["<h4 id=\"", Id, "\">", Data, "</h4>"]; +title1(Data, Attrs, [{section,B}, {section,A} | _], E) -> + {value, Id} = find_attribute(id,Attrs), + ["<h3 id=\"", Id, "\">", Data, "</h3>"]; +title1(Data, Attrs, [{section,A} | _], E) -> + {value, Id} = find_attribute(id,Attrs), + ["<h2 id=\"", Id, "\">", Data, "</h2>"]; +title1(Data, Attrs, [{articleinfo,_} | _], E) -> + ["<h1>", Data, "</h1>"]; +title1(Data, Attrs, [{table,_} | _], E) -> + ["<caption>", Data, "</caption>"]; +title1(Data, Attrs, [{bibliomset,_} | _], E) -> + ["<i><b>", Data, "</b></i>"]; +title1(Data, Attrs, Parents, E)-> + ["<h4>", Data, "</h4>"]. + +titleabbrev(Data, Attrs, Parents, E)->[]. + +trademark(Data, Attrs, Parents, E)-> + [ Data, " ® "]. + +ulink(Data, Attrs, Parents, E)-> + case find_attribute(url, Attrs) of + {value,LINK} -> + ["<a href="++ LINK ++ ">", Data, "</a>"]; + false -> + Data + end. + + +%% User input is Constant Bold +userinput(Data, Attrs, Parents, E)-> + ["<tt><b>", Data, "</b></tt>"]. + +variablelist(Data, Attrs, Parents, E)-> + markup("dl", Attrs, Data). + +varlistentry(Data, Attrs, Parents, E)->Data. + +%videodata +%{ +% display: inline; +%} + +%videoobject +%{ +% display: inline; +%} + +%volumenum +%{ +% display: inline; +%} + +xref(Data, Attrs, Parents, E)-> + case find_attribute(linkend, Attrs) of + {value,LINK} -> + ["<a href=\"#" ++ LINK ++ "\" />"]; + false -> + Data + end. + +year(Data, Attrs, Parents, E)->Data. + +%% ---------------------------------------------------------- +%% Utils find_attribute copied from Ulf Wigers xmerl distribution + +find_attribute(Name, Attrs) -> + case lists:keysearch(Name, #xmlAttribute.name, Attrs) of + {value, #xmlAttribute{value = V}} -> + {value, V}; + false -> + false + end. +%% ------------ + +changetitle(A) -> + Afun = fun changecount/2, + {E, Acc} = mapfoldxml(Afun, {0,0,0,0,0,0}, A), + E. + +changecount(#xmlElement{name=title}=E, {A,B,C,Ex,Fig,Tab})-> + case E#xmlElement.parents of + [{example,_} |_] -> + {addexhead(E,{A,Ex+1}), {A,B,C,Ex+1,Fig,Tab} }; + [{figure,_} |_] -> + {addfighead(E,{A,Fig+1}), {A,B,C,Ex,Fig+1,Tab} }; + [{table,_} |_] -> + {addtablehead(E,{A,Tab+1}), {A,B,C,Ex,Fig,Tab+1} }; + [{section,_},{section,_},{section,_},{article,_} |_] -> + {addheader(E,{A,B,C+1}), {A,B,C+1,Ex,Fig,Tab} }; + [{section,_},{section,_},{article,_} |_] -> + { addheader(E,{A,B+1,0}), {A,B+1,0,Ex,Fig,Tab} }; + [{section,_},{article,_} |_] -> + {addheader(E,{A+1,0,0}),{A+1,0,0,0,0,0}}; + _ -> + {E,{A,B,C,Ex,Fig,Tab}} + end; +changecount(E, Acc)->{E,Acc}. + +addexhead(#xmlElement{name=title,content=[#xmlText{}=T1|_]}= E, {Ch,No})-> + NewHeader = "Example " ++ + integer_to_list(Ch)++" - "++ integer_to_list(No) ++ + " " ++ T1#xmlText.value, + E#xmlElement{content=[T1#xmlText{value=NewHeader}]}. +addfighead(#xmlElement{name=title,content=[#xmlText{}=T1|_]}= E, {Ch,No})-> + NewHeader = "Figure " ++ + integer_to_list(Ch)++" - "++ integer_to_list(No) ++ + " " ++ T1#xmlText.value, + E#xmlElement{content=[T1#xmlText{value=NewHeader}]}. +addtablehead(#xmlElement{name=title,content=[#xmlText{}=T1|_]}= E, {Ch,No})-> + NewHeader = "Table " ++ + integer_to_list(Ch)++" - "++ integer_to_list(No) ++ + " " ++ T1#xmlText.value, + E#xmlElement{content=[T1#xmlText{value=NewHeader}]}. + +addheader(#xmlElement{name=title,content=[#xmlText{}=T1|_]}= E, Chapters)-> + NewHeader = chapterstring(Chapters)++ " " ++ T1#xmlText.value, + NewAtts = addid(E#xmlElement.attributes, Chapters), + E#xmlElement{content=[T1#xmlText{value=NewHeader}], + attributes = NewAtts}. + +chapterstring({A,0,0})->integer_to_list(A); +chapterstring({A,B,0})->integer_to_list(A)++"."++ integer_to_list(B); +chapterstring({A,B,C})->integer_to_list(A) ++ "." ++ + integer_to_list(B) ++ "." ++ + integer_to_list(C). + +%% addid add id attribute if it not already exists +addid(OldAtts, Chapters)-> + case find_attribute(id, OldAtts) of + {value,_} -> + OldAtts; + false -> + add_attribute(id,"sect_"++ chapterstring(Chapters), + OldAtts) + end. + +add_attribute(Name, Value, OldAtts)-> + [#xmlAttribute{ name=Name, value = Value}| OldAtts ]. + + +process_toc(E)-> + AFun = fun chapindex/2, + TOCR = foldxml(AFun, [], E), +% Str = case find_attribute(lang, E#xmlElement.attributes) of +% {value,"en"} -> "Table of Contents"; +% {value,"sv"} -> "Inneh�llsf�rtecking"; +% _ ->"lang is undefined" +% end, + Str = case E#xmlElement.language of + "en" -> "Table of Contents"; + "sv" -> "Inneh�llsf�rtecking"; + _ ->"lang is undefined" + end, + TOC = ["<div class=\"toc\"><h3>",Str,"</h3><ul>", +lists:reverse(TOCR), "</ul></div>"]. + +chapindex(#xmlElement{name=title}=E, Accu)-> + case E#xmlElement.parents of + [{section,_},{section,_},{section,_},{article,_} |_] -> + ["<li>"++spind(3)++ addlink(E,"toc_level_3") ++"</li>"| Accu]; + [{section,_},{section,_},{article,_} |_] -> + ["<li>"++spind(2)++ addlink(E,"toc_level_2") ++ "</li>"| Accu]; + [{section,_},{article,_} |_] -> + ["<li>"++spind(1)++ addlink(E,"toc_level_1") ++"</li>"| Accu]; + _ -> + Accu + end; +chapindex(E, Accu) -> + Accu. + +spind(0) ->""; +spind(X)-> + "  " ++ spind(X-1). + +addlink(E, TocLevel)-> + {value,LINK} = find_attribute(id,E#xmlElement.attributes), + [#xmlText{value=Title}|_] = E#xmlElement.content, %% Pfuii + "<a href=\"#" ++ LINK ++ + "\" class=\"" ++ TocLevel ++ "\">" ++ + Title ++ "</a>". + + diff --git a/lib/xmerl/doc/examples/test_html.erl b/lib/xmerl/doc/examples/test_html.erl new file mode 100755 index 0000000000..3ca15f30f8 --- /dev/null +++ b/lib/xmerl/doc/examples/test_html.erl @@ -0,0 +1,225 @@ +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.0, (the "License"); you may not use this file except in +%%% compliance with the License. You may obtain a copy of the License at +%%% http://www.erlang.org/license/EPL1_0.txt +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% The Original Code is xmerl-0.7 +%%% +%%% The Initial Developer of the Original Code is Ericsson Telecom +%%% AB. Portions created by Ericsson are Copyright (C), 1998, Ericsson +%%% Telecom AB. All Rights Reserved. +%%% +%%% Contributor(s): ______________________________________. +%%% +%%%---------------------------------------------------------------------- +%%% #0. BASIC INFORMATION +%%%---------------------------------------------------------------------- +%%% File: test_html.erl +%%% Author : Ulf Wiger <[email protected]> + +%%% Description : Callback module for exporting XML to HTML with support +%%% for special Erlang-related tags. (Experimental) +%%% +%%% Modules used : lists, io_lib +%%% +%%%---------------------------------------------------------------------- + +-module(test_html). +-author('[email protected]'). + + +-export(['#xml-inheritance#'/0]). + +%%% special Erlang forms +-export(['EXIT'/4, + 'tuple_list'/4]). + +-export(['#root#'/4, + title/4, + heading/4, + section/4, + table/4, + row/4, + col/4, + data/4, + p/4, para/4, 'P'/4, + emphasis/4]). + +-include("xmerl.hrl"). + + +'#xml-inheritance#'() -> [xmerl_xml]. + + + +%% The '#root#' tag is called when the entire structure has been exported. +%% It does not appear in the structure itself. +'#root#'(Data, Attrs, [], E) -> + Title = + case find_attribute(title, Attrs) of + {value, T} -> + ["<title>", T, "</title>"]; + false -> + [] + end, + ["<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n" + "<html>\n", + "<head>\n", Title, "</head>\n" + "<body>\n", Data, "</body>\n"]. + + + + +%%% Special token: EXIT +'EXIT'(Reason, Attrs = [], Parents = [], E) -> + %% This happens e.g. if a request function crashes completely. + ["<pre>\n", mk_string({'EXIT', Reason}), "</pre>"]. + + +title(Str, Attrs, Parents, E) -> + ["<h1>", Str, "</h1>\n"]. + + +%%% section/3 is to be used instead of headings. +section(Data, Attrs, [{section,_}, {section,_}, {section,_} | _], E) -> + opt_heading(Attrs, "<h4>", "</h4>", Data); +section(Data, Attrs, [{section,_}, {section,_} | _], E) -> + opt_heading(Attrs, "<h3>", "</h3>", Data); +section(Data, Attrs, [{section,_} | _], E) -> + opt_heading(Attrs, "<h2>", "</h2>", Data); +section(Data, Attrs, Parents, E) -> + opt_heading(Attrs, "<h1>", "</h1>", Data). + +opt_heading(Attrs, StartTag, EndTag, Data) -> + case find_attribute(heading, Attrs) of + {value, Text} -> + [StartTag, Text, EndTag, "\n" | Data]; + false -> + Data + end. + + +%% tables +%% e.g. {table, [{heading, [{col, H1}, {col, H2}]}, +%% {row, [{col, C11}, {col, C12}]}, +%% {row, [{col, C21}, {col, C22}]}]}. + +table(Data, Attrs, Parents, E) -> + Border = case find_attribute(border, Attrs) of + false -> + " border=1"; + {value, N} -> + [" border=", mk_string(N)] + end, + ["<table", Border, ">\n", Data, "\n</table>\n"]. + +row(Data, Attrs, [{table,_}|_], E) -> + ["<tr>", Data, "</tr>\n"]. + +heading(Data, Attrs, [{table,_}|_], E) -> + ["<tr>", Data, "</tr>\n"]. + + +%% Context-sensitive columns (heading- or row columns) +col(Data, Attrs, [{heading,_}, {table,_} | _], E) -> + ["<th>", nbsp_if_empty(Data), "</th>\n"]; +col(Data, Attrs, [{row,_}, {table,_} | _], E) -> + ["<td>", nbsp_if_empty(Data), "</td>\n"]. + + +tuple_list(List, Attrs, Parents, E) -> + Elems = case find_attribute(elements, Attrs) of + {value, Es} -> + Es; + false -> + case List of + [H|_] -> + lists:seq(1,size(H)); + [] -> + [] + end + end, + TableData = [{row, [{col, {element(P, Rec)}} || P <- Elems]} || + Rec <- List], + Table = case find_attribute(heading, Attrs) of + {value, Cols} -> + Head = {heading, [{col, C} || C <- Cols]}, + {table, [Head | TableData]}; + false -> + {table, TableData} + end, + {'#xml-redefine#', Table}. + + +data(Data, Pos, Attrs, Parents) -> + mk_string(Data). + + + +p(Data, Pos, Attrs, Parents) -> + {'#xml-alias#', 'P'}. + +para(Data, Pos, Attrs, Parents) -> + {'#xml-alias#', 'P'}. + +'P'(Data, Pos, Attrs, Parents) -> + ["<p>", mk_string(Data), "</p>\n"]. + + +emphasis(Str, Pos, Attrs, Parents) -> + ["<strong>", Str, "</strong>"]. + + +nbsp_if_empty(Data) when binary(Data), size(Data) == 0 -> + " "; +nbsp_if_empty(Data) when list(Data) -> + case catch list_to_binary(Data) of + {'EXIT', _} -> + nbsp_if_empty_term(Data); + B when size(B) == 0 -> + " "; + _ -> + Data + end; +nbsp_if_empty(Data) -> + nbsp_if_empty_term(Data). + +nbsp_if_empty_term(Data) -> + Str = io_lib:format("~p", [Data]), + case list_to_binary(Str) of + B when size(B) == 0 -> + " "; + _ -> + Str + end. + + +mk_string(I) when integer(I) -> + integer_to_list(I); +mk_string(A) when atom(A) -> + atom_to_list(A); +mk_string(L) when list(L) -> + %% again, we can't regognize a string without "parsing" it + case catch list_to_binary(L) of + {'EXIT',_} -> + io_lib:format("~p", [L]); + _ -> + L + end; +mk_string(Term) -> + io_lib:format("~p", [Term]). + + + +find_attribute(Name, Attrs) -> + case lists:keysearch(Name, #xmlAttribute.name, Attrs) of + {value, #xmlAttribute{value = V}} -> + {value, V}; + false -> + false + end. diff --git a/lib/xmerl/doc/examples/xmerl_test.erl b/lib/xmerl/doc/examples/xmerl_test.erl new file mode 100644 index 0000000000..b4288431f2 --- /dev/null +++ b/lib/xmerl/doc/examples/xmerl_test.erl @@ -0,0 +1,522 @@ +-module(xmerl_test). + +-compile(export_all). +%%-export([Function/Arity, ...]). + +-define(XMERL_APP,). + +-include("xmerl.hrl"). + +%% Export to HTML from "simple" format +test1() -> + xmerl:export_simple(simple(), xmerl_html, [{title, "Doc Title"}]). + + +%% Export to XML from "simple" format +test2() -> + xmerl:export_simple(simple(), xmerl_xml, [{title, "Doc Title"}]). + + +%% Parse XHTML, and export result to HTML and text +test3() -> + FetchFun = fun(_DTDSpec, S) -> {ok, not_fetched,S} end, + {A, _} = xmerl_scan:string(html(), + [{fetch_fun,FetchFun}]), + io:format("From xmerl_scan:string/2~n ~p~n", [A]), + B = xmerl:export([A], xmerl_html), + io:format("From xmerl:export/2 xmerl_html filter~n ~p~n", [B]), + C = xmerl:export([A], xmerl_text), + io:format("From xmerl:export/2 xmerl_text filter~n ~p~n", [C]). + + +test4() -> + FetchFun = fun(_DTDSpec, S) -> {ok, not_fetched, S} end, + {A,_} = xmerl_scan:string(xml_namespace(), + [{fetch_fun,FetchFun}, + {namespace_conformant,true}]), + io:format("From xmerl_scan:string/2~n ~p~n", [A]). + +test5() -> + {ok, Cwd} = file:get_cwd(), % Assume we are in the examples dir... + File = Cwd ++ "/xml/xmerl.xml", + FetchFun = fun(_DTDSpec, S) -> {ok, not_fetched, S} end, +% {Resp0,Rest0}=xmerl_eventp:stream(File,[]), +% io:format("Tree: ~p~n",[Resp0]), + {Resp1, _Rest1}=xmerl_eventp:file_sax(File, ?MODULE, undefined, + [{fetch_fun, FetchFun}]), + io:format("Using file_sax: counted ~p paragraphs~n", [Resp1]), + {Resp2, _Rest2} = xmerl_eventp:stream_sax(File, ?MODULE, undefined, []), + io:format("Using stream_sax: counted ~p paragraphs~n", [Resp2]). + +test6() -> + FetchFun = fun(_DTDSpec, S) -> {ok, {string,""}, S} end, + {Doc, _} = xmerl_scan:string(xml_namespace(), + [{fetch_fun, FetchFun}, + {namespace_conformant, true}]), + E = xmerl_xpath:string("child::title[position()=1]", Doc), + io:format("From xmerl_scan:string/2~n E=~p~n", [E]). + + +simple() -> + [{document, + [{title, ["Doc Title"]}, + {author, ["Ulf Wiger"]}, + {section,[{heading, ["heading1"]}, + {'P', ["This is a paragraph of text."]}, + {section,[{heading, ["heading2"]}, + {'P', ["This is another paragraph."]}, + {table,[{border, ["1"]}, + {heading,[{col, ["head1"]}, + {col, ["head2"]}]}, + {row, [{col, ["col11"]}, + {col, ["col12"]}]}, + {row, [{col, ["col21"]}, + {col, ["col22"]}]} + ]} + ]} + ]} + ]} + ]. + + +html() -> + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"DTD/xhtml1-strict.dtd\"><html>" + "<head><title>Doc Title</title><author>Ulf Wiger</author></head>" + "<h1>heading1</h1>" + "<p>This is a paragraph of text.</p>" + "<h2>heading2</h2>" + "<p>This is another paragraph.</p>" + "<table>" + "<thead><tr><td>head1</td><td>head2</td></tr></thead>" + "<tr><td>col11</td><td>col122</td></tr>" + "<tr><td>col21</td><td>col122</td></tr>" + "</table>" + "</html>". + +xml_namespace() -> + "<?xml version=\"1.0\"?>" + "<!-- initially, the default namespace is \"books\" -->" + "<book xmlns='urn:loc.gov:books' xmlns:isbn='urn:ISBN:0-395-36341-6'>" + "<title>Cheaper by the Dozen</title>" + "<isbn:number>1568491379</isbn:number>" + "<notes>" + "<!-- make HTML the default namespace for some comments -->" + "<p xmlns='urn:w3-org-ns:HTML'>" + "This is a <i>funny</i> book!" + "</p>" + "</notes>" + "</book>". + + +%%% ============================================================================ +%%% Generic callbacks + +%'#text#'(Text) -> +% []. + +'#root#'(Data, Attrs, [], _E) -> + io:format("root... Data=~p Attrs=~p E=~p~n",[Data,Attrs,_E]), + []. + +'#element#'(Tag, Data, Attrs, _Parents, _E) -> + io:format("Tag=~p~n Data=~p~n Attrs=~p~n Parents=~p~n E=~p~n", + [Tag, Data, Attrs, _Parents, _E]), + []. + +'#element#'(_Tag, _Data, _Attrs, CBstate) -> +% io:format("Tag=~p~n Data=~p~n Attrs=~p~n CBstate=~p~n", +% [Tag, Data, Attrs, CBstate]), + CBstate. + +'#text#'(Text, CBstate) -> + io:format("Text=~p~n CBstate=~p~n", + [Text, CBstate]), + CBstate. + + +'#xml-inheritance#'() -> + [xmerl_html]. + + + + +%%% ============================================================================ +%%% To run these tests you must first download the testsuite from www.w3c.org +%%% xmlconf.xml is the main test file that contains references to all the tests. +%%% Thus parse this, export result and execute tests in the call-back functions. +%%% Note: +%%% - xmerl assumes all characters are represented with a single integer. +w3cvalidate() -> + Tests = filename:join(filename:dirname(filename:absname(code:which(xmerl))), + "../w3c/xmlconf/xmlconf.xml"), + TestDir = filename:dirname(Tests), + io:format("Looking for W3C tests at ~p~n", [Tests]), + {ok, Bin} = file:read_file(Tests), + +% String = ucs:to_unicode(binary_to_list(Bin), 'utf-8'), +% case xmerl_scan:string(String, [{xmlbase, TestDir}]) of + case xmerl_scan:string(binary_to_list(Bin), [{xmlbase, TestDir}]) of + {error, Reason} -> + io:format("ERROR xmerl:scan_file/2 Reason=~w~n", [Reason]); + {A, _Res} -> +% io:format("From xmerl:scan_file/2 ~n A=~p~n Res=~w~n", [A,Res]), + C = xmerl:export([A], xmerl_test), + io:format("From xmerl:export/2 xmerl_text filter~n ~p~n", [C]) + end. + + +'TESTSUITE'(_Data, Attrs, _Parents, _E) -> + _Profile = find_attribute('PROFILE', Attrs), +% io:format("testsuite Profile=~p~n", [Profile]), + []. + +'TESTCASES'(_Data, Attrs, _Parents, _E) -> + Profile = find_attribute('PROFILE', Attrs), + XMLbase = find_attribute('xml:base', Attrs), + io:format("testsuite Profile=~p~n xml:base=~p~n", [Profile, XMLbase]), + []. + +%% More info on Canonical Forms can be found at: +%% http://dev.w3.org/cvsweb/~checkout~/2001/XML-Test-Suite/xmlconf/sun/cxml.html?content-type=text/html;%20charset=iso-8859-1 +'TEST'(Data, Attrs, _Parents, E) -> +% io:format("test Attrs=~p~n Parents=~p~n E=~p~n",[Attrs, _Parents, E]), + Id = find_attribute('ID', Attrs), + io:format("Test: ~p ",[Id]), + Entities = find_attribute('ENTITIES', Attrs), % Always handle all entities + Output1 = find_attribute('OUTPUT', Attrs), % + Output3 = find_attribute('OUTPUT3', Attrs), % FIXME! + Sections = find_attribute('SECTIONS', Attrs), + Recommendation = find_attribute('RECOMMENDATION', Attrs), % FIXME! + Type = find_attribute('TYPE', Attrs), % Always handle all entities + Version = find_attribute('VERSION', Attrs), % FIXME! + URI = find_attribute('URI', Attrs), + Namespace = find_attribute('NAMESPACE', Attrs), % FIXME! + + OutputForm= + if + Output1 =/= undefined -> Output1; + true -> Output3 + end, + Test = filename:join(E#xmlElement.xmlbase, URI), +% io:format("TEST URI=~p~n E=~p~n",[Test,E]), + case Type of + "valid" -> +% io:format("Data=~p~n Attrs=~p~n Parents=~p~n Path=~p~n", +% [Data, Attrs, _Parents, Test]), + test_valid(Test, Data, Sections, Entities, OutputForm, Recommendation, + Version, Namespace); + "invalid" -> + test_invalid(Test, Data, Sections, Entities, OutputForm, Recommendation, + Version, Namespace); + "not-wf" -> + test_notwf(Test, Data, Sections, Entities, OutputForm, Recommendation, + Version, Namespace); + "error" -> + test_error(Test, Data, Sections, Entities, OutputForm, Recommendation, + Version, Namespace) + end, + []. + +%% Really basic HTML font tweaks, to support highlighting +%% some aspects of test descriptions ... +'EM'(Data, _Attrs, _Parents, _E) -> + [$" |Data ++ [$"]]. + +'B'(Data, _Attrs, _Parents, _E) -> + [$" |Data ++ [$"]]. + + + +find_attribute(Tag,Attrs) -> + case xmerl_lib:find_attribute(Tag, Attrs) of + {value, Id} -> Id; + false -> undefined + end. + + +-define(CONT, false). + +%%% All parsers must accept "valid" testcases. +test_valid(URI, Data, Sections, Entities, OutputForm, Recommendation, Version, + Namespace) -> + io:format("nonvalidating ", []), + case nonvalidating_parser_q(URI) of + {Res, Tail} when is_record(Res, xmlElement) -> + case is_whitespace(Tail) of + true -> + io:format("OK ", []), + ok; + false -> + print_error({Res, Tail}, URI, Sections, Entities, OutputForm, + Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end; + Error -> + print_error(Error, URI, Sections, Entities, OutputForm, Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end, + io:format("validating ", []), + case validating_parser_q(URI) of + {Res2, Tail2} when is_record(Res2, xmlElement) -> + case is_whitespace(Tail2) of + true -> + io:format("OK~n", []), + ok; + false -> + print_error({Res2, Tail2}, URI, Sections, Entities, OutputForm, + Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end; + Error2 -> + print_error(Error2, URI, Sections, Entities, OutputForm, Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end. + + +%%% Nonvalidating parsers must accept "invalid" testcases, but validating ones +%%% must reject them. +test_invalid(URI, Data, Sections, Entities, OutputForm, Recommendation, Version, + Namespace) -> + io:format("nonvalidating ", []), + case nonvalidating_parser_q(URI) of + {Res,Tail} when is_record(Res, xmlElement) -> + case is_whitespace(Tail) of + true -> + io:format("OK ", []), + ok; + false -> + print_error({Res, Tail}, URI, Sections, Entities, OutputForm, + Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end; + Error -> + print_error(Error, URI, Sections, Entities, OutputForm, Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end, + io:format("validating ", []), + case validating_parser_q(URI) of + {Res2, Tail2} when is_record(Res2, xmlElement) -> + case is_whitespace(Tail2) of + false -> + io:format("OK~n", []), + ok; + true -> + print_error({Res2, Tail2}, URI, Sections, Entities, OutputForm, + Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end; + {error, enoent} -> + print_error("Testfile not found", URI, Sections, Entities, OutputForm, + Recommendation, Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end; + _Error2 -> + io:format("OK~n", []), + ok + end. + +%%% No parser should accept a "not-wf" testcase unless it's a nonvalidating +%%% parser and the test contains external entities that the parser doesn't read +test_notwf(URI, Data, Sections, Entities, OutputForm, Recommendation, Version, + Namespace) -> + io:format("nonvalidating ", []), + case nonvalidating_parser_q(URI) of + {Res, Tail} when is_record(Res, xmlElement) -> + case is_whitespace(Tail) of + false -> + io:format("OK ", []), + ok; + true -> + print_error({Res, Tail}, URI, Sections, Entities, OutputForm, + Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end; + {error,enoent} -> + print_error("Testfile not found",URI,Sections,Entities,OutputForm, + Recommendation,Version,Namespace,Data), + if + ?CONT==false -> throw({'EXIT', failed_test}); + true -> error + end; + _Error -> + io:format("OK ",[]), + ok + end, + io:format("validating ",[]), + case validating_parser_q(URI) of + {Res2, Tail2} when is_record(Res2, xmlElement) -> + case is_whitespace(Tail2) of + false -> + io:format("OK~n", []), + ok; + true -> + print_error({Res2, Tail2}, URI, Sections, Entities, OutputForm, + Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end; + {error,enoent} -> + print_error("Testfile not found", URI, Sections, Entities, OutputForm, + Recommendation, Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end; + _Error2 -> + io:format("OK~n", []), + ok + end. + +%%% Parsers are not required to report "errors", but xmerl will always... +test_error(URI, Data, Sections, Entities, OutputForm, Recommendation, Version, + Namespace) -> + io:format("nonvalidating ", []), + case nonvalidating_parser_q(URI) of + {'EXIT', _Reason} -> + io:format("OK ", []), + ok; + {error, enoent} -> + print_error("Testfile not found", URI, Sections, Entities, OutputForm, + Recommendation, Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end; + Res -> + print_error(Res, URI, Sections, Entities, OutputForm, Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end, + io:format("validating ", []), + case validating_parser_q(URI) of + {'EXIT', _Reason2} -> + io:format("OK~n", []), + ok; + {error, enoent} -> + print_error("Testfile not found", URI, Sections, Entities, OutputForm, + Recommendation, Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end; + Res2 -> + print_error(Res2, URI, Sections, Entities, OutputForm, Recommendation, + Version, Namespace, Data), + if + ?CONT == false -> throw({'EXIT', failed_test}); + true -> error + end + end. + + +%%% Use xmerl as nonvalidating XML parser +nonvalidating_parser(URI) -> + (catch xmerl_scan:file(URI, [])). + + +%%% Use xmerl as nonvalidating XML parser +nonvalidating_parser_q(URI) -> + (catch xmerl_scan:file(URI, [{quiet, true}])). + + +%%% Use xmerl as validating XML parser +validating_parser(URI) -> + (catch xmerl_scan:file(URI, [{validation, true}])). + + +%%% Use xmerl as validating XML parser +validating_parser_q(URI) -> + (catch xmerl_scan:file(URI, [{validation, true}, {quiet, true}])). + + +is_whitespace([]) -> + true; +is_whitespace([H |Rest]) when ?whitespace(H) -> + is_whitespace(Rest); +is_whitespace(_) -> + false. + + +print_error(Error, URI, Sections, Entities, OutputForm, Recommendation, Version, + Namespace, Data) -> + io:format("ERROR ~p~n URI=~p~n See Section ~s~n",[Error, URI, Sections]), + if + Entities == undefined -> ok; + true -> io:format(" Entities =~s~n",[Entities]) + end, + if + OutputForm == undefined -> ok; + true -> io:format(" OutputForm=~s FIXME!~n",[OutputForm]) + end, + if + Recommendation == undefined -> ok; + true -> io:format(" Recommendation=~s~n",[Recommendation]) + end, + if + Version == undefined -> ok; + true -> io:format(" Version =~s~n",[Version]) + end, + if + Namespace == undefined -> ok; + true -> io:format(" Namespace =~s~n",[Namespace]) + end, + io:format(Data). + + + + + + + + + +%%% ============================================================================ +%%% Callbacks for parsing of Simplified DocBook XML + +para(_Data, _Attrs, US) -> + case US of + Int when is_integer(Int) -> Int+1; + undefined -> 1 + end. + + diff --git a/lib/xmerl/doc/examples/xml/test.xml b/lib/xmerl/doc/examples/xml/test.xml new file mode 100755 index 0000000000..e803a83560 --- /dev/null +++ b/lib/xmerl/doc/examples/xml/test.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" ?> +<People> + <Person Type = "Personal"> + </Person> +</People> + diff --git a/lib/xmerl/doc/examples/xml/test2.xml b/lib/xmerl/doc/examples/xml/test2.xml new file mode 100755 index 0000000000..0cb11194fc --- /dev/null +++ b/lib/xmerl/doc/examples/xml/test2.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding = "ISO-8859-1" ?> +<People> + <!-- This is a real comment --> + <comment>This is a comment</comment> + <Person Type = "Personal"> + </Person> +</People> + diff --git a/lib/xmerl/doc/examples/xml/test3.xml b/lib/xmerl/doc/examples/xml/test3.xml new file mode 100755 index 0000000000..dbdc1e62c2 --- /dev/null +++ b/lib/xmerl/doc/examples/xml/test3.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding = 'ISO-8859-1' ?> +<People> + <!-- This is a real comment --> + <comment>This is a comment</comment> + <Person Type = "Personal"> + </Person> +</People> + diff --git a/lib/xmerl/doc/examples/xml/test4.xml b/lib/xmerl/doc/examples/xml/test4.xml new file mode 100755 index 0000000000..e9d85b8d8f --- /dev/null +++ b/lib/xmerl/doc/examples/xml/test4.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding = 'ISO-8859-1' ?> +<People> + <!-- This is a real comment --> + <comment> + This is a comment + </comment> + <Person Type = "Personal"> + </Person> +</People> diff --git a/lib/xmerl/doc/examples/xml/test5.xml b/lib/xmerl/doc/examples/xml/test5.xml new file mode 100755 index 0000000000..e9d85b8d8f --- /dev/null +++ b/lib/xmerl/doc/examples/xml/test5.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding = 'ISO-8859-1' ?> +<People> + <!-- This is a real comment --> + <comment> + This is a comment + </comment> + <Person Type = "Personal"> + </Person> +</People> diff --git a/lib/xmerl/doc/examples/xml/testdtd.dtd b/lib/xmerl/doc/examples/xml/testdtd.dtd new file mode 100755 index 0000000000..2ce1c513a6 --- /dev/null +++ b/lib/xmerl/doc/examples/xml/testdtd.dtd @@ -0,0 +1,17 @@ +<!ELEMENT PARAMETER ( #PCDATA | PARAMETER )* > +<!ATTLIST PARAMETER NR ( 1000024 | 1000025 | 1000101 | 1000102 | 1000103 +| 1000105 | 1000110 | 1000115 | 1000198 ) #REQUIRED > +<!ATTLIST PARAMETER UNIT CDATA #REQUIRED > + +<!ELEMENT PRODUCT ( USER_DEF, PRODUCTELEMENT+ ) > +<!ATTLIST PRODUCT CUSTOMER CDATA #REQUIRED > +<!ATTLIST PRODUCT DESCRIPTION CDATA #REQUIRED > +<!ATTLIST PRODUCT GENERATOR NMTOKEN #REQUIRED > +<!ATTLIST PRODUCT PRODUCTID NMTOKEN #REQUIRED > + +<!ELEMENT PRODUCTELEMENT ( PARAMETER+ ) > +<!ATTLIST PRODUCTELEMENT ELEMENTID CDATA #REQUIRED > +<!ATTLIST PRODUCTELEMENT TYPE NMTOKEN #REQUIRED > + +<!ELEMENT USER_DEF ( #PCDATA ) > + diff --git a/lib/xmerl/doc/examples/xml/xmerl.xml b/lib/xmerl/doc/examples/xml/xmerl.xml new file mode 100755 index 0000000000..f02282dbef --- /dev/null +++ b/lib/xmerl/doc/examples/xml/xmerl.xml @@ -0,0 +1,523 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE article + PUBLIC "-//OASIS//DTD Simplified DocBook XML V1.0//EN" + "http://www.oasis-open.org/docbook/xml/simple/1.0/sdocbook.dtd"> + +<article lang="en" xml:lang="en" > + <articleinfo> + <title>XMerL - XML processing tools for Erlang</title> + <subtitle>Reference Manual</subtitle> + <authorgroup> + <author> + <firstname>Ulf</firstname> + <surname>Wiger</surname> + </author> + </authorgroup> + <revhistory> + <revision> + <revnumber>1.0</revnumber><date>2003-02-04</date> + <revremark>Converted xml from html</revremark> + </revision> + </revhistory> + <abstract> + <para>XMerL tools contains xmerl_scan; a non-validating XML + processor, xmerl_xpath; a XPath implementation, xmerl for export + of XML trees to HTML, XML or text and xmerl_xs for XSLT like + transforms in erlang. + </para> + </abstract> + </articleinfo> + + <section> + <title>xmerl_scan - the XML processor</title> + <para>The (non-validating) XML processor is activated through + <computeroutput>xmerl_scan:string/[1,2]</computeroutput> or + <computeroutput>xmerl_scan:file/[1,2]</computeroutput>. + It returns records of the type defined in xmerl.hrl. + </para> + + <para>As far as I can tell, xmerl_scan implements the complete XML + 1.0 spec, including:</para> + <itemizedlist> + <listitem><para>entity expansion</para></listitem> + <listitem><para>fetching and parsing external DTDs</para></listitem> + <listitem><para>contitional processing</para></listitem> + <listitem><para>UniCode</para></listitem> + <listitem><para>XML Names</para></listitem> + </itemizedlist> + <programlisting> +xmerl_scan:string(Text [ , Options ]) -> #xmlElement{}. +xmerl_scan:file(Filename [ , Options ]) -> #xmlElement{}. </programlisting> + + <para>The Options are basically to specify the behaviour of the + scanner. See the source code for details, but you can specify + funs to handle scanner events (event_fun), process the document + entities once identified (hook_fun), and decide what to do if the + scanner runs into eof before the document is complete + (continuation_fun).</para> + + <para>You can also specify a path (fetch_path) as a list of + directories to search when fetching files. If the file in question + is not in the fetch_path, the URI will be used as a file + name.</para> + + + <section> + <title>Customization functions</title> + <para>The XML processor offers a number of hooks for + customization. These hooks are defined as function objects, and + can be provided by the caller.</para> + + <para>The following customization functions are available. If + they also have access to their own state variable, the access + function for this state is identified within parentheses:</para> + + <itemizedlist> + + <listitem><para>event function (<computeroutput> + xmerl_scan:event_state/[1,2] + </computeroutput>)</para></listitem> + + <listitem><para>hook function (<computeroutput> + xmerl_scan:hook_state/[1,2] + </computeroutput>)</para></listitem> + + <listitem><para>fetch function (<computeroutput> + xmerl_scan:fetch_state/[1,2] </computeroutput>) + </para></listitem> + + <listitem><para>continuation function (<computeroutput> + xmerl_scan:cont_state/[1,2] </computeroutput>) + </para></listitem> + + <listitem><para>rules function (<computeroutput> + xmerl_scan:rules_state/[1,2] </computeroutput>) + </para></listitem> + + <listitem><para>accumulator function</para></listitem> + + <listitem><para>close function</para></listitem> + + </itemizedlist> + + <para>For all of the above state access functions, the function + with one argument + (e.g. <computeroutput>event_fun(GlobalState)</computeroutput>) + will read the state variable, while the function with two + arguments (e.g.: <computeroutput>event_fun(NewStateData, + GlobalState)</computeroutput>) will modify it.</para> + + <para>For each function, the description starts with the syntax + for specifying the function in the + <computeroutput>Options</computeroutput> list. The general forms + are <computeroutput>{Tag, Fun}</computeroutput>, or + <computeroutput>{Tag, Fun, LocalState}</computeroutput>. The + second form can be used to initialize the state variable in + question.</para> + + <section> + <title>User State</title> + + <para>All customization functions are free to access a + "User state" variable. Care must of course be taken + to coordinate the use of this state. It is recommended that + functions, which do not really have anything to contribute to + the "global" user state, use their own state + variable instead. Another option (used in + e.g. <computeroutput>xmerl_eventp.erl</computeroutput>) is for + customization functions to share one of the local states (in + <computeroutput>xmerl_eventp.erl</computeroutput>, the + continuation function and the fetch function both acces the + <computeroutput>cont_state</computeroutput>.)</para> + + <para>Functions to access user state:</para> + + <itemizedlist> + + <listitem><para><computeroutput> + xmerl_scan:user_state(GlobalState) </computeroutput> + </para></listitem> + + <listitem><para><computeroutput>xmerl_scan:user_state(UserState', + GlobalState) </computeroutput></para></listitem> + + </itemizedlist> + + </section> + <section> + <title>Event Function</title> + + <para><computeroutput>{event_fun, fun()} | {event_fun, fun(), + LocalState}</computeroutput></para> + + <para>The event function is called at the beginning and at the + end of a parsed entity. It has the following format and + semantics:</para> + +<programlisting> +<![CDATA[ +fun(Event, GlobalState) -> + EventState = xmerl_scan:event_state(GlobalState), + EventState' = foo(Event, EventState), + GlobalState' = xmerl_scan:event_state(EventState', GlobalState) +end. +]]></programlisting> + + </section> + <section> + <title>Hook Function</title> + <para> <computeroutput>{hook_fun, fun()} | {hook_fun, fun(), + LocalState}</computeroutput></para> + + + +<para>The hook function is called when the processor has parsed a complete +entity. Format and semantics:</para> + +<programlisting> +<![CDATA[ +fun(Entity, GlobalState) -> + HookState = xmerl_scan:hook_state(GlobalState), + {TransformedEntity, HookState'} = foo(Entity, HookState), + GlobalState' = xmerl_scan:hook_state(HookState', GlobalState), + {TransformedEntity, GlobalState'} +end. +]]></programlisting> + + <para>The relationship between the event function, the hook + function and the accumulator function is as follows:</para> + + <orderedlist> + <listitem><para>The event function is first called with an + 'ended' event for the parsed entity.</para></listitem> + + <listitem><para>The hook function is called, possibly + re-formatting the entity.</para></listitem> + + <listitem><para>The acc function is called in order to + (optionally) add the re-formatted entity to the contents of + its parent element.</para></listitem> + + </orderedlist> + + </section> + <section> + <title>Fetch Function</title> +<para> +<computeroutput>{fetch_fun, fun()} | {fetch_fun, fun(), LocalState}</computeroutput> +</para> +<para>The fetch function is called in order to fetch an external resource +(e.g. a DTD).</para> + +<para>The fetch function can respond with three different return values:</para> + + <programlisting> +<![CDATA[ + Result ::= + {ok, GlobalState'} | + {ok, {file, Filename}, GlobalState'} | + {ok, {string, String}, GlobalState'} +]]></programlisting> + +<para>Format and semantics:</para> + + <programlisting> +<![CDATA[ +fun(URI, GlobalState) -> + FetchState = xmerl_scan:fetch_state(GlobalState), + Result = foo(URI, FetchState). % Result being one of the above +end. +]]></programlisting> + + </section> + <section> + <title>Continuation Function</title> +<para> +<computeroutput>{continuation_fun, fun()} | {continuation_fun, fun(), LocalState}</computeroutput> +</para> +<para>The continuation function is called when the parser encounters the end +of the byte stream. Format and semantics:</para> + + <programlisting> +<![CDATA[ +fun(Continue, Exception, GlobalState) -> + ContState = xmerl_scan:cont_state(GlobalState), + {Result, ContState'} = get_more_bytes(ContState), + GlobalState' = xmerl_scan:cont_state(ContState', GlobalState), + case Result of + [] -> + GlobalState' = xmerl_scan:cont_state(ContState', GlobalState), + Exception(GlobalState'); + MoreBytes -> + {MoreBytes', Rest} = end_on_whitespace_char(MoreBytes), + ContState'' = update_cont_state(Rest, ContState'), + GlobalState' = xmerl_scan:cont_state(ContState'', GlobalState), + Continue(MoreBytes', GlobalState') + end +end. +]]></programlisting> + </section> + <section> + <title>Rules Functions</title> + <para> +<computeroutput> +{rules, ReadFun : fun(), WriteFun : fun(), LocalState} | +{rules, Table : ets()}</computeroutput> +</para> + <para>The rules functions take care of storing scanner + information in a rules database. User-provided rules functions + may opt to store the information in mnesia, or perhaps in the + user_state(LocalState).</para> + + <para>The following modes exist:</para> + + <itemizedlist> + + <listitem><para>If the user doesn't specify an option, the + scanner creates an ets table, and uses built-in functions to + read and write data to it. When the scanner is done, the ets + table is deleted.</para></listitem> + + <listitem><para>If the user specifies an ets table via the + <computeroutput>{rules, Table}</computeroutput> option, the + scanner uses this table. When the scanner is done, it does + <emphasis>not</emphasis> delete the table.</para></listitem> + + <listitem><para>If the user specifies read and write + functions, the scanner will use them instead.</para></listitem> + + </itemizedlist> + + <para>The format for the read and write functions are as + follows:</para> + + +<programlisting> +<![CDATA[ +WriteFun(Context, Name, Definition, ScannerState) -> NewScannerState. +ReadFun(Context, Name, ScannerState) -> Definition | undefined. +]]></programlisting> + + <para>Here is a summary of the data objects currently being + written by the scanner:</para> + + <table> + <title>Scanner data objects</title> + <tgroup cols="3"> + <thead> + <row> + <entry>Context</entry> + <entry>Key Value</entry> + <entry>Definition</entry> + </row> + </thead> + <tbody> + <row> + <entry>notation</entry> + <entry>NotationName</entry> + <entry><computeroutput>{system, SL} | {public, PIDL, SL}</computeroutput></entry> + </row> + <row> + <entry>elem_def</entry> + <entry>ElementName</entry> + <entry><computeroutput>#xmlElement{content = ContentSpec}</computeroutput></entry> + </row> + <row> + <entry>parameter_entity</entry> + <entry>PEName</entry> + <entry><computeroutput>PEDef</computeroutput></entry> + </row> + <row> + <entry>entity</entry> + <entry>EntityName</entry> + <entry><computeroutput>EntityDef</computeroutput></entry> + </row> + </tbody> + </tgroup> + </table> + + +<programlisting> +<![CDATA[ +ContentSpec ::= empty | any | ElemContent +ElemContent ::= {Mode, Elems} +Mode ::= seq | choice +Elems ::= [Elem] +Elem ::= '#PCDATA' | Name | ElemContent | {Occurrence, Elems} +Occurrence ::= '*' | '?' | '+' +]]></programlisting> + <note><para>When <Elem> is not wrapped with +<Occurrence>, (Occurrence = once) is implied.</para></note> + + </section> + <section> + <title>Accumulator Function</title> + <para><computeroutput>{acc_fun, fun()} | {acc_fun, fun(), + LocalState}</computeroutput></para> + + <para>The accumulator function is called to accumulate the + contents of an entity.When parsing very large files, it may + not be desireable to do so.In this case, an acc function can + be provided that simply doesn't accumulate.</para> + + <para>Note that it is possible to even modify the parsed + entity before accumulating it, but this must be done with + care. <computeroutput>xmerl_scan</computeroutput> performs + post-processing of the element for namespace management. Thus, + the element must keep its original structure for this to + work.</para> + + <para>The acc function has the following format and + semantics:</para> + + <programlisting> +<![CDATA[ +%% default accumulating acc fun +fun(ParsedEntity, Acc, GlobalState) -> + {[X|Acc], GlobalState}. + +%% non-accumulating acc fun +fun(ParsedEntity, Acc, GlobalState) -> + {Acc, GlobalState}. +]]></programlisting> + </section> + <section> + <title>Close Function</title> + + <para>The close function is called when a document (either the + main document or an external DTD) has been completely + parsed. When xmerl_scan was started using + <computeroutput>xmerl_scan:file/[1,2]</computeroutput>, the + file will be read in full, and closed immediately, before the + parsing starts, so when the close function is called, it will + not need to actually close the file. In this case, the close + function will be a good place to modify the state + variables.</para> + + <para>Format and semantics:</para> + + <programlisting> +<![CDATA[ +fun(GlobalState) -> + GlobalState' = .... % state variables may be altered +]]></programlisting> + </section> + + </section> + + </section> + + <section> + <title>XPATH</title> + + <programlisting> +<![CDATA[ +xmerl_xpath:string(QueryString, #xmlElement{}) -> + [DocEntity] + +DocEntity : #xmlElement{} + | #xmlAttribute{} + | #xmlText{} + | #xmlPI{} + | #xmlComment{} +]]></programlisting> + + <para>The xmerl_xpath module does seem to handle the entire XPATH + 1.0 spec, but I haven't tested that much yet. The grammar is + defined in + <computeroutput>xmerl_xpath_parse.yrl</computeroutput>. The core + functions are defined in + <computeroutput>xmerl_xpath_pred.erl</computeroutput>.</para> + </section> + <section> + <title>Some useful shell commands for debugging the XPath parser</title> +<para> + <command> +<![CDATA[ +c(xmerl_xpath_scan). +yecc:yecc("xmerl_xpath_parse.yrl", "xmerl_xpath_parse", true, []). +c(xmerl_xpath_parse). + +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("position() > -1")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 * 6 div 2")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 + 6 mod 2")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 * 6")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 * 6")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("-----6")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("parent::node()")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("descendant-or-self::node()")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("parent::processing-instruction('foo')")).]]></command></para> + </section> + <section> + <title>Erlang Data Structure Export</title> + + <para>The idea as follows:</para> + + <para>The Erlang data structure should look like this:</para> + <programlisting> +<![CDATA[ +Element: {Tag, Attributes, Content} +Tag : atom() +Attributes: [{Key, Value}] +Content: [String | Element] +String: [char() | binary() | String] +]]></programlisting> + + <para>Some short forms are allowed:</para> + <programlisting> +<![CDATA[ +{Tag, Content} -> {Tag, [], Content} +Tag -> {Tag, [], []} +]]></programlisting> + + <para>Note that content lists must be flat, but strings can be + deep.</para> + + <para>It is also allowed to include normal + <computeroutput>#xml...</computeroutput> elements in the simple + format.</para> + + <para><computeroutput>xmerl:export_simple(Data, + Callback)</computeroutput> takes the above data structure and + exports it, using the callback module + <computeroutput>Callback</computeroutput>.</para> + + <para>The callback module should contain hook functions for all + tags present in the data structure. The hook function must have + the format:</para> + <para><computeroutput> Tag(Data, Attrs, Parents, E) + </computeroutput></para> + + <para>where E is an <computeroutput>#xmlElement{}</computeroutput> + record (see <computeroutput>xmerl.hrl</computeroutput>).</para> + + <para>Attrs is converted from the simple <computeroutput>[{Key, + Value}]</computeroutput> to + <computeroutput>[#xmlAttribute{}]</computeroutput></para> + + <para>Parents is a list of <computeroutput>[{ParentTag, + ParentTagPosition}]</computeroutput>.</para> + + <para>The hook function should return either the Data to be + exported, or the tuple <computeroutput>{'#xml-redefine#', + NewStructure}</computeroutput>, where + <computeroutput>NewStructure</computeroutput> is an element (which + can be simple), or a (simple-) content list wrapped in a 1-tuple + as <computeroutput>{NewContent}</computeroutput>.</para> + + <para>The callback module can inherit definitions from other + callback modules, through the required function + <computeroutput>'#xml-interitance#() -> + [ModuleName]</computeroutput>. </para> + + <para>As long as a tag is represented in one of the callback + modules, things will work. It is of course also possible to + redefine a tag.</para> + <section> + <title>XSLT like transforms</title> + <para>See separate document <ulink url="xmerl_xs.html" >xmerl_xs.html + </ulink></para>. + </section> + </section> + +</article> diff --git a/lib/xmerl/doc/examples/xml/xmerl_xs.xml b/lib/xmerl/doc/examples/xml/xmerl_xs.xml new file mode 100644 index 0000000000..9a798808b9 --- /dev/null +++ b/lib/xmerl/doc/examples/xml/xmerl_xs.xml @@ -0,0 +1,541 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE article + PUBLIC "-//OASIS//DTD Simplified DocBook XML V1.0//EN" + "http://www.oasis-open.org/docbook/xml/simple/1.0/sdocbook.dtd"> + +<article lang="en" xml:lang="en" > + <articleinfo> + <title>XSLT like transformations in Erlang </title> + <subtitle>User Guide</subtitle> + <authorgroup> + <author> + <firstname>Mikael</firstname> + <surname>Karlsson</surname> + </author> + </authorgroup> + <revhistory> + <revision> + <revnumber>1.0</revnumber><date>2002-10-25</date> + <revremark>First Draft</revremark> + </revision> + <revision> + <revnumber>1.1</revnumber><date>2003-02-05</date> + <revremark>Moved module xserl to xmerl application, renamed to + xmerl_xs</revremark> + </revision> + </revhistory> + <abstract> + <para>Erlang has similarities to XSLT since both languages + have a functional programming approach. Using the xpath implementation + in the existing xmerl application it is possible to write XSLT + like transforms in Erlang. One can also combine the + transformations with the erlang scripting possibility + in the yaws webserver to implement "on the fly" html + conversions of xml documents. + </para> + </abstract> + </articleinfo> + + + <section> + <title>Terminology</title> + <variablelist> + <varlistentry> + <term>XML</term> + <listitem> + <para>Extensible Markup Language</para> + </listitem> + </varlistentry> + <varlistentry> + <term>XSLT</term> + <listitem> + <para>Extensible Stylesheet Language: Transformations</para> + </listitem> + </varlistentry> + </variablelist> + </section> + <section> + <title>Introduction</title> + <para>XSLT stylesheets are often used when transforming XML + documents, to other XML documents or (X)HTML for presentation. + There are a number of brick-sized books written on the + topic. XSLT contains quite many + functions and learning them all may take some effort, which + could be a reason why the author only has reached a basic level of + understanding. This document assumes a basic level of + understanding of XSLT. + </para> + <para>Since XSLT is based on a functional programming approach + with pattern matching and recursion it is possible to write + similar style sheets in Erlang. At least for basic + transforms. XPath which is used in XSLT is also already + implemented in the xmerl application written i Erlang. This + document describes how to use the XPath implementation together + with Erlangs pattern matching and a couple of functions to write + XSLT like transforms.</para> + <para>This approach is probably easier for an Erlanger but + if you need to use real XSLT stylesheets in order to "comply to + the standard" there is an adapter available to the Sablotron + XSLT package which is written i C++. + </para> + <para> + This document is written in the Simplified Docbook DTD which is + a subset of the complete one and converted to xhtml using a + stylesheet written in Erlang. + </para> + </section> + + <section> + <title>Tools</title> + <section> + <title>xmerl</title> + <para><ulink url="http://sowap.sourceforge.net/" >xmerl</ulink> + is a xml parser written in Erlang</para> + <section> + <title>xmerl_xpath</title> + <para>XPath is in important part of XSLT and is implemented in + xmerl</para> + </section> + <section> + <title>xmerl_xs</title> + <para> + <ulink url="xmerl_xs.yaws" >xmerl_xs</ulink> is a very small + module acting as "syntactic sugar" for the XSLT lookalike + transforms. It uses xmerl_xpath. + </para> + </section> + </section> + + <section> + <title>yaws</title> + <para> + <ulink url="http://yaws.hyber.org/" >Yaws</ulink>, Yet Another + Webserver, is a web server written in Erlang that support dynamic + content generation using embedded scripts, also written in Erlang. + </para> +<!-- + <figure> + <title>The Yaws logo</title> + <mediaobject> + <imageobject> + <imagedata fileref="yaws_pb.gif" format="GIF" scale="50%"/> + </imageobject> + </mediaobject> + </figure> +--> + <para>Yaws is not needed to make the XSLT like transformations, but + combining yaws and xmerl it is possible to do transformations + of XML documents to HTML in realtime, when clients requests a + web page. As an example I am able to edit this document using + emacs with psgml tools, save the document and just do a reload + in my browser to see the result. The parse/transform time is not + visually different compared to loading any other document in the + browser. + </para> + </section> + + </section> + + <section> + <title>Transformations</title> +<para> + When xmerl_scan parses an xml string/file it returns a record of: +</para> + <programlisting> +<![CDATA[ + -record(xmlElement, { + name, + parents = [], + pos, + attributes = [], + content = [], + language = [], + expanded_name = [], + nsinfo = [],% {Prefix, Local} | [] + namespace = #xmlNamespace{} + }). + ]]> +</programlisting> +<para> + Were content is a mixed list of yet other xmlElement records and/or + xmlText (or other node types). +</para> + <section> + <title>xmerl_xs functions</title> + <para> + Functions used: + </para> + <variablelist> + <varlistentry> + <term>xslapply/2</term> + <listitem> + <para>function to make things look similar + to xsl:apply-templates. + </para> + </listitem> + </varlistentry> + <varlistentry> + <term>value_of/1</term> + <listitem> + <para>Conatenates all text nodes within a tree.</para> + </listitem> + </varlistentry> + <varlistentry> + <term>select/2</term> + <listitem> + <para>select(Str, E) extracts nodes from the XML tree using + xmerl_xpath. + </para> + </listitem> + </varlistentry> + <varlistentry> + <term>built_in_rules/2</term> + <listitem> + <para>The default fallback behaviour, template funs should + end with: + <computeroutput>template(E)->built_in_rules(fun + template/1, E). +</computeroutput> + </para> + </listitem> + </varlistentry> + </variablelist> +<note><para>Text is escaped using xmerl_lib:export_text/1 for + "<", ">" and other relevant xml + characters when exported. So the value_of/1 and built_in_rules/2 + functions should be replaced when not exporting to xml or html. +</para></note> + </section> + + +<section><title>Examples</title> + <example> + <title>Using xslapply</title> + <para>original XSLT:</para> + <programlisting> +<![CDATA[ + <xsl:template match="doc/title"> + <h1> + <xsl:apply-templates/> + </h1> + </xsl:template> + ]]> + </programlisting> + <para> + becomes in Erlang:</para> + <programlisting> +<![CDATA[ + template(E = #xmlElement{ parents=[{'doc',_}|_], name='title'}) -> + ["<h1>", + xslapply(fun template/1, E), + "</h1>"]; + ]]> + </programlisting> + + </example> + <example> + <title>Using value_of and select</title> + <programlisting> +<![CDATA[ + <xsl:template match="title"> + <div align="center"><h1><xsl:value-of select="." /></h1></div> + </xsl:template> + ]]> + </programlisting> + <para> + becomes: + </para> + <programlisting> +<![CDATA[ +template(E = #xmlElement{name='title'}) -> + ["<div align=\"center\"><h1>", value_of(select(".", E)), "</h1></div>"]; + ]]> + </programlisting> + </example> + <example> + <title>Simple xsl stylesheet</title> +<para> + A complete example with the XSLT sheet in the xmerl distribution. +</para> + <programlisting> +<![CDATA[ + +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns="http://www.w3.org/TR/xhtml1/strict"> + + <xsl:strip-space elements="doc chapter section"/> + <xsl:output + method="xml" + indent="yes" + encoding="iso-8859-1" + /> + + <xsl:template match="doc"> + <html> + <head> + <title> + <xsl:value-of select="title"/> + </title> + </head> + <body> + <xsl:apply-templates/> + </body> + </html> + </xsl:template> + + <xsl:template match="doc/title"> + <h1> + <xsl:apply-templates/> + </h1> + </xsl:template> + + <xsl:template match="chapter/title"> + <h2> + <xsl:apply-templates/> + </h2> + </xsl:template> + + <xsl:template match="section/title"> + <h3> + <xsl:apply-templates/> + </h3> + </xsl:template> + + <xsl:template match="para"> + <p> + <xsl:apply-templates/> + </p> + </xsl:template> + + <xsl:template match="note"> + <p class="note"> + <b>NOTE: </b> + <xsl:apply-templates/> + </p> + </xsl:template> + + <xsl:template match="emph"> + <em> + <xsl:apply-templates/> + </em> + </xsl:template> + +</xsl:stylesheet> + ]]> + </programlisting> + </example> + <example> + <title>Erlang version</title> + <para> + Erlang transformation of previous example: + </para> + <programlisting> +<![CDATA[ + +-include("xmerl.hrl"). + +-import(xmerl_xs, + [ xslapply/2, value_of/1, select/2, built_in_rules/2 ]). + +doctype()-> + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\ + \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd \">". + +process_xml(Doc)-> + template(Doc). + +template(E = #xmlElement{name='doc'})-> + [ "<\?xml version=\"1.0\" encoding=\"iso-8859-1\"\?>", + doctype(), + "<html xmlns=\"http://www.w3.org/1999/xhtml\" >" + "<head>" + "<title>", value_of(select("title",E)), "</title>" + "</head>" + "<body>", + xslapply( fun template/1, E), + "</body>" + "</html>" ]; + + +template(E = #xmlElement{ parents=[{'doc',_}|_], name='title'}) -> + ["<h1>", + xslapply( fun template/1, E), + "</h1>"]; + +template(E = #xmlElement{ parents=[{'chapter',_}|_], name='title'}) -> + ["<h2>", + xslapply( fun template/1, E), + "</h2>"]; + +template(E = #xmlElement{ parents=[{'section',_}|_], name='title'}) -> + ["<h3>", + xslapply( fun template/1, E), + "</h3>"]; + +template(E = #xmlElement{ name='para'}) -> + ["<p>", xslapply( fun template/1, E), "</p>"]; + +template(E = #xmlElement{ name='note'}) -> + ["<p class=\"note\">" + "<b>NOTE: </b>", + xslapply( fun template/1, E), + "</p>"]; + +template(E = #xmlElement{ name='emph'}) -> + ["<em>", xslapply( fun template/1, E), "</em>"]; + +template(E)-> + built_in_rules( fun template/1, E). + ]]> + </programlisting> + <para> + It is important to end with a call to + <computeroutput>xmerl_xs:built_in_rules/2</computeroutput> + if you want any text to be written in "push" transforms. + That are the ones using a lot <computeroutput>xslapply( fun + template/1, E )</computeroutput> instead of + <computeroutput>value_of(select("xpath",E))</computeroutput>, + which is pull... + </para> + </example> +<para>The largest example is the stylesheet to transform this document + from the Simplified Docbook XML format to xhtml. The source + file is <computeroutput>sdocbook2xhtml.erl</computeroutput>. +</para> +</section> + <section> + <title>Tips and tricks</title> + <section> + <title>for-each</title> + <para>The function for-each is quite common in XSLT stylesheets. + It can often be rewritten and replaced by select/1. Since + select/1 returns a list of #xmlElements and xslapply/2 + traverses them it is more or less the same as to loop over all + the elements. + </para> + </section> + <section> + <title>position()</title> + <para>The XSLT position() and #xmlElement.pos are not the + same. One has to make an own position in Erlang.</para> + <example> + <title>Counting positions</title> + <programlisting> +<![CDATA[ +<xsl:template match="stanza"> + <p><xsl:apply-templates select="line" /></p> +</xsl:template> + +<xsl:template match="line"> + <xsl:if test="position() mod 2 = 0">  </xsl:if> + <xsl:value-of select="." /><br /> +</xsl:template> + ]]> + </programlisting> +<para>Can be written as</para> + <programlisting> +<![CDATA[ +template(E = #xmlElement{name='stanza'}) -> + {Lines,LineNo} = lists:mapfoldl(fun template_pos/2, 1, select("line", E)), + ["<p>", Lines, "</p>"]. + +template_pos(E = #xmlElement{name='line'}, P) -> + {[indent_line(P rem 2), value_of(E#xmlElement.content), "<br />"], P + 1 }. + +indent_line(0)->"  "; +indent_line(_)->"". + ]]> + </programlisting> + </example> + </section> + <section> + <title>Global tree awareness</title> + <para>In XSLT you have "root" access to the top of the tree + with XPath, even though you are somewhere deep in your + tree.</para> + <para>The xslapply/2 function only carries back the child part + of the tree to the template fun. But it is quite easy to write + template funs that handles both the child and top tree.</para> + <example> + <title>Passing the root tree</title> + <para>The following example piece will prepend the article + title to any section title</para> + <programlisting> +<![CDATA[ +template(E = #xmlElement{name='title'}, ETop ) -> + ["<h3>", value_of(select("title", ETop))," - ", + xslapply( fun(A) -> template(A, ETop) end, E), + "</h3>"]; + ]]> + </programlisting> + </example> + </section> + </section> + + </section> + + + <section> + <title>Utility functions</title> + <para> + The module xmerl_xs contains the functions + <computeroutput>mapxml/2, foldxml/3</computeroutput> and + <computeroutput> mapfoldxml/3</computeroutput> to traverse + <literal>#xmlElement</literal> trees. They can be used in order + to build cross-references, see sdocbook2xhtml.erl for instance + where <computeroutput>foldxml/3</computeroutput> and + <computeroutput> mapfoldxml/3</computeroutput> are used to + number chapters, examples and figures and to build the Table of + contents for the document. + </para> + </section> + + + <section> + <title>Future enhancements</title> + <para> + More wish- than task-list at the moment. + </para> + <itemizedlist> + <listitem> + <para>More stylesheets</para> + </listitem> + <listitem> + <para>On the fly exports to PDF for printing and also more + "polished" presentations. + </para> + </listitem> + </itemizedlist> + </section> + + <section> + <title>References</title> + <orderedlist> + <listitem> + <para><ulink url="../xml/xmerl_xs.xml" >XML source + file</ulink> for this document. + </para> + </listitem> + <listitem> + <para><ulink url="../xs/sdocbook2xhtml.erl" >Erlang style + sheet</ulink> used for this document. (Simplified Docbook DTD).</para> + </listitem> + <listitem> + <para><ulink url="http://www.erlang.org/" >Open Source Erlang</ulink> + </para> + </listitem> + </orderedlist> + + </section> +</article> + +<!-- +Local Variables: +mode: xml +sgml-indent-step: 2 +sgml-indent-data: t +sgml-set-face: t +sgml-insert-missing-element-comment: nil +End: +--> diff --git a/lib/xmerl/doc/examples/xserl_test.erl b/lib/xmerl/doc/examples/xserl_test.erl new file mode 100644 index 0000000000..69db75cfe8 --- /dev/null +++ b/lib/xmerl/doc/examples/xserl_test.erl @@ -0,0 +1,85 @@ +-module(xserl_test). +-include("xmerl.hrl"). +-import(xserl,[ xslapply/2, value_of/1, select/2, built_in_rules/2 ]). +-export([process_xml/1,test/0]). + +doctype()-> + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\ + \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd \">". + +test() -> + Str= "<?xml version=\"1.0\"?>" + "<doc xmlns='urn:loc.gov:books' xmlns:isbn='urn:ISBN:0-395-36341-6'>" + "<title>Cheaper by the Dozen</title>" + "<isbn:number>1568491379</isbn:number>" + "<note>" + "<!-- make HTML the default namespace for some commentary -->" + "<p xmlns='urn:w3-org-ns:HTML'>" + "This is a <i>funny</i> book!" + "</p>" + "</note>" + "</doc>", + {Doc,_}=xmerl_scan:string(Str,[{fetch_fun, fun(DTDSpec,S) -> {ok,S} end}]), + + process_xml(Doc). + +process_xml(Doc)-> + template(Doc). + +template(E = #xmlElement{name='doc'})-> + [ "<\?xml version=\"1.0\" encoding=\"iso-8859-1\"\?>", + doctype(), + "<html xmlns=\"http://www.w3.org/1999/xhtml\" >" + "<head>" + "<title>", xserl:value_of(select("title",E)), "</title>" + "</head>" + "<body>", + xslapply( fun template/1, E), + "</body>" + "</html>" ]; + + +template(E = #xmlElement{ parents=[{'doc',_}|_], name='title'}) -> + ["<h1>", +%% xslapply( fun template/1, E), +%% same as + lists:map( fun template/1, E#xmlElement.content ), + "</h1>"]; + +template(E = #xmlElement{ parents=[{'chapter',_}|_], name='title'}) -> + ["<h2>", + xslapply( fun template/1, E), + "</h2>"]; + +template(E = #xmlElement{ parents=[{'section',_}|_], name='title'}) -> + ["<h3>", + xslapply( fun template/1, E), + "</h3>"]; + +template(E = #xmlElement{ name='para'}) -> + ["<p>", + xslapply( fun template/1, E), + "</p>"]; + +template(E = #xmlElement{ name='note'}) -> + ["<p class=\"note\">" + "<b>NOTE: </b>", + xslapply( fun template/1, E), + "</p>"]; + +template(E = #xmlElement{ name='emph'}) -> + ["<em>", + xslapply( fun template/1, E), + "</em>"]; + +template(E)-> + built_in_rules( fun template/1, E). + +%% It is important to end with a call to xserl:built_in_rules/2 +%% if you want any text to be written in "push" transforms. +%% That are the ones using a lot xslapply( fun template/1, E ) +%% instead of value_of(select("xpath",E)), which is pull... +%% Could maybe be caught as an exception in xslapply instead, +%% but I think that could degrade performance - having an +%% exception for every #xmlText element. + diff --git a/lib/xmerl/doc/src/notes.xml b/lib/xmerl/doc/src/notes.xml index 91a98808a2..697823eee2 100644 --- a/lib/xmerl/doc/src/notes.xml +++ b/lib/xmerl/doc/src/notes.xml @@ -4,7 +4,7 @@ <chapter> <header> <copyright> - <year>2004</year><year>2010</year> + <year>2004</year><year>2011</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -31,7 +31,82 @@ <p>This document describes the changes made to the Xmerl application.</p> -<section><title>Xmerl 1.2.4.1</title> +<section><title>Xmerl 1.2.9</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Fix minor typos and improve punctuation in the + xmerl_xpath @doc comment (Thanks to Marcus Marinelli)</p> + <p> + Own Id: OTP-9187</p> + </item> + <item> + <p> + Prevent xmerl from over-normalizing character references + in attributes</p> + <p> + Section 3.3.3 of the XML Recommendation gives the rules + for attribute-value normalization. One of those rules + requires that character references not be re-normalized + after being replaced with the referenced characters. + (Thanks to Tom Moertel)</p> + <p> + Own Id: OTP-9274</p> + </item> + <item> + <p> Fixed the default encoding option in SAX parser. </p> + <p> + Own Id: OTP-9288</p> + </item> + </list> + </section> + + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> Added the xmerl test suites and examples to the open + source distribution. </p> + <p> + Own Id: OTP-9228</p> + </item> + </list> + </section> + +</section> + +<section><title>Xmerl 1.2.8</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> The function xmerl_lib:expand_content/1 is mainly for + expanding Simple XML, but can also handle xmerl records. + This patch fixes an omission that caused expand_content/1 + to not maintain the parents list when expanding + #xmlElement{} records. (Thanks to Ulf Wiger) </p> + <p> + Own Id: OTP-9034</p> + </item> + </list> + </section> + + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> Removed some dialyzer warnings. </p> + <p> + Own Id: OTP-9074</p> + </item> + </list> + </section> + +</section> + +<section><title>Xmerl 1.2.7</title> <section><title>Fixed Bugs and Malfunctions</title> <list> @@ -41,6 +116,76 @@ <p> Own Id: OTP-8599</p> </item> + <item> + <p> Fix format_man_pages so it handles all man sections + and remove warnings/errors in various man pages. </p> + <p> + Own Id: OTP-8600</p> + </item> + </list> + </section> + + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> Fix entity checking so there are no fatal errors for + undefined entities when option skip_external_dtd is used. + </p> + <p> + Own Id: OTP-8947</p> + </item> + </list> + </section> + +</section> + +<section><title>Xmerl 1.2.6</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> Fixed problem with hex entities in UTF-8 documents: + When a document was in UTF-8 encoding, xmerl_scan + improperly replaced hex entities by the UTF-8 bytes + instead of returning the character, as it does with + inline UTF-8 text and decimal entities. (Thanks to Paul + Guyot.) </p> + <p> + Own Id: OTP-8697</p> + </item> + </list> + </section> + +</section> + +<section><title>Xmerl 1.2.5</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + All Erlang files are now built by the test server instead of the test directory Makefile. + </p> + <p> + Erlang files in data directories are now built by the test suites instead of using + prebuilt versions under version control. + </p> + <p> + Removed a number of obsolete guards. + </p> + <p> + Own Id: OTP-8537 + </p> + </item> + <item> + <p> + An empty element declared as a simpleContent was not properly validated. + </p> + <p> + Own Id: OTP-8599 + </p> + </item> </list> </section> @@ -69,7 +214,7 @@ <item> <p> A continuation clause of <c>parse_reference/3</c> had - it's parameters in wrong order.</p> + its parameters in wrong order.</p> <p> Own Id: OTP-8251 Aux Id: seq11429 </p> </item> diff --git a/lib/xmerl/doc/src/notes_history.xml b/lib/xmerl/doc/src/notes_history.xml index 06d0cb3b40..a8f7d8b3a6 100644 --- a/lib/xmerl/doc/src/notes_history.xml +++ b/lib/xmerl/doc/src/notes_history.xml @@ -5,7 +5,7 @@ <header> <copyright> <year>2006</year> - <year>2007</year> + <year>2011</year> <holder>Ericsson AB, All Rights Reserved</holder> </copyright> <legalnotice> diff --git a/lib/xmerl/doc/src/xmerl_sax_parser.xml b/lib/xmerl/doc/src/xmerl_sax_parser.xml index ea63ba22a1..972023622e 100644 --- a/lib/xmerl/doc/src/xmerl_sax_parser.xml +++ b/lib/xmerl/doc/src/xmerl_sax_parser.xml @@ -5,7 +5,7 @@ <header> <copyright> <year>2008</year> - <year>2008</year> + <year>2011</year> <holder>Ericsson AB, All Rights Reserved</holder> </copyright> <legalnotice> |