diff options
Diffstat (limited to 'lib/xmerl')
34 files changed, 695 insertions, 309 deletions
diff --git a/lib/xmerl/doc/examples/test_html.erl b/lib/xmerl/doc/examples/test_html.erl index 3ca15f30f8..3ca15f30f8 100755..100644 --- a/lib/xmerl/doc/examples/test_html.erl +++ b/lib/xmerl/doc/examples/test_html.erl diff --git a/lib/xmerl/doc/examples/xml/test.xml b/lib/xmerl/doc/examples/xml/test.xml index e803a83560..e803a83560 100755..100644 --- a/lib/xmerl/doc/examples/xml/test.xml +++ b/lib/xmerl/doc/examples/xml/test.xml diff --git a/lib/xmerl/doc/examples/xml/test2.xml b/lib/xmerl/doc/examples/xml/test2.xml index 0cb11194fc..0cb11194fc 100755..100644 --- a/lib/xmerl/doc/examples/xml/test2.xml +++ b/lib/xmerl/doc/examples/xml/test2.xml diff --git a/lib/xmerl/doc/examples/xml/test3.xml b/lib/xmerl/doc/examples/xml/test3.xml index dbdc1e62c2..dbdc1e62c2 100755..100644 --- a/lib/xmerl/doc/examples/xml/test3.xml +++ b/lib/xmerl/doc/examples/xml/test3.xml diff --git a/lib/xmerl/doc/examples/xml/test4.xml b/lib/xmerl/doc/examples/xml/test4.xml index e9d85b8d8f..e9d85b8d8f 100755..100644 --- a/lib/xmerl/doc/examples/xml/test4.xml +++ b/lib/xmerl/doc/examples/xml/test4.xml diff --git a/lib/xmerl/doc/examples/xml/test5.xml b/lib/xmerl/doc/examples/xml/test5.xml index e9d85b8d8f..e9d85b8d8f 100755..100644 --- a/lib/xmerl/doc/examples/xml/test5.xml +++ b/lib/xmerl/doc/examples/xml/test5.xml diff --git a/lib/xmerl/doc/examples/xml/testdtd.dtd b/lib/xmerl/doc/examples/xml/testdtd.dtd index 2ce1c513a6..2ce1c513a6 100755..100644 --- a/lib/xmerl/doc/examples/xml/testdtd.dtd +++ b/lib/xmerl/doc/examples/xml/testdtd.dtd diff --git a/lib/xmerl/doc/examples/xml/xmerl.xml b/lib/xmerl/doc/examples/xml/xmerl.xml index f02282dbef..f02282dbef 100755..100644 --- a/lib/xmerl/doc/examples/xml/xmerl.xml +++ b/lib/xmerl/doc/examples/xml/xmerl.xml diff --git a/lib/xmerl/doc/src/make.dep b/lib/xmerl/doc/src/make.dep deleted file mode 100644 index 9c303fc41c..0000000000 --- a/lib/xmerl/doc/src/make.dep +++ /dev/null @@ -1,24 +0,0 @@ -# ---------------------------------------------------- -# >>>> Do not edit this file <<<< -# This file was automaticly generated by -# /home/otp/bin/docdepend -# ---------------------------------------------------- - - -# ---------------------------------------------------- -# TeX files that the DVI file depend on -# ---------------------------------------------------- - -book.dvi: book.tex part.tex ref_man.tex xmerl.tex xmerl_eventp.tex \ - xmerl_scan.tex xmerl_ug.tex xmerl_xpath.tex \ - xmerl_xs.tex xmerl_xsd.tex xmerl_sax_parser.tex - -# ---------------------------------------------------- -# Source inlined when transforming from source to LaTeX -# ---------------------------------------------------- - -book.tex: ref_man.xml - -xmerl_ug.tex: motorcycles.txt motorcycles2html.erl motorcycles_dtd.txt \ - new_motorcycles.txt new_motorcycles2.txt - diff --git a/lib/xmerl/doc/src/notes.xml b/lib/xmerl/doc/src/notes.xml index 697823eee2..15c42d6f6a 100644 --- a/lib/xmerl/doc/src/notes.xml +++ b/lib/xmerl/doc/src/notes.xml @@ -31,6 +31,63 @@ <p>This document describes the changes made to the Xmerl application.</p> +<section><title>Xmerl 1.2.10</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> Fixed a schema search bug in xmerl_xsd. </p> <p> A + new flag was needed in the xsd_state record so if the + state is saved there is an incompatibility and a state + conversion is needed. </p> + <p> + *** INCOMPATIBILITY with R14B03 ***</p> + <p> + Own Id: OTP-9410</p> + </item> + <item> + <p> Fixed xmerl_scan problems with entities in attribute + values. </p> + <p> + Own Id: OTP-9411</p> + </item> + <item> + <p> Streaming bug in xmerl_scan. </p> <p> If the + continuation_fun runs out of input at the end of an + attribute value then it crashed. (Thanks to Simon + Cornish) </p> + <p> + Own Id: OTP-9457</p> + </item> + <item> + <p> + Fixed xmerl_ucs UCS2 little endian en/decoding</p> + <p> + Corrected number of shift bytes in + xmerl_ucs:char_to_ucs2le and recursive call from + from_ucs2le to from_ucs4le. (Thanks to Michal Ptaszek)</p> + <p> + Own Id: OTP-9548</p> + </item> + <item> + <p> + Add latin9 (iso-8859-15) support in xmerl_ucs (Thanks to + David Julien)</p> + <p> + Own Id: OTP-9552</p> + </item> + <item> + <p> + Improve spelling throughout documentation, code comments + and error messages</p> + <p> + Own Id: OTP-9555</p> + </item> + </list> + </section> + +</section> + <section><title>Xmerl 1.2.9</title> <section><title>Fixed Bugs and Malfunctions</title> diff --git a/lib/xmerl/doc/src/part_notes.xml b/lib/xmerl/doc/src/part_notes.xml index 827ffd90e9..827ffd90e9 100755..100644 --- a/lib/xmerl/doc/src/part_notes.xml +++ b/lib/xmerl/doc/src/part_notes.xml diff --git a/lib/xmerl/include/xmerl.hrl b/lib/xmerl/include/xmerl.hrl index 7bb3f4de9b..3760a5cce0 100755..100644 --- a/lib/xmerl/include/xmerl.hrl +++ b/lib/xmerl/include/xmerl.hrl @@ -61,10 +61,11 @@ }). %% namespace node - i.e. a {Prefix, URI} pair -%% TODO: these are not currently used?? /RC -record(xmlNsNode,{ - prefix, - uri = [] + parents = [], % [{atom(),integer()}] + pos, % integer() + prefix, % string() + uri = [] % [] | atom() }). %% XML Element @@ -103,9 +104,10 @@ %% processing instruction -record(xmlPI,{ - name, % atom() - pos, % integer() - value % IOlist() + name, % atom() + parents = [], % [{atom(),integer()}] + pos, % integer() + value % IOlist() }). -record(xmlDocument,{ @@ -154,6 +156,9 @@ declarations = [], % [{Name, Attrs}] doctype_name, doctype_DTD = internal, % internal | DTDId + comments = true, + document = false, + default_attrs = false, rules, keep_rules = false, % delete (ets) tab if false namespace_conformant = false, % true | false diff --git a/lib/xmerl/include/xmerl_xlink.hrl b/lib/xmerl/include/xmerl_xlink.hrl index 375e244c23..375e244c23 100755..100644 --- a/lib/xmerl/include/xmerl_xlink.hrl +++ b/lib/xmerl/include/xmerl_xlink.hrl diff --git a/lib/xmerl/include/xmerl_xsd.hrl b/lib/xmerl/include/xmerl_xsd.hrl index b527accc8c..6dad7d8ff0 100644 --- a/lib/xmerl/include/xmerl_xsd.hrl +++ b/lib/xmerl/include/xmerl_xsd.hrl @@ -36,6 +36,7 @@ schema_name, vsn, schema_preprocessed=false, + external_xsd_base=false, xsd_base, xml_options=[], scope=[], diff --git a/lib/xmerl/src/xmerl.erl b/lib/xmerl/src/xmerl.erl index cf78f7bdf7..2332517988 100644 --- a/lib/xmerl/src/xmerl.erl +++ b/lib/xmerl/src/xmerl.erl @@ -307,7 +307,7 @@ apply_cb(Ms, F, Df, Args) -> apply_cb([M|Ms], F, Df, Args, Ms0) -> case catch apply(M, F, Args) of - {'EXIT', {undef,[{M,F,_}|_]}} -> + {'EXIT', {undef,[{M,F,_,_}|_]}} -> apply_cb(Ms, F, Df, Args, Ms0); {'EXIT', Reason} -> exit(Reason); diff --git a/lib/xmerl/src/xmerl_lib.erl b/lib/xmerl/src/xmerl_lib.erl index 6402f1cbeb..aeb821f411 100644 --- a/lib/xmerl/src/xmerl_lib.erl +++ b/lib/xmerl/src/xmerl_lib.erl @@ -160,8 +160,9 @@ expand_element(E = #xmlText{}, Pos, Parents, Norm) -> E#xmlText{pos = Pos, parents = Parents, value = expand_text(E#xmlText.value, Norm)}; -expand_element(E = #xmlPI{}, Pos, _Parents, Norm) -> +expand_element(E = #xmlPI{}, Pos, Parents, Norm) -> E#xmlPI{pos = Pos, + parents = Parents, value = expand_text(E#xmlPI.value, Norm)}; expand_element(E = #xmlComment{}, Pos, Parents, Norm) -> E#xmlComment{pos = Pos, diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 3b9eaa309c..ec9178ea25 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -944,14 +944,19 @@ parse_att_value(?STRING_REST("&", Rest), State, Stop, Acc) -> {unparsed, Name, _} -> ?fatal_error(State1, "Unparsed entity reference in attribute value: " ++ Name) end; -parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> +parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> {lists:reverse(Acc), Rest, State}; -parse_att_value(?STRING_UNBOUND_REST($<, _Rest), State, _Stop, _Acc) -> +parse_att_value(?STRING_UNBOUND_REST($<, _Rest), State, _Stop, _Acc) -> ?fatal_error(State, "< not allowed in attribute value"); -parse_att_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> - parse_att_value(Rest, State, Stop, [C|Acc]); -parse_att_value(Bytes, State, Stop, Acc) -> - unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_att_value/4], +parse_att_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> + if + ?is_char(C) -> + parse_att_value(Rest, State, Stop, [C|Acc]); + true -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character in attribute value: ~p", [C]))) + end; +parse_att_value(Bytes, State, Stop, Acc) -> + unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_att_value/4], undefined). @@ -1120,10 +1125,10 @@ parse_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) -> ?is_char(C) -> parse_content(Rest, State, [C|Acc], false); true -> - ?fatal_error(State, "Bad character in content: " ++ C) - end; -parse_content(Bytes, State, Acc, IgnorableWS) -> - unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_content/4], + ?fatal_error(State, lists:flatten(io_lib:format("Bad character in content: ~p", [C]))) + end; +parse_content(Bytes, State, Acc, IgnorableWS) -> + unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_content/4], undefined). @@ -2522,11 +2527,16 @@ parse_entity_value(?STRING_REST("%", Rest), #xmerl_sax_parser_state{file_type=Ty end end; -parse_entity_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> +parse_entity_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> {lists:reverse(Acc), Rest, State}; -parse_entity_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> - parse_entity_value(Rest, State, Stop, [C|Acc]); -parse_entity_value(Bytes, State, Stop, Acc) -> +parse_entity_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> + if + ?is_char(C) -> + parse_entity_value(Rest, State, Stop, [C|Acc]); + true -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character in entity value: ~p", [C]))) + end; +parse_entity_value(Bytes, State, Stop, Acc) -> unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_entity_value/4], undefined). diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 059c8f21b6..ec7ea534d6 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -100,7 +100,21 @@ %% <dd>Set default character set used (default UTF-8). %% This character set is used only if not explicitly given by the XML %% declaration. </dd> +%% <dt><code>{document, Flag}</code></dt> +%% <dd>Set to 'true' if xmerl should return a complete XML document +%% as an xmlDocument record (default 'false').</dd> +%% <dt><code>{comments, Flag}</code></dt> +%% <dd>Set to 'false' if xmerl should skip comments otherwise they will +%% be returned as xmlComment records (default 'true').</dd> +%% <dt><code>{default_attrs, Flag}</code></dt> +%% <dd>Set to 'true' if xmerl should add to elements missing attributes +%% with a defined default value (default 'false').</dd> %% </dl> +%% @type document() = xmlElement() | xmlDocument(). <p> +%% The document returned by <tt>xmerl_scan:string/[1,2]</tt> and +%% <tt>xmerl_scan:file/[1,2]</tt>. The type of the returned record depends on +%% the value of the document option passed to the function. +%% </p> -module(xmerl_scan). @@ -224,7 +238,7 @@ cont_state(X, S=#xmerl_scanner{fun_states = FS}) -> file(F) -> file(F, []). -%% @spec file(Filename::string(), Options::option_list()) -> {xmlElement(),Rest} +%% @spec file(Filename::string(), Options::option_list()) -> {document(),Rest} %% Rest = list() %%% @doc Parse file containing an XML document file(F, Options) -> @@ -264,7 +278,7 @@ int_file_decl(F, Options,_ExtCharset) -> string(Str) -> string(Str, []). -%% @spec string(Text::list(),Options::option_list()) -> {xmlElement(),Rest} +%% @spec string(Text::list(),Options::option_list()) -> {document(),Rest} %% Rest = list() %%% @doc Parse string containing an XML document string(Str, Options) -> @@ -381,6 +395,12 @@ initial_state([{quiet, F}|T], S) when F==true; F==false -> initial_state(T, S#xmerl_scanner{quiet = F}); initial_state([{doctype_DTD,DTD}|T], S) -> initial_state(T,S#xmerl_scanner{doctype_DTD = DTD}); +initial_state([{document, F}|T], S) when is_boolean(F) -> + initial_state(T,S#xmerl_scanner{document = F}); +initial_state([{comments, F}|T], S) when is_boolean(F) -> + initial_state(T,S#xmerl_scanner{comments = F}); +initial_state([{default_attrs, F}|T], S) when is_boolean(F) -> + initial_state(T,S#xmerl_scanner{default_attrs = F}); initial_state([{text_decl,Bool}|T], S) -> initial_state(T,S#xmerl_scanner{text_decl=Bool}); initial_state([{environment,Env}|T], S) -> @@ -518,6 +538,7 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, line = L, col = C, environment=Env, encoding=Charset, + document=Document, validation=ValidateResult}) -> S1 = Event(#xmerl_event{event = started, line = L, @@ -530,8 +551,8 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, Str=if Charset == "utf-8" -> Str0; - Charset=/=undefined -> % Default character set is UTF-8 - xmerl_ucs:to_unicode(Str0,list_to_atom(Charset)); + Charset =/= undefined -> % Default character set is UTF-8 + xmerl_ucs:to_unicode(Str0, list_to_atom(Charset)); true -> %% Charset is undefined if no external input is %% given, and no auto detection of character %% encoding was made. @@ -539,17 +560,17 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, end, %% M1 = erlang:memory(), %% io:format("Memory status before prolog: ~p~n",[M1]), - {T1, S2} = scan_prolog(Str, S1, _StartPos = 1), + {Prolog, Pos, T1, S2} = scan_prolog(Str, S1, _StartPos = 1), %% M2 = erlang:memory(), %% io:format("Memory status after prolog: ~p~n",[M2]), %%io:format("scan_document 2, prolog parsed~n",[]), - T2 = scan_mandatory("<",T1,1,S2,expected_element_start_tag), + T2 = scan_mandatory("<", T1, 1, S2, expected_element_start_tag), %% M3 = erlang:memory(), %% io:format("Memory status before element: ~p~n",[M3]), - {Res, T3, S3} =scan_element(T2,S2,_StartPos = 1), + {Res, T3, S3} = scan_element(T2,S2,Pos), %% M4 = erlang:memory(), %% io:format("Memory status after element: ~p~n",[M4]), - {Tail, S4}=scan_misc(T3, S3, _StartPos = 1), + {Misc, _Pos1, Tail, S4}=scan_misc(T3, S3, Pos + 1), %% M5 = erlang:memory(), %% io:format("Memory status after misc: ~p~n",[M5]), @@ -558,44 +579,52 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, col = S4#xmerl_scanner.col, data = document}, S4), - {Res2,S6} = case validation_mode(ValidateResult) of + {Res2, S6} = case validation_mode(ValidateResult) of off -> - {Res,cleanup(S5)}; + {Res, cleanup(S5)}; dtd when Env == element; Env == prolog -> check_decl2(S5), - case xmerl_validate:validate(S5,Res) of - {'EXIT',{error,Reason}} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {'EXIT',Reason} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {error,Reason} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {error,Reason,_Next} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); + case xmerl_validate:validate(S5, Res) of + {'EXIT', {error, Reason}} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {'EXIT', Reason} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {error, Reason} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {error, Reason, _Next} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); _XML -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end; schema -> - case schemaLocations(Res,S5) of - {ok,Schemas} -> + case schemaLocations(Res, S5) of + {ok, Schemas} -> cleanup(S5), %%io:format("Schemas: ~p~nRes: ~p~ninhertih_options(S): ~p~n", %% [Schemas,Res,inherit_options(S5)]), - XSDRes = xmerl_xsd:process_validate(Schemas,Res, + XSDRes = xmerl_xsd:process_validate(Schemas, Res, inherit_options(S5)), - handle_schema_result(XSDRes,S5); + handle_schema_result(XSDRes, S5); _ -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end; _ -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end, - {Res2, Tail, S6}. + Res3 = + case Document of + true -> + Content = lists:reverse(Prolog, [Res2 | lists:reverse(Misc)]), + #xmlDocument{content = Content}; + false -> + Res2 + end, + {Res3, Tail, S6}. scan_decl(Str, S=#xmerl_scanner{event_fun = Event, @@ -609,11 +638,11 @@ scan_decl(Str, S=#xmerl_scanner{event_fun = Event, data = document}, S), case scan_prolog(Str, S1, _StartPos = 1) of - {T2="<"++_, S2} -> + {_,_,T2="<"++_, S2} -> {{S2#xmerl_scanner.user_state,T2},[],S2}; - {[], S2}-> + {_,_,[], S2}-> {[],[],S2}; - {T2, S2} -> + {_,_,T2, S2} -> {_,_,S3} = scan_content(T2,S2,[],_Attrs=[],S2#xmerl_scanner.space, _Lang=[],_Parents=[],#xmlNamespace{}), {T2,[],S3} @@ -624,14 +653,17 @@ scan_decl(Str, S=#xmerl_scanner{event_fun = Event, %%% prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? %%% %% empty text declarations are handled by the first function clause. -scan_prolog([], S=#xmerl_scanner{continuation_fun = F}, Pos) -> +scan_prolog(T, S, Pos) -> + scan_prolog(T, S, Pos, []). +scan_prolog([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) -> ?dbg("cont()...~n", []), - F(fun(MoreBytes, S1) -> scan_prolog(MoreBytes, S1, Pos) end, - fun(S1) -> {[], S1} end, + F(fun(MoreBytes, S1) -> scan_prolog(MoreBytes, S1, Pos, Acc) end, + fun(S1) -> {Acc, Pos, [], S1} end, S); -scan_prolog("<?xml"++T,S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},Pos) - when ?whitespace(hd(T)) -> - {Charset,T3, S3}= +scan_prolog("<?xml"++T, + S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L}, + Pos,Acc) when ?whitespace(hd(T)) -> + {Charset, T3, S3} = if Col==1,L==1,S0#xmerl_scanner.text_decl==true -> ?dbg("prolog(\"<?xml\")~n", []), @@ -639,13 +671,13 @@ scan_prolog("<?xml"++T,S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},Pos) {_,T1,S1} = mandatory_strip(T,S), {Decl,T2, S2}=scan_text_decl(T1,S1), Encoding=Decl#xmlDecl.encoding, - {Encoding,T2, S2#xmerl_scanner{encoding=Encoding}}; + {Encoding, T2, S2#xmerl_scanner{encoding=Encoding}}; Col==1,L==1 -> ?dbg("prolog(\"<?xml\")~n", []), ?bump_col(5), {Decl,T2, S2}=scan_xml_decl(T, S), Encoding=Decl#xmlDecl.encoding, - {Encoding,T2, S2#xmerl_scanner{encoding=Encoding}}; + {Encoding, T2, S2#xmerl_scanner{encoding=Encoding}}; true -> ?fatal({xml_declaration_must_be_first_in_doc,Col,L},S0) end, @@ -659,7 +691,7 @@ scan_prolog("<?xml"++T,S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},Pos) %% Now transform to declared character set. if Charset==Charset0 -> % Document already transformed to this charset! - scan_prolog(T3, S3, Pos); + scan_prolog(T3, S3, Pos, Acc); Charset0=/=undefined -> %% For example may an external entity %% have the BOM for utf-16 and the internal @@ -668,17 +700,18 @@ scan_prolog("<?xml"++T,S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},Pos) %% 'iso-10646-utf-1', and Charset will be 'utf-16', all %% legal. %% - scan_prolog(T3,S3#xmerl_scanner{encoding=Charset0},Pos); + scan_prolog(T3,S3#xmerl_scanner{encoding=Charset0},Pos,Acc); Charset == "utf-8" -> - scan_prolog(T3, S3, Pos); + scan_prolog(T3, S3, Pos, Acc); Charset=/=undefined -> % Document not previously transformed T4=xmerl_ucs:to_unicode(T3,list_to_atom(Charset)), - scan_prolog(T4, S3, Pos); + scan_prolog(T4, S3, Pos, Acc); true -> % No encoding info given - scan_prolog(T3, S3, Pos) + scan_prolog(T3, S3, Pos, Acc) end; -scan_prolog("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog, - encoding=_Charset}, Pos) -> +scan_prolog("<!DOCTYPE" ++ T, + S0=#xmerl_scanner{environment=prolog,encoding=_Charset}, + Pos, Acc) -> ?dbg("prolog(\"<!DOCTYPE\")~n", []), ?bump_col(9), %% If no known character set assume it is UTF-8 @@ -687,10 +720,13 @@ scan_prolog("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog, true -> T end, {T2, S1} = scan_doctype(T1, S), - scan_misc(T2, S1, Pos); -scan_prolog(Str="%"++_T,S=#xmerl_scanner{environment={external,_}},_Pos) -> - scan_ext_subset(Str,S); -scan_prolog(Str, S0 = #xmerl_scanner{user_state=_US,encoding=_Charset},Pos) -> + scan_misc(T2, S1, Pos, Acc); +scan_prolog(Str="%"++_T,S=#xmerl_scanner{environment={external,_}}, + Pos,Acc) -> + {T, S1} = scan_ext_subset(Str,S), + {Acc, Pos, T, S1}; +scan_prolog(Str, S0 = #xmerl_scanner{user_state=_US,encoding=_Charset}, + Pos,Acc) -> ?dbg("prolog(\"<\")~n", []), %% Check for Comments, PI before possible DOCTYPE declaration @@ -700,26 +736,28 @@ scan_prolog(Str, S0 = #xmerl_scanner{user_state=_US,encoding=_Charset},Pos) -> %% Charset==undefined -> xmerl_ucs:to_unicode(Str,'utf-8'); true -> Str end, - {T1, S1}=scan_misc(T, S, Pos), - scan_prolog2(T1,S1,Pos). + {Acc1, Pos1, T1, S1}=scan_misc(T, S, Pos, Acc), + scan_prolog2(T1,S1,Pos1,Acc1). -scan_prolog2([], S=#xmerl_scanner{continuation_fun = F}, Pos) -> +scan_prolog2([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) -> ?dbg("cont()...~n", []), - F(fun(MoreBytes, S1) -> scan_prolog2(MoreBytes, S1, Pos) end, - fun(S1) -> {[], S1} end, + F(fun(MoreBytes, S1) -> scan_prolog2(MoreBytes, S1, Pos, Acc) end, + fun(S1) -> {Acc, Pos, [], S1} end, S); -scan_prolog2("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog}, Pos) -> +scan_prolog2("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog}, + Pos, Acc) -> ?dbg("prolog(\"<!DOCTYPE\")~n", []), ?bump_col(9), {T1, S1} = scan_doctype(T, S), - scan_misc(T1, S1, Pos); -scan_prolog2(Str = "<!" ++ _, S, _Pos) -> + scan_misc(T1, S1, Pos, Acc); +scan_prolog2(Str = "<!" ++ _, S, Pos, Acc) -> ?dbg("prolog(\"<!\")~n", []), %% In e.g. a DTD, we jump directly to markup declarations - scan_ext_subset(Str, S); -scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos) -> + {T, S1} = scan_ext_subset(Str, S), + {Acc, Pos, T, S1}; +scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos,Acc) -> ?dbg("prolog(\"<\")~n", []), %% Here we consider the DTD provided by doctype_DTD option, @@ -733,7 +771,7 @@ scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos) -> end, %% Check for more Comments and PI after DOCTYPE declaration % ?bump_col(1), - scan_misc(Str, S1, Pos). + scan_misc(Str, S1, Pos, Acc). @@ -743,26 +781,46 @@ scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos) -> %% - Neither of Comment and PI are returned in the resulting parsed %% structure. %% - scan_misc/3 implements Misc* as that is how the rule is always used -scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos) -> +scan_misc(T, S, Pos) -> + scan_misc(T, S, Pos, []). +scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) -> ?dbg("cont()...~n", []), - F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos) end, - fun(S1) -> {[], S1} end, + F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos, Acc) end, + fun(S1) -> {Acc, Pos, [], S1} end, S); -scan_misc("<!--" ++ T, S0, Pos) -> % Comment +scan_misc("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F, comments=CF}, Pos, Acc) -> % Comment ?bump_col(4), - {_, T1, S1} = scan_comment(T, S, Pos, _Parents = [], _Lang = []), - scan_misc(T1,S1,Pos); -scan_misc("<?" ++ T, S0, Pos) -> % PI + {C, T1, S1} = scan_comment(T, S, Pos, _Parents = [], _Lang = []), + case CF of + true -> + {Acc2, Pos2, S3} = + case F(C, Acc, S1) of + {Acc1, S2} -> + {Acc1, Pos + 1, S2}; + {Acc1, Pos1, S2} -> + {Acc1, Pos1, S2} + end, + scan_misc(T1, S3, Pos2, Acc2); + false -> + scan_misc(T1, S1, Pos, Acc) + end; +scan_misc("<?" ++ T, S0=#xmerl_scanner{acc_fun = F}, Pos, Acc) -> % PI ?dbg("prolog(\"<?\")~n", []), ?bump_col(2), - {_PI, T1, S1} = scan_pi(T, S, Pos), - scan_misc(T1,S1,Pos); -scan_misc(T=[H|_T], S, Pos) when ?whitespace(H) -> + {PI, T1, S1} = scan_pi(T, S, Pos, []), + {Acc2, Pos2, S3} = case F(PI, Acc, S1) of + {Acc1, S2} -> + {Acc1, Pos + 1, S2}; + {Acc1, Pos1, S2} -> + {Acc1, Pos1, S2} + end, + scan_misc(T1,S3,Pos2,Acc2); +scan_misc(T=[H|_T], S, Pos, Acc) when ?whitespace(H) -> ?dbg("prolog(whitespace)~n", []), {_,T1,S1}=strip(T,S), - scan_misc(T1,S1,Pos); -scan_misc(T,S,_Pos) -> - {T,S}. + scan_misc(T1,S1,Pos,Acc); +scan_misc(T,S,Pos,Acc) -> + {Acc,Pos,T,S}. cleanup(S=#xmerl_scanner{keep_rules = false, @@ -789,7 +847,8 @@ scan_xml_decl(T, S) -> Attr = #xmlAttribute{name = version, parents = [{xml, _XMLPos = 1}], value = Vsn}, - scan_xml_decl(T4, S4, #xmlDecl{attributes = [Attr]}). + scan_xml_decl(T4, S4, #xmlDecl{vsn = Vsn, + attributes = [Attr]}). scan_xml_decl([], S=#xmerl_scanner{continuation_fun = F}, Decl) -> ?dbg("cont()...~n", []), @@ -1025,50 +1084,53 @@ xml_vsn([H|T], S=#xmerl_scanner{col = C}, Delim, Acc) -> %%%%%%% [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' -scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Pos) -> +scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Pos, Ps) -> ?dbg("cont()...~n", []), - F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Pos) end, + F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Pos, Ps) end, fun(S1) -> ?fatal(unexpected_end, S1) end, S); -scan_pi(Str = [H1,H2,H3 | T],S0=#xmerl_scanner{line = L, col = C}, Pos) +scan_pi(Str = [H1,H2,H3 | T],S0=#xmerl_scanner{line = L, col = C}, Pos, Ps) when H1==$x;H1==$X -> %% names beginning with [xX][mM][lL] are reserved for future use. ?bump_col(3), if ((H2==$m) or (H2==$M)) and ((H3==$l) or (H3==$L)) -> - scan_wellknown_pi(T,S,Pos); + scan_wellknown_pi(T,S,Pos,Ps); true -> {Target, _NamespaceInfo, T1, S1} = scan_name(Str, S), - scan_pi(T1, S1, Target, L, C, Pos, []) + scan_pi(T1, S1, Target, L, C, Pos, Ps, []) end; -scan_pi(Str, S=#xmerl_scanner{line = L, col = C}, Pos) -> +scan_pi(Str, S=#xmerl_scanner{line = L, col = C}, Pos, Ps) -> {Target, _NamespaceInfo, T1, S1} = scan_name(Str, S), - scan_pi(T1, S1, Target, L, C, Pos,[]). + scan_pi(T1, S1, Target, L, C, Pos, Ps, []). %%% More info on xml-stylesheet can be found at: %%% "Associating Style Sheets with XML documents", Version 1.0, %%% W3C Recommendation 29 June 1999 (http://www.w3.org/TR/xml-stylesheet/) -scan_wellknown_pi("-stylesheet"++T, S0=#xmerl_scanner{line=L,col=C},Pos) -> +scan_wellknown_pi("-stylesheet"++T, S0=#xmerl_scanner{line=L,col=C},Pos,Ps) -> ?dbg("prolog(\"<?xml-stylesheet\")~n", []), ?bump_col(16), - scan_pi(T, S, "xml-stylesheet",L,C,Pos,[]); -scan_wellknown_pi(Str,S,_Pos) -> + scan_pi(T, S, "xml-stylesheet",L,C,Pos,Ps,[]); +scan_wellknown_pi(Str,S,_Pos,_Ps) -> ?fatal({invalid_target_name, lists:sublist(Str, 1, 10)}, S). -scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Target,L, C, Pos, Acc) -> +scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Target, + L, C, Pos, Ps, Acc) -> ?dbg("cont()...~n", []), - F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Target, L, C, Pos, Acc) end, + F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Target, + L, C, Pos, Ps, Acc) end, fun(S1) -> ?fatal(unexpected_end, S1) end, S); scan_pi("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, event_fun = Event}, - Target, L, C, Pos, Acc) -> + Target, L, C, Pos, Ps, Acc) -> ?bump_col(2), PI = #xmlPI{name = Target, + parents = Ps, pos = Pos, value = lists:reverse(Acc)}, S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended, @@ -1077,22 +1139,25 @@ scan_pi("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, data = PI}, S), {Ret, S2} = Hook(PI, S1), {Ret, T, S2}; -scan_pi([H|T], S, Target, L, C, Pos, Acc) when ?whitespace(H) -> +scan_pi([H|T], S, Target, L, C, Pos, Ps, Acc) when ?whitespace(H) -> ?strip1, - scan_pi2(T1, S1, Target, L, C, Pos, Acc); -scan_pi([H|_T],S,_Target, _L, _C, _Pos, _Acc) -> + scan_pi2(T1, S1, Target, L, C, Pos, Ps, Acc); +scan_pi([H|_T],S,_Target, _L, _C, _Pos, _Ps, _Acc) -> ?fatal({expected_whitespace_OR_end_of_PI,{char,H}}, S). -scan_pi2([], S=#xmerl_scanner{continuation_fun = F}, Target,L, C, Pos, Acc) -> +scan_pi2([], S=#xmerl_scanner{continuation_fun = F}, Target, + L, C, Pos, Ps, Acc) -> ?dbg("cont()...~n", []), - F(fun(MoreBytes, S1) -> scan_pi2(MoreBytes, S1, Target, L, C, Pos, Acc) end, + F(fun(MoreBytes, S1) -> scan_pi2(MoreBytes, S1, Target, + L, C, Pos, Ps, Acc) end, fun(S1) -> ?fatal(unexpected_end, S1) end, S); scan_pi2("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, event_fun = Event}, - Target, L, C, Pos, Acc) -> + Target, L, C, Pos, Ps, Acc) -> ?bump_col(2), PI = #xmlPI{name = Target, + parents = Ps, pos = Pos, value = lists:reverse(Acc)}, S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended, @@ -1101,10 +1166,10 @@ scan_pi2("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, data = PI}, S), {Ret, S2} = Hook(PI, S1), {Ret, T, S2}; -scan_pi2(Str, S0, Target, L, C, Pos, Acc) -> +scan_pi2(Str, S0, Target, L, C, Pos, Ps, Acc) -> ?bump_col(1), {Ch,T} = wfc_legal_char(Str,S), - scan_pi2(T, S, Target, L, C, Pos, [Ch|Acc]). + scan_pi2(T, S, Target, L, C, Pos, Ps, [Ch|Acc]). @@ -1575,7 +1640,7 @@ scan_markup_decl("<!--" ++ T, S0) -> scan_comment(T, S); scan_markup_decl("<?" ++ T, S0) -> ?bump_col(2), - {_PI, T1, S1} = scan_pi(T, S,_Pos=markup), + {_PI, T1, S1} = scan_pi(T, S,_Pos=markup,[]), strip(T1, S1); scan_markup_decl("<!ELEMENT" ++ T, #xmerl_scanner{rules_read_fun = Read, @@ -1981,7 +2046,7 @@ scan_element(T, S, Pos) -> scan_element(T, S=#xmerl_scanner{line=L,col=C}, Pos, SpaceDefault,Lang, Parents, NS) -> {Name, NamespaceInfo, T1, S1} = scan_name(T, S), - vc_Element_valid(Name,S), + vc_Element_valid(Name,NamespaceInfo,S), ?strip2, scan_element(T2, S2, Pos, Name, L, C, _Attrs = [], Lang, Parents, NamespaceInfo, NS, @@ -2016,7 +2081,8 @@ scan_element("/>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, Attrs = lists:reverse(Attrs0), E=processed_whole_element(S, Pos, Name, Attrs, Lang, Parents,NSI,Namespace), - wfc_unique_att_spec(Attrs,S), + #xmlElement{attributes = Attrs1} = E, + wfc_unique_att_spec(Attrs1,S), S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended, line = L, col = C, @@ -2074,10 +2140,10 @@ scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents, {AttName, NamespaceInfo, T1, S1} = scan_name(T, S), {T2, S2} = scan_eq(T1, S1), {AttType,_DefaultDecl} = get_att_type(S2,AttName,Name), - {AttValue, T3, S3,IsNorm} = scan_att_value(T2, S2, AttType), + {AttValue, T3a, S3a,IsNorm} = scan_att_value(T2, S2, AttType), %% check_default_value(S3,DefaultDecl,AttValue), NewNS = check_namespace(AttName, NamespaceInfo, AttValue, NS), - wfc_whitespace_betw_attrs(hd(T3),S3), + {T3,S3} = wfc_whitespace_betw_attrs(T3a,S3a), ?strip4, AttrPos = case Attrs of [] -> @@ -2086,9 +2152,10 @@ scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents, P+1 end, Attr = #xmlAttribute{name = AttName, + parents = [{Name, Pos}|Parents], pos = AttrPos, language = Lang, - namespace = NamespaceInfo, + nsinfo = NamespaceInfo, value = AttValue, normalized = IsNorm}, XMLBase=if @@ -2110,6 +2177,14 @@ scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents, scan_element(T4, S5, Pos, Name, StartL, StartC, [Attr|Attrs], Lang, Parents, NSI, NewNS, SpaceDefault). +get_default_attrs(S = #xmerl_scanner{rules_read_fun = Read}, ElemName) -> + case Read(elem_def, ElemName, S) of + #xmlElement{attributes = Attrs} -> + [ {AttName, AttValue} || + {AttName, _, AttValue, _, _} <- Attrs, AttValue =/= no_value ]; + _ -> [] + end. + get_att_type(S=#xmerl_scanner{rules_read_fun=Read},AttName,ElemName) -> case Read(elem_def,ElemName,S) of #xmlElement{attributes = Attrs} -> @@ -2139,6 +2214,23 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook, Pos, Name, Attrs, Lang, Parents, NSI, Namespace) -> Language = check_language(Attrs, Lang), + AllAttrs = + case S#xmerl_scanner.default_attrs of + true -> + [ #xmlAttribute{name = AttName, + parents = [{Name, Pos} | Parents], + language = Lang, + nsinfo = NSI, + namespace = Namespace, + value = AttValue, + normalized = true} || + {AttName, AttValue} <- get_default_attrs(S, Name), + AttValue =/= no_value, + not lists:keymember(AttName, #xmlAttribute.name, Attrs) ]; + false -> + Attrs + end, + {ExpName, ExpAttrs} = case S#xmerl_scanner.namespace_conformant of true -> @@ -2153,14 +2245,15 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook, TempNamespace = Namespace#xmlNamespace{default = []}, ExpAttrsX = [A#xmlAttribute{ + namespace=Namespace, expanded_name=expanded_name( A#xmlAttribute.name, - A#xmlAttribute.namespace, + A#xmlAttribute.nsinfo, % NSI, - TempNamespace, S)} || A <- Attrs], + TempNamespace, S)} || A <- AllAttrs], {expanded_name(Name, NSI, Namespace, S), ExpAttrsX}; false -> - {Name, Attrs} + {Name, AllAttrs} end, #xmlElement{name = Name, @@ -2194,10 +2287,32 @@ check_namespace(_, _, _, NS) -> expanded_name(Name, [], #xmlNamespace{default = []}, _S) -> Name; -expanded_name(Name, [], #xmlNamespace{default = URI}, _S) -> - {URI, Name}; -expanded_name(_Name, {"xmlns", Local}, _NS, _S) -> % CHECK THIS /JB - {"xmlns",Local}; +expanded_name(Name, [], #xmlNamespace{default = URI}, S) -> + case URI of + 'http://www.w3.org/XML/1998/namespace' -> + ?fatal(cannot_bind_default_namespace_to_xml_namespace_name, S); + 'http://www.w3.org/2000/xmlns/' -> + ?fatal(cannot_bind_default_namespace_to_xmlns_namespace_name, S); + _ -> + {URI, Name} + end; +expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> + {_, Value} = lists:keyfind(Local, 1, Ns), + case Name of + 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> + ?fatal({xml_prefix_cannot_be_redeclared, Value}, S); + 'xmlns:xmlns' -> + ?fatal({xmlns_prefix_cannot_be_declared, Value}, S); + _ -> + case Value of + 'http://www.w3.org/XML/1998/namespace' -> + ?fatal({cannot_bind_prefix_to_xml_namespace, Local}, S); + 'http://www.w3.org/2000/xmlns/' -> + ?fatal({cannot_bind_prefix_to_xmlns_namespace, Local}, S); + _ -> + N + end + end; expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> case lists:keysearch(Prefix, 1, Ns) of {value, {_, URI}} -> @@ -2276,7 +2391,7 @@ scan_att_chars([H|T], S0, H, Acc, TmpAcc,AttType,IsNorm) -> % End quote true -> normalize(Acc,S,IsNorm) end, - {lists:reverse(Acc2), T, S2,IsNorm2}; + {lists:flatten(lists:reverse(Acc2)), T, S2,IsNorm2}; scan_att_chars("&" ++ T, S0, Delim, Acc, TmpAcc,AT,IsNorm) -> % Reference ?bump_col(1), {ExpRef, T1, S1} = scan_reference(T, S), @@ -2449,9 +2564,23 @@ scan_content("&" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) - _ -> scan_content(string_to_char_set(S1#xmerl_scanner.encoding,ExpRef)++T1,S1,Pos,Name,Attrs,Space,Lang,Parents,NS,Acc,[]) end; -scan_content("<!--" ++ T, S, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) -> - {_, T1, S1} = scan_comment(T, S, Pos, Parents, Lang), - scan_content(T1, S1, Pos+1, Name, Attrs, Space, Lang, Parents, NS, Acc,[]); +scan_content("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F, comments=CF}, Pos, Name, Attrs, Space, + Lang, Parents, NS, Acc,[]) -> + ?bump_col(4), + {C, T1, S1} = scan_comment(T, S, Pos, Parents, Lang), + case CF of + true -> + {Acc2, Pos2, S3} = + case F(C, Acc, S1) of + {Acc1, S2} -> + {Acc1, Pos + 1, S2}; + {Acc1, Pos1, S2} -> + {Acc1, Pos1, S2} + end, + scan_content(T1, S3, Pos2, Name, Attrs, Space, Lang, Parents, NS, Acc2,[]); + false -> + scan_content(T1, S1, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) + end; scan_content("<" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) -> ?bump_col(1), {Markup, T1, S1} = @@ -2508,9 +2637,9 @@ scan_content_markup("![CDATA[" ++ T, S0, Pos, _Name, _Attrs, _Space, _Lang, Parents, _NS) -> ?bump_col(8), scan_cdata(T, S, Pos, Parents); -scan_content_markup("?"++T,S0,Pos,_Name,_Attrs,_Space,_Lang,_Parents,_NS) -> +scan_content_markup("?"++T,S0,Pos,_Name,_Attrs,_Space,_Lang,Parents,_NS) -> ?bump_col(1), - scan_pi(T, S, Pos); + scan_pi(T, S, Pos, Parents); scan_content_markup(T, S, Pos, _Name, _Attrs, Space, Lang, Parents, NS) -> scan_element(T, S, Pos, Space, Lang, Parents, NS). @@ -3259,12 +3388,18 @@ mandatory_delimeter_wfc(T,S) -> wfc_unique_att_spec([],_S) -> ok; -wfc_unique_att_spec([#xmlAttribute{name=N}|Atts],S) -> +wfc_unique_att_spec([#xmlAttribute{name=N,expanded_name=EN}|Atts],S) -> case lists:keymember(N,#xmlAttribute.name,Atts) of true -> ?fatal({error,{unique_att_spec_required,N}},S); _ -> - wfc_unique_att_spec(Atts,S) + case S#xmerl_scanner.namespace_conformant andalso + lists:keymember(EN, #xmlAttribute.expanded_name, Atts) of + true -> + ?fatal({error,{unique_att_spec_required,EN}},S); + _ -> + wfc_unique_att_spec(Atts,S) + end end. wfc_legal_char(Chars,S) when is_list(Chars)-> @@ -3284,12 +3419,17 @@ wfc_legal_char(Ch,S) -> end. -wfc_whitespace_betw_attrs(WS,_S) when ?whitespace(WS) -> - ok; -wfc_whitespace_betw_attrs($/,_S) -> - ok; -wfc_whitespace_betw_attrs($>,_S) -> - ok; +wfc_whitespace_betw_attrs([WS |_]=L,S) when ?whitespace(WS) -> + {L,S}; +wfc_whitespace_betw_attrs([$/ |_]=L,S) -> + {L,S}; +wfc_whitespace_betw_attrs([$> |_]=L,S) -> + {L,S}; +wfc_whitespace_betw_attrs([],S=#xmerl_scanner{continuation_fun = F}) -> + ?dbg("cont()...~n", []), + F(fun(MoreBytes, S1) -> wfc_whitespace_betw_attrs(MoreBytes, S1) end, + fun(S1) -> ?fatal(unexpected_end, S1) end, + S); wfc_whitespace_betw_attrs(_,S) -> ?fatal({whitespace_required_between_attributes},S). @@ -3308,6 +3448,11 @@ wfc_Internal_parsed_entity(internal,Value,S) -> wfc_Internal_parsed_entity(_,_,_) -> ok. +vc_Element_valid(_Name, {"xmlns", _}, + S = #xmerl_scanner{namespace_conformant = true}) -> + ?fatal({error,{illegal_element_prefix,xmlns}},S); +vc_Element_valid(Name, _, S) -> + vc_Element_valid(Name, S). vc_Element_valid(_Name,#xmerl_scanner{environment=internal_parsed_entity}) -> ok; @@ -3912,7 +4057,7 @@ schemaLocations(El,#xmerl_scanner{schemaLocation=SL}) -> schemaLocations(#xmlElement{attributes=Atts,xmlbase=_Base}) -> Pred = fun(#xmlAttribute{name=schemaLocation}) -> false; - (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false; + (#xmlAttribute{nsinfo={_,"schemaLocation"}}) -> false; (_) -> true end, case lists:dropwhile(Pred,Atts) of diff --git a/lib/xmerl/src/xmerl_ucs.erl b/lib/xmerl/src/xmerl_ucs.erl index 7c45c838ab..6550a9d954 100644 --- a/lib/xmerl/src/xmerl_ucs.erl +++ b/lib/xmerl/src/xmerl_ucs.erl @@ -1,19 +1,19 @@ %% %% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2009. All Rights Reserved. -%% +%% +%% Copyright Ericsson AB 2005-2011. All Rights Reserved. +%% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. -%% +%% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. -%% +%% %% %CopyrightEnd% %% @@ -43,6 +43,7 @@ -export([to_utf16be/1, from_utf16be/1, from_utf16be/2]). -export([to_utf16le/1, from_utf16le/1, from_utf16le/2]). -export([to_utf8/1, from_utf8/1]). +-export([from_latin9/1]). %%% NB: Non-canonical UTF-8 encodings and incorrectly used %%% surrogate-pair codes are disallowed by this code. There are @@ -177,13 +178,27 @@ to_utf8(List) when is_list(List) -> lists:flatmap(fun to_utf8/1, List); to_utf8(Ch) -> char_to_utf8(Ch). from_utf8(Bin) when is_binary(Bin) -> from_utf8(binary_to_list(Bin)); -from_utf8(List) -> +from_utf8(List) -> case expand_utf8(List) of {Result,0} -> Result; {_Res,_NumBadChar} -> exit({ucs,{bad_utf8_character_code}}) end. +%%% Latin9 support +from_latin9(Bin) when is_binary(Bin) -> from_latin9(binary_to_list(Bin)); +from_latin9(List) -> + [ latin9_to_ucs4(Char) || Char <- List]. + +latin9_to_ucs4(16#A4) -> 16#20AC; +latin9_to_ucs4(16#A6) -> 16#160; +latin9_to_ucs4(16#A8) -> 16#161; +latin9_to_ucs4(16#B4) -> 16#17D; +latin9_to_ucs4(16#B8) -> 16#17E; +latin9_to_ucs4(16#BC) -> 16#152; +latin9_to_ucs4(16#BD) -> 16#153; +latin9_to_ucs4(16#BE) -> 16#178; +latin9_to_ucs4(Other) -> Other. @@ -238,7 +253,7 @@ from_ucs4le(Bin,Acc,Tail) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% UCS-2 support -%%% FIXME! Don't know how to encode UCS-2!! +%%% FIXME! Don't know how to encode UCS-2!! %%% Currently I just encode as UCS-4, but strips the 16 higher bits. char_to_ucs2be(Ch) -> true = is_iso10646(Ch), @@ -259,15 +274,15 @@ from_ucs2be(Bin,Acc,Tail) -> char_to_ucs2le(Ch) -> true = is_iso10646(Ch), - [(Ch bsr 16) band 16#FF, - (Ch bsr 24)]. + [Ch band 16#FF, + (Ch bsr 8) band 16#FF]. from_ucs2le(<<Ch:16/little-signed-integer, Rest/binary>>,Acc,Tail) -> if Ch < 0; Ch >= 16#D800, Ch < 16#E000; Ch =:= 16#FFFE; Ch =:= 16#FFFF -> exit({bad_character_code,Ch}); true -> - from_ucs4le(Rest,[Ch|Acc],Tail) + from_ucs2le(Rest,[Ch|Acc],Tail) end; from_ucs2le(<<>>,Acc,Tail) -> lists:reverse(Acc,Tail); @@ -476,6 +491,8 @@ to_unicode(Input,Cs) when Cs=='iso_8859-1:1987';Cs=='iso-ir-100'; Cs=='l1';Cs=='ibm819'; Cs=='cp819';Cs=='csisolatin1' -> Input; +to_unicode(Input,Cs) when Cs=='iso_8859-15';Cs=='iso-8859-15';Cs=='latin9' -> + from_latin9(Input); % to_unicode(Input,Cs) when Cs=='mnemonic';Cs=='"mnemonic+ascii+38'; % Cs=='mnem';Cs=='"mnemonic+ascii+8200' -> % from_mnemonic(Input); diff --git a/lib/xmerl/src/xmerl_validate.erl b/lib/xmerl/src/xmerl_validate.erl index 893e23ca34..4028fef2b9 100644 --- a/lib/xmerl/src/xmerl_validate.erl +++ b/lib/xmerl/src/xmerl_validate.erl @@ -399,25 +399,28 @@ test_attribute_value(_Rule,Attr,_,_) -> %% +type valid_contents([rule()],[xmlElement()])-> %% [xmlElement() | {error,???}. -valid_contents(Rule,XMLS,Rules,S,WSActionMode)-> - case parse(Rule,XMLS,Rules,WSActionMode,S) of - {XML_N,[]}-> - lists:flatten(XML_N); - {_,[#xmlElement{name=Name}|_T]} -> - exit({error,{element,Name,isnt_comprise_in_the_rule,Rule}}); - {_,[#xmlText{}=Txt|_T]} -> - exit({error,{element,text,Txt,isnt_comprise_in_the_rule,Rule}}); - {error,Reason} -> - {error,Reason}; - {error,Reason,N} -> - {error,Reason,N} +valid_contents(Rule, XMLS, Rules, S, WSActionMode)-> + case parse(Rule, XMLS, Rules, WSActionMode, S) of + {error, Reason} -> + {error, Reason}; + {error, Reason, N} -> + {error, Reason, N}; + {XML_N, Rest} -> %The list may consist of xmlComment{} records + case lists:dropwhile(fun(X) when is_record(X, xmlComment) -> true; (_) -> false end, Rest) of + [] -> + lists:flatten(XML_N); + [#xmlElement{name=Name} |_T] -> + exit({error, {element, Name, isnt_comprise_in_the_rule, Rule}}); + [#xmlText{} = Txt |_T] -> + exit({error, {element, text, Txt, isnt_comprise_in_the_rule, Rule}}) + end end. -parse({'*',SubRule},XMLS,Rules,WSaction,S)-> - star(SubRule,XMLS,Rules,WSaction,[],S); -parse({'+',SubRule},XMLS,Rules,WSaction,S) -> - plus(SubRule,XMLS,Rules,WSaction,S); -parse({choice,CHOICE},XMLS,Rules,WSaction,S)-> +parse({'*', SubRule}, XMLS, Rules, WSaction, S)-> + star(SubRule, XMLS, Rules, WSaction, [], S); +parse({'+',SubRule}, XMLS, Rules, WSaction, S) -> + plus(SubRule, XMLS, Rules, WSaction, S); +parse({choice,CHOICE}, XMLS, Rules, WSaction, S)-> % case XMLS of % [] -> % io:format("~p~n",[{choice,CHOICE,[]}]); @@ -426,47 +429,49 @@ parse({choice,CHOICE},XMLS,Rules,WSaction,S)-> % [#xmlText{value=V}|_] -> % io:format("~p~n",[{choice,CHOICE,{text,V}}]) % end, - choice(CHOICE,XMLS,Rules,WSaction,S); -parse(empty,[],_Rules,_WSaction,_S) -> - {[],[]}; -parse({'?',SubRule},XMLS,Rules,_WSaction,S)-> - question(SubRule,XMLS,Rules,S); -parse({seq,List},XMLS,Rules,WSaction,S) -> - seq(List,XMLS,Rules,WSaction,S); -parse(El_Name,[#xmlElement{name=El_Name}=XML|T],Rules,_WSaction,S) + choice(CHOICE, XMLS, Rules, WSaction, S); +parse(empty, [], _Rules, _WSaction, _S) -> + {[], []}; +parse({'?', SubRule}, XMLS, Rules, _WSaction, S)-> + question(SubRule, XMLS, Rules, S); +parse({seq,List}, XMLS, Rules, WSaction, S) -> + seq(List, XMLS, Rules, WSaction, S); +parse(El_Name, [#xmlElement{name=El_Name} = XML |T], Rules, _WSaction, S) when is_atom(El_Name)-> - case do_validation(read_rules(Rules,El_Name),XML,Rules,S) of - {error,R} -> + case do_validation(read_rules(Rules, El_Name), XML, Rules, S) of + {error, R} -> % {error,R}; exit(R); - {error,R,_N}-> + {error, R, _N}-> % {error,R,N}; exit(R); XML_-> - {[XML_],T} + {[XML_], T} end; -parse(any,Cont,Rules,_WSaction,S) -> - case catch parse_any(Cont,Rules,S) of - Err = {error,_} -> Err; - ValidContents -> {ValidContents,[]} +parse(any, Cont, Rules, _WSaction, S) -> + case catch parse_any(Cont, Rules, S) of + Err = {error, _} -> Err; + ValidContents -> {ValidContents, []} end; -parse(El_Name,[#xmlElement{name=Name}|_T]=S,_Rules,_WSa,_S) when is_atom(El_Name)-> +parse(El_Name, [#xmlElement{name=Name} |_T] = XMLS, _Rules, _WSa, _S) when is_atom(El_Name) -> {error, - {element_seq_not_conform,{wait,El_Name},{is,Name}}, - {{next,S},{act,[]}} }; -parse(_El_Name,[#xmlPI{}=H|T],_Rules,_WSa,_S) -> - {[H],T}; -parse('#PCDATA',XML,_Rules,_WSa,_S)-> + {element_seq_not_conform,{wait, El_Name}, {is, Name}}, + {{next, XMLS}, {act, []}}}; +parse(El_Name, [#xmlComment{} |T], Rules, WSa, S) -> + parse(El_Name, T, Rules, WSa, S); +parse(_El_Name, [#xmlPI{} = H |T], _Rules, _WSa, _S) -> + {[H], T}; +parse('#PCDATA', XMLS, _Rules, _WSa, _S)-> %%% PCDATA it is 0 , 1 or more #xmlText{}. - parse_pcdata(XML); -parse(El_Name,[#xmlText{}|_T]=S,_Rules,_WSa,_S)-> + parse_pcdata(XMLS); +parse(El_Name, [#xmlText{}|_T] = XMLS, _Rules, _WSa, _S)-> {error, - {text_in_place_of,El_Name}, - {{next,S},{act,[]}}}; -parse([],_,_,_,_) -> - {error,no_rule}; -parse(Rule,[],_,_,_) -> - {error,{no_xml_element,Rule}}. + {text_in_place_of, El_Name}, + {{next, XMLS}, {act, []}}}; +parse([], _, _, _, _) -> + {error, no_rule}; +parse(Rule, [], _, _, _) -> + {error, {no_xml_element, Rule}}. parse_any([],_Rules,_S) -> []; @@ -618,11 +623,15 @@ el_name(#xmlElement{name=Name})-> parse_pcdata([#xmlText{}=H|T])-> parse_pcdata(T,[H]); +parse_pcdata([#xmlComment{}|T])-> + parse_pcdata(T,[]); parse_pcdata(H) -> {[],H}. parse_pcdata([#xmlText{}=H|T],Acc)-> parse_pcdata(T,Acc++[H]); +parse_pcdata([#xmlComment{}|T],Acc)-> + parse_pcdata(T,Acc); parse_pcdata(H,Acc) -> {Acc,H}. diff --git a/lib/xmerl/src/xmerl_xpath.erl b/lib/xmerl/src/xmerl_xpath.erl index db3d3ac2d6..b3301f2faf 100644 --- a/lib/xmerl/src/xmerl_xpath.erl +++ b/lib/xmerl/src/xmerl_xpath.erl @@ -41,18 +41,13 @@ % xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("parent::processing-instruction('foo')")). %% </pre> %% -%% @type docEntity() = +%% @type nodeEntity() = %% xmlElement() %% | xmlAttribute() %% | xmlText() %% | xmlPI() %% | xmlComment() -%% @type nodeEntity() = -%% xmlElement() -%% | xmlAttribute() -%% | xmlText() -%% | xmlPI() -%% | xmlNamespace() +%% | xmlNsNode() %% | xmlDocument() %% @type option_list(). <p>Options allows to customize the behaviour of the %% XPath scanner. @@ -303,6 +298,17 @@ write_node(#xmlNode{pos = Pos, node = #xmlText{value = Txt, parents = Ps}}) -> {text, Pos, Txt, Ps}; +write_node(#xmlNode{pos = Pos, + node = #xmlComment{parents = Ps}}) -> + {comment, Pos, '', Ps}; +write_node(#xmlNode{pos = Pos, + node = #xmlPI{name = Name, + parents = Ps}}) -> + {processing_instruction, Pos, Name, Ps}; +write_node(#xmlNode{pos = Pos, + node = #xmlNsNode{parents = Ps, + prefix = Prefix}}) -> + {namespace, Pos, Prefix, Ps}; write_node(_) -> other. @@ -330,18 +336,16 @@ eval_path(rel, PathExpr, C = #xmlContext{}) -> Context = C#xmlContext{nodeset = NodeSet}, S = #state{context = Context}, path_expr(PathExpr, S); -eval_path(filter, {PathExpr, PredExpr}, C = #xmlContext{}) -> +eval_path(filter, {PathExpr, {pred, Pred}}, C = #xmlContext{}) -> S = #state{context = C}, - S1 = path_expr(PathExpr, S), - pred_expr(PredExpr, S1). + S1 = match_expr(PathExpr, S), + eval_pred(Pred, S1). -eval_primary_expr(FC = {function_call,_,_},S = #state{context = Context}) -> +eval_primary_expr(PrimExpr, S = #state{context = Context}) -> %% NewNodeSet = xmerl_xpath_pred:eval(FC, Context), - NewNodeSet = xmerl_xpath_lib:eval(primary_expr, FC, Context), + NewNodeSet = xmerl_xpath_lib:eval(primary_expr, PrimExpr, Context), NewContext = Context#xmlContext{nodeset = NewNodeSet}, - S#state{context = NewContext}; -eval_primary_expr(PrimExpr,_S) -> - exit({primary_expression,{not_implemented, PrimExpr}}). + S#state{context = NewContext}. %% axis(Axis,NodeTest,Context::xmlContext()) -> xmlContext() @@ -384,8 +388,8 @@ axis1(preceding, Tok, N, Acc, Context) -> match_preceding(Tok, N, Acc, Context); axis1(attribute, Tok, N, Acc, Context) -> match_attribute(Tok, N, Acc, Context); -%axis1(namespace, Tok, N, Acc, Context) -> -% match_namespace(Tok, N, Acc, Context); +axis1(namespace, Tok, N, Acc, Context) -> + match_namespace(Tok, N, Acc, Context); axis1(ancestor_or_self, Tok, N, Acc, Context) -> match_ancestor_or_self(Tok, N, Acc, Context); axis1(descendant_or_self, Tok, N, Acc, Context) -> @@ -627,14 +631,58 @@ node_type(#xmlAttribute{}) -> attribute; node_type(#xmlElement{}) -> element; node_type(#xmlText{}) -> text; node_type(#xmlPI{}) -> processing_instruction; -node_type(#xmlNamespace{}) -> namespace; +node_type(#xmlNsNode{}) -> namespace; +node_type(#xmlComment{}) -> comment; node_type(#xmlDocument{}) -> root_node. %% "The namespace axis contains the namespace nodes of the context node; %% the axis will be empty unless the context node is an element." -%match_namespace(_Tok, _N, _Acc, _Context) -> - %% TODO: IMPLEMENT NAMESPACE AXIS -% erlang:fault(not_yet_implemented). +match_namespace(Tok, N, Acc, Context) -> + case N#xmlNode.type of + element -> + #xmlNode{parents = Ps, node = E} = N, + #xmlElement{name = Name, + namespace = NS, + parents = EPs, + pos = Pos} = E, + #xmlNamespace{default = Default, nodes = NSPairs} = NS, + ThisEPs = [{Name, Pos}|EPs], + ThisPs = [N|Ps], + Acc0 = + case Default of + D when D =:= []; D =:= '' -> + {[], 1}; + URI -> + DefaultNSNode = #xmlNsNode{parents = ThisEPs, + pos = 1, + prefix = [], + uri = URI}, + Node = #xmlNode{type = namespace, + node = DefaultNSNode, + parents = ThisPs}, + {[Node], 2} + end, + {Nodes, _I} = + lists:foldr( + fun ({Prefix, URI}, {AccX, I}) -> + NSNode = #xmlNsNode{parents = ThisEPs, + pos = I, + prefix = Prefix, + uri = URI}, + ThisN = #xmlNode{pos = I, + type = namespace, + node = NSNode, + parents = ThisPs}, + {[ThisN | AccX], I + 1} + end, Acc0, NSPairs), + lists:foldr( + fun (ThisN, AccX) -> + match_self(Tok, ThisN, AccX, Context) + end, Acc, Nodes); + _Other -> + %%[] + Acc + end. update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) -> @@ -655,8 +703,15 @@ update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) -> node_test(F, N, Context) when is_function(F) -> F(N, Context); +node_test(_Test, #xmlNode{type=attribute,node=#xmlAttribute{name=xmlns}}, + _Context) -> + false; +node_test(_Test, + #xmlNode{type=attribute,node=#xmlAttribute{nsinfo={"xmlns",_Local}}}, + _Context) -> + false; node_test({wildcard, _}, #xmlNode{type=ElAt}, _Context) - when ElAt==element; ElAt==attribute -> + when ElAt==element; ElAt==attribute; ElAt==namespace -> true; node_test({prefix_test, Prefix}, #xmlNode{node = N}, _Context) -> case N of @@ -720,6 +775,9 @@ node_test({name, {_Tag, Prefix, Local}}, [{_Tag, Prefix, Local}, write_node(NSNodes)]), false end; +node_test({name, {_Tag, [], Local}}, + #xmlNode{node = #xmlNsNode{prefix = Local}}, _Context) -> + true; node_test({node_type, NT}, #xmlNode{node = N}, _Context) -> case {NT, N} of {text, #xmlText{}} -> @@ -728,14 +786,18 @@ node_test({node_type, NT}, #xmlNode{node = N}, _Context) -> true; {attribute, #xmlAttribute{}} -> true; - {namespace, #xmlNamespace{}} -> + {namespace, #xmlNsNode{}} -> + true; + {comment, #xmlComment{}} -> + true; + {processing_instruction, #xmlPI{}} -> true; _ -> false end; -node_test({processing_instruction, {literal, _, Name}}, - #xmlNode{node = {processing_instruction, Name, _Data}}, _Context) -> - true; +node_test({processing_instruction, Name1}, + #xmlNode{node = #xmlPI{name = Name2}}, _Context) -> + Name1 == atom_to_list(Name2); node_test(_Other, _N, _Context) -> %io:format("node_test(~p, ~p) -> false.~n", [_Other, write_node(_N)]), false. diff --git a/lib/xmerl/src/xmerl_xpath_lib.erl b/lib/xmerl/src/xmerl_xpath_lib.erl index cfd0e36667..096f54ec30 100644 --- a/lib/xmerl/src/xmerl_xpath_lib.erl +++ b/lib/xmerl/src/xmerl_xpath_lib.erl @@ -49,5 +49,7 @@ primary_expr({function_call, F, Args}, C) -> %% here, we should look up the function in the context provided %% by the caller, but we haven't figured this out yet. exit({not_a_core_function, F}) - end. + end; +primary_expr(PrimExpr, _C) -> + exit({primary_expression, {not_implemented, PrimExpr}}). diff --git a/lib/xmerl/src/xmerl_xpath_parse.yrl b/lib/xmerl/src/xmerl_xpath_parse.yrl index 37576b9e61..f60cea0a2e 100644 --- a/lib/xmerl/src/xmerl_xpath_parse.yrl +++ b/lib/xmerl/src/xmerl_xpath_parse.yrl @@ -144,6 +144,7 @@ Expect 2. %% [7] 'NodeTest' -> 'NameTest' : '$1' . 'NodeTest' -> 'node_type' '(' ')' : {node_type, value('$1')} . +'NodeTest' -> 'processing-instruction' '(' ')' : {node_type, value('$1')} . 'NodeTest' -> 'processing-instruction' '(' 'literal' ')' : {processing_instruction, value('$3')} . diff --git a/lib/xmerl/src/xmerl_xpath_pred.erl b/lib/xmerl/src/xmerl_xpath_pred.erl index 451a09bee3..855b8599fe 100644 --- a/lib/xmerl/src/xmerl_xpath_pred.erl +++ b/lib/xmerl/src/xmerl_xpath_pred.erl @@ -337,6 +337,9 @@ local_name1([#xmlNode{type=element,node=El}|_]) -> local_name1([#xmlNode{type=attribute,node=Att}|_]) -> #xmlAttribute{name=Name,nsinfo=NSI} = Att, local_name2(Name,NSI); +local_name1([#xmlNode{type=namespace,node=N}|_]) -> + #xmlNsNode{prefix=Prefix} = N, + ?string(Prefix); local_name1([#xmlElement{name = Name, nsinfo = NSI}|_]) -> local_name2(Name,NSI). local_name2(Name, NSI) -> @@ -431,6 +434,9 @@ string_value(N=#xmlObj{}) -> string_value(A=#xmlNode{type=attribute}) -> #xmlAttribute{value=AttVal}=A#xmlNode.node, ?string(AttVal); +string_value(N=#xmlNode{type=namespace}) -> + #xmlNsNode{uri=URI}=N#xmlNode.node, + ?string(atom_to_list(URI)); string_value(El=#xmlNode{type=element}) -> #xmlElement{content=C} = El#xmlNode.node, TextValue = fun(#xmlText{value=T},_Fun) -> T; @@ -442,6 +448,9 @@ string_value(El=#xmlNode{type=element}) -> string_value(T=#xmlNode{type=text}) -> #xmlText{value=Txt} = T#xmlNode.node, ?string(Txt); +string_value(T=#xmlNode{type=comment}) -> + #xmlComment{value=Txt} = T#xmlNode.node, + ?string(Txt); string_value(infinity) -> ?string("Infinity"); string_value(neg_infinity) -> ?string("-Infinity"); string_value(A) when is_atom(A) -> diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl index e56f1470c0..ed0890f0d0 100644 --- a/lib/xmerl/src/xmerl_xsd.erl +++ b/lib/xmerl/src/xmerl_xsd.erl @@ -245,21 +245,27 @@ process_validate2({SE,_},Schema,Xml,Opts) -> S4 = validation_options(S3,Opts), validate3(Schema,Xml,S4). -validate3(Schema,Xml,S=#xsd_state{errors=[]}) -> - Ret = {_,S2} = - case catch validate_xml(Xml,S) of - {[XML2],[],Sx} -> - {XML2,Sx}; - {XML2,[],Sx} -> - {XML2,Sx}; - {_,UnValidated,Sx} -> - {Xml,acc_errs(Sx,{error_path(UnValidated,Xml#xmlElement.name),?MODULE, - {unvalidated_rest,UnValidated}})}; - _Err = {error,Reason} -> - {Xml,acc_errs(S,Reason)}; - {'EXIT',Reason} -> - {Xml,acc_errs(S,{error_path(Xml,Xml#xmlElement.name),?MODULE, - {undefined,{internal_error,Reason}}})} +validate3(Schema, Xml,S =#xsd_state{errors=[]}) -> + Ret = {_, S2} = + case catch validate_xml(Xml, S) of + _Err = {error, Reason} -> + {Xml, acc_errs(S, Reason)}; + {'EXIT', Reason} -> + {Xml, acc_errs(S, {error_path(Xml, Xml#xmlElement.name), ?MODULE, + {undefined, {internal_error, Reason}}})}; + {XML2, Rest, Sx} -> + case lists:dropwhile(fun(X) when is_record(X, xmlComment) -> true; (_) -> false end, Rest) of + [] -> + case XML2 of + [XML3] -> + {XML3,Sx}; + XML3 -> + {XML3,Sx} + end; + UnValidated -> + {Xml,acc_errs(Sx,{error_path(UnValidated,Xml#xmlElement.name),?MODULE, + {unvalidated_rest,UnValidated}})} + end end, save_to_file(S2,filename:rootname(Schema)++".tab2"), case S2#xsd_state.errors of @@ -287,10 +293,19 @@ process_schema(Schema) -> %% error reason. The error reason may be a list of several errors %% or a single error encountered during the processing. process_schema(Schema,Options) when is_list(Options) -> - S = initiate_state(Options,Schema), - process_schema2(xmerl_scan:file(filename:join(S#xsd_state.xsd_base, Schema)),S,Schema); -process_schema(Schema,State) when is_record(State,xsd_state) -> - process_schema2(xmerl_scan:file(filename:join(State#xsd_state.xsd_base, Schema)),State,Schema). + State = initiate_state(Options,Schema), + process_schema(Schema, State); +process_schema(Schema, State=#xsd_state{fetch_fun=Fetch})-> + case Fetch(Schema, State) of + {ok,{file,File},_} -> + process_schema2(xmerl_scan:file(File), State, Schema); + {ok,{string,Str},_} -> + process_schema2(xmerl_scan:string(Str), State, Schema); + {ok,[],_} -> + {error,enoent}; + Err -> + Err + end. process_schema2(Err={error,_},_,_) -> Err; @@ -319,12 +334,9 @@ process_schemas(Schemas) -> %% error reason. The error reason may be a list of several errors %% or a single error encountered during the processing. process_schemas(Schemas=[{_,Schema}|_],Options) when is_list(Options) -> - process_schemas(Schemas,initiate_state(Options,Schema)); + State = initiate_state(Options,Schema), + process_schemas(Schemas, State); process_schemas([{_NS,Schema}|Rest],State=#xsd_state{fetch_fun=Fetch}) -> -%% case process_external_schema_once(Schema,if_list_to_atom(NS),State) of -%% S when is_record(S,xsd_state) -> -%% case process_schema(filename:join([State#xsd_state.xsd_base,Schema]),State) of -%% {ok,S} -> Res= case Fetch(Schema,State) of {ok,{file,File},_} -> @@ -345,20 +357,20 @@ process_schemas([{_NS,Schema}|Rest],State=#xsd_state{fetch_fun=Fetch}) -> process_schemas([],S) when is_record(S,xsd_state) -> {ok,S}. - initiate_state(Opts,Schema) -> XSDBase = filename:dirname(Schema), {{state,S},RestOpts}=new_state(Opts), S2 = create_tables(S), - initiate_state2(S2#xsd_state{schema_name = Schema, - xsd_base = XSDBase, - fetch_fun = fun fetch/2},RestOpts). + initiate_state2(S2#xsd_state{schema_name = Schema, xsd_base=XSDBase, + fetch_fun = fun fetch/2}, + RestOpts). + initiate_state2(S,[]) -> S; initiate_state2(S,[{tab2file,Bool}|T]) -> initiate_state2(S#xsd_state{tab2file=Bool},T); -initiate_state2(S,[{xsdbase,XSDBase}|T]) -> - initiate_state2(S#xsd_state{xsd_base=XSDBase},T); +initiate_state2(S,[{xsdbase, XSDBase}|T]) -> + initiate_state2(S#xsd_state{xsd_base=XSDBase, external_xsd_base=true},T); initiate_state2(S,[{fetch_fun,FetchFun}|T]) -> initiate_state2(S#xsd_state{fetch_fun=FetchFun},T); initiate_state2(S,[{fetch_path,FetchPath}|T]) -> @@ -736,7 +748,7 @@ element_content({IDC,S},El,Env) {{IDC,IDConstr},S3}; Err -> S3 = acc_errs(S2,{error_path(El,El#xmlElement.name),?MODULE, - {erronous_content_in_identity_constraint,IDC,Err}}), + {erroneous_content_in_identity_constraint,IDC,Err}}), {{IDC,[]},S3} end; element_content({selector,S},Sel,_Env) -> @@ -1944,7 +1956,7 @@ fetch_external_schema(Path,S) when is_list(Path) -> {EXSD,S#xsd_state{schema_name=File}} end; {_,{string,String},_} -> %% this is for a user defined fetch fun that returns an xml document on string format. - ?debug("scanning string: ~p~n",[File]), + ?debug("scanning string: ~p~n",[String]), case xmerl_scan:string(String,S#xsd_state.xml_options) of {error,Reason} -> {error,acc_errs(S,{[],?MODULE,{parsing_external_schema_failed,Path,Reason}})}; @@ -2514,9 +2526,9 @@ check_element_type([],#schema_complex_type{name=_Name,block=_Bl,content=C}, {error,{error_path(Checked,undefined),?MODULE, {empty_content_not_allowed,C}}} end; -check_element_type(C,{anyType,_},_Env,_Block,S,_Checked) -> +check_element_type(C, {anyType, _}, _Env, _Block, S, _Checked) -> %% permitt anything - {C,[],S}; + {lists:reverse(C), [], S}; check_element_type(XML=[#xmlText{}|_],Type=#schema_simple_type{}, _Env,_Block,S,_Checked) -> @@ -2579,7 +2591,7 @@ check_element_type(XML=[XMLEl=#xmlElement{name=Name}|RestXML], S6 = check_form(ElName,Name,XMLEl, actual_form_value(CMEl#schema_element.form, S5#xsd_state.elementFormDefault), - S5), + S5), %Step into content of XML element. {Content,_,S7} = case @@ -2599,12 +2611,12 @@ check_element_type(XML=[XMLEl=#xmlElement{name=Name}|RestXML], RestXML, set_scope(S5#xsd_state.scope,set_num_el(S7,S6))}; true -> - {error,{error_path(XMLEl,Name),?MODULE, - {element_not_suitable_with_schema,ElName,S}}}; + {error,{error_path(XMLEl, Name), ?MODULE, + {element_not_suitable_with_schema, ElName, S}}}; _ when S#xsd_state.num_el >= Min -> %% it may be a match error or an optional element not %% present - {[],XML,S#xsd_state{num_el=0}}; + {[], XML, S#xsd_state{num_el=0}}; _ -> {error,{error_path(XMLEl,Name),?MODULE, {element_not_suitable_with_schema,ElName,CMName,CMEl,S}}} @@ -2639,7 +2651,7 @@ check_element_type(XML=[#xmlElement{}|_Rest], check_element_type(XML=[E=#xmlElement{name=Name}|Rest], Any={any,{Namespace,_Occ={Min,_},ProcessorContents}},Env, _Block,S,_Checked) -> - ?debug("check any: {any,{~p,~p,~p}}~n",[Namespace,Occ,ProcessorContents]), + ?debug("check any: {any,{~p,~p,~p}}~n",[Namespace,_Occ,ProcessorContents]), %% ProcessorContents any of lax | strict | skip %% lax: may validate if schema is found %% strict: must validate @@ -2704,8 +2716,11 @@ check_element_type([],CM,_Env,_Block,S,Checked) -> {error,{error_path(Checked,undefined),?MODULE, {empty_content_not_allowed,CM}}} end; +check_element_type([C = #xmlComment{} |Rest],CM,Env,Block,S,Checked) -> + check_element_type(Rest,CM,Env,Block,S,[C |Checked]); check_element_type(XML,CM,_Env,_Block,S,_Checked) -> {error,{error_path(XML,undefined),?MODULE,{match_failure,XML,CM,S}}}. + %% single xml content object and single schema object check_text_type(XML=[#xmlText{}|_],optional_text,S) -> % {XMLTxt,optional_text}; @@ -2724,7 +2739,7 @@ check_text_type([XMLTxt=#xmlText{}|_],CMEl,_S) -> {cannot_contain_text,XMLTxt,CMEl}}}. split_xmlText(XML) -> - splitwith(fun(#xmlText{}) -> true;(_) -> false end,XML). + splitwith(fun(#xmlText{}) -> true;(#xmlComment{}) -> true;(_) -> false end,XML). %% Sequence check_sequence([T=#xmlText{}|Rest],Els,Occ,Env,S,Checked) -> @@ -2767,6 +2782,8 @@ check_sequence(Seq=[_InstEl=#xmlElement{}|_],[El|Els],Occ={_Min,_Max},Env,S,Chec count_num_el(set_num_el(S3,S2)), Ret++Checked) end; +check_sequence([C = #xmlComment{} |Rest], Els, Occ, Env, S, Checked) -> + check_sequence(Rest,Els,Occ,Env,S,[C |Checked]); check_sequence(Rest,[],_Occ,_Env,S,Checked) -> {Checked,Rest,set_num_el(S,0)}; check_sequence([],Els,_Occ,_Env,S,Checked) -> @@ -2863,6 +2880,8 @@ check_all(XML=[E=#xmlElement{name=Name}|RestXML],CM,Occ,Env,S, {element_not_in_all,ElName,E,CM}}, check_all(RestXML,CM,Occ,Env,acc_errs(S,Err),[E|Checked],PrevXML) end; +check_all([C=#xmlComment{} |RestXML], CM, Occ, Env, S, Checked, XML) -> + check_all(RestXML, CM, Occ, Env, S, [C |Checked], XML); check_all(XML,[],_,_,S,Checked,_) -> {Checked,XML,S}; check_all([],CM,_Occ,_,S,Checked,_PrevXML) -> @@ -2914,7 +2933,7 @@ check_target_namespace(XMLEl,S) -> schemaLocations(El=#xmlElement{attributes=Atts},S) -> Pred = fun(#xmlAttribute{name=schemaLocation}) -> false; - (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false; + (#xmlAttribute{nsinfo={_,"schemaLocation"}}) -> false; (_) -> true end, case lists:dropwhile(Pred,Atts) of @@ -5232,7 +5251,12 @@ fetch(URI,S) -> [] -> %% empty systemliteral []; _ -> - filename:join(S#xsd_state.xsd_base, URI) + case S#xsd_state.external_xsd_base of + true -> + filename:join(S#xsd_state.xsd_base, URI); + false -> + filename:join(S#xsd_state.xsd_base, filename:basename(URI)) + end end, Path = path_locate(S#xsd_state.fetch_path, Filename, Fullname), ?dbg("fetch(~p) -> {file, ~p}.~n", [URI, Path]), @@ -5560,7 +5584,7 @@ format_error({incomplete_file,_FileName,_Other}) -> "Schema: The file containing a schema state must be produced by xmerl_xsd:state2file/[1,2]."; format_error({unexpected_content_in_any,A}) -> io_lib:format("Schema: The any type is considered to have no content besides annotation. ~p was found.",[A]); -format_error({erronous_content_in_identity_constraint,IDC,Err}) -> +format_error({erroneous_content_in_identity_constraint,IDC,Err}) -> io_lib:format("Schema: An ~p identity constraint must have one selector and one or more field in content. This case ~p",[IDC,Err]); format_error({missing_xpath_attribute,IDCContent}) -> io_lib:format("Schema: A ~p in a identity constraint must have a xpath attribute.",[IDCContent]); diff --git a/lib/xmerl/test/Makefile b/lib/xmerl/test/Makefile index 9715aa054a..5a2a585841 100644 --- a/lib/xmerl/test/Makefile +++ b/lib/xmerl/test/Makefile @@ -124,4 +124,4 @@ release_tests_spec: opt @tar cfh - xmerl_xsd_MS2002-01-16_SUITE_data | (cd $(RELSYSDIR); tar xf -) @tar cfh - xmerl_xsd_NIST2002-01-16_SUITE_data | (cd $(RELSYSDIR); tar xf -) @tar cfh - xmerl_xsd_Sun2002-01-16_SUITE_data | (cd $(RELSYSDIR); tar xf -) - chmod -f -R u+w $(RELSYSDIR) + chmod -R u+w $(RELSYSDIR) diff --git a/lib/xmerl/test/xmerl_SUITE.erl b/lib/xmerl/test/xmerl_SUITE.erl index 392b2522e8..55b6d1844c 100644 --- a/lib/xmerl/test/xmerl_SUITE.erl +++ b/lib/xmerl/test/xmerl_SUITE.erl @@ -57,7 +57,8 @@ groups() -> {eventp_tests, [], [sax_parse_and_export]}, {ticket_tests, [], [ticket_5998, ticket_7211, ticket_7214, ticket_7430, - ticket_6873, ticket_7496, ticket_8156, ticket_8697]}, + ticket_6873, ticket_7496, ticket_8156, ticket_8697, + ticket_9411, ticket_9457, ticket_9664_schema, ticket_9664_dtd]}, {app_test, [], [{xmerl_app_test, all}]}, {appup_test, [], [{xmerl_appup_test, all}]}]. @@ -283,7 +284,7 @@ export(Config) -> ?line {E,_} = xmerl_scan:file(TestFile), ?line Exported = xmerl:export([E],xmerl_xml,[{prolog,Prolog}]), B = list_to_binary(Exported++"\n"), - ?line {ok,B} = file:read_file(TestFile), + ?line {ok, B} = file:read_file(TestFile), ok. %%---------------------------------------------------------------------- @@ -575,8 +576,69 @@ ticket_8697(Config) -> ?line [16#545C] = HexEntityText, ok. +ticket_9411(suite) -> []; +ticket_9411(doc) -> + ["Test that xmerl_scan handles attribute that contains for example ""]; +ticket_9411(Config) -> + DataDir = ?config(data_dir,Config), + + ?line {ok, Schema} = xmerl_xsd:process_schema(filename:join([DataDir,"misc/ticket_9411.xsd"])), + ?line {ok, Bin} = file:read_file(filename:join([DataDir,"misc/ticket_9411.xml"])), + ?line Xml = erlang:binary_to_list(Bin), + ?line {E, _} = xmerl_scan:string(Xml), + ?line {E, _} = xmerl_xsd:validate(E, Schema). + +ticket_9457(suite) -> []; +ticket_9457(doc) -> + ["Test that xmerl_scan handles continuation correct when current input runs out at the end of an attribute value"]; +ticket_9457(Config) -> + Opts = [{continuation_fun, fun ticket_9457_cont/3, start}, {space, normalize}], + ?line {E, _} = xmerl_scan:string([], Opts). + +ticket_9457_cont(Continue, Exception, GlobalState) -> + case xmerl_scan:cont_state(GlobalState) of + start -> + G1 = xmerl_scan:cont_state(next, GlobalState), + Bytes = "<?xml version=\"1.0\" ?>\r\n<item a=\"b\"", + Continue(Bytes, G1); + next -> + G1 = xmerl_scan:cont_state(last, GlobalState), + Bytes = ">blah</item>\r\n", + Continue(Bytes, G1); + _ -> + Exception(GlobalState) + end. + +ticket_9664_schema(suite) -> []; +ticket_9664_schema(doc) -> + ["Test that comments are handled correct whith"]; +ticket_9664_schema(Config) -> + ?line {E, _} = xmerl_scan:file(filename:join([?config(data_dir, Config), misc, + "ticket_9664_schema.xml"]),[]), + ?line {ok, S} = xmerl_xsd:process_schema(filename:join([?config(data_dir, Config), misc, + "motorcycles.xsd"])), + ?line {E1, _} = xmerl_xsd:validate(E, S), + + ?line {E1,_} = xmerl_xsd:process_validate(filename:join([?config(data_dir,Config), misc, + "motorcycles.xsd"]),E,[]), + + ?line {E1,_} = xmerl_scan:file(filename:join([?config(data_dir,Config), misc, + "ticket_9664_schema.xml"]), + [{schemaLocation, [{"mc", "motorcycles.xsd"}]}, + {validation, schema}]), + ok. + +ticket_9664_dtd(suite) -> []; +ticket_9664_dtd(doc) -> + ["Test that comments are handled correct whith"]; +ticket_9664_dtd(Config) -> + ?line {E, _} = xmerl_scan:file(filename:join([?config(data_dir, Config), misc, + "ticket_9664_dtd.xml"]),[]), + ?line {E, _} = xmerl_scan:file(filename:join([?config(data_dir, Config), misc, + "ticket_9664_dtd.xml"]),[{validation, true}]), + ok. %%====================================================================== diff --git a/lib/xmerl/test/xmerl_SUITE_data/misc.tar.gz b/lib/xmerl/test/xmerl_SUITE_data/misc.tar.gz Binary files differindex c48a6f897b..ffc1d327a5 100644 --- a/lib/xmerl/test/xmerl_SUITE_data/misc.tar.gz +++ b/lib/xmerl/test/xmerl_SUITE_data/misc.tar.gz diff --git a/lib/xmerl/test/xmerl_SUITE_data/xpath/xpath_abbrev.erl b/lib/xmerl/test/xmerl_SUITE_data/xpath/xpath_abbrev.erl index 850b7f8135..7b6f1e95b3 100644 --- a/lib/xmerl/test/xmerl_SUITE_data/xpath/xpath_abbrev.erl +++ b/lib/xmerl/test/xmerl_SUITE_data/xpath/xpath_abbrev.erl @@ -210,7 +210,7 @@ ticket_7496() -> ?line {Doc3,_} = xmerl_scan:file("documentRoot.xml"), ?line ok = Test(Doc3,"//child",[child,child,child]), ?line ok = Test(Doc3,"//child[@name='beta']",[child]), - ?line [{xmlAttribute,id,[],[],[],[],1,[],"2",false}] = + ?line [{xmlAttribute,id,[],[],[],_,1,[],"2",false}] = xmerl_xpath:string("/documentRoot/parent/child[@name='beta']/@id",Doc3), ?line ok = Test(Doc3,"/documentRoot/parent/child|/documentRoot/parent/pet", [child,child,child,pet,pet]), diff --git a/lib/xmerl/test/xmerl_test_lib.erl b/lib/xmerl/test/xmerl_test_lib.erl index a83956c076..e82ad283b2 100644 --- a/lib/xmerl/test/xmerl_test_lib.erl +++ b/lib/xmerl/test/xmerl_test_lib.erl @@ -87,6 +87,6 @@ keysearch_delete(Key,N,List) -> %% the original data directory. get_data_dir(Config) -> - Data0 = ?config(data_dir, Config), - {ok,Data,_} = regexp:sub(Data0, "xmerl_sax_std_SUITE", "xmerl_std_SUITE"), - Data. + Data = ?config(data_dir, Config), + Opts = [{return,list}], + re:replace(Data, "xmerl_sax_std_SUITE", "xmerl_std_SUITE", Opts). diff --git a/lib/xmerl/test/xmerl_xsd_SUITE.erl b/lib/xmerl/test/xmerl_xsd_SUITE.erl index a0d3b1e667..421fa48054 100644 --- a/lib/xmerl/test/xmerl_xsd_SUITE.erl +++ b/lib/xmerl/test/xmerl_xsd_SUITE.erl @@ -62,7 +62,7 @@ groups() -> sis2, state2file_file2state, union]}, {ticket_tests, [], [ticket_6910, ticket_7165, ticket_7190, ticket_7288, - ticket_7736, ticket_8599]}, + ticket_7736, ticket_8599, ticket_9410]}, {facets, [], [length, minLength, maxLength, pattern, enumeration, whiteSpace, maxInclusive, maxExclusive, minExclusive, @@ -1146,3 +1146,8 @@ ticket_8599(Config) -> ?line {{xmlElement,persons,persons,_,_,_,_,_,_,_,_,_},_GlobalState} = xmerl_xsd:validate(E, S). + +ticket_9410(suite) -> []; +ticket_9410(Config) -> + file:set_cwd(filename:join([?config(data_dir,Config),".."])), + ?line {ok, _S} = xmerl_xsd:process_schema("xmerl_xsd_SUITE_data/small.xsd"). diff --git a/lib/xmerl/test/xmerl_xsd_SUITE_data/mim.xsd b/lib/xmerl/test/xmerl_xsd_SUITE_data/mim.xsd index 057344cde8..057344cde8 100755..100644 --- a/lib/xmerl/test/xmerl_xsd_SUITE_data/mim.xsd +++ b/lib/xmerl/test/xmerl_xsd_SUITE_data/mim.xsd diff --git a/lib/xmerl/vsn.mk b/lib/xmerl/vsn.mk index 965a0ae7b4..de47e3418b 100644 --- a/lib/xmerl/vsn.mk +++ b/lib/xmerl/vsn.mk @@ -1 +1 @@ -XMERL_VSN = 1.2.9 +XMERL_VSN = 1.3 diff --git a/lib/xmerl/xmerl.pub b/lib/xmerl/xmerl.pub index 29a81bbde2..29a81bbde2 100755..100644 --- a/lib/xmerl/xmerl.pub +++ b/lib/xmerl/xmerl.pub |