From 8a0e62c1e8c01dbecf70fb4317f00ad50e9f6051 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 20 Dec 2010 17:03:15 +0100 Subject: Fix namespace-conformance constraints See [Namespaces in XML 1.0 (Third Edition)][1]: > The prefix xml is by definition bound to the namespace name > http://www.w3.org/XML/1998/namespace. It MAY, but need not, be > declared, and MUST NOT be bound to any other namespace name. Other > prefixes MUST NOT be bound to this namespace name, and it MUST NOT be > declared as the default namespace. > > The prefix xmlns is used only to declare namespace bindings and is by > definition bound to the namespace name http://www.w3.org/2000/xmlns/. > It MUST NOT be declared . Other prefixes MUST NOT be bound to this > namespace name, and it MUST NOT be declared as the default namespace. > Element names MUST NOT have the prefix xmlns. > > In XML documents conforming to this specification, no tag may containe > two attributes which have identical names, or have qualified names > with the same local part and with prefixes which have been bound to > namespace names that are identical. [1] http://www.w3.org/TR/REC-xml-names/ --- lib/xmerl/src/xmerl_scan.erl | 54 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 10 deletions(-) (limited to 'lib/xmerl') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 25c6547497..6855097089 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -1981,7 +1981,7 @@ scan_element(T, S, Pos) -> scan_element(T, S=#xmerl_scanner{line=L,col=C}, Pos, SpaceDefault,Lang, Parents, NS) -> {Name, NamespaceInfo, T1, S1} = scan_name(T, S), - vc_Element_valid(Name,S), + vc_Element_valid(Name,NamespaceInfo,S), ?strip2, scan_element(T2, S2, Pos, Name, L, C, _Attrs = [], Lang, Parents, NamespaceInfo, NS, @@ -2016,7 +2016,8 @@ scan_element("/>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, Attrs = lists:reverse(Attrs0), E=processed_whole_element(S, Pos, Name, Attrs, Lang, Parents,NSI,Namespace), - wfc_unique_att_spec(Attrs,S), + #xmlElement{attributes = Attrs1} = E, + wfc_unique_att_spec(Attrs1,S), S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended, line = L, col = C, @@ -2088,7 +2089,7 @@ scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents, Attr = #xmlAttribute{name = AttName, pos = AttrPos, language = Lang, - namespace = NamespaceInfo, + nsinfo = NamespaceInfo, value = AttValue, normalized = IsNorm}, XMLBase=if @@ -2155,7 +2156,7 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook, [A#xmlAttribute{ expanded_name=expanded_name( A#xmlAttribute.name, - A#xmlAttribute.namespace, + A#xmlAttribute.nsinfo, % NSI, TempNamespace, S)} || A <- Attrs], {expanded_name(Name, NSI, Namespace, S), ExpAttrsX}; @@ -2194,10 +2195,32 @@ check_namespace(_, _, _, NS) -> expanded_name(Name, [], #xmlNamespace{default = []}, _S) -> Name; -expanded_name(Name, [], #xmlNamespace{default = URI}, _S) -> - {URI, Name}; -expanded_name(_Name, {"xmlns", Local}, _NS, _S) -> % CHECK THIS /JB - {"xmlns",Local}; +expanded_name(Name, [], #xmlNamespace{default = URI}, S) -> + case URI of + 'http://www.w3.org/XML/1998/namespace' -> + ?fatal(cannot_bind_default_namespace_to_xml_namespace_name, S); + 'http://www.w3.org/2000/xmlns/' -> + ?fatal(cannot_bind_default_namespace_to_xmlns_namespace_name, S); + _ -> + {URI, Name} + end; +expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> + {_, Value} = lists:keyfind(Local, 1, Ns), + case Name of + 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> + ?fatal({xml_prefix_cannot_be_redeclared, Value}, S); + 'xmlns:xmlns' -> + ?fatal({xmlns_prefix_cannot_be_declared, Value}, S); + _ -> + case Value of + 'http://www.w3.org/XML/1998/namespace' -> + ?fatal({cannot_bind_prefix_to_xml_namespace, Local}, S); + 'http://www.w3.org/2000/xmlns/' -> + ?fatal({cannot_bind_prefix_to_xmlns_namespace, Local}, S); + _ -> + N + end + end; expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> case lists:keysearch(Prefix, 1, Ns) of {value, {_, URI}} -> @@ -3259,12 +3282,18 @@ mandatory_delimeter_wfc(T,S) -> wfc_unique_att_spec([],_S) -> ok; -wfc_unique_att_spec([#xmlAttribute{name=N}|Atts],S) -> +wfc_unique_att_spec([#xmlAttribute{name=N,expanded_name=EN}|Atts],S) -> case lists:keymember(N,#xmlAttribute.name,Atts) of true -> ?fatal({error,{unique_att_spec_required,N}},S); _ -> - wfc_unique_att_spec(Atts,S) + case S#xmerl_scanner.namespace_conformant andalso + lists:keymember(EN, #xmlAttribute.expanded_name, Atts) of + true -> + ?fatal({error,{unique_att_spec_required,EN}},S); + _ -> + wfc_unique_att_spec(Atts,S) + end end. wfc_legal_char(Chars,S) when is_list(Chars)-> @@ -3313,6 +3342,11 @@ wfc_Internal_parsed_entity(internal,Value,S) -> wfc_Internal_parsed_entity(_,_,_) -> ok. +vc_Element_valid(_Name, {"xmlns", _}, + S = #xmerl_scanner{namespace_conformant = true}) -> + ?fatal({error,{illegal_element_prefix,xmlns}},S); +vc_Element_valid(Name, _, S) -> + vc_Element_valid(Name, S). vc_Element_valid(_Name,#xmerl_scanner{environment=internal_parsed_entity}) -> ok; -- cgit v1.2.3 From 5d3feb918409696bf375ccf1ab6e67f027e3aa6e Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 7 Dec 2010 19:30:36 +0100 Subject: Set `vsn` field in `#xmlDecl` record --- lib/xmerl/src/xmerl_scan.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/xmerl') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 6855097089..ad6fa7d2b9 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -789,7 +789,8 @@ scan_xml_decl(T, S) -> Attr = #xmlAttribute{name = version, parents = [{xml, _XMLPos = 1}], value = Vsn}, - scan_xml_decl(T4, S4, #xmlDecl{attributes = [Attr]}). + scan_xml_decl(T4, S4, #xmlDecl{vsn = Vsn, + attributes = [Attr]}). scan_xml_decl([], S=#xmerl_scanner{continuation_fun = F}, Decl) -> ?dbg("cont()...~n", []), -- cgit v1.2.3 From 2b11547e61112b102a02de6b30c11c37e34ca21b Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 7 Dec 2010 14:23:01 +0100 Subject: Track parents in `#xmlPI` nodes --- lib/xmerl/include/xmerl.hrl | 7 +++--- lib/xmerl/src/xmerl_lib.erl | 3 ++- lib/xmerl/src/xmerl_scan.erl | 56 ++++++++++++++++++++++++-------------------- 3 files changed, 37 insertions(+), 29 deletions(-) (limited to 'lib/xmerl') diff --git a/lib/xmerl/include/xmerl.hrl b/lib/xmerl/include/xmerl.hrl index 7bb3f4de9b..fb9b00c73c 100644 --- a/lib/xmerl/include/xmerl.hrl +++ b/lib/xmerl/include/xmerl.hrl @@ -103,9 +103,10 @@ %% processing instruction -record(xmlPI,{ - name, % atom() - pos, % integer() - value % IOlist() + name, % atom() + parents = [], % [{atom(),integer()}] + pos, % integer() + value % IOlist() }). -record(xmlDocument,{ diff --git a/lib/xmerl/src/xmerl_lib.erl b/lib/xmerl/src/xmerl_lib.erl index 6402f1cbeb..aeb821f411 100644 --- a/lib/xmerl/src/xmerl_lib.erl +++ b/lib/xmerl/src/xmerl_lib.erl @@ -160,8 +160,9 @@ expand_element(E = #xmlText{}, Pos, Parents, Norm) -> E#xmlText{pos = Pos, parents = Parents, value = expand_text(E#xmlText.value, Norm)}; -expand_element(E = #xmlPI{}, Pos, _Parents, Norm) -> +expand_element(E = #xmlPI{}, Pos, Parents, Norm) -> E#xmlPI{pos = Pos, + parents = Parents, value = expand_text(E#xmlPI.value, Norm)}; expand_element(E = #xmlComment{}, Pos, Parents, Norm) -> E#xmlComment{pos = Pos, diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index ad6fa7d2b9..0c84ae5775 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -755,7 +755,7 @@ scan_misc("