aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r--lib/xmerl/src/Makefile19
-rw-r--r--lib/xmerl/src/xmerl.erl4
-rw-r--r--lib/xmerl/src/xmerl_lib.erl3
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl5
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc453
-rw-r--r--lib/xmerl/src/xmerl_scan.erl713
-rw-r--r--lib/xmerl/src/xmerl_uri.erl4
-rw-r--r--lib/xmerl/src/xmerl_validate.erl105
-rw-r--r--lib/xmerl/src/xmerl_xpath.erl114
-rw-r--r--lib/xmerl/src/xmerl_xpath_lib.erl6
-rw-r--r--lib/xmerl/src/xmerl_xpath_parse.yrl3
-rw-r--r--lib/xmerl/src/xmerl_xpath_pred.erl11
-rw-r--r--lib/xmerl/src/xmerl_xpath_scan.erl3
-rw-r--r--lib/xmerl/src/xmerl_xsd.erl67
14 files changed, 885 insertions, 625 deletions
diff --git a/lib/xmerl/src/Makefile b/lib/xmerl/src/Makefile
index 7009b50f6c..ce1aa11fba 100644
--- a/lib/xmerl/src/Makefile
+++ b/lib/xmerl/src/Makefile
@@ -1,7 +1,7 @@
#
# %CopyrightBegin%
#
-# Copyright Ericsson AB 2003-2009. All Rights Reserved.
+# Copyright Ericsson AB 2003-2012. All Rights Reserved.
#
# The contents of this file are subject to the Erlang Public License,
# Version 1.1, (the "License"); you may not use this file except in
@@ -94,7 +94,6 @@ MODULES = $(EDOC_MODULES) \
HRL_FILES = \
../include/xmerl.hrl \
- ../include/xmerl_xlink.hrl \
../include/xmerl_xpath.hrl \
../include/xmerl_xsd.hrl
@@ -215,14 +214,14 @@ include $(ERL_TOP)/make/otp_release_targets.mk
release_spec: opt
- $(INSTALL_DIR) $(RELSYSDIR)/ebin
- $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin
- $(INSTALL_DIR) $(RELSYSDIR)/src
- $(INSTALL_DATA) $(ERL_FILES) $(INTERNAL_HRL_FILES) $(APP_SRC) $(APPUP_SRC) $(RELSYSDIR)/src
- $(INSTALL_DATA) xmerl_xpath_parse.yrl $(RELSYSDIR)/src
- $(INSTALL_DATA) xmerl_b64Bin.yrl $(RELSYSDIR)/src
- $(INSTALL_DIR) $(RELSYSDIR)/include
- $(INSTALL_DATA) $(HRL_FILES) $(RELSYSDIR)/include
+ $(INSTALL_DIR) "$(RELSYSDIR)/ebin"
+ $(INSTALL_DATA) $(TARGET_FILES) "$(RELSYSDIR)/ebin"
+ $(INSTALL_DIR) "$(RELSYSDIR)/src"
+ $(INSTALL_DATA) $(ERL_FILES) $(INTERNAL_HRL_FILES) $(APP_SRC) $(APPUP_SRC) "$(RELSYSDIR)/src"
+ $(INSTALL_DATA) xmerl_xpath_parse.yrl "$(RELSYSDIR)/src"
+ $(INSTALL_DATA) xmerl_b64Bin.yrl "$(RELSYSDIR)/src"
+ $(INSTALL_DIR) "$(RELSYSDIR)/include"
+ $(INSTALL_DATA) $(HRL_FILES) "$(RELSYSDIR)/include"
release_docs_spec:
diff --git a/lib/xmerl/src/xmerl.erl b/lib/xmerl/src/xmerl.erl
index cf78f7bdf7..3249094e78 100644
--- a/lib/xmerl/src/xmerl.erl
+++ b/lib/xmerl/src/xmerl.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2003-2011. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -307,7 +307,7 @@ apply_cb(Ms, F, Df, Args) ->
apply_cb([M|Ms], F, Df, Args, Ms0) ->
case catch apply(M, F, Args) of
- {'EXIT', {undef,[{M,F,_}|_]}} ->
+ {'EXIT', {undef,[{M,F,_,_}|_]}} ->
apply_cb(Ms, F, Df, Args, Ms0);
{'EXIT', Reason} ->
exit(Reason);
diff --git a/lib/xmerl/src/xmerl_lib.erl b/lib/xmerl/src/xmerl_lib.erl
index 6402f1cbeb..aeb821f411 100644
--- a/lib/xmerl/src/xmerl_lib.erl
+++ b/lib/xmerl/src/xmerl_lib.erl
@@ -160,8 +160,9 @@ expand_element(E = #xmlText{}, Pos, Parents, Norm) ->
E#xmlText{pos = Pos,
parents = Parents,
value = expand_text(E#xmlText.value, Norm)};
-expand_element(E = #xmlPI{}, Pos, _Parents, Norm) ->
+expand_element(E = #xmlPI{}, Pos, Parents, Norm) ->
E#xmlPI{pos = Pos,
+ parents = Parents,
value = expand_text(E#xmlPI.value, Norm)};
expand_element(E = #xmlComment{}, Pos, Parents, Norm) ->
E#xmlComment{pos = Pos,
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index 45e2a928ac..5c006aada2 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -1,7 +1,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2011. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -217,7 +217,7 @@ check_encoding_option(E) when E==utf8; E=={utf16,little}; E=={utf16,big};
check_encoding_option(utf16) ->
{utf16,big};
check_encoding_option(E) ->
- {error, io_lib:format("Charcter set ~p not supported", [E])}.
+ {error, io_lib:format("Character set ~p not supported", [E])}.
%%----------------------------------------------------------------------
%% Function: detect_charset(Xml, State)
@@ -279,6 +279,7 @@ convert_encoding(Enc) -> %% Just for 7,8 bit + utf8
case string:to_lower(Enc) of
"utf-8" -> utf8;
"us-ascii" -> utf8;
+ "latin1" -> latin1;
"iso-8859-1" -> latin1; % Handle all iso-8859 as latin1
"iso-8859-2" -> latin1;
"iso-8859-3" -> latin1;
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index 3b9eaa309c..7b64d7c302 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -1,7 +1,7 @@
%%-*-erlang-*-
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2010. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -39,6 +39,9 @@
%% Internal exports
%%----------------------------------------------------------------------
-export([
+ cf/3,
+ cf/4,
+ cf/5
]).
%%----------------------------------------------------------------------
@@ -152,16 +155,16 @@ parse_xml_decl(?BYTE_ORDER_MARK_2, State) ->
cf(?BYTE_ORDER_MARK_2, State, fun parse_xml_decl/2);
parse_xml_decl(?BYTE_ORDER_MARK_REST(Rest), State) ->
cf(Rest, State, fun parse_xml_decl/2);
-parse_xml_decl(?STRING("<"), State) ->
- cf(?STRING("<"), State, fun parse_xml_decl/2);
-parse_xml_decl(?STRING("<?"), State) ->
- cf(?STRING("<?"), State, fun parse_xml_decl/2);
-parse_xml_decl(?STRING("<?x"), State) ->
- cf(?STRING("<?x"), State, fun parse_xml_decl/2);
-parse_xml_decl(?STRING("<?xm"), State) ->
- cf(?STRING("<?xm"), State, fun parse_xml_decl/2);
-parse_xml_decl(?STRING("<?xml"), State) ->
- cf(?STRING("<?xml"), State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<") = Bytes, State) ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<?") = Bytes, State) ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<?x") = Bytes, State) ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<?xm") = Bytes, State) ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<?xml") = Bytes, State) ->
+ cf(Bytes, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING_REST("<?xml", Rest1), State) ->
parse_xml_decl_1(Rest1, State);
parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) ->
@@ -201,8 +204,8 @@ parse_xml_decl_1(Bytes, State) ->
%%----------------------------------------------------------------------
parse_prolog(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_prolog/2);
-parse_prolog(?STRING("<"), State) ->
- cf(?STRING("<"), State, fun parse_prolog/2);
+parse_prolog(?STRING("<") = Bytes, State) ->
+ cf(Bytes, State, fun parse_prolog/2);
parse_prolog(?STRING_REST("<?", Rest), State) ->
{Rest1, State1} = parse_pi(Rest, State),
parse_prolog(Rest1, State1);
@@ -220,18 +223,18 @@ parse_prolog(Bytes, State) ->
parse_prolog_1(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_prolog_1/2);
-parse_prolog_1(?STRING("D"), State) ->
- cf(?STRING("D"), State, fun parse_prolog_1/2);
-parse_prolog_1(?STRING("DO"), State) ->
- cf(?STRING("DO"), State, fun parse_prolog_1/2);
-parse_prolog_1(?STRING("DOC"), State) ->
- cf(?STRING("DOC"), State, fun parse_prolog_1/2);
-parse_prolog_1(?STRING("DOCT"), State) ->
- cf(?STRING("DOCT"), State, fun parse_prolog_1/2);
-parse_prolog_1(?STRING("DOCTY"), State) ->
- cf(?STRING("DOCTY"), State, fun parse_prolog_1/2);
-parse_prolog_1(?STRING("DOCTYP"), State) ->
- cf(?STRING("DOCTYP"), State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("D") = Bytes, State) ->
+ cf(Bytes, State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DO") = Bytes, State) ->
+ cf(Bytes, State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DOC") = Bytes, State) ->
+ cf(Bytes, State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DOCT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DOCTY") = Bytes, State) ->
+ cf(Bytes, State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DOCTYP") = Bytes, State) ->
+ cf(Bytes, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING_REST("DOCTYPE", Rest), State) ->
{Rest1, State1} = parse_doctype(Rest, State),
State2 = event_callback(endDTD, State1),
@@ -509,10 +512,10 @@ parse_ns_name(Bytes, State, Prefix, Name) ->
%%----------------------------------------------------------------------
parse_pi_data(?STRING_EMPTY, State, Acc) ->
cf(?STRING_EMPTY, State, Acc, fun parse_pi_data/3);
-parse_pi_data(?STRING("?"), State, Acc) ->
- cf(?STRING("?"), State, Acc, fun parse_pi_data/3);
-parse_pi_data(?STRING("\r"), State, Acc) ->
- cf(?STRING("\r"), State, Acc, fun parse_pi_data/3);
+parse_pi_data(?STRING("?") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_pi_data/3);
+parse_pi_data(?STRING("\r") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_pi_data/3);
parse_pi_data(?STRING_REST("?>", Rest), State, Acc) ->
{lists:reverse(Acc), Rest, State};
parse_pi_data(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
@@ -541,23 +544,23 @@ parse_pi_data(Bytes, State, Acc) ->
%%----------------------------------------------------------------------
parse_cdata(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_cdata/2);
-parse_cdata(?STRING("["), State) ->
- cf(?STRING("["), State, fun parse_cdata/2);
-parse_cdata(?STRING("[C"), State) ->
- cf(?STRING("[C"), State, fun parse_cdata/2);
-parse_cdata(?STRING("[CD"), State) ->
- cf(?STRING("[CD"), State, fun parse_cdata/2);
-parse_cdata(?STRING("[CDA"), State) ->
- cf(?STRING("[CDA"), State, fun parse_cdata/2);
-parse_cdata(?STRING("[CDAT"), State) ->
- cf(?STRING("[CDAT"), State, fun parse_cdata/2);
-parse_cdata(?STRING("[CDATA"), State) ->
- cf(?STRING("[CDATA"), State, fun parse_cdata/2);
-parse_cdata(?STRING_REST("[CDATA[", Rest), State) ->
- State1 = event_callback(startCDATA, State),
+parse_cdata(?STRING("[") = Bytes, State) ->
+ cf(Bytes, State, fun parse_cdata/2);
+parse_cdata(?STRING("[C") = Bytes, State) ->
+ cf(Bytes, State, fun parse_cdata/2);
+parse_cdata(?STRING("[CD") = Bytes, State) ->
+ cf(Bytes, State, fun parse_cdata/2);
+parse_cdata(?STRING("[CDA") = Bytes, State) ->
+ cf(Bytes, State, fun parse_cdata/2);
+parse_cdata(?STRING("[CDAT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_cdata/2);
+parse_cdata(?STRING("[CDATA") = Bytes, State) ->
+ cf(Bytes, State, fun parse_cdata/2);
+parse_cdata(?STRING_REST("[CDATA[", Rest), State) ->
+ State1 = event_callback(startCDATA, State),
parse_cdata(Rest, State1, []);
-parse_cdata(Bytes, State) ->
- unicode_incomplete_check([Bytes, State, fun parse_cdata/2],
+parse_cdata(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_cdata/2],
"expecting comment or CDATA").
@@ -571,12 +574,12 @@ parse_cdata(Bytes, State) ->
%%----------------------------------------------------------------------
parse_cdata(?STRING_EMPTY, State, Acc) ->
cf(?STRING_EMPTY, State, Acc, fun parse_cdata/3);
-parse_cdata(?STRING("\r"), State, Acc) ->
- cf(?STRING("\r"), State, Acc, fun parse_cdata/3);
-parse_cdata(?STRING("]"), State, Acc) ->
- cf(?STRING("]"), State, Acc, fun parse_cdata/3);
-parse_cdata(?STRING("]]"), State, Acc) ->
- cf(?STRING("]]"), State, Acc, fun parse_cdata/3);
+parse_cdata(?STRING("\r") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_cdata/3);
+parse_cdata(?STRING("]") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_cdata/3);
+parse_cdata(?STRING("]]") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_cdata/3);
parse_cdata(?STRING_REST("]]>", Rest), State, Acc) ->
State1 = event_callback({characters, lists:reverse(Acc)}, State),
State2 = event_callback(endCDATA, State1),
@@ -607,12 +610,12 @@ parse_cdata(Bytes, State, Acc) ->
%%----------------------------------------------------------------------
parse_comment(?STRING_EMPTY, State, Acc) ->
cf(?STRING_EMPTY, State, Acc, fun parse_comment/3);
-parse_comment(?STRING("\r"), State, Acc) ->
- cf(?STRING("\r"), State, Acc, fun parse_comment/3);
-parse_comment(?STRING("-"), State, Acc) ->
- cf(?STRING("-"), State, Acc, fun parse_comment/3);
-parse_comment(?STRING("--"), State, Acc) ->
- cf(?STRING("--"), State, Acc, fun parse_comment/3);
+parse_comment(?STRING("\r") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_comment/3);
+parse_comment(?STRING("-") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_comment/3);
+parse_comment(?STRING("--") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_comment/3);
parse_comment(?STRING_REST("-->", Rest), State, Acc) ->
State1 = event_callback({comment, lists:reverse(Acc)}, State),
{Rest, State1};
@@ -710,8 +713,8 @@ parse_stag(Bytes, State) ->
%%----------------------------------------------------------------------
parse_attributes(?STRING_EMPTY, State, CurrentTag) ->
cf(?STRING_EMPTY, State, CurrentTag, fun parse_attributes/3);
-parse_attributes(?STRING("/"), State, CurrentTag) ->
- cf(?STRING("/"), State, CurrentTag, fun parse_attributes/3);
+parse_attributes(?STRING("/") = Bytes, State, CurrentTag) ->
+ cf(Bytes, State, CurrentTag, fun parse_attributes/3);
parse_attributes(?STRING_REST("/>", Rest), State, {Tag, AttList, NewNsList}) ->
CompleteNsList = NewNsList ++ State#xmerl_sax_parser_state.ns,
{Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
@@ -759,7 +762,7 @@ parse_attributes(?STRING_UNBOUND_REST(C, Rest), State, {Tag, AttList, NsList}) -
end
end;
false ->
- ?fatal_error(State, "Invalid start character in attribute name: " ++ C)
+ ?fatal_error(State, "Invalid start character in attribute name: " ++ [C])
end;
parse_attributes(Bytes, State, CurrentTag) ->
unicode_incomplete_check([Bytes, State, CurrentTag, fun parse_attributes/3],
@@ -908,20 +911,20 @@ parse_att_value(?STRING_EMPTY, State, undefined, Acc) ->
{Acc, [], State}; %% stop clause when parsing references
parse_att_value(?STRING_EMPTY, State, Stop, Acc) ->
cf(?STRING_EMPTY, State, Stop, Acc, fun parse_att_value/4);
-parse_att_value(?STRING("\r"), State, Stop, Acc) ->
- cf(?STRING("\r"), State, Stop, Acc, fun parse_att_value/4);
+parse_att_value(?STRING("\r") = Bytes, State, Stop, Acc) ->
+ cf(Bytes, State, Stop, Acc, fun parse_att_value/4);
parse_att_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
parse_att_value(Rest,
- State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_att_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
parse_att_value(Rest,
- State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_att_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
parse_att_value(Rest,
- State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_att_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
parse_att_value(Rest,
- State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_att_value(?STRING_REST("&", Rest), State, Stop, Acc) ->
{Ref, Rest1, State1} = parse_reference(Rest, State, true),
case Ref of
@@ -944,14 +947,19 @@ parse_att_value(?STRING_REST("&", Rest), State, Stop, Acc) ->
{unparsed, Name, _} ->
?fatal_error(State1, "Unparsed entity reference in attribute value: " ++ Name)
end;
-parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
+parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
{lists:reverse(Acc), Rest, State};
-parse_att_value(?STRING_UNBOUND_REST($<, _Rest), State, _Stop, _Acc) ->
+parse_att_value(?STRING_UNBOUND_REST($<, _Rest), State, _Stop, _Acc) ->
?fatal_error(State, "< not allowed in attribute value");
-parse_att_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
- parse_att_value(Rest, State, Stop, [C|Acc]);
-parse_att_value(Bytes, State, Stop, Acc) ->
- unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_att_value/4],
+parse_att_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
+ if
+ ?is_char(C) ->
+ parse_att_value(Rest, State, Stop, [C|Acc]);
+ true ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character in attribute value: ~p", [C])))
+ end;
+parse_att_value(Bytes, State, Stop, Acc) ->
+ unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_att_value/4],
undefined).
@@ -1038,17 +1046,17 @@ parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
Other ->
throw(Other)
end;
-parse_content(?STRING("\r"), State, Acc, IgnorableWS) ->
- cf(?STRING("\r"), State, Acc, IgnorableWS, fun parse_content/4);
-parse_content(?STRING("<"), State, Acc, IgnorableWS) ->
- cf(?STRING("<"), State, Acc, IgnorableWS, fun parse_content/4);
+parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
+parse_content(?STRING("<") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
parse_content(?STRING_REST("</", Rest), State, Acc, IgnorableWS) ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
parse_etag(Rest, State1);
-parse_content(?STRING("<!"), State, _Acc, IgnorableWS) ->
- cf(?STRING("<!"), State, [], IgnorableWS, fun parse_content/4);
-parse_content(?STRING("<!-"), State, _Acc, IgnorableWS) ->
- cf(?STRING("<!-"), State, [], IgnorableWS, fun parse_content/4);
+parse_content(?STRING("<!") = Bytes, State, _Acc, IgnorableWS) ->
+ cf(Bytes, State, [], IgnorableWS, fun parse_content/4);
+parse_content(?STRING("<!-") = Bytes, State, _Acc, IgnorableWS) ->
+ cf(Bytes, State, [], IgnorableWS, fun parse_content/4);
parse_content(?STRING_REST("<!--", Rest), State, Acc, IgnorableWS) ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
{Rest1, State2} = parse_comment(Rest, State1, []),
@@ -1120,10 +1128,10 @@ parse_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) ->
?is_char(C) ->
parse_content(Rest, State, [C|Acc], false);
true ->
- ?fatal_error(State, "Bad character in content: " ++ C)
- end;
-parse_content(Bytes, State, Acc, IgnorableWS) ->
- unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_content/4],
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character in content: ~p", [C])))
+ end;
+parse_content(Bytes, State, Acc, IgnorableWS) ->
+ unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_content/4],
undefined).
@@ -1219,8 +1227,8 @@ whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_bi
%%----------------------------------------------------------------------
parse_reference(?STRING_EMPTY, State, HaveToExist) ->
cf(?STRING_EMPTY, State, HaveToExist, fun parse_reference/3);
-parse_reference(?STRING("#"), State, HaveToExist) ->
- cf(?STRING("#"), State, HaveToExist, fun parse_reference/3);
+parse_reference(?STRING("#") = Bytes, State, HaveToExist) ->
+ cf(Bytes, State, HaveToExist, fun parse_reference/3);
parse_reference(?STRING_REST("#x", Rest), State, _HaveToExist) ->
{CharValue, RefString, Rest1, State1} = parse_hex(Rest, State, []),
if
@@ -1694,16 +1702,16 @@ parse_external_entity_1(?BYTE_ORDER_MARK_2, State) ->
cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_1/2);
parse_external_entity_1(?BYTE_ORDER_MARK_REST(Rest), State) ->
parse_external_entity_1(Rest, State);
-parse_external_entity_1(?STRING("<"), State) ->
- cf(?STRING("<"), State, fun parse_external_entity_1/2);
-parse_external_entity_1(?STRING("<?"), State) ->
- cf(?STRING("<?"), State, fun parse_external_entity_1/2);
-parse_external_entity_1(?STRING("<?x"), State) ->
- cf(?STRING("<?x"), State, fun parse_external_entity_1/2);
-parse_external_entity_1(?STRING("<?xm"), State) ->
- cf(?STRING("<?xm"), State, fun parse_external_entity_1/2);
-parse_external_entity_1(?STRING("<?xml"), State) ->
- cf(?STRING("<?xml"), State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<") = Bytes, State) ->
+ cf(Bytes, State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<?") = Bytes, State) ->
+ cf(Bytes, State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<?x") = Bytes, State) ->
+ cf(Bytes, State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<?xm") = Bytes, State) ->
+ cf(Bytes, State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<?xml") = Bytes, State) ->
+ cf(Bytes, State, fun parse_external_entity_1/2);
parse_external_entity_1(?STRING_REST("<?xml", Rest) = Bytes,
#xmerl_sax_parser_state{file_type=Type} = State) ->
{Rest1, State1} =
@@ -1773,29 +1781,29 @@ is_next_char_whitespace(Bytes, State) ->
%%----------------------------------------------------------------------
parse_external_id(?STRING_EMPTY, State, OptionalSystemId) ->
cf(?STRING_EMPTY, State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("S"), State,OptionalSystemId) ->
- cf(?STRING("S"), State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("SY"), State, OptionalSystemId) ->
- cf(?STRING("SY"), State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("SYS"), State, OptionalSystemId) ->
- cf(?STRING("SYS"), State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("SYST"), State, OptionalSystemId) ->
- cf(?STRING("SYST"), State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("SYSTE"), State, OptionalSystemId) ->
- cf(?STRING("SYSTE"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("S") = Bytes, State,OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("SY") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("SYS") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("SYST") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("SYSTE") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING_REST("SYSTEM", Rest), State, _) ->
{SysId, Rest1, State1} = parse_system_id(Rest, State, false),
{"", SysId, Rest1, State1};
-parse_external_id(?STRING("P"), State, OptionalSystemId) ->
- cf(?STRING("P"), State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("PU"), State, OptionalSystemId) ->
- cf(?STRING("PU"), State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("PUB"), State, OptionalSystemId) ->
- cf(?STRING("PUB"), State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("PUBL"), State, OptionalSystemId) ->
- cf(?STRING("PUBL"), State, OptionalSystemId, fun parse_external_id/3);
-parse_external_id(?STRING("PUBLI"), State, OptionalSystemId) ->
- cf(?STRING("PUBLI"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("P") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("PU") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("PUB") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("PUBL") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("PUBLI") = Bytes, State, OptionalSystemId) ->
+ cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING_REST("PUBLIC", Rest), State, OptionalSystemId) ->
parse_public_id(Rest, State, OptionalSystemId);
parse_external_id(Bytes, State, OptionalSystemId) ->
@@ -1915,70 +1923,70 @@ parse_doctype_decl(Bytes, State) ->
parse_doctype_decl_1(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("E"), State) ->
- cf(?STRING("E"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("EL"), State) ->
- cf(?STRING("EL"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ELE"), State) ->
- cf(?STRING("ELE"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ELEM"), State) ->
- cf(?STRING("ELEM"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ELEME"), State) ->
- cf(?STRING("ELEME"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ELEMEN"), State) ->
- cf(?STRING("ELEMEN"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("E") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("EL") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ELE") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ELEM") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ELEME") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ELEMEN") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("ELEMENT", Rest), State) ->
{Rest1, State1} = parse_element_decl(Rest, State),
parse_doctype_decl(Rest1, State1);
-parse_doctype_decl_1(?STRING("A"), State) ->
- cf(?STRING("A"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("AT"), State) ->
- cf(?STRING("AT"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ATT"), State) ->
- cf(?STRING("ATT"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ATTL"), State) ->
- cf(?STRING("ATTL"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ATTLI"), State) ->
- cf(?STRING("ATTLI"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ATTLIS"), State) ->
- cf(?STRING("ATTLIS"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("A") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("AT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ATT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ATTL") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ATTLI") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ATTLIS") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("ATTLIST", Rest), State) ->
{Rest1, State1} = parse_att_list_decl(Rest, State),
parse_doctype_decl(Rest1, State1);
%% E clause not needed here because already taken care of above.
-parse_doctype_decl_1(?STRING("EN"), State) ->
- cf(?STRING("EN"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ENT"), State) ->
- cf(?STRING("ENT"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ENTI"), State) ->
- cf(?STRING("ENTI"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("ENTIT"), State) ->
- cf(?STRING("ENTIT"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("EN") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ENT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ENTI") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ENTIT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("ENTITY", Rest), State) ->
{Rest1, State1} = parse_entity_decl(Rest, State),
parse_doctype_decl(Rest1, State1);
-parse_doctype_decl_1(?STRING("N"), State) ->
- cf(?STRING("N"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("NO"), State) ->
- cf(?STRING("NO"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("NOT"), State) ->
- cf(?STRING("NOT"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("NOTA"), State) ->
- cf(?STRING("NOTA"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("NOTAT"), State) ->
- cf(?STRING("NOTAT"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("NOTATI"), State) ->
- cf(?STRING("NOTATI"), State, fun parse_doctype_decl_1/2);
-parse_doctype_decl_1(?STRING("NOTATIO"), State) ->
- cf(?STRING("NOTATIO"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("N") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NO") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOTA") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOTAT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOTATI") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOTATIO") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("NOTATION", Rest), State) ->
{Rest1, State1} = parse_notation_decl(Rest, State),
parse_doctype_decl(Rest1, State1);
-parse_doctype_decl_1(?STRING("-"), State) ->
- cf(?STRING("-"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("-") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("--", Rest), State) ->
{Rest1, State1} = parse_comment(Rest, State, []),
parse_doctype_decl(Rest1, State1);
@@ -2049,14 +2057,18 @@ parse_element_content(Bytes, State) ->
parse_element_content_1(?STRING_EMPTY, State, Acc) ->
cf(?STRING_EMPTY, State, Acc, fun parse_element_content_1/3);
parse_element_content_1(?STRING_REST(">", Rest), State, Acc) ->
- {lists:reverse(Acc), Rest, State};
+ {lists:reverse(delete_leading_whitespace(Acc)), Rest, State};
parse_element_content_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
parse_element_content_1(Rest, State, [C|Acc]);
parse_element_content_1(Bytes, State, Acc) ->
unicode_incomplete_check([Bytes, State, Acc, fun parse_element_content_1/3],
undefined).
-
+delete_leading_whitespace([C |Acc]) when ?is_whitespace(C)->
+ delete_leading_whitespace(Acc);
+delete_leading_whitespace(Acc) ->
+ Acc.
+
%%----------------------------------------------------------------------
%% Function : parse_att_list_decl(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
@@ -2252,52 +2264,52 @@ parse_default_decl(Bytes, State) ->
%%----------------------------------------------------------------------
parse_default_decl_1(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_default_decl_1/2);
-parse_default_decl_1(?STRING_REST("#", Rest), State) ->
- case Rest of
- ?STRING("R") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("RE") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("REQ") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("REQU") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("REQUI") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("REQUIR") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("REQUIRE") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING_REST("REQUIRED", Rest1) ->
+parse_default_decl_1(?STRING_REST("#", _Rest) = Bytes, State) ->
+ case Bytes of
+ ?STRING("#R") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#RE") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#REQ") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#REQU") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#REQUI") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#REQUIR") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#REQUIRE") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING_REST("#REQUIRED", Rest1) ->
{"#REQUIRED", undefined, Rest1, State};
- ?STRING("I") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("IM") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("IMP") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("IMPL") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("IMPLI") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("IMPLIE") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING_REST("IMPLIED", Rest1) ->
+ ?STRING("#I") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#IM") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#IMP") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#IMPL") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#IMPLI") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#IMPLIE") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING_REST("#IMPLIED", Rest1) ->
{"#IMPLIED", undefined, Rest1, State};
- ?STRING("F") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("FI") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("FIX") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING("FIXE") ->
- cf(Rest, State, fun parse_default_decl_1/2);
- ?STRING_REST("FIXED", Rest1) ->
+ ?STRING("#F") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#FI") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#FIX") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING("#FIXE") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
+ ?STRING_REST("#FIXED", Rest1) ->
parse_fixed(Rest1, State);
_ ->
- ?fatal_error(State, "REQUIRED, IMPLIED or FIXED expected")
+ ?fatal_error(State, "REQUIRED, IMPLIED or FIXED expected after #")
end;
parse_default_decl_1(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" ->
{DefaultValue, Rest1, State1} = parse_att_value(Rest, State, C, []),
@@ -2522,11 +2534,16 @@ parse_entity_value(?STRING_REST("%", Rest), #xmerl_sax_parser_state{file_type=Ty
end
end;
-parse_entity_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
+parse_entity_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
{lists:reverse(Acc), Rest, State};
-parse_entity_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
- parse_entity_value(Rest, State, Stop, [C|Acc]);
-parse_entity_value(Bytes, State, Stop, Acc) ->
+parse_entity_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
+ if
+ ?is_char(C) ->
+ parse_entity_value(Rest, State, Stop, [C|Acc]);
+ true ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character in entity value: ~p", [C])))
+ end;
+parse_entity_value(Bytes, State, Stop, Acc) ->
unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_entity_value/4],
undefined).
@@ -2543,14 +2560,14 @@ parse_ndata_decl(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_ndata_decl/2);
parse_ndata_decl(?STRING_REST(">", Rest), State) ->
{undefined, Rest, State};
-parse_ndata_decl(?STRING("N") = Rest, State) ->
- cf(Rest, State, fun parse_ndata_decl/2);
-parse_ndata_decl(?STRING("ND") = Rest, State) ->
- cf(Rest, State, fun parse_ndata_decl/2);
-parse_ndata_decl(?STRING("NDA") = Rest, State) ->
- cf(Rest, State, fun parse_ndata_decl/2);
-parse_ndata_decl(?STRING("NDAT") = Rest, State) ->
- cf(Rest, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING("N") = Bytes, State) ->
+ cf(Bytes, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING("ND") = Bytes, State) ->
+ cf(Bytes, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING("NDA") = Bytes, State) ->
+ cf(Bytes, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING("NDAT") = Bytes, State) ->
+ cf(Bytes, State, fun parse_ndata_decl/2);
parse_ndata_decl(?STRING_REST("NDATA", Rest), State) ->
parse_ndata_decl_1(Rest, State);
parse_ndata_decl(Bytes, State) ->
diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl
index 25c6547497..05431a5fd2 100644
--- a/lib/xmerl/src/xmerl_scan.erl
+++ b/lib/xmerl/src/xmerl_scan.erl
@@ -20,8 +20,8 @@
%% Description : Simgle-pass XML scanner. See xmerl.hrl for data defs.
%% @doc This module is the interface to the XML parser, it handles XML 1.0.
-%% The XML parser is activated through
-%% <tt>xmerl_scan:string/[1,2]</tt> or
+%% The XML parser is activated through
+%% <tt>xmerl_scan:string/[1,2]</tt> or
%% <tt>xmerl_scan:file/[1,2]</tt>.
%% It returns records of the type defined in xmerl.hrl.
%% See also <a href="xmerl_examples.html">tutorial</a> on customization
@@ -79,15 +79,15 @@
%% <dt><code>{validation, Flag}</code></dt>
%% <dd>Controls whether to process as a validating XML parser:
%% 'off' (default) no validation, or validation 'dtd' by DTD or 'schema'
-%% by XML Schema. 'false' and 'true' options are obsolete
-%% (i.e. they may be removed in a future release), if used 'false'
+%% by XML Schema. 'false' and 'true' options are obsolete
+%% (i.e. they may be removed in a future release), if used 'false'
%% equals 'off' and 'true' equals 'dtd'.</dd>
%% <dt><code>{schemaLocation, [{Namespace,Link}|...]}</code></dt>
-%% <dd>Tells explicitly which XML Schema documents to use to validate
-%% the XML document. Used together with the
+%% <dd>Tells explicitly which XML Schema documents to use to validate
+%% the XML document. Used together with the
%% <code>{validation,schema}</code> option.</dd>
%% <dt><code>{quiet, Flag}</code></dt>
-%% <dd>Set to 'true' if xmerl should behave quietly and not output any
+%% <dd>Set to 'true' if xmerl should behave quietly and not output any
%% information to standard output (default 'false').</dd>
%% <dt><code>{doctype_DTD, DTD}</code></dt>
%% <dd>Allows to specify DTD name when it isn't available in the XML
@@ -100,7 +100,21 @@
%% <dd>Set default character set used (default UTF-8).
%% This character set is used only if not explicitly given by the XML
%% declaration. </dd>
+%% <dt><code>{document, Flag}</code></dt>
+%% <dd>Set to 'true' if xmerl should return a complete XML document
+%% as an xmlDocument record (default 'false').</dd>
+%% <dt><code>{comments, Flag}</code></dt>
+%% <dd>Set to 'false' if xmerl should skip comments otherwise they will
+%% be returned as xmlComment records (default 'true').</dd>
+%% <dt><code>{default_attrs, Flag}</code></dt>
+%% <dd>Set to 'true' if xmerl should add to elements missing attributes
+%% with a defined default value (default 'false').</dd>
%% </dl>
+%% @type document() = xmlElement() | xmlDocument(). <p>
+%% The document returned by <tt>xmerl_scan:string/[1,2]</tt> and
+%% <tt>xmerl_scan:file/[1,2]</tt>. The type of the returned record depends on
+%% the value of the document option passed to the function.
+%% </p>
-module(xmerl_scan).
@@ -224,7 +238,7 @@ cont_state(X, S=#xmerl_scanner{fun_states = FS}) ->
file(F) ->
file(F, []).
-%% @spec file(Filename::string(), Options::option_list()) -> {xmlElement(),Rest}
+%% @spec file(Filename::string(), Options::option_list()) -> {document(),Rest}
%% Rest = list()
%%% @doc Parse file containing an XML document
file(F, Options) ->
@@ -261,10 +275,10 @@ int_file_decl(F, Options,_ExtCharset) ->
%% @spec string(Text::list()) -> {xmlElement(),Rest}
%% Rest = list()
%% @equiv string(Test, [])
-string(Str) ->
+string(Str) ->
string(Str, []).
-%% @spec string(Text::list(),Options::option_list()) -> {xmlElement(),Rest}
+%% @spec string(Text::list(),Options::option_list()) -> {document(),Rest}
%% Rest = list()
%%% @doc Parse string containing an XML document
string(Str, Options) ->
@@ -292,7 +306,7 @@ int_string(Str, Options, XMLBase, FileName) ->
scan_document(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"});
{undefined,undefined,Str2} -> %% no auto detection
scan_document(Str2, S);
- {external,ExtCharset,Str2} ->
+ {external,ExtCharset,Str2} ->
%% no auto detection, ExtCharset is an explicitly provided
%% 7 bit,8 bit or utf-8 encoding
scan_document(Str2, S#xmerl_scanner{encoding=atom_to_list(ExtCharset)})
@@ -311,7 +325,7 @@ int_string_decl(Str, Options, XMLBase, FileName) ->
{external,ExtCharset,Str2} ->
scan_decl(Str2, S#xmerl_scanner{encoding=atom_to_list(ExtCharset)})
end.
-
+
initial_state0(Options,XMLBase) ->
@@ -372,7 +386,7 @@ initial_state([{line, L}|T], S) ->
initial_state(T, S#xmerl_scanner{line = L});
initial_state([{namespace_conformant, F}|T], S) when F==true; F==false ->
initial_state(T, S#xmerl_scanner{namespace_conformant = F});
-initial_state([{validation, F}|T], S)
+initial_state([{validation, F}|T], S)
when F==off; F==dtd; F==schema; F==true; F==false ->
initial_state(T, S#xmerl_scanner{validation = validation_value(F)});
initial_state([{schemaLocation, SL}|T], S) when is_list(SL) ->
@@ -381,6 +395,12 @@ initial_state([{quiet, F}|T], S) when F==true; F==false ->
initial_state(T, S#xmerl_scanner{quiet = F});
initial_state([{doctype_DTD,DTD}|T], S) ->
initial_state(T,S#xmerl_scanner{doctype_DTD = DTD});
+initial_state([{document, F}|T], S) when is_boolean(F) ->
+ initial_state(T,S#xmerl_scanner{document = F});
+initial_state([{comments, F}|T], S) when is_boolean(F) ->
+ initial_state(T,S#xmerl_scanner{comments = F});
+initial_state([{default_attrs, F}|T], S) when is_boolean(F) ->
+ initial_state(T,S#xmerl_scanner{default_attrs = F});
initial_state([{text_decl,Bool}|T], S) ->
initial_state(T,S#xmerl_scanner{text_decl=Bool});
initial_state([{environment,Env}|T], S) ->
@@ -402,7 +422,7 @@ validation_value(false) ->
validation_value(F) ->
F.
-%% Used for compacting (some) indentations.
+%% Used for compacting (some) indentations.
%% See also fast_accumulate_whitespace().
common_data() ->
{comdata(lists:duplicate(60, $\s), []),
@@ -445,7 +465,7 @@ event(_X, S) ->
%% where Pos' can be derived from X#xmlElement.pos, X#xmlText.pos, or
%% X#xmlAttribute.pos (whichever is the current object type.)
%% The acc/3 function is not allowed to redefine the type of object
-%% being defined, but _is_ allowed to either ignore it or split it
+%% being defined, but _is_ allowed to either ignore it or split it
%% into multiple objects (in which case {Acc',Pos',S'} should be returned.)
%% If {Acc',S'} is returned, Pos will be incremented by 1 by default.
%% Below is an example of an acceptable operation
@@ -468,10 +488,10 @@ fetch_URI(URI, S) ->
%% assume URI is a filename
Split = filename:split(URI),
Filename = fun([])->[];(X)->lists:last(X) end (Split),
- Fullname =
+ Fullname =
case Split of %% how about Windows systems?
["file:"|Name]-> %% absolute path, see RFC2396 sect 3
- %% file:/dtd_name
+ %% file:/dtd_name
filename:join(["/"|Name]);
["/"|Rest] when Rest /= [] ->
%% absolute path name
@@ -518,20 +538,21 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event,
line = L, col = C,
environment=Env,
encoding=Charset,
+ document=Document,
validation=ValidateResult}) ->
S1 = Event(#xmerl_event{event = started,
line = L,
col = C,
data = document}, S),
-
+
%% Transform to given character set.
- %% Note that if another character set is given in the encoding
+ %% Note that if another character set is given in the encoding
%% attribute in a XML declaration that one will be used later
Str=if
Charset == "utf-8" ->
Str0;
- Charset=/=undefined -> % Default character set is UTF-8
- xmerl_ucs:to_unicode(Str0,list_to_atom(Charset));
+ Charset =/= undefined -> % Default character set is UTF-8
+ xmerl_ucs:to_unicode(Str0, list_to_atom(Charset));
true -> %% Charset is undefined if no external input is
%% given, and no auto detection of character
%% encoding was made.
@@ -539,63 +560,71 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event,
end,
%% M1 = erlang:memory(),
%% io:format("Memory status before prolog: ~p~n",[M1]),
- {T1, S2} = scan_prolog(Str, S1, _StartPos = 1),
+ {Prolog, Pos, T1, S2} = scan_prolog(Str, S1, _StartPos = 1),
%% M2 = erlang:memory(),
%% io:format("Memory status after prolog: ~p~n",[M2]),
%%io:format("scan_document 2, prolog parsed~n",[]),
- T2 = scan_mandatory("<",T1,1,S2,expected_element_start_tag),
+ T2 = scan_mandatory("<", T1, 1, S2, expected_element_start_tag),
%% M3 = erlang:memory(),
%% io:format("Memory status before element: ~p~n",[M3]),
- {Res, T3, S3} =scan_element(T2,S2,_StartPos = 1),
+ {Res, T3, S3} = scan_element(T2,S2,Pos),
%% M4 = erlang:memory(),
%% io:format("Memory status after element: ~p~n",[M4]),
- {Tail, S4}=scan_misc(T3, S3, _StartPos = 1),
+ {Misc, _Pos1, Tail, S4}=scan_misc(T3, S3, Pos + 1),
%% M5 = erlang:memory(),
%% io:format("Memory status after misc: ~p~n",[M5]),
-
+
S5 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
line = S4#xmerl_scanner.line,
col = S4#xmerl_scanner.col,
data = document}, S4),
- {Res2,S6} = case validation_mode(ValidateResult) of
+ {Res2, S6} = case validation_mode(ValidateResult) of
off ->
- {Res,cleanup(S5)};
+ {Res, cleanup(S5)};
dtd when Env == element; Env == prolog ->
check_decl2(S5),
- case xmerl_validate:validate(S5,Res) of
- {'EXIT',{error,Reason}} ->
- S5b=cleanup(S5),
- ?fatal({failed_validation,Reason}, S5b);
- {'EXIT',Reason} ->
- S5b=cleanup(S5),
- ?fatal({failed_validation,Reason}, S5b);
- {error,Reason} ->
- S5b=cleanup(S5),
- ?fatal({failed_validation,Reason}, S5b);
- {error,Reason,_Next} ->
- S5b=cleanup(S5),
- ?fatal({failed_validation,Reason}, S5b);
+ case xmerl_validate:validate(S5, Res) of
+ {'EXIT', {error, Reason}} ->
+ S5b = cleanup(S5),
+ ?fatal({failed_validation, Reason}, S5b);
+ {'EXIT', Reason} ->
+ S5b = cleanup(S5),
+ ?fatal({failed_validation, Reason}, S5b);
+ {error, Reason} ->
+ S5b = cleanup(S5),
+ ?fatal({failed_validation, Reason}, S5b);
+ {error, Reason, _Next} ->
+ S5b = cleanup(S5),
+ ?fatal({failed_validation, Reason}, S5b);
_XML ->
- {Res,cleanup(S5)}
+ {Res, cleanup(S5)}
end;
schema ->
- case schemaLocations(Res,S5) of
- {ok,Schemas} ->
+ case schemaLocations(Res, S5) of
+ {ok, Schemas} ->
cleanup(S5),
%%io:format("Schemas: ~p~nRes: ~p~ninhertih_options(S): ~p~n",
%% [Schemas,Res,inherit_options(S5)]),
- XSDRes = xmerl_xsd:process_validate(Schemas,Res,
+ XSDRes = xmerl_xsd:process_validate(Schemas, Res,
inherit_options(S5)),
- handle_schema_result(XSDRes,S5);
+ handle_schema_result(XSDRes, S5);
_ ->
- {Res,cleanup(S5)}
+ {Res, cleanup(S5)}
end;
_ ->
- {Res,cleanup(S5)}
+ {Res, cleanup(S5)}
end,
- {Res2, Tail, S6}.
+ Res3 =
+ case Document of
+ true ->
+ Content = lists:reverse(Prolog, [Res2 | lists:reverse(Misc)]),
+ #xmlDocument{content = Content};
+ false ->
+ Res2
+ end,
+ {Res3, Tail, S6}.
scan_decl(Str, S=#xmerl_scanner{event_fun = Event,
@@ -607,13 +636,13 @@ scan_decl(Str, S=#xmerl_scanner{event_fun = Event,
line = L,
col = C,
data = document}, S),
-
+
case scan_prolog(Str, S1, _StartPos = 1) of
- {T2="<"++_, S2} ->
+ {_,_,T2="<"++_, S2} ->
{{S2#xmerl_scanner.user_state,T2},[],S2};
- {[], S2}->
+ {_,_,[], S2}->
{[],[],S2};
- {T2, S2} ->
+ {_,_,T2, S2} ->
{_,_,S3} = scan_content(T2,S2,[],_Attrs=[],S2#xmerl_scanner.space,
_Lang=[],_Parents=[],#xmlNamespace{}),
{T2,[],S3}
@@ -624,28 +653,31 @@ scan_decl(Str, S=#xmerl_scanner{event_fun = Event,
%%% prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
%%%
%% empty text declarations are handled by the first function clause.
-scan_prolog([], S=#xmerl_scanner{continuation_fun = F}, Pos) ->
+scan_prolog(T, S, Pos) ->
+ scan_prolog(T, S, Pos, []).
+scan_prolog([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_prolog(MoreBytes, S1, Pos) end,
- fun(S1) -> {[], S1} end,
+ F(fun(MoreBytes, S1) -> scan_prolog(MoreBytes, S1, Pos, Acc) end,
+ fun(S1) -> {Acc, Pos, [], S1} end,
S);
-scan_prolog("<?xml"++T,S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},Pos)
- when ?whitespace(hd(T)) ->
- {Charset,T3, S3}=
+scan_prolog("<?xml"++T,
+ S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},
+ Pos,Acc) when ?whitespace(hd(T)) ->
+ {Charset, T3, S3} =
if
- Col==1,L==1,S0#xmerl_scanner.text_decl==true ->
+ Col==1,L==1,S0#xmerl_scanner.text_decl==true ->
?dbg("prolog(\"<?xml\")~n", []),
?bump_col(5),
{_,T1,S1} = mandatory_strip(T,S),
{Decl,T2, S2}=scan_text_decl(T1,S1),
Encoding=Decl#xmlDecl.encoding,
- {Encoding,T2, S2#xmerl_scanner{encoding=Encoding}};
- Col==1,L==1 ->
+ {Encoding, T2, S2#xmerl_scanner{encoding=Encoding}};
+ Col==1,L==1 ->
?dbg("prolog(\"<?xml\")~n", []),
?bump_col(5),
{Decl,T2, S2}=scan_xml_decl(T, S),
Encoding=Decl#xmlDecl.encoding,
- {Encoding,T2, S2#xmerl_scanner{encoding=Encoding}};
+ {Encoding, T2, S2#xmerl_scanner{encoding=Encoding}};
true ->
?fatal({xml_declaration_must_be_first_in_doc,Col,L},S0)
end,
@@ -659,7 +691,7 @@ scan_prolog("<?xml"++T,S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},Pos)
%% Now transform to declared character set.
if
Charset==Charset0 -> % Document already transformed to this charset!
- scan_prolog(T3, S3, Pos);
+ scan_prolog(T3, S3, Pos, Acc);
Charset0=/=undefined ->
%% For example may an external entity
%% have the BOM for utf-16 and the internal
@@ -668,17 +700,18 @@ scan_prolog("<?xml"++T,S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},Pos)
%% 'iso-10646-utf-1', and Charset will be 'utf-16', all
%% legal.
%%
- scan_prolog(T3,S3#xmerl_scanner{encoding=Charset0},Pos);
+ scan_prolog(T3,S3#xmerl_scanner{encoding=Charset0},Pos,Acc);
Charset == "utf-8" ->
- scan_prolog(T3, S3, Pos);
+ scan_prolog(T3, S3, Pos, Acc);
Charset=/=undefined -> % Document not previously transformed
T4=xmerl_ucs:to_unicode(T3,list_to_atom(Charset)),
- scan_prolog(T4, S3, Pos);
+ scan_prolog(T4, S3, Pos, Acc);
true -> % No encoding info given
- scan_prolog(T3, S3, Pos)
+ scan_prolog(T3, S3, Pos, Acc)
end;
-scan_prolog("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog,
- encoding=_Charset}, Pos) ->
+scan_prolog("<!DOCTYPE" ++ T,
+ S0=#xmerl_scanner{environment=prolog,encoding=_Charset},
+ Pos, Acc) ->
?dbg("prolog(\"<!DOCTYPE\")~n", []),
?bump_col(9),
%% If no known character set assume it is UTF-8
@@ -687,12 +720,15 @@ scan_prolog("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog,
true -> T
end,
{T2, S1} = scan_doctype(T1, S),
- scan_misc(T2, S1, Pos);
-scan_prolog(Str="%"++_T,S=#xmerl_scanner{environment={external,_}},_Pos) ->
- scan_ext_subset(Str,S);
-scan_prolog(Str, S0 = #xmerl_scanner{user_state=_US,encoding=_Charset},Pos) ->
+ scan_misc(T2, S1, Pos, Acc);
+scan_prolog(Str="%"++_T,S=#xmerl_scanner{environment={external,_}},
+ Pos,Acc) ->
+ {T, S1} = scan_ext_subset(Str,S),
+ {Acc, Pos, T, S1};
+scan_prolog(Str, S0 = #xmerl_scanner{user_state=_US,encoding=_Charset},
+ Pos,Acc) ->
?dbg("prolog(\"<\")~n", []),
-
+
%% Check for Comments, PI before possible DOCTYPE declaration
?bump_col(1),
%% If no known character set assume it is UTF-8
@@ -700,28 +736,30 @@ scan_prolog(Str, S0 = #xmerl_scanner{user_state=_US,encoding=_Charset},Pos) ->
%% Charset==undefined -> xmerl_ucs:to_unicode(Str,'utf-8');
true -> Str
end,
- {T1, S1}=scan_misc(T, S, Pos),
- scan_prolog2(T1,S1,Pos).
+ {Acc1, Pos1, T1, S1}=scan_misc(T, S, Pos, Acc),
+ scan_prolog2(T1,S1,Pos1,Acc1).
-scan_prolog2([], S=#xmerl_scanner{continuation_fun = F}, Pos) ->
+scan_prolog2([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_prolog2(MoreBytes, S1, Pos) end,
- fun(S1) -> {[], S1} end,
+ F(fun(MoreBytes, S1) -> scan_prolog2(MoreBytes, S1, Pos, Acc) end,
+ fun(S1) -> {Acc, Pos, [], S1} end,
S);
-scan_prolog2("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog}, Pos) ->
+scan_prolog2("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog},
+ Pos, Acc) ->
?dbg("prolog(\"<!DOCTYPE\")~n", []),
?bump_col(9),
{T1, S1} = scan_doctype(T, S),
- scan_misc(T1, S1, Pos);
-scan_prolog2(Str = "<!" ++ _, S, _Pos) ->
+ scan_misc(T1, S1, Pos, Acc);
+scan_prolog2(Str = "<!" ++ _, S, Pos, Acc) ->
?dbg("prolog(\"<!\")~n", []),
%% In e.g. a DTD, we jump directly to markup declarations
- scan_ext_subset(Str, S);
-scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos) ->
+ {T, S1} = scan_ext_subset(Str, S),
+ {Acc, Pos, T, S1};
+scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos,Acc) ->
?dbg("prolog(\"<\")~n", []),
-
+
%% Here we consider the DTD provided by doctype_DTD option,
S1 =
case S0 of
@@ -733,7 +771,7 @@ scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos) ->
end,
%% Check for more Comments and PI after DOCTYPE declaration
% ?bump_col(1),
- scan_misc(Str, S1, Pos).
+ scan_misc(Str, S1, Pos, Acc).
@@ -743,26 +781,46 @@ scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos) ->
%% - Neither of Comment and PI are returned in the resulting parsed
%% structure.
%% - scan_misc/3 implements Misc* as that is how the rule is always used
-scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos) ->
+scan_misc(T, S, Pos) ->
+ scan_misc(T, S, Pos, []).
+scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos) end,
- fun(S1) -> {[], S1} end,
+ F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos, Acc) end,
+ fun(S1) -> {Acc, Pos, [], S1} end,
S);
-scan_misc("<!--" ++ T, S0, Pos) -> % Comment
+scan_misc("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F, comments=CF}, Pos, Acc) -> % Comment
?bump_col(4),
- {_, T1, S1} = scan_comment(T, S, Pos, _Parents = [], _Lang = []),
- scan_misc(T1,S1,Pos);
-scan_misc("<?" ++ T, S0, Pos) -> % PI
+ {C, T1, S1} = scan_comment(T, S, Pos, _Parents = [], _Lang = []),
+ case CF of
+ true ->
+ {Acc2, Pos2, S3} =
+ case F(C, Acc, S1) of
+ {Acc1, S2} ->
+ {Acc1, Pos + 1, S2};
+ {Acc1, Pos1, S2} ->
+ {Acc1, Pos1, S2}
+ end,
+ scan_misc(T1, S3, Pos2, Acc2);
+ false ->
+ scan_misc(T1, S1, Pos, Acc)
+ end;
+scan_misc("<?" ++ T, S0=#xmerl_scanner{acc_fun = F}, Pos, Acc) -> % PI
?dbg("prolog(\"<?\")~n", []),
?bump_col(2),
- {_PI, T1, S1} = scan_pi(T, S, Pos),
- scan_misc(T1,S1,Pos);
-scan_misc(T=[H|_T], S, Pos) when ?whitespace(H) ->
+ {PI, T1, S1} = scan_pi(T, S, Pos, []),
+ {Acc2, Pos2, S3} = case F(PI, Acc, S1) of
+ {Acc1, S2} ->
+ {Acc1, Pos + 1, S2};
+ {Acc1, Pos1, S2} ->
+ {Acc1, Pos1, S2}
+ end,
+ scan_misc(T1,S3,Pos2,Acc2);
+scan_misc(T=[H|_T], S, Pos, Acc) when ?whitespace(H) ->
?dbg("prolog(whitespace)~n", []),
{_,T1,S1}=strip(T,S),
- scan_misc(T1,S1,Pos);
-scan_misc(T,S,_Pos) ->
- {T,S}.
+ scan_misc(T1,S1,Pos,Acc);
+scan_misc(T,S,Pos,Acc) ->
+ {Acc,Pos,T,S}.
cleanup(S=#xmerl_scanner{keep_rules = false,
@@ -780,7 +838,7 @@ scan_xml_decl(T, S) ->
{_,T1,S1} = mandatory_strip(T,S),
{T2,S2} =
case T1 of
- "version" ++ _T2 ->
+ "version" ++ _T2 ->
{_T2,S1#xmerl_scanner{col=S1#xmerl_scanner.col+7}};
_ -> ?fatal(expected_version_attribute,S1)
end,
@@ -789,7 +847,8 @@ scan_xml_decl(T, S) ->
Attr = #xmlAttribute{name = version,
parents = [{xml, _XMLPos = 1}],
value = Vsn},
- scan_xml_decl(T4, S4, #xmlDecl{attributes = [Attr]}).
+ scan_xml_decl(T4, S4, #xmlDecl{vsn = Vsn,
+ attributes = [Attr]}).
scan_xml_decl([], S=#xmerl_scanner{continuation_fun = F}, Decl) ->
?dbg("cont()...~n", []),
@@ -820,8 +879,8 @@ scan_xml_decl2("encoding" ++ T, S0 = #xmerl_scanner{event_fun = Event},
value = LowEncName},
Decl = Decl0#xmlDecl{encoding = LowEncName,
attributes = [Attr|Attrs]},
- S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
- line = S0#xmerl_scanner.line,
+ S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S0#xmerl_scanner.line,
col = S0#xmerl_scanner.col,
data = Attr}, S2),
case T2 of
@@ -843,7 +902,7 @@ scan_xml_decl3("?>" ++ T, S0,Decl) ->
return_xml_decl(T,S,Decl);
scan_xml_decl3("standalone" ++ T,S0 = #xmerl_scanner{event_fun = Event},
Decl0 = #xmlDecl{attributes = Attrs}) ->
- %% [32] SDDecl
+ %% [32] SDDecl
?bump_col(10),
{T1, S1} = scan_eq(T, S),
{StValue,T2,S2}=scan_standalone_value(T1,S1),
@@ -852,8 +911,8 @@ scan_xml_decl3("standalone" ++ T,S0 = #xmerl_scanner{event_fun = Event},
value = StValue},
Decl = Decl0#xmlDecl{standalone = StValue,
attributes = [Attr|Attrs]},
- S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
- line = S0#xmerl_scanner.line,
+ S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S0#xmerl_scanner.line,
col = S0#xmerl_scanner.col,
data = Attr}, S2),
{_,T3,S4} = strip(T2,S3),
@@ -874,7 +933,7 @@ return_xml_decl(T,S=#xmerl_scanner{hook_fun = _Hook,
%% {Ret, S3} = Hook(Decl, S2),
%% {Ret, T1, S3}.
{Decl, T1, S2}.
-
+
scan_standalone_value("'yes'" ++T,S0)->
?bump_col(5),
@@ -917,7 +976,7 @@ scan_text_decl(T,S=#xmerl_scanner{event_fun = Event}) ->
scan_text_decl(T5,S6,Decl).
scan_text_decl("?>"++T,S0 = #xmerl_scanner{hook_fun = _Hook,
- event_fun = Event},
+ event_fun = Event},
Decl0 = #xmlDecl{attributes = Attrs}) ->
?bump_col(2),
?strip1,
@@ -942,7 +1001,7 @@ scan_optional_version("version"++T,S0) ->
{#xmlDecl{attributes=[Attr]},T4,S4};
scan_optional_version(T,S) ->
{#xmlDecl{attributes=[]},T,S}.
-
+
%%%%%%% [81] EncName
@@ -951,7 +1010,7 @@ scan_enc_name([], S=#xmerl_scanner{continuation_fun = F}) ->
F(fun(MoreBytes, S1) -> scan_enc_name(MoreBytes, S1) end,
fun(S1) -> ?fatal(expected_encoding_name, S1) end,
S);
-scan_enc_name([H|T], S0) when H >= $"; H =< $' ->
+scan_enc_name([H|T], S0) when H >= $"; H =< $' ->
?bump_col(1),
scan_enc_name(T, S, H, []).
@@ -1004,7 +1063,7 @@ scan_xml_vsn([H|T], S) when H==$"; H==$'->
xml_vsn([], S=#xmerl_scanner{continuation_fun = F}, Delim, Acc) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> xml_vsn(MoreBytes, S1, Delim, Acc) end,
+ F(fun(MoreBytes, S1) -> xml_vsn(MoreBytes, S1, Delim, Acc) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
xml_vsn([H|T], S=#xmerl_scanner{col = C}, H, Acc) ->
@@ -1025,50 +1084,53 @@ xml_vsn([H|T], S=#xmerl_scanner{col = C}, Delim, Acc) ->
%%%%%%% [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
-scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Pos) ->
+scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Pos, Ps) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Pos) end,
+ F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Pos, Ps) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
-scan_pi(Str = [H1,H2,H3 | T],S0=#xmerl_scanner{line = L, col = C}, Pos)
+scan_pi(Str = [H1,H2,H3 | T],S0=#xmerl_scanner{line = L, col = C}, Pos, Ps)
when H1==$x;H1==$X ->
%% names beginning with [xX][mM][lL] are reserved for future use.
?bump_col(3),
- if
+ if
((H2==$m) or (H2==$M)) and
((H3==$l) or (H3==$L)) ->
- scan_wellknown_pi(T,S,Pos);
+ scan_wellknown_pi(T,S,Pos,Ps);
true ->
{Target, _NamespaceInfo, T1, S1} = scan_name(Str, S),
- scan_pi(T1, S1, Target, L, C, Pos, [])
+ scan_pi(T1, S1, Target, L, C, Pos, Ps, [])
end;
-scan_pi(Str, S=#xmerl_scanner{line = L, col = C}, Pos) ->
+scan_pi(Str, S=#xmerl_scanner{line = L, col = C}, Pos, Ps) ->
{Target, _NamespaceInfo, T1, S1} = scan_name(Str, S),
- scan_pi(T1, S1, Target, L, C, Pos,[]).
+ scan_pi(T1, S1, Target, L, C, Pos, Ps, []).
%%% More info on xml-stylesheet can be found at:
%%% "Associating Style Sheets with XML documents", Version 1.0,
%%% W3C Recommendation 29 June 1999 (http://www.w3.org/TR/xml-stylesheet/)
-scan_wellknown_pi("-stylesheet"++T, S0=#xmerl_scanner{line=L,col=C},Pos) ->
+scan_wellknown_pi("-stylesheet"++T, S0=#xmerl_scanner{line=L,col=C},Pos,Ps) ->
?dbg("prolog(\"<?xml-stylesheet\")~n", []),
?bump_col(16),
- scan_pi(T, S, "xml-stylesheet",L,C,Pos,[]);
-scan_wellknown_pi(Str,S,_Pos) ->
+ scan_pi(T, S, "xml-stylesheet",L,C,Pos,Ps,[]);
+scan_wellknown_pi(Str,S,_Pos,_Ps) ->
?fatal({invalid_target_name, lists:sublist(Str, 1, 10)}, S).
-scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Target,L, C, Pos, Acc) ->
+scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Target,
+ L, C, Pos, Ps, Acc) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Target, L, C, Pos, Acc) end,
+ F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Target,
+ L, C, Pos, Ps, Acc) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
scan_pi("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
- event_fun = Event},
- Target, L, C, Pos, Acc) ->
+ event_fun = Event},
+ Target, L, C, Pos, Ps, Acc) ->
?bump_col(2),
PI = #xmlPI{name = Target,
+ parents = Ps,
pos = Pos,
value = lists:reverse(Acc)},
S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
@@ -1077,22 +1139,25 @@ scan_pi("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
data = PI}, S),
{Ret, S2} = Hook(PI, S1),
{Ret, T, S2};
-scan_pi([H|T], S, Target, L, C, Pos, Acc) when ?whitespace(H) ->
+scan_pi([H|T], S, Target, L, C, Pos, Ps, Acc) when ?whitespace(H) ->
?strip1,
- scan_pi2(T1, S1, Target, L, C, Pos, Acc);
-scan_pi([H|_T],S,_Target, _L, _C, _Pos, _Acc) ->
+ scan_pi2(T1, S1, Target, L, C, Pos, Ps, Acc);
+scan_pi([H|_T],S,_Target, _L, _C, _Pos, _Ps, _Acc) ->
?fatal({expected_whitespace_OR_end_of_PI,{char,H}}, S).
-scan_pi2([], S=#xmerl_scanner{continuation_fun = F}, Target,L, C, Pos, Acc) ->
+scan_pi2([], S=#xmerl_scanner{continuation_fun = F}, Target,
+ L, C, Pos, Ps, Acc) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_pi2(MoreBytes, S1, Target, L, C, Pos, Acc) end,
+ F(fun(MoreBytes, S1) -> scan_pi2(MoreBytes, S1, Target,
+ L, C, Pos, Ps, Acc) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
scan_pi2("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
- event_fun = Event},
- Target, L, C, Pos, Acc) ->
+ event_fun = Event},
+ Target, L, C, Pos, Ps, Acc) ->
?bump_col(2),
PI = #xmlPI{name = Target,
+ parents = Ps,
pos = Pos,
value = lists:reverse(Acc)},
S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
@@ -1101,14 +1166,14 @@ scan_pi2("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
data = PI}, S),
{Ret, S2} = Hook(PI, S1),
{Ret, T, S2};
-scan_pi2(Str, S0, Target, L, C, Pos, Acc) ->
+scan_pi2(Str, S0, Target, L, C, Pos, Ps, Acc) ->
?bump_col(1),
{Ch,T} = wfc_legal_char(Str,S),
- scan_pi2(T, S, Target, L, C, Pos, [Ch|Acc]).
+ scan_pi2(T, S, Target, L, C, Pos, Ps, [Ch|Acc]).
-%% [28] doctypedecl ::=
+%% [28] doctypedecl ::=
%% '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
scan_doctype([], S=#xmerl_scanner{continuation_fun = F}) ->
?dbg("cont()...~n", []),
@@ -1214,7 +1279,7 @@ fetch_DTD(undefined, S) ->
S;
% fetch_DTD(_,S=#xmerl_scanner{validation=false}) ->
% S;
-fetch_DTD(DTDSpec, S)->
+fetch_DTD(DTDSpec, S)->
case fetch_and_parse(DTDSpec,S,[{text_decl,true},
{environment,{external,subset}}]) of
NewS when is_record(NewS,xmerl_scanner) ->
@@ -1229,7 +1294,7 @@ fetch_and_parse(ExtSpec,S=#xmerl_scanner{fetch_fun=Fetch,
Options0) ->
RetS =
case Fetch(ExtSpec, S) of
- {ok, NewS} ->
+ {ok, NewS} ->
%% For backward compatibility only. This will be removed later!!
NewS;
{ok, not_fetched,NewS} ->
@@ -1294,7 +1359,7 @@ fetch_not_parse(ExtSpec,S=#xmerl_scanner{fetch_fun=Fetch}) ->
{ok, DataRet, NewS} ->
{String,LocationName} =
case DataRet of
- {file,F} ->
+ {file,F} ->
{get_file(F,S),F};
{string,Str} ->
{binary_to_list(Str),file_name_unknown};
@@ -1310,7 +1375,7 @@ fetch_not_parse(ExtSpec,S=#xmerl_scanner{fetch_fun=Fetch}) ->
get_file(F,S) ->
% io:format("get_file F=~p~n",[F]),
case file:read_file(F) of
- {ok,Bin} ->
+ {ok,Bin} ->
binary_to_list(Bin);
Err ->
?fatal({error_reading_file,F,Err},S)
@@ -1325,7 +1390,7 @@ check_decl(#xmerl_scanner{rules=Tab} = S) ->
check_notations(Tab,S),
check_elements(Tab,S), %% check also attribute defs for element
check_entities(Tab,S).
-
+
check_notations(Tab,S) ->
case ets:match(Tab,{{notation,'$1'},undeclared}) of
[[]] -> ok;
@@ -1374,7 +1439,7 @@ check_attributes([{N1,'ID',_,_,_}=Attr|Rest],S) ->
check_attributes([{_,{enumeration,_},_,_,_}=Attr|T],S) ->
vc_Enumeration(Attr,S),
check_attributes(T,S);
-check_attributes([{_,Ent,_,_,_}=Attr|T],S)
+check_attributes([{_,Ent,_,_,_}=Attr|T],S)
when Ent=='ENTITY';Ent=='ENTITIES' ->
vc_Entity_Name(Attr,S),
check_attributes(T,S);
@@ -1418,7 +1483,7 @@ scan_ext_subset([], S=#xmerl_scanner{continuation_fun = F}) ->
F(fun(MoreBytes, S1) -> scan_ext_subset(MoreBytes, S1) end,
fun(S1) -> {[], S1} end,
S);
-scan_ext_subset("%" ++ T, S0) ->
+scan_ext_subset("%" ++ T, S0) ->
%% DeclSep [28a]: WFC: PE Between Declarations.
%% The replacement text of a parameter entity reference in a
%% DeclSep must match the production extSubsetDecl.
@@ -1472,7 +1537,7 @@ scan_decl_sep(T,S) ->
% {" " ++ EntV2 ++ " ",_S3};
% ExpRef ->
% {ExpRef,S1}
-% end,
+% end,
% {_, T3, S3} = strip(ExpandedRef,S2),
% {_T4,S4} = scan_ext_subset(T3,S3),
% strip(T1,S4).
@@ -1558,7 +1623,7 @@ scan_include(T, S) ->
scan_include(T1, S1).
-%%%%%%% [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
+%%%%%%% [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
%%%%%%% NotationDecl | PI |Comment
%%%%%%% [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
@@ -1575,16 +1640,16 @@ scan_markup_decl("<!--" ++ T, S0) ->
scan_comment(T, S);
scan_markup_decl("<?" ++ T, S0) ->
?bump_col(2),
- {_PI, T1, S1} = scan_pi(T, S,_Pos=markup),
+ {_PI, T1, S1} = scan_pi(T, S,_Pos=markup,[]),
strip(T1, S1);
-scan_markup_decl("<!ELEMENT" ++ T,
+scan_markup_decl("<!ELEMENT" ++ T,
#xmerl_scanner{rules_read_fun = Read,
rules_write_fun = Write,
rules_delete_fun = Delete} = S0) ->
?bump_col(9),
{_,T1,S1} = mandatory_strip(T,S),
{Ename, _NamespaceInfo, T2, S2} = scan_name(T1, S1),
- Element =
+ Element =
case Read(elem_def, Ename, S2) of
El = #xmlElement{elementdef=Decl} when Decl =/= undeclared ->
case S2#xmerl_scanner.validation of
@@ -1625,7 +1690,7 @@ scan_markup_decl("<!NOTATION" ++ T, S0) ->
{_,T1,S1} = mandatory_strip(T,S),
{T2, S2} = scan_notation_decl(T1, S1),
strip(T2,S2);
-scan_markup_decl("<!ATTLIST" ++ T,
+scan_markup_decl("<!ATTLIST" ++ T,
#xmerl_scanner{rules_read_fun = Read,
rules_write_fun = Write,
rules_delete_fun= Delete} = S0) ->
@@ -1642,7 +1707,7 @@ scan_markup_decl("<!ATTLIST" ++ T,
%% internal DTD.
{#xmlElement{},update_attributes(Attributes,[])};
Edef = #xmlElement{attributes = OldAttrs} ->
- Delete(elem_def,Ename,S4),
+ Delete(elem_def,Ename,S4),
%% the slot in rules table must be empty so that the
%% later write has the assumed effect. Read maybe
%% should empty the table slot.
@@ -1661,7 +1726,7 @@ scan_element_completion(T,S) ->
update_attributes(NewAttrs, OldAttrs) ->
update_attributes1(NewAttrs,lists:reverse(OldAttrs)).
-update_attributes1([A = {Name,_Type,_DefaultV,_DefaultD,_Env}|Attrs],
+update_attributes1([A = {Name,_Type,_DefaultV,_DefaultD,_Env}|Attrs],
OldAttrs) ->
case lists:keymember(Name, 1, OldAttrs) of
true ->
@@ -1802,7 +1867,7 @@ scan_notation_type("|" ++ T, S0, Acc) ->
?strip3,
scan_notation_type(T3, S3, [Name | Acc]).
-%%% Validity constraint for NotationType:
+%%% Validity constraint for NotationType:
%%% The used notation names must be declared in the DTD, but they may
%%% be declared later.
notation_exists(Name, #xmerl_scanner{rules_read_fun = Read,
@@ -1931,7 +1996,7 @@ scan_entity_def(Str, S, EName) ->
{environment,{external,{entity,EName}}}]) of
{{_USret,Entity},_Tail,_Sx} ->
{Entity, external,T2, S2};
- {Entity,_Tail,Sx} ->
+ {Entity,_Tail,Sx} ->
OldRef=S2#xmerl_scanner.entity_references,
NewRef=Sx#xmerl_scanner.entity_references,
{Entity,external,T2,
@@ -1981,28 +2046,28 @@ scan_element(T, S, Pos) ->
scan_element(T, S=#xmerl_scanner{line=L,col=C},
Pos, SpaceDefault,Lang, Parents, NS) ->
{Name, NamespaceInfo, T1, S1} = scan_name(T, S),
- vc_Element_valid(Name,S),
+ vc_Element_valid(Name,NamespaceInfo,S),
?strip2,
- scan_element(T2, S2, Pos, Name, L, C, _Attrs = [],
- Lang, Parents, NamespaceInfo, NS,
+ scan_element(T2, S2, Pos, Name, L, C, _Attrs = [],
+ Lang, Parents, NamespaceInfo, NS,
SpaceDefault).
scan_element("/", S=#xmerl_scanner{continuation_fun = F},
- Pos, Name, StartL, StartC, Attrs, Lang, Parents,
+ Pos, Name, StartL, StartC, Attrs, Lang, Parents,
NSI, NS, SpaceDefault) ->
?dbg("trailing / detected~n", []),
- F(fun(MoreBytes, S1) -> scan_element("/" ++ MoreBytes, S1,
- Pos, Name, StartL, StartC, Attrs,
+ F(fun(MoreBytes, S1) -> scan_element("/" ++ MoreBytes, S1,
+ Pos, Name, StartL, StartC, Attrs,
Lang,Parents,NSI,NS,SpaceDefault) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
-scan_element([], S=#xmerl_scanner{continuation_fun = F},
- Pos, Name, StartL, StartC, Attrs, Lang, Parents,
+scan_element([], S=#xmerl_scanner{continuation_fun = F},
+ Pos, Name, StartL, StartC, Attrs, Lang, Parents,
NSI, NS, SpaceDefault) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_element(MoreBytes, S1,
- Pos, Name, StartL, StartC, Attrs,
+ F(fun(MoreBytes, S1) -> scan_element(MoreBytes, S1,
+ Pos, Name, StartL, StartC, Attrs,
Lang,Parents,NSI,NS,SpaceDefault) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
@@ -2010,13 +2075,14 @@ scan_element("/>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
event_fun = Event,
line = L, col = C,
xmlbase_cache=XMLBase}, Pos,
- Name, _StartL, _StartC, Attrs0, Lang, Parents, NSI,
+ Name, _StartL, _StartC, Attrs0, Lang, Parents, NSI,
Namespace, _SpaceDefault) ->
?bump_col(2),
Attrs = lists:reverse(Attrs0),
E=processed_whole_element(S, Pos, Name, Attrs, Lang, Parents,NSI,Namespace),
-
- wfc_unique_att_spec(Attrs,S),
+
+ #xmlElement{attributes = Attrs1} = E,
+ wfc_unique_att_spec(Attrs1,S),
S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
line = L,
col = C,
@@ -2025,11 +2091,11 @@ scan_element("/>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
S2b=S2#xmerl_scanner{xmlbase=XMLBase},
{Ret, T, S2b};
scan_element(">", S=#xmerl_scanner{continuation_fun = F},
- Pos, Name, StartL, StartC, Attrs, Lang, Parents,
+ Pos, Name, StartL, StartC, Attrs, Lang, Parents,
NSI, NS, SpaceDefault) ->
?dbg("trailing > detected~n", []),
- F(fun(MoreBytes, S1) -> scan_element(">" ++ MoreBytes, S1,
- Pos, Name, StartL, StartC, Attrs,
+ F(fun(MoreBytes, S1) -> scan_element(">" ++ MoreBytes, S1,
+ Pos, Name, StartL, StartC, Attrs,
Lang,Parents,NSI,NS,SpaceDefault) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
@@ -2038,28 +2104,31 @@ scan_element(">" ++ T, S0 = #xmerl_scanner{event_fun = Event,
line = L, col = C,
xmlbase_cache=XMLBase,
space = SpaceOption},
- Pos, Name, StartL, StartC, Attrs0, Lang, Parents,
+ Pos, Name, StartL, StartC, Attrs0, Lang, Parents,
NSI, Namespace, SpaceDefault) ->
?bump_col(1),
Attrs = lists:reverse(Attrs0),
- wfc_unique_att_spec(Attrs,S),
- XMLSpace = case lists:keysearch('xml:space', #xmlAttribute.name, Attrs) of
+ E0=processed_whole_element(S,Pos,Name,Attrs,Lang,Parents,NSI,Namespace),
+
+ #xmlElement{attributes = Attrs1} = E0,
+ wfc_unique_att_spec(Attrs1,S),
+ XMLSpace = case lists:keysearch('xml:space', #xmlAttribute.name, Attrs1) of
false -> SpaceDefault;
{value, #xmlAttribute{value="default"}} -> SpaceOption;
{value, #xmlAttribute{value="preserve"}} -> preserve;
_ -> SpaceDefault
end,
-
- E0=processed_whole_element(S,Pos,Name,Attrs,Lang,Parents,NSI,Namespace),
+
+ E0=processed_whole_element(S,Pos,Name,Attrs1,Lang,Parents,NSI,Namespace),
S1 = #xmerl_scanner{} = Event(#xmerl_event{event = started,
line = StartL,
col = StartC,
data = E0}, S),
-
- {Content, T1, S2} = scan_content(T, S1, Name, Attrs, XMLSpace,
+
+ {Content, T1, S2} = scan_content(T, S1, Name, Attrs1, XMLSpace,
E0#xmlElement.language,
[{Name, Pos}|Parents], Namespace),
-
+
Element=E0#xmlElement{content=Content,
xmlbase=E0#xmlElement.xmlbase},
S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
@@ -2069,7 +2138,7 @@ scan_element(">" ++ T, S0 = #xmerl_scanner{event_fun = Event,
{Ret, S4} = Hook(Element, S3),
S4b=S4#xmerl_scanner{xmlbase=XMLBase},
{Ret, T1, S4b};
-scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents,
+scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents,
NSI, NS, SpaceDefault) ->
{AttName, NamespaceInfo, T1, S1} = scan_name(T, S),
{T2, S2} = scan_eq(T1, S1),
@@ -2078,26 +2147,27 @@ scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents,
%% check_default_value(S3,DefaultDecl,AttValue),
NewNS = check_namespace(AttName, NamespaceInfo, AttValue, NS),
{T3,S3} = wfc_whitespace_betw_attrs(T3a,S3a),
- ?strip4,
+ ?strip4,
AttrPos = case Attrs of
[] ->
1;
[#xmlAttribute{pos = P}|_] ->
P+1
end,
- Attr = #xmlAttribute{name = AttName,
+ Attr = #xmlAttribute{name = AttName,
+ parents = [{Name, Pos}|Parents],
pos = AttrPos,
language = Lang,
- namespace = NamespaceInfo,
+ nsinfo = NamespaceInfo,
value = AttValue,
normalized = IsNorm},
XMLBase=if
AttName=='xml:base' ->
resolve_relative_uri(AttValue,S4#xmerl_scanner.xmlbase);
- true ->
+ true ->
S4#xmerl_scanner.xmlbase
end,
-
+
#xmerl_scanner{event_fun = Event,
line = Line,
col = Col} = S4,
@@ -2107,9 +2177,17 @@ scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents,
data = Attr},
S4#xmerl_scanner{xmlbase=XMLBase,
xmlbase_cache=S#xmerl_scanner.xmlbase}),
- scan_element(T4, S5, Pos, Name, StartL, StartC, [Attr|Attrs],
+ scan_element(T4, S5, Pos, Name, StartL, StartC, [Attr|Attrs],
Lang, Parents, NSI, NewNS, SpaceDefault).
+get_default_attrs(S = #xmerl_scanner{rules_read_fun = Read}, ElemName) ->
+ case Read(elem_def, ElemName, S) of
+ #xmlElement{attributes = Attrs} ->
+ [ {AttName, AttValue} ||
+ {AttName, _, AttValue, _, _} <- Attrs, AttValue =/= no_value ];
+ _ -> []
+ end.
+
get_att_type(S=#xmerl_scanner{rules_read_fun=Read},AttName,ElemName) ->
case Read(elem_def,ElemName,S) of
#xmlElement{attributes = Attrs} ->
@@ -2135,11 +2213,28 @@ resolve_relative_uri(NewBase,CurrentBase) ->
processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook,
xmlbase = XMLBase,
line = _L, col = _C,
- event_fun = _Event},
+ event_fun = _Event},
Pos, Name, Attrs, Lang, Parents, NSI, Namespace) ->
Language = check_language(Attrs, Lang),
- {ExpName, ExpAttrs} =
+ AllAttrs =
+ case S#xmerl_scanner.default_attrs of
+ true ->
+ [ #xmlAttribute{name = AttName,
+ parents = [{Name, Pos} | Parents],
+ language = Lang,
+ nsinfo = NSI,
+ namespace = Namespace,
+ value = AttValue,
+ normalized = true} ||
+ {AttName, AttValue} <- get_default_attrs(S, Name),
+ AttValue =/= no_value,
+ not lists:keymember(AttName, #xmlAttribute.name, Attrs) ];
+ false ->
+ Attrs
+ end,
+
+ {ExpName, ExpAttrs} =
case S#xmerl_scanner.namespace_conformant of
true ->
%% expand attribute names. We need to do this after having
@@ -2151,16 +2246,17 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook,
%% should apply to those attributes as well.
%% Note that the default URI does not apply to attrbute names.
TempNamespace = Namespace#xmlNamespace{default = []},
- ExpAttrsX =
+ ExpAttrsX =
[A#xmlAttribute{
+ namespace=Namespace,
expanded_name=expanded_name(
- A#xmlAttribute.name,
- A#xmlAttribute.namespace,
+ A#xmlAttribute.name,
+ A#xmlAttribute.nsinfo,
% NSI,
- TempNamespace, S)} || A <- Attrs],
+ TempNamespace, S)} || A <- AllAttrs],
{expanded_name(Name, NSI, Namespace, S), ExpAttrsX};
false ->
- {Name, Attrs}
+ {Name, AllAttrs}
end,
#xmlElement{name = Name,
@@ -2184,7 +2280,7 @@ check_language([], Lang) ->
check_namespace(xmlns, _, Value, NS) ->
NS#xmlNamespace{default = list_to_atom(Value)};
-check_namespace(_, {"xmlns", Prefix}, Value,
+check_namespace(_, {"xmlns", Prefix}, Value,
NS = #xmlNamespace{nodes = Ns}) ->
NS#xmlNamespace{nodes = keyreplaceadd(
Prefix, 1, Ns, {Prefix, list_to_atom(Value)})};
@@ -2194,10 +2290,32 @@ check_namespace(_, _, _, NS) ->
expanded_name(Name, [], #xmlNamespace{default = []}, _S) ->
Name;
-expanded_name(Name, [], #xmlNamespace{default = URI}, _S) ->
- {URI, Name};
-expanded_name(_Name, {"xmlns", Local}, _NS, _S) -> % CHECK THIS /JB
- {"xmlns",Local};
+expanded_name(Name, [], #xmlNamespace{default = URI}, S) ->
+ case URI of
+ 'http://www.w3.org/XML/1998/namespace' ->
+ ?fatal(cannot_bind_default_namespace_to_xml_namespace_name, S);
+ 'http://www.w3.org/2000/xmlns/' ->
+ ?fatal(cannot_bind_default_namespace_to_xmlns_namespace_name, S);
+ _ ->
+ {URI, Name}
+ end;
+expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) ->
+ {_, Value} = lists:keyfind(Local, 1, Ns),
+ case Name of
+ 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' ->
+ ?fatal({xml_prefix_cannot_be_redeclared, Value}, S);
+ 'xmlns:xmlns' ->
+ ?fatal({xmlns_prefix_cannot_be_declared, Value}, S);
+ _ ->
+ case Value of
+ 'http://www.w3.org/XML/1998/namespace' ->
+ ?fatal({cannot_bind_prefix_to_xml_namespace, Local}, S);
+ 'http://www.w3.org/2000/xmlns/' ->
+ ?fatal({cannot_bind_prefix_to_xmlns_namespace, Local}, S);
+ _ ->
+ N
+ end
+ end;
expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) ->
case lists:keysearch(Prefix, 1, Ns) of
{value, {_, URI}} ->
@@ -2207,7 +2325,7 @@ expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) ->
%% must be declared
?fatal({namespace_prefix_not_declared, Prefix}, S)
end.
-
+
@@ -2233,7 +2351,7 @@ scan_att_value("%"++T,S0=#xmerl_scanner{rules_read_fun=Read,
rules_delete_fun=Delete},AttType) ->
?bump_col(1),
{Name,T1,S1} = scan_pe_reference(T,S),
- {ExpandedRef,S2} =
+ {ExpandedRef,S2} =
case expand_pe_reference(Name,S1,in_literal) of
Tuple when is_tuple(Tuple) ->
%% {system,URI} or {public,URI}
@@ -2271,9 +2389,9 @@ scan_att_chars([H|T], S0, H, Acc, TmpAcc,AttType,IsNorm) -> % End quote
?bump_col(1),
check_att_default_val(S#xmerl_scanner.validation,TmpAcc,AttType,S),
{Acc2,S2,IsNorm2} =
- if
+ if
AttType == 'CDATA' -> {Acc,S,IsNorm};
- true ->
+ true ->
normalize(Acc,S,IsNorm)
end,
{lists:flatten(lists:reverse(Acc2)), T, S2,IsNorm2};
@@ -2328,7 +2446,7 @@ check_att_default_val(dtd,RevName,Ent,S) ->
check_att_default_val(_,_,_,_) ->
ok.
-check_att_default_val(Name,Ent,S=#xmerl_scanner{rules_write_fun=Write})
+check_att_default_val(Name,Ent,S=#xmerl_scanner{rules_write_fun=Write})
when Ent == 'ENTITY'; Ent == 'ENTITIES' ->
case xmerl_lib:is_letter(hd(Name)) of
true -> ok;
@@ -2389,28 +2507,28 @@ valid_Char(_,_,C,S) ->
%%%%%%% [43] content
scan_content(T, S, Name, Attrs, Space, Lang, Parents, NS) ->
- scan_content(T, S, _Pos = 1, Name, Attrs, Space,
+ scan_content(T, S, _Pos = 1, Name, Attrs, Space,
Lang, Parents, NS, _Acc = [],_MarkupDel=[]).
scan_content("<", S= #xmerl_scanner{continuation_fun = F},
Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,_) ->
?dbg("trailing < detected~n", []),
- F(fun(MoreBytes, S1) -> scan_content("<" ++ MoreBytes, S1,
- Pos, Name, Attrs,
+ F(fun(MoreBytes, S1) -> scan_content("<" ++ MoreBytes, S1,
+ Pos, Name, Attrs,
Space, Lang, Parents, NS, Acc,[]) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
-scan_content([], S=#xmerl_scanner{environment={external,{entity,_}}},
+scan_content([], S=#xmerl_scanner{environment={external,{entity,_}}},
_Pos, _Name, _Attrs, _Space, _Lang, _Parents, _NS, Acc,_) ->
{lists:reverse(Acc),[],S};
-scan_content([], S=#xmerl_scanner{environment=internal_parsed_entity},
+scan_content([], S=#xmerl_scanner{environment=internal_parsed_entity},
_Pos, _Name, _Attrs, _Space, _Lang, _Parents, _NS, Acc,_) ->
{lists:reverse(Acc),[],S};
-scan_content([], S=#xmerl_scanner{continuation_fun = F},
+scan_content([], S=#xmerl_scanner{continuation_fun = F},
Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,_) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_content(MoreBytes, S1,
- Pos, Name, Attrs,
+ F(fun(MoreBytes, S1) -> scan_content(MoreBytes, S1,
+ Pos, Name, Attrs,
Space, Lang, Parents, NS, Acc,[]) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
@@ -2427,10 +2545,10 @@ scan_content("</" ++ T, S0, _Pos, Name, _Attrs, _Space, _Lang,
case T2 of
">" ++ T3 ->
{lists:reverse(Acc), T3, S2};
- _ ->
+ _ ->
?fatal({error,{unexpected_end_of_STag}},S)
end;
-scan_content([$&|_T]=Str,
+scan_content([$&|_T]=Str,
#xmerl_scanner{environment={external,{entity,EName}}} = S0,
Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,_) ->
{_EntV,T1,S1}=scan_entity_value(Str,S0 ,[],EName,general),
@@ -2449,12 +2567,26 @@ scan_content("&" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) -
_ ->
scan_content(string_to_char_set(S1#xmerl_scanner.encoding,ExpRef)++T1,S1,Pos,Name,Attrs,Space,Lang,Parents,NS,Acc,[])
end;
-scan_content("<!--" ++ T, S, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) ->
- {_, T1, S1} = scan_comment(T, S, Pos, Parents, Lang),
- scan_content(T1, S1, Pos+1, Name, Attrs, Space, Lang, Parents, NS, Acc,[]);
+scan_content("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F, comments=CF}, Pos, Name, Attrs, Space,
+ Lang, Parents, NS, Acc,[]) ->
+ ?bump_col(4),
+ {C, T1, S1} = scan_comment(T, S, Pos, Parents, Lang),
+ case CF of
+ true ->
+ {Acc2, Pos2, S3} =
+ case F(C, Acc, S1) of
+ {Acc1, S2} ->
+ {Acc1, Pos + 1, S2};
+ {Acc1, Pos1, S2} ->
+ {Acc1, Pos1, S2}
+ end,
+ scan_content(T1, S3, Pos2, Name, Attrs, Space, Lang, Parents, NS, Acc2,[]);
+ false ->
+ scan_content(T1, S1, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[])
+ end;
scan_content("<" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) ->
?bump_col(1),
- {Markup, T1, S1} =
+ {Markup, T1, S1} =
scan_content_markup(T, S, Pos, Name, Attrs, Space, Lang, Parents, NS),
AccF = S1#xmerl_scanner.acc_fun,
{NewAcc, NewPos, NewS} = case AccF(Markup, Acc, S1) of
@@ -2470,10 +2602,10 @@ scan_content([_H|T], S= #xmerl_scanner{environment={external,{entity,_}}},
%% Guess we have to scan the content to find any internal entity
%% references.
scan_content(T,S,Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]);
-scan_content(T, S=#xmerl_scanner{acc_fun = F,
+scan_content(T, S=#xmerl_scanner{acc_fun = F,
event_fun = Event,
hook_fun=Hook,
- line = _L},
+ line = _L},
Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,MarkupDel) ->
Text0 = #xmlText{pos = Pos,
parents = Parents},
@@ -2496,7 +2628,7 @@ scan_content(T, S=#xmerl_scanner{acc_fun = F,
Parents, NS, NewAcc,[]).
-scan_content_markup([], S=#xmerl_scanner{continuation_fun = F},
+scan_content_markup([], S=#xmerl_scanner{continuation_fun = F},
Pos, Name, Attrs, Space, Lang, Parents, NS) ->
?dbg("cont()...~n", []),
F(fun(MoreBytes, S1) -> scan_content_markup(
@@ -2508,9 +2640,9 @@ scan_content_markup("![CDATA[" ++ T, S0, Pos, _Name, _Attrs,
_Space, _Lang, Parents, _NS) ->
?bump_col(8),
scan_cdata(T, S, Pos, Parents);
-scan_content_markup("?"++T,S0,Pos,_Name,_Attrs,_Space,_Lang,_Parents,_NS) ->
+scan_content_markup("?"++T,S0,Pos,_Name,_Attrs,_Space,_Lang,Parents,_NS) ->
?bump_col(1),
- scan_pi(T, S, Pos);
+ scan_pi(T, S, Pos, Parents);
scan_content_markup(T, S, Pos, _Name, _Attrs, Space, Lang, Parents, NS) ->
scan_element(T, S, Pos, Space, Lang, Parents, NS).
@@ -2521,21 +2653,21 @@ scan_char_data(T, S, Space,MUD) ->
scan_char_data([], S=#xmerl_scanner{environment={external,{entity,_}}},
_Space,_MUD, Acc) ->
-
+
{lists:reverse(Acc), [], S};
scan_char_data([], S=#xmerl_scanner{environment=internal_parsed_entity},
_Space, _MUD,Acc) ->
-
+
{lists:reverse(Acc), [], S};
scan_char_data([], S=#xmerl_scanner{continuation_fun = F}, Space, _MUD,Acc) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_char_data(MoreBytes,S1,Space,_MUD,Acc) end,
+ F(fun(MoreBytes, S1) -> scan_char_data(MoreBytes,S1,Space,_MUD,Acc) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
scan_char_data([$&|T], S,Space,"&",Acc) ->
scan_char_data(T, S, Space,[], [$&|Acc]);
scan_char_data(T=[$&|_], S,_Space,_MUD,Acc) ->
-
+
{lists:reverse(Acc), T, S};
scan_char_data("]]>" ++ _T, S, _Space,_MUD, _Acc) ->
%% See Section 2.4: Especially:
@@ -2547,7 +2679,7 @@ scan_char_data("]]>" ++ _T, S, _Space,_MUD, _Acc) ->
scan_char_data([$<|T],S,Space,"<", Acc) ->
scan_char_data(T, S, Space,[], [$<|Acc]);
scan_char_data(T = [$<|_], S, _Space,_MUD,Acc) ->
-
+
{lists:reverse(Acc), T, S};
scan_char_data(T = [H|R], S, Space,MUD, Acc) when ?whitespace(H) ->
if
@@ -2640,7 +2772,7 @@ scan_reference(T, S) ->
%% ampersand is not recognized as an entity-reference delimiter.)"
%%
%% How to achieve this? My current approach is to insert the *strings* "&",
-%% "<", ">", "'", and "\"" instead of the characters. The processor will
+%% "<", ">", "'", and "\"" instead of the characters. The processor will
%% ignore them when performing multiple expansions. This means, for now, that
%% the character data output by the processor is (1-2 levels) deep.
%% At some suitable point, we should flatten these, so that application-level
@@ -2669,7 +2801,7 @@ scan_entity_ref("quot;" ++ T, S0) ->
scan_entity_ref(T, S) ->
{Name, _NamespaceInfo, T1, S1} = scan_name(T, S),
T2 = scan_mandatory(";",T1,1,S1,expected_entity_reference_semicolon),
-% ";" ++ T2 = T1,
+% ";" ++ T2 = T1,
S2 = S1,
Entity = expand_reference(Name, S2),
{Entity, T2, S2}.
@@ -2680,7 +2812,7 @@ scan_entity_ref(T, S) ->
scan_pe_reference(T, S) ->
{Name, _NamespaceInfo, T1, S1} = scan_name(T, S),
T2 = scan_mandatory(";",T1,1,S1,expected_parsed_entity_reference_semicolon),
-% ";" ++ T2 = T1,
+% ";" ++ T2 = T1,
{Name, T2, S1#xmerl_scanner{col = S1#xmerl_scanner.col+1}}.
expand_pe_reference(Name, #xmerl_scanner{rules_read_fun = Read} = S,WS) ->
@@ -2707,7 +2839,7 @@ expand_pe_reference(Name, #xmerl_scanner{rules_read_fun = Read} = S,WS) ->
% Result ->
% fetch_DTD(Result,S)
% end.
-
+
%%%%%%% [68] EntityReference
@@ -2786,15 +2918,15 @@ scan_eq(T, S) ->
%% scan_name/2
%%
-%% We perform some checks here to make sure that the names conform to
+%% We perform some checks here to make sure that the names conform to
%% the "Namespaces in XML" specification. This is an option.
-%%
+%%
%% Qualified Name:
%% [6] QName ::= (Prefix ':')? LocalPart
%% [7] Prefix ::= NCName
%% [8] LocalPart ::= NCName
%% [4] NCName ::= (Letter | '_') (NCNameChar)*
-%% [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_'
+%% [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_'
%% | CombiningChar | Extender
@@ -2808,9 +2940,9 @@ scan_eq(T, S) ->
%%
scan_name_no_colons(Str, S) ->
NSC = S#xmerl_scanner.namespace_conformant,
- case NSC of
+ case NSC of
true ->
- {Target, NSI, T1, S1} =
+ {Target, NSI, T1, S1} =
scan_name(Str,S#xmerl_scanner{namespace_conformant=no_colons}),
{Target,NSI,T1,S1#xmerl_scanner{namespace_conformant=NSC}};
false ->
@@ -2822,7 +2954,7 @@ scan_name_no_colons(Str, S) ->
%% [5] Name ::= (Letter | '_' | ':') (NameChar)*
scan_name([], S=#xmerl_scanner{continuation_fun = F}) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_name(MoreBytes, S1) end,
+ F(fun(MoreBytes, S1) -> scan_name(MoreBytes, S1) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
scan_name(Str = [$:|T], S0 = #xmerl_scanner{namespace_conformant = NSC}) ->
@@ -2885,15 +3017,15 @@ scan_nmtoken(Str, S) ->
{Ch,T} = to_ucs(S#xmerl_scanner.encoding,Str),
case xmerl_lib:is_namechar(Ch) of
true ->
- scan_nmtoken(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
- _Acc = [Ch], _Prefix = [], _Local = [Ch],
+ scan_nmtoken(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
+ _Acc = [Ch], _Prefix = [], _Local = [Ch],
_NamespaceConformant = false,isLatin1(Ch,true));
false ->
?fatal({invalid_nmtoken, lists:sublist(Str, 1, 6)}, S)
end.
-scan_nmtoken([], S=#xmerl_scanner{continuation_fun = F},
+scan_nmtoken([], S=#xmerl_scanner{continuation_fun = F},
Acc, Prefix, Local, NSC,IsLatin1) ->
?dbg("cont()...~n", []),
F(fun(MoreBytes, S1) -> scan_nmtoken(MoreBytes,S1,Acc,Prefix,Local,NSC,IsLatin1) end,
@@ -2907,16 +3039,16 @@ scan_nmtoken(Str = [H|_], S, Acc, Prefix, Local, _NSC,true) when ?whitespace(H)
NmString = lists:reverse(Acc),
{list_to_atom(NmString), namespace_info(Prefix, Local), Str, S};
scan_nmtoken(Str = [$:|_], S, Acc, [], _Local, no_colons,_IsLatin1) ->
- ?fatal({invalid_NCName,
+ ?fatal({invalid_NCName,
lists:sublist(lists:reverse(Acc) ++ Str, 1, 6)}, S);
scan_nmtoken([$:|T], S0, Acc, [], Local, NSC, IsLatin1) ->
?bump_col(1),
scan_nmtoken(T, S, [$:|Acc], lists:reverse(Local), [], NSC,IsLatin1);
scan_nmtoken(Str = [$:|_T], S, Acc, _Prefix, _Local, _NSC = true,_IsLatin1) ->
%% non-empty Prefix means that we've encountered a ":" already.
- %% Conformity with "Namespaces in XML" requires
+ %% Conformity with "Namespaces in XML" requires
%% at most one colon in a name
- ?fatal({invalid_NCName,
+ ?fatal({invalid_NCName,
lists:sublist(lists:reverse(Acc) ++ Str, 1, 6)}, S);
%% non-namechar also marks the end of a name
@@ -2949,7 +3081,7 @@ isLatin1(_,_) ->
scan_system_literal([], S=#xmerl_scanner{continuation_fun = F}) ->
?dbg("cont()...~n", []),
- F(fun(MoreBytes, S1) -> scan_system_literal(MoreBytes, S1) end,
+ F(fun(MoreBytes, S1) -> scan_system_literal(MoreBytes, S1) end,
fun(S1) -> ?fatal(unexpected_end, S1) end,
S);
scan_system_literal("\"" ++ T, S) ->
@@ -2958,7 +3090,7 @@ scan_system_literal("'" ++ T, S) ->
scan_system_literal(T, S, $', []).
-scan_system_literal([], S=#xmerl_scanner{continuation_fun = F},
+scan_system_literal([], S=#xmerl_scanner{continuation_fun = F},
Delimiter, Acc) ->
?dbg("cont()...~n", []),
F(fun(MoreBytes, S1) -> scan_system_literal(MoreBytes,S1,Delimiter,Acc) end,
@@ -2971,7 +3103,7 @@ scan_system_literal("#"++_R, S, _H, _Acc) ->
?fatal(fragment_identifier_in_system_literal,S);
scan_system_literal(Str, S, Delimiter, Acc) ->
{Ch,T} = to_ucs(S#xmerl_scanner.encoding,Str),
- scan_system_literal(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
+ scan_system_literal(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
Delimiter, [Ch|Acc]).
@@ -2988,7 +3120,7 @@ scan_pubid_literal([H|_T], S) ->
?fatal({invalid_pubid_char, H}, S).
-scan_pubid_literal([], S=#xmerl_scanner{continuation_fun = F},
+scan_pubid_literal([], S=#xmerl_scanner{continuation_fun = F},
Delimiter, Acc) ->
?dbg("cont()...~n", []),
F(fun(MoreBytes, S1) -> scan_pubid_literal(MoreBytes,S1,Delimiter,Acc) end,
@@ -3005,7 +3137,7 @@ scan_pubid_literal([H|T], S, Delimiter, Acc) ->
case is_pubid_char(H) of
true ->
scan_pubid_literal(
- T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
+ T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
Delimiter, [H|Acc]);
false ->
?fatal({invalid_pubid_char, H}, S)
@@ -3057,7 +3189,7 @@ scan_contentspec(_Str,S) ->
scan_elem_content(T, S) ->
scan_elem_content(T, S, _Context = children, _Mode = unknown, _Acc = []).
-scan_elem_content([], S=#xmerl_scanner{continuation_fun = F},
+scan_elem_content([], S=#xmerl_scanner{continuation_fun = F},
Context, Mode, Acc) ->
?dbg("cont()...~n", []),
F(fun(MoreBytes,S1) -> scan_elem_content(MoreBytes,S1,Context,Mode,Acc) end,
@@ -3078,7 +3210,7 @@ scan_elem_content(")" ++ T, S0, Context, Mode0, Acc0) ->
% more names than '#PCDATA'
% and no '*'.
{'*', mixed,_} -> ok;
- {Other, mixed,_} ->
+ {Other, mixed,_} ->
?fatal({illegal_for_mixed_content, Other}, S1);
_ ->
ok
@@ -3087,7 +3219,7 @@ scan_elem_content(")" ++ T, S0, Context, Mode0, Acc0) ->
{format_elem_content({Occurrence, {Mode, Acc}}), T2, S2};
scan_elem_content("#PCDATA" ++ _T, S, not_mixed, _Mode, _Acc) ->
?fatal({error,{extra_set_of_parenthesis}},S);
-scan_elem_content("#PCDATA" ++ _T, S, _Cont, Mode, Acc)
+scan_elem_content("#PCDATA" ++ _T, S, _Cont, Mode, Acc)
when Mode==choice;Mode==seq;Acc/=[] ->
?fatal({error,{invalid_format_of_mixed_content}},S);
scan_elem_content("#PCDATA" ++ T, S0, _Context, Mode, Acc) ->
@@ -3130,7 +3262,7 @@ scan_elem_content2(T, S, Context, Mode, Acc) ->
{Occurrence, T2, S2} = scan_occurrence(T1, S1),
case {Occurrence, Context} of
{once, mixed} -> ok;
- {Other, mixed} ->
+ {Other, mixed} ->
?fatal({illegal_for_mixed_content, Other}, S1);
_ ->
ok
@@ -3176,17 +3308,17 @@ vc_Valid_Char(_AT,C,S) ->
-vc_ID_Attribute_Default(_,#xmerl_scanner{validation=Valid})
+vc_ID_Attribute_Default(_,#xmerl_scanner{validation=Valid})
when Valid /= dtd ->
- ok;
-vc_ID_Attribute_Default({_,'ID',_,Def,_},_S)
+ ok;
+vc_ID_Attribute_Default({_,'ID',_,Def,_},_S)
when Def=='#IMPLIED';Def=='#REQUIRED' ->
ok;
vc_ID_Attribute_Default({_,'ID',_,Def,_},S) ->
?fatal({error,{validity_constraint_error_ID_Attribute_Default,Def}},S).
-vc_Enumeration({_Name,{_,NameList},DefaultVal,_,_},S)
- when is_list(DefaultVal) ->
+vc_Enumeration({_Name,{_,NameList},DefaultVal,_,_},S)
+ when is_list(DefaultVal) ->
case lists:member(list_to_atom(DefaultVal),NameList) of
true ->
ok;
@@ -3209,12 +3341,12 @@ vc_Entity_Name({_,'ENTITIES',DefaultVal,_,_},S) when is_list(DefaultVal) ->
Read = S#xmerl_scanner.rules_read_fun,
NameListFun = fun([],Acc,_St,_Fun) ->
lists:reverse(Acc);
- (Str,Acc,St,Fun) ->
+ (Str,Acc,St,Fun) ->
{N,_,St2,Str2} = scan_name(Str,St),
Fun(Str2,[N|Acc],St2,Fun)
end,
NameList = NameListFun(DefaultVal,[],S,NameListFun),
- VcFun =
+ VcFun =
fun(X) ->
case Read(entity,X,S) of
{_,external,{_,{ndata,_}}} ->
@@ -3227,7 +3359,7 @@ vc_Entity_Name({_,'ENTITIES',_,_,_},_S) ->
ok.
vc_No_Duplicate_Types(#xmerl_scanner{validation=dtd} = S,mixed,Acc) ->
- CheckDupl =
+ CheckDupl =
fun([H|T],F) ->
case lists:member(H,T) of
true ->
@@ -3259,12 +3391,18 @@ mandatory_delimeter_wfc(T,S) ->
wfc_unique_att_spec([],_S) ->
ok;
-wfc_unique_att_spec([#xmlAttribute{name=N}|Atts],S) ->
+wfc_unique_att_spec([#xmlAttribute{name=N,expanded_name=EN}|Atts],S) ->
case lists:keymember(N,#xmlAttribute.name,Atts) of
true ->
?fatal({error,{unique_att_spec_required,N}},S);
_ ->
- wfc_unique_att_spec(Atts,S)
+ case S#xmerl_scanner.namespace_conformant andalso
+ lists:keymember(EN, #xmlAttribute.expanded_name, Atts) of
+ true ->
+ ?fatal({error,{unique_att_spec_required,EN}},S);
+ _ ->
+ wfc_unique_att_spec(Atts,S)
+ end
end.
wfc_legal_char(Chars,S) when is_list(Chars)->
@@ -3313,6 +3451,11 @@ wfc_Internal_parsed_entity(internal,Value,S) ->
wfc_Internal_parsed_entity(_,_,_) ->
ok.
+vc_Element_valid(_Name, {"xmlns", _},
+ S = #xmerl_scanner{namespace_conformant = true}) ->
+ ?fatal({error,{illegal_element_prefix,xmlns}},S);
+vc_Element_valid(Name, _, S) ->
+ vc_Element_valid(Name, S).
vc_Element_valid(_Name,#xmerl_scanner{environment=internal_parsed_entity}) ->
ok;
@@ -3379,7 +3522,7 @@ scan_notation_decl1("PUBLIC" ++ T, S0) ->
?strip3,
case T3 of
">" ++ _ ->
- {{public, PIDL}, T3,
+ {{public, PIDL}, T3,
S3#xmerl_scanner{col = S3#xmerl_scanner.col+1}};
_ ->
{SL, T4, S4} = scan_system_literal(T3, S3),
@@ -3430,7 +3573,7 @@ scan_entity_value([],S,
scan_entity_value([],S=#xmerl_scanner{validation=dtd},
no_delim,_Acc,PEName,_,_PENesting) ->
{{error,{failed_VC_Proper_Declaration_PE_Nesting,2,PEName}},[],S};
-scan_entity_value([], S=#xmerl_scanner{continuation_fun = F},
+scan_entity_value([], S=#xmerl_scanner{continuation_fun = F},
Delim, Acc, PEName,Namespace,PENesting) ->
?dbg("cont()...~n", []),
F(fun(MoreBytes, S1) ->
@@ -3449,7 +3592,7 @@ scan_entity_value([Delim|T], S0,
scan_entity_value("%" ++ _T,S=#xmerl_scanner{environment=prolog},_,_,_,_,_) ->
?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
% %% This is a PEdecl in an external entity
-% scan_entity_value([$%,WS|T], S0, Delim, Acc, PEName,Namespace,PENesting)
+% scan_entity_value([$%,WS|T], S0, Delim, Acc, PEName,Namespace,PENesting)
% when ?whitespace(WS) ->
% ?bump_col(2),
% scan_entity_value(T, S, Delim, [WS,$%|Acc], PEName,Namespace,PENesting);
@@ -3459,7 +3602,7 @@ scan_entity_value("%" ++ T, S0, Delim, Acc, PEName,Namespace,PENesting) ->
if PERefName == PEName,Namespace==parameter ->
?fatal({illegal_recursion_in_PE, PEName}, S1);
true ->
- {ExpandedRef,S2} =
+ {ExpandedRef,S2} =
case expand_pe_reference(PERefName, S1, in_literal) of
%% actually should pe ref be expanded as_PE but
%% handle whitespace explicitly in this case.
@@ -3467,7 +3610,7 @@ scan_entity_value("%" ++ T, S0, Delim, Acc, PEName,Namespace,PENesting) ->
%% {system,URI} or {public,URI}
%% Included in literal.
{ExpRef,Sx}=fetch_not_parse(Tuple,S1),
- {EntV, _, S5} =
+ {EntV, _, S5} =
scan_entity_value(ExpRef, Sx, no_delim,[],
PERefName,parameter,[]),
%% should do an update Write(parameter_entity)
@@ -3587,7 +3730,7 @@ scan_entity_value(")"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
scan_entity_value(T,S,Delim,[")"|Acc],PEName,NS,
pe_pop(")",PENesting,S));
scan_entity_value("\n"++T, S, Delim, Acc, PEName,Namespace,PENesting) ->
- scan_entity_value(T, S#xmerl_scanner{line=S#xmerl_scanner.line+1},
+ scan_entity_value(T, S#xmerl_scanner{line=S#xmerl_scanner.line+1},
Delim, ["\n"|Acc], PEName,Namespace,PENesting);
scan_entity_value(Str, S0, Delim, Acc, PEName,Namespace,PENesting) ->
{Ch,T} = to_ucs(S0#xmerl_scanner.encoding,Str),
@@ -3630,7 +3773,7 @@ save_refed_entity_name1(Name,PEName,
pe_push(Tok,Stack,_S) when Tok=="<!";Tok=="<?";Tok=="<!--";Tok=="<![";
Tok=="[";Tok=="<";Tok=="</";Tok=="(" ->
[Tok|Stack];
-pe_push(Tok,Stack,#xmerl_scanner{validation=dtd})
+pe_push(Tok,Stack,#xmerl_scanner{validation=dtd})
when Tok==")";Tok==">";Tok=="?>";Tok=="]]>";Tok=="-->";Tok=="/>"->
[Tok|Stack];
pe_push(_,Stack,_S) ->
@@ -3698,10 +3841,10 @@ scan_comment(Str,S=#xmerl_scanner{col=C,event_fun=Event}, Pos, Parents, Lang) ->
col = C,
pos = Pos,
data = Comment}, S),
-
+
scan_comment1(Str, S1, Pos, Comment, _Acc = []).
-scan_comment1([], S=#xmerl_scanner{continuation_fun = F},
+scan_comment1([], S=#xmerl_scanner{continuation_fun = F},
Pos, Comment, Acc) ->
?dbg("cont()...~n", []),
F(fun(MoreBytes, S1) -> scan_comment1(MoreBytes, S1, Pos, Comment, Acc) end,
@@ -3709,7 +3852,7 @@ scan_comment1([], S=#xmerl_scanner{continuation_fun = F},
S);
scan_comment1("-->" ++ T, S0 = #xmerl_scanner{col = C,
event_fun = Event,
- hook_fun = Hook},
+ hook_fun = Hook},
_Pos, Comment, Acc) ->
?bump_col(3),
Comment1 = Comment#xmlComment{value = lists:reverse(Acc)},
@@ -3817,9 +3960,9 @@ normalize(T,S,IsNorm) ->
end.
-%% Optimization:
+%% Optimization:
%% - avoid building list of spaces or tabs;
-%% - avoid reverse;
+%% - avoid reverse;
%% - compact two common indentation patterns.
%% Note: only to be called when a \n was found.
fast_accumulate_whitespace(" " ++ T, S, _) ->
@@ -3831,7 +3974,7 @@ fast_accumulate_whitespace("<"++_=R, S, _T) ->
{done, {element(3, CD), R, S#xmerl_scanner{col = 1, line = Line + 1}}};
fast_accumulate_whitespace(_, S, T) ->
accumulate_whitespace(T, S, []).
-
+
fast_acc_spaces(" " ++ T, S, N) ->
fast_acc_spaces(T, S, N + 1);
fast_acc_spaces(T, S, N) ->
@@ -3845,18 +3988,18 @@ fast_acc_tabs(T, S, N) ->
fast_acc_end(T, S, N, Col, C, CD_I) ->
#xmerl_scanner{common_data = CD, line = Line0} = S,
Line = Line0 + 1,
- try
+ try
$< = hd(T),
- {done,{element(N, element(CD_I, CD)), T,
+ {done,{element(N, element(CD_I, CD)), T,
S#xmerl_scanner{col = Col, line = Line}}}
- catch _:_ ->
+ catch _:_ ->
accumulate_whitespace(T, S, Line, Col, lists:duplicate(N, C)++"\n")
end.
-
+
%%% @spec accumulate_whitespace(T::string(),S::global_state(),
%%% atom(),Acc::string()) -> {Acc, T1, S1}
-%%%
+%%%
%%% @doc Function to accumulate and normalize whitespace.
accumulate_whitespace(T, S, preserve, Acc) ->
accumulate_whitespace(T, S, Acc);
@@ -3915,19 +4058,19 @@ schemaLocations(El,#xmerl_scanner{schemaLocation=SL}) ->
schemaLocations(El)
end.
-schemaLocations(#xmlElement{attributes=Atts,xmlbase=_Base}) ->
+schemaLocations(#xmlElement{attributes=Atts,xmlbase=_Base}) ->
Pred = fun(#xmlAttribute{name=schemaLocation}) -> false;
- (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false;
+ (#xmlAttribute{nsinfo={_,"schemaLocation"}}) -> false;
(_) -> true
end,
case lists:dropwhile(Pred,Atts) of
[#xmlAttribute{value=Paths}|_] ->
-
+
case string:tokens(Paths," \n\t\r") of
L when length(L) > 0 ->
case length(L) rem 2 of
0 ->
- PairList =
+ PairList =
fun([],_Fun) ->
[];
([SLNS,SLLoc|Rest],Fun) ->
@@ -3997,7 +4140,7 @@ to_ucs(Encoding, Chars) when Encoding=="utf-8"; Encoding == undefined ->
utf8_2_ucs(Chars);
to_ucs(_,[C|Rest]) ->
{C,Rest}.
-
+
utf8_2_ucs([A,B,C,D|Rest]) when A band 16#f8 =:= 16#f0,
B band 16#c0 =:= 16#80,
C band 16#c0 =:= 16#80,
@@ -4086,7 +4229,7 @@ string_to_char_set(_,Str) ->
%% {{_,{_,Tot}},Tot110} when Tot > Tot110 ->
%% io:format("From ~p to ~p, total memory: ~p (~p)~n",[OldLine,Line,Tot,OldTot]),
%% Tot;
-%% {{_,{_,Tot}},_} ->
+%% {{_,{_,Tot}},_} ->
%% Tot
%% end,
%% put_total({NewTot,Line}).
diff --git a/lib/xmerl/src/xmerl_uri.erl b/lib/xmerl/src/xmerl_uri.erl
index a0c6f1c2a7..ed66dd78d4 100644
--- a/lib/xmerl/src/xmerl_uri.erl
+++ b/lib/xmerl/src/xmerl_uri.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2005-2011. All Rights Reserved.
+%% Copyright Ericsson AB 2005-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -358,7 +358,7 @@ scan_host(C0) ->
%% Hex3=<?HEX;
%% Hex4=<?HEX ->
%% {C1,lists:reverse(lists:append(IPv6address))};
- {C1,Hostname,[A|_HostF]} ->
+ {C1,Hostname,[_A|_HostF]} ->
{C1,lists:reverse(lists:append(Hostname))}
%% _ ->
%% {error,no_host}
diff --git a/lib/xmerl/src/xmerl_validate.erl b/lib/xmerl/src/xmerl_validate.erl
index 893e23ca34..60f228474b 100644
--- a/lib/xmerl/src/xmerl_validate.erl
+++ b/lib/xmerl/src/xmerl_validate.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2003-2011. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -399,25 +399,28 @@ test_attribute_value(_Rule,Attr,_,_) ->
%% +type valid_contents([rule()],[xmlElement()])->
%% [xmlElement() | {error,???}.
-valid_contents(Rule,XMLS,Rules,S,WSActionMode)->
- case parse(Rule,XMLS,Rules,WSActionMode,S) of
- {XML_N,[]}->
- lists:flatten(XML_N);
- {_,[#xmlElement{name=Name}|_T]} ->
- exit({error,{element,Name,isnt_comprise_in_the_rule,Rule}});
- {_,[#xmlText{}=Txt|_T]} ->
- exit({error,{element,text,Txt,isnt_comprise_in_the_rule,Rule}});
- {error,Reason} ->
- {error,Reason};
- {error,Reason,N} ->
- {error,Reason,N}
+valid_contents(Rule, XMLS, Rules, S, WSActionMode)->
+ case parse(Rule, XMLS, Rules, WSActionMode, S) of
+ {error, Reason} ->
+ {error, Reason};
+ {error, Reason, N} ->
+ {error, Reason, N};
+ {XML_N, Rest} -> %The list may consist of xmlComment{} records
+ case lists:dropwhile(fun(X) when is_record(X, xmlComment) -> true; (_) -> false end, Rest) of
+ [] ->
+ lists:flatten(XML_N);
+ [#xmlElement{name=Name} |_T] ->
+ exit({error, {element, Name, isnt_comprise_in_the_rule, Rule}});
+ [#xmlText{} = Txt |_T] ->
+ exit({error, {element, text, Txt, isnt_comprise_in_the_rule, Rule}})
+ end
end.
-parse({'*',SubRule},XMLS,Rules,WSaction,S)->
- star(SubRule,XMLS,Rules,WSaction,[],S);
-parse({'+',SubRule},XMLS,Rules,WSaction,S) ->
- plus(SubRule,XMLS,Rules,WSaction,S);
-parse({choice,CHOICE},XMLS,Rules,WSaction,S)->
+parse({'*', SubRule}, XMLS, Rules, WSaction, S)->
+ star(SubRule, XMLS, Rules, WSaction, [], S);
+parse({'+',SubRule}, XMLS, Rules, WSaction, S) ->
+ plus(SubRule, XMLS, Rules, WSaction, S);
+parse({choice,CHOICE}, XMLS, Rules, WSaction, S)->
% case XMLS of
% [] ->
% io:format("~p~n",[{choice,CHOICE,[]}]);
@@ -426,47 +429,49 @@ parse({choice,CHOICE},XMLS,Rules,WSaction,S)->
% [#xmlText{value=V}|_] ->
% io:format("~p~n",[{choice,CHOICE,{text,V}}])
% end,
- choice(CHOICE,XMLS,Rules,WSaction,S);
-parse(empty,[],_Rules,_WSaction,_S) ->
- {[],[]};
-parse({'?',SubRule},XMLS,Rules,_WSaction,S)->
- question(SubRule,XMLS,Rules,S);
-parse({seq,List},XMLS,Rules,WSaction,S) ->
- seq(List,XMLS,Rules,WSaction,S);
-parse(El_Name,[#xmlElement{name=El_Name}=XML|T],Rules,_WSaction,S)
+ choice(CHOICE, XMLS, Rules, WSaction, S);
+parse(empty, [], _Rules, _WSaction, _S) ->
+ {[], []};
+parse({'?', SubRule}, XMLS, Rules, _WSaction, S)->
+ question(SubRule, XMLS, Rules, S);
+parse({seq,List}, XMLS, Rules, WSaction, S) ->
+ seq(List, XMLS, Rules, WSaction, S);
+parse(El_Name, [#xmlElement{name=El_Name} = XML |T], Rules, _WSaction, S)
when is_atom(El_Name)->
- case do_validation(read_rules(Rules,El_Name),XML,Rules,S) of
- {error,R} ->
+ case do_validation(read_rules(Rules, El_Name), XML, Rules, S) of
+ {error, R} ->
% {error,R};
exit(R);
- {error,R,_N}->
+ {error, R, _N}->
% {error,R,N};
exit(R);
XML_->
- {[XML_],T}
+ {[XML_], T}
end;
-parse(any,Cont,Rules,_WSaction,S) ->
- case catch parse_any(Cont,Rules,S) of
- Err = {error,_} -> Err;
- ValidContents -> {ValidContents,[]}
+parse(any, Cont, Rules, _WSaction, S) ->
+ case catch parse_any(Cont, Rules, S) of
+ Err = {error, _} -> Err;
+ ValidContents -> {ValidContents, []}
end;
-parse(El_Name,[#xmlElement{name=Name}|_T]=S,_Rules,_WSa,_S) when is_atom(El_Name)->
+parse(El_Name, [#xmlElement{name=Name} |_T] = XMLS, _Rules, _WSa, _S) when is_atom(El_Name) ->
{error,
- {element_seq_not_conform,{wait,El_Name},{is,Name}},
- {{next,S},{act,[]}} };
-parse(_El_Name,[#xmlPI{}=H|T],_Rules,_WSa,_S) ->
- {[H],T};
-parse('#PCDATA',XML,_Rules,_WSa,_S)->
+ {element_seq_not_conform,{wait, El_Name}, {is, Name}},
+ {{next, XMLS}, {act, []}}};
+parse(El_Name, [#xmlComment{} |T], Rules, WSa, S) ->
+ parse(El_Name, T, Rules, WSa, S);
+parse(_El_Name, [#xmlPI{} = H |T], _Rules, _WSa, _S) ->
+ {[H], T};
+parse('#PCDATA', XMLS, _Rules, _WSa, _S)->
%%% PCDATA it is 0 , 1 or more #xmlText{}.
- parse_pcdata(XML);
-parse(El_Name,[#xmlText{}|_T]=S,_Rules,_WSa,_S)->
+ parse_pcdata(XMLS);
+parse(El_Name, [#xmlText{}|_T] = XMLS, _Rules, _WSa, _S)->
{error,
- {text_in_place_of,El_Name},
- {{next,S},{act,[]}}};
-parse([],_,_,_,_) ->
- {error,no_rule};
-parse(Rule,[],_,_,_) ->
- {error,{no_xml_element,Rule}}.
+ {text_in_place_of, El_Name},
+ {{next, XMLS}, {act, []}}};
+parse([], _, _, _, _) ->
+ {error, no_rule};
+parse(Rule, [], _, _, _) ->
+ {error, {no_xml_element, Rule}}.
parse_any([],_Rules,_S) ->
[];
@@ -618,11 +623,15 @@ el_name(#xmlElement{name=Name})->
parse_pcdata([#xmlText{}=H|T])->
parse_pcdata(T,[H]);
+parse_pcdata([#xmlComment{}|T])->
+ parse_pcdata(T,[]);
parse_pcdata(H) ->
{[],H}.
parse_pcdata([#xmlText{}=H|T],Acc)->
parse_pcdata(T,Acc++[H]);
+parse_pcdata([#xmlComment{}|T],Acc)->
+ parse_pcdata(T,Acc);
parse_pcdata(H,Acc) ->
{Acc,H}.
diff --git a/lib/xmerl/src/xmerl_xpath.erl b/lib/xmerl/src/xmerl_xpath.erl
index db3d3ac2d6..b3301f2faf 100644
--- a/lib/xmerl/src/xmerl_xpath.erl
+++ b/lib/xmerl/src/xmerl_xpath.erl
@@ -41,18 +41,13 @@
% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("parent::processing-instruction('foo')")).
%% </pre>
%%
-%% @type docEntity() =
+%% @type nodeEntity() =
%% xmlElement()
%% | xmlAttribute()
%% | xmlText()
%% | xmlPI()
%% | xmlComment()
-%% @type nodeEntity() =
-%% xmlElement()
-%% | xmlAttribute()
-%% | xmlText()
-%% | xmlPI()
-%% | xmlNamespace()
+%% | xmlNsNode()
%% | xmlDocument()
%% @type option_list(). <p>Options allows to customize the behaviour of the
%% XPath scanner.
@@ -303,6 +298,17 @@ write_node(#xmlNode{pos = Pos,
node = #xmlText{value = Txt,
parents = Ps}}) ->
{text, Pos, Txt, Ps};
+write_node(#xmlNode{pos = Pos,
+ node = #xmlComment{parents = Ps}}) ->
+ {comment, Pos, '', Ps};
+write_node(#xmlNode{pos = Pos,
+ node = #xmlPI{name = Name,
+ parents = Ps}}) ->
+ {processing_instruction, Pos, Name, Ps};
+write_node(#xmlNode{pos = Pos,
+ node = #xmlNsNode{parents = Ps,
+ prefix = Prefix}}) ->
+ {namespace, Pos, Prefix, Ps};
write_node(_) ->
other.
@@ -330,18 +336,16 @@ eval_path(rel, PathExpr, C = #xmlContext{}) ->
Context = C#xmlContext{nodeset = NodeSet},
S = #state{context = Context},
path_expr(PathExpr, S);
-eval_path(filter, {PathExpr, PredExpr}, C = #xmlContext{}) ->
+eval_path(filter, {PathExpr, {pred, Pred}}, C = #xmlContext{}) ->
S = #state{context = C},
- S1 = path_expr(PathExpr, S),
- pred_expr(PredExpr, S1).
+ S1 = match_expr(PathExpr, S),
+ eval_pred(Pred, S1).
-eval_primary_expr(FC = {function_call,_,_},S = #state{context = Context}) ->
+eval_primary_expr(PrimExpr, S = #state{context = Context}) ->
%% NewNodeSet = xmerl_xpath_pred:eval(FC, Context),
- NewNodeSet = xmerl_xpath_lib:eval(primary_expr, FC, Context),
+ NewNodeSet = xmerl_xpath_lib:eval(primary_expr, PrimExpr, Context),
NewContext = Context#xmlContext{nodeset = NewNodeSet},
- S#state{context = NewContext};
-eval_primary_expr(PrimExpr,_S) ->
- exit({primary_expression,{not_implemented, PrimExpr}}).
+ S#state{context = NewContext}.
%% axis(Axis,NodeTest,Context::xmlContext()) -> xmlContext()
@@ -384,8 +388,8 @@ axis1(preceding, Tok, N, Acc, Context) ->
match_preceding(Tok, N, Acc, Context);
axis1(attribute, Tok, N, Acc, Context) ->
match_attribute(Tok, N, Acc, Context);
-%axis1(namespace, Tok, N, Acc, Context) ->
-% match_namespace(Tok, N, Acc, Context);
+axis1(namespace, Tok, N, Acc, Context) ->
+ match_namespace(Tok, N, Acc, Context);
axis1(ancestor_or_self, Tok, N, Acc, Context) ->
match_ancestor_or_self(Tok, N, Acc, Context);
axis1(descendant_or_self, Tok, N, Acc, Context) ->
@@ -627,14 +631,58 @@ node_type(#xmlAttribute{}) -> attribute;
node_type(#xmlElement{}) -> element;
node_type(#xmlText{}) -> text;
node_type(#xmlPI{}) -> processing_instruction;
-node_type(#xmlNamespace{}) -> namespace;
+node_type(#xmlNsNode{}) -> namespace;
+node_type(#xmlComment{}) -> comment;
node_type(#xmlDocument{}) -> root_node.
%% "The namespace axis contains the namespace nodes of the context node;
%% the axis will be empty unless the context node is an element."
-%match_namespace(_Tok, _N, _Acc, _Context) ->
- %% TODO: IMPLEMENT NAMESPACE AXIS
-% erlang:fault(not_yet_implemented).
+match_namespace(Tok, N, Acc, Context) ->
+ case N#xmlNode.type of
+ element ->
+ #xmlNode{parents = Ps, node = E} = N,
+ #xmlElement{name = Name,
+ namespace = NS,
+ parents = EPs,
+ pos = Pos} = E,
+ #xmlNamespace{default = Default, nodes = NSPairs} = NS,
+ ThisEPs = [{Name, Pos}|EPs],
+ ThisPs = [N|Ps],
+ Acc0 =
+ case Default of
+ D when D =:= []; D =:= '' ->
+ {[], 1};
+ URI ->
+ DefaultNSNode = #xmlNsNode{parents = ThisEPs,
+ pos = 1,
+ prefix = [],
+ uri = URI},
+ Node = #xmlNode{type = namespace,
+ node = DefaultNSNode,
+ parents = ThisPs},
+ {[Node], 2}
+ end,
+ {Nodes, _I} =
+ lists:foldr(
+ fun ({Prefix, URI}, {AccX, I}) ->
+ NSNode = #xmlNsNode{parents = ThisEPs,
+ pos = I,
+ prefix = Prefix,
+ uri = URI},
+ ThisN = #xmlNode{pos = I,
+ type = namespace,
+ node = NSNode,
+ parents = ThisPs},
+ {[ThisN | AccX], I + 1}
+ end, Acc0, NSPairs),
+ lists:foldr(
+ fun (ThisN, AccX) ->
+ match_self(Tok, ThisN, AccX, Context)
+ end, Acc, Nodes);
+ _Other ->
+ %%[]
+ Acc
+ end.
update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) ->
@@ -655,8 +703,15 @@ update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) ->
node_test(F, N, Context) when is_function(F) ->
F(N, Context);
+node_test(_Test, #xmlNode{type=attribute,node=#xmlAttribute{name=xmlns}},
+ _Context) ->
+ false;
+node_test(_Test,
+ #xmlNode{type=attribute,node=#xmlAttribute{nsinfo={"xmlns",_Local}}},
+ _Context) ->
+ false;
node_test({wildcard, _}, #xmlNode{type=ElAt}, _Context)
- when ElAt==element; ElAt==attribute ->
+ when ElAt==element; ElAt==attribute; ElAt==namespace ->
true;
node_test({prefix_test, Prefix}, #xmlNode{node = N}, _Context) ->
case N of
@@ -720,6 +775,9 @@ node_test({name, {_Tag, Prefix, Local}},
[{_Tag, Prefix, Local}, write_node(NSNodes)]),
false
end;
+node_test({name, {_Tag, [], Local}},
+ #xmlNode{node = #xmlNsNode{prefix = Local}}, _Context) ->
+ true;
node_test({node_type, NT}, #xmlNode{node = N}, _Context) ->
case {NT, N} of
{text, #xmlText{}} ->
@@ -728,14 +786,18 @@ node_test({node_type, NT}, #xmlNode{node = N}, _Context) ->
true;
{attribute, #xmlAttribute{}} ->
true;
- {namespace, #xmlNamespace{}} ->
+ {namespace, #xmlNsNode{}} ->
+ true;
+ {comment, #xmlComment{}} ->
+ true;
+ {processing_instruction, #xmlPI{}} ->
true;
_ ->
false
end;
-node_test({processing_instruction, {literal, _, Name}},
- #xmlNode{node = {processing_instruction, Name, _Data}}, _Context) ->
- true;
+node_test({processing_instruction, Name1},
+ #xmlNode{node = #xmlPI{name = Name2}}, _Context) ->
+ Name1 == atom_to_list(Name2);
node_test(_Other, _N, _Context) ->
%io:format("node_test(~p, ~p) -> false.~n", [_Other, write_node(_N)]),
false.
diff --git a/lib/xmerl/src/xmerl_xpath_lib.erl b/lib/xmerl/src/xmerl_xpath_lib.erl
index cfd0e36667..b37bdc93f9 100644
--- a/lib/xmerl/src/xmerl_xpath_lib.erl
+++ b/lib/xmerl/src/xmerl_xpath_lib.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2011. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -49,5 +49,7 @@ primary_expr({function_call, F, Args}, C) ->
%% here, we should look up the function in the context provided
%% by the caller, but we haven't figured this out yet.
exit({not_a_core_function, F})
- end.
+ end;
+primary_expr(PrimExpr, _C) ->
+ exit({primary_expression, {not_implemented, PrimExpr}}).
diff --git a/lib/xmerl/src/xmerl_xpath_parse.yrl b/lib/xmerl/src/xmerl_xpath_parse.yrl
index 37576b9e61..381ea20193 100644
--- a/lib/xmerl/src/xmerl_xpath_parse.yrl
+++ b/lib/xmerl/src/xmerl_xpath_parse.yrl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2003-2011. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -144,6 +144,7 @@ Expect 2.
%% [7]
'NodeTest' -> 'NameTest' : '$1' .
'NodeTest' -> 'node_type' '(' ')' : {node_type, value('$1')} .
+'NodeTest' -> 'processing-instruction' '(' ')' : {node_type, value('$1')} .
'NodeTest' -> 'processing-instruction' '(' 'literal' ')'
: {processing_instruction, value('$3')} .
diff --git a/lib/xmerl/src/xmerl_xpath_pred.erl b/lib/xmerl/src/xmerl_xpath_pred.erl
index 451a09bee3..b94f3bb14d 100644
--- a/lib/xmerl/src/xmerl_xpath_pred.erl
+++ b/lib/xmerl/src/xmerl_xpath_pred.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2003-2011. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -337,6 +337,9 @@ local_name1([#xmlNode{type=element,node=El}|_]) ->
local_name1([#xmlNode{type=attribute,node=Att}|_]) ->
#xmlAttribute{name=Name,nsinfo=NSI} = Att,
local_name2(Name,NSI);
+local_name1([#xmlNode{type=namespace,node=N}|_]) ->
+ #xmlNsNode{prefix=Prefix} = N,
+ ?string(Prefix);
local_name1([#xmlElement{name = Name, nsinfo = NSI}|_]) ->
local_name2(Name,NSI).
local_name2(Name, NSI) ->
@@ -431,6 +434,9 @@ string_value(N=#xmlObj{}) ->
string_value(A=#xmlNode{type=attribute}) ->
#xmlAttribute{value=AttVal}=A#xmlNode.node,
?string(AttVal);
+string_value(N=#xmlNode{type=namespace}) ->
+ #xmlNsNode{uri=URI}=N#xmlNode.node,
+ ?string(atom_to_list(URI));
string_value(El=#xmlNode{type=element}) ->
#xmlElement{content=C} = El#xmlNode.node,
TextValue = fun(#xmlText{value=T},_Fun) -> T;
@@ -442,6 +448,9 @@ string_value(El=#xmlNode{type=element}) ->
string_value(T=#xmlNode{type=text}) ->
#xmlText{value=Txt} = T#xmlNode.node,
?string(Txt);
+string_value(T=#xmlNode{type=comment}) ->
+ #xmlComment{value=Txt} = T#xmlNode.node,
+ ?string(Txt);
string_value(infinity) -> ?string("Infinity");
string_value(neg_infinity) -> ?string("-Infinity");
string_value(A) when is_atom(A) ->
diff --git a/lib/xmerl/src/xmerl_xpath_scan.erl b/lib/xmerl/src/xmerl_xpath_scan.erl
index 10e2756e74..f0a5bd35a3 100644
--- a/lib/xmerl/src/xmerl_xpath_scan.erl
+++ b/lib/xmerl/src/xmerl_xpath_scan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2003-2011. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -287,6 +287,7 @@ strip_ws(T) ->
special_token('@') -> true;
special_token('::') -> true;
+special_token(',') -> true;
special_token('(') -> true;
special_token('[') -> true;
special_token('/') -> true;
diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl
index dfdc6138ef..78d84d23a4 100644
--- a/lib/xmerl/src/xmerl_xsd.erl
+++ b/lib/xmerl/src/xmerl_xsd.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2006-2011. All Rights Reserved.
+%% Copyright Ericsson AB 2006-2012. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -245,21 +245,27 @@ process_validate2({SE,_},Schema,Xml,Opts) ->
S4 = validation_options(S3,Opts),
validate3(Schema,Xml,S4).
-validate3(Schema,Xml,S=#xsd_state{errors=[]}) ->
- Ret = {_,S2} =
- case catch validate_xml(Xml,S) of
- {[XML2],[],Sx} ->
- {XML2,Sx};
- {XML2,[],Sx} ->
- {XML2,Sx};
- {_,UnValidated,Sx} ->
- {Xml,acc_errs(Sx,{error_path(UnValidated,Xml#xmlElement.name),?MODULE,
- {unvalidated_rest,UnValidated}})};
- _Err = {error,Reason} ->
- {Xml,acc_errs(S,Reason)};
- {'EXIT',Reason} ->
- {Xml,acc_errs(S,{error_path(Xml,Xml#xmlElement.name),?MODULE,
- {undefined,{internal_error,Reason}}})}
+validate3(Schema, Xml,S =#xsd_state{errors=[]}) ->
+ Ret = {_, S2} =
+ case catch validate_xml(Xml, S) of
+ _Err = {error, Reason} ->
+ {Xml, acc_errs(S, Reason)};
+ {'EXIT', Reason} ->
+ {Xml, acc_errs(S, {error_path(Xml, Xml#xmlElement.name), ?MODULE,
+ {undefined, {internal_error, Reason}}})};
+ {XML2, Rest, Sx} ->
+ case lists:dropwhile(fun(X) when is_record(X, xmlComment) -> true; (_) -> false end, Rest) of
+ [] ->
+ case XML2 of
+ [XML3] ->
+ {XML3,Sx};
+ XML3 ->
+ {XML3,Sx}
+ end;
+ UnValidated ->
+ {Xml,acc_errs(Sx,{error_path(UnValidated,Xml#xmlElement.name),?MODULE,
+ {unvalidated_rest,UnValidated}})}
+ end
end,
save_to_file(S2,filename:rootname(Schema)++".tab2"),
case S2#xsd_state.errors of
@@ -1950,7 +1956,7 @@ fetch_external_schema(Path,S) when is_list(Path) ->
{EXSD,S#xsd_state{schema_name=File}}
end;
{_,{string,String},_} -> %% this is for a user defined fetch fun that returns an xml document on string format.
- ?debug("scanning string: ~p~n",[File]),
+ ?debug("scanning string: ~p~n",[String]),
case xmerl_scan:string(String,S#xsd_state.xml_options) of
{error,Reason} ->
{error,acc_errs(S,{[],?MODULE,{parsing_external_schema_failed,Path,Reason}})};
@@ -2332,6 +2338,8 @@ complexity([]) ->
undefined;
complexity([#xmlText{}|T]) ->
complexity(T);
+complexity([#xmlComment{}|T]) ->
+ complexity(T);
complexity([H|T]) ->
case kind(H) of
simpleContent ->
@@ -2520,9 +2528,9 @@ check_element_type([],#schema_complex_type{name=_Name,block=_Bl,content=C},
{error,{error_path(Checked,undefined),?MODULE,
{empty_content_not_allowed,C}}}
end;
-check_element_type(C,{anyType,_},_Env,_Block,S,_Checked) ->
+check_element_type(C, {anyType, _}, _Env, _Block, S, _Checked) ->
%% permitt anything
- {C,[],S};
+ {lists:reverse(C), [], S};
check_element_type(XML=[#xmlText{}|_],Type=#schema_simple_type{},
_Env,_Block,S,_Checked) ->
@@ -2585,7 +2593,7 @@ check_element_type(XML=[XMLEl=#xmlElement{name=Name}|RestXML],
S6 = check_form(ElName,Name,XMLEl,
actual_form_value(CMEl#schema_element.form,
S5#xsd_state.elementFormDefault),
- S5),
+ S5),
%Step into content of XML element.
{Content,_,S7} =
case
@@ -2605,12 +2613,12 @@ check_element_type(XML=[XMLEl=#xmlElement{name=Name}|RestXML],
RestXML,
set_scope(S5#xsd_state.scope,set_num_el(S7,S6))};
true ->
- {error,{error_path(XMLEl,Name),?MODULE,
- {element_not_suitable_with_schema,ElName,S}}};
+ {error,{error_path(XMLEl, Name), ?MODULE,
+ {element_not_suitable_with_schema, ElName, S}}};
_ when S#xsd_state.num_el >= Min ->
%% it may be a match error or an optional element not
%% present
- {[],XML,S#xsd_state{num_el=0}};
+ {[], XML, S#xsd_state{num_el=0}};
_ ->
{error,{error_path(XMLEl,Name),?MODULE,
{element_not_suitable_with_schema,ElName,CMName,CMEl,S}}}
@@ -2645,7 +2653,7 @@ check_element_type(XML=[#xmlElement{}|_Rest],
check_element_type(XML=[E=#xmlElement{name=Name}|Rest],
Any={any,{Namespace,_Occ={Min,_},ProcessorContents}},Env,
_Block,S,_Checked) ->
- ?debug("check any: {any,{~p,~p,~p}}~n",[Namespace,Occ,ProcessorContents]),
+ ?debug("check any: {any,{~p,~p,~p}}~n",[Namespace,_Occ,ProcessorContents]),
%% ProcessorContents any of lax | strict | skip
%% lax: may validate if schema is found
%% strict: must validate
@@ -2710,8 +2718,11 @@ check_element_type([],CM,_Env,_Block,S,Checked) ->
{error,{error_path(Checked,undefined),?MODULE,
{empty_content_not_allowed,CM}}}
end;
+check_element_type([C = #xmlComment{} |Rest],CM,Env,Block,S,Checked) ->
+ check_element_type(Rest,CM,Env,Block,S,[C |Checked]);
check_element_type(XML,CM,_Env,_Block,S,_Checked) ->
{error,{error_path(XML,undefined),?MODULE,{match_failure,XML,CM,S}}}.
+
%% single xml content object and single schema object
check_text_type(XML=[#xmlText{}|_],optional_text,S) ->
% {XMLTxt,optional_text};
@@ -2730,7 +2741,7 @@ check_text_type([XMLTxt=#xmlText{}|_],CMEl,_S) ->
{cannot_contain_text,XMLTxt,CMEl}}}.
split_xmlText(XML) ->
- splitwith(fun(#xmlText{}) -> true;(_) -> false end,XML).
+ splitwith(fun(#xmlText{}) -> true;(#xmlComment{}) -> true;(_) -> false end,XML).
%% Sequence
check_sequence([T=#xmlText{}|Rest],Els,Occ,Env,S,Checked) ->
@@ -2773,6 +2784,8 @@ check_sequence(Seq=[_InstEl=#xmlElement{}|_],[El|Els],Occ={_Min,_Max},Env,S,Chec
count_num_el(set_num_el(S3,S2)),
Ret++Checked)
end;
+check_sequence([C = #xmlComment{} |Rest], Els, Occ, Env, S, Checked) ->
+ check_sequence(Rest,Els,Occ,Env,S,[C |Checked]);
check_sequence(Rest,[],_Occ,_Env,S,Checked) ->
{Checked,Rest,set_num_el(S,0)};
check_sequence([],Els,_Occ,_Env,S,Checked) ->
@@ -2869,6 +2882,8 @@ check_all(XML=[E=#xmlElement{name=Name}|RestXML],CM,Occ,Env,S,
{element_not_in_all,ElName,E,CM}},
check_all(RestXML,CM,Occ,Env,acc_errs(S,Err),[E|Checked],PrevXML)
end;
+check_all([C=#xmlComment{} |RestXML], CM, Occ, Env, S, Checked, XML) ->
+ check_all(RestXML, CM, Occ, Env, S, [C |Checked], XML);
check_all(XML,[],_,_,S,Checked,_) ->
{Checked,XML,S};
check_all([],CM,_Occ,_,S,Checked,_PrevXML) ->
@@ -2920,7 +2935,7 @@ check_target_namespace(XMLEl,S) ->
schemaLocations(El=#xmlElement{attributes=Atts},S) ->
Pred = fun(#xmlAttribute{name=schemaLocation}) -> false;
- (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false;
+ (#xmlAttribute{nsinfo={_,"schemaLocation"}}) -> false;
(_) -> true
end,
case lists:dropwhile(Pred,Atts) of