aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/xmerl/src
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r--lib/xmerl/src/Makefile251
-rw-r--r--lib/xmerl/src/xmerl.app.src43
-rw-r--r--lib/xmerl/src/xmerl.appup.src14
-rw-r--r--lib/xmerl/src/xmerl.erl320
-rw-r--r--lib/xmerl/src/xmerl_b64Bin.yrl68
-rw-r--r--lib/xmerl/src/xmerl_b64Bin_scan.erl59
-rw-r--r--lib/xmerl/src/xmerl_dtd.erl0
-rw-r--r--lib/xmerl/src/xmerl_eventp.erl366
-rw-r--r--lib/xmerl/src/xmerl_html.erl122
-rw-r--r--lib/xmerl/src/xmerl_internal.hrl46
-rw-r--r--lib/xmerl/src/xmerl_lib.erl1065
-rw-r--r--lib/xmerl/src/xmerl_otpsgml.erl162
-rw-r--r--lib/xmerl/src/xmerl_regexp.erl1437
-rw-r--r--lib/xmerl/src/xmerl_sax_old_dom.erl293
-rw-r--r--lib/xmerl/src/xmerl_sax_old_dom.hrl138
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl399
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.hrl93
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc3571
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc40
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_list.erlsrc40
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc40
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc40
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc43
-rw-r--r--lib/xmerl/src/xmerl_sax_simple_dom.erl263
-rw-r--r--lib/xmerl/src/xmerl_scan.erl4088
-rw-r--r--lib/xmerl/src/xmerl_sgml.erl65
-rw-r--r--lib/xmerl/src/xmerl_simple.erl109
-rw-r--r--lib/xmerl/src/xmerl_text.erl50
-rw-r--r--lib/xmerl/src/xmerl_ucs.erl556
-rw-r--r--lib/xmerl/src/xmerl_uri.erl478
-rw-r--r--lib/xmerl/src/xmerl_validate.erl663
-rw-r--r--lib/xmerl/src/xmerl_xlate.erl50
-rw-r--r--lib/xmerl/src/xmerl_xml.erl62
-rw-r--r--lib/xmerl/src/xmerl_xpath.erl776
-rw-r--r--lib/xmerl/src/xmerl_xpath_lib.erl53
-rw-r--r--lib/xmerl/src/xmerl_xpath_parse.yrl311
-rw-r--r--lib/xmerl/src/xmerl_xpath_pred.erl808
-rw-r--r--lib/xmerl/src/xmerl_xpath_scan.erl308
-rw-r--r--lib/xmerl/src/xmerl_xs.erl123
-rw-r--r--lib/xmerl/src/xmerl_xsd.erl5710
-rw-r--r--lib/xmerl/src/xmerl_xsd_type.erl1558
41 files changed, 24681 insertions, 0 deletions
diff --git a/lib/xmerl/src/Makefile b/lib/xmerl/src/Makefile
new file mode 100644
index 0000000000..7009b50f6c
--- /dev/null
+++ b/lib/xmerl/src/Makefile
@@ -0,0 +1,251 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 2003-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+#
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(XMERL_VSN)
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/xmerl-$(VSN)
+
+# ----------------------------------------------------
+# Documentation directory specification
+# ----------------------------------------------------
+
+DOCDIR = ../doc
+
+
+### XMERL use EDOC for documentation, to regenerate update paths as needed!
+XMERL_APP = ..
+
+EDOC_APP = ../../edoc
+
+SYNTAX_TOOLS_APP = ../../syntax_tools
+
+
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+
+EDOC_MODULES = \
+ xmerl_scan \
+ xmerl \
+ xmerl_xs \
+ xmerl_eventp \
+ xmerl_xpath \
+ xmerl_xsd
+
+
+
+MODULES = $(EDOC_MODULES) \
+ xmerl_b64Bin \
+ xmerl_b64Bin_scan \
+ xmerl_html \
+ xmerl_lib \
+ xmerl_otpsgml \
+ xmerl_regexp \
+ xmerl_sgml \
+ xmerl_simple \
+ xmerl_text \
+ xmerl_ucs \
+ xmerl_uri \
+ xmerl_validate \
+ xmerl_xlate \
+ xmerl_xml \
+ xmerl_xpath_lib \
+ xmerl_xpath_parse \
+ xmerl_xpath_pred \
+ xmerl_xpath_scan \
+ xmerl_xsd_type \
+ xmerl_sax_parser \
+ xmerl_sax_parser_list \
+ xmerl_sax_parser_latin1 \
+ xmerl_sax_parser_utf8 \
+ xmerl_sax_parser_utf16be \
+ xmerl_sax_parser_utf16le \
+ xmerl_sax_simple_dom \
+ xmerl_sax_old_dom
+
+
+
+HRL_FILES = \
+ ../include/xmerl.hrl \
+ ../include/xmerl_xlink.hrl \
+ ../include/xmerl_xpath.hrl \
+ ../include/xmerl_xsd.hrl
+
+INCLUDE_DIR = ../include
+
+INTERNAL_HRL_FILES = \
+ xmerl_internal.hrl \
+ xmerl_sax_old_dom.hrl \
+ xmerl_sax_parser.hrl
+
+ERL_FILES= $(MODULES:%=%.erl)
+
+APP_FILE= xmerl.app
+APP_SRC= $(APP_FILE).src
+APP_TARGET= $(EBIN)/$(APP_FILE)
+
+APPUP_FILE= xmerl.appup
+APPUP_SRC= $(APPUP_FILE).src
+APPUP_TARGET= $(EBIN)/$(APPUP_FILE)
+
+DOC_TARGET_FILES = $(EDOC_MODULES:%=$(DOCDIR)/%.html)
+TARGET_FILES = $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET)
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+ERL_COMPILE_FLAGS += \
+ -I ../include \
+ +warn_unused_vars
+
+# +bin_opt_info
+
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+debug opt: $(TARGET_FILES)
+
+docs:
+#docs: $(DOC_TARGET_FILES)
+
+
+clean:
+ $(RM) $(TARGET_FILES)
+ $(RM) xmerl_xpath_parse.erl
+ $(RM) xmerl_b64Bin.erl
+ $(RM) xmerl_sax_parser_list.erl
+ $(RM) xmerl_sax_parser_latin1.erl
+ $(RM) xmerl_sax_parser_utf8.erl
+ $(RM) xmerl_sax_parser_utf16be.erl
+ $(RM) xmerl_sax_parser_utf16le.erl
+ $(RM) core *~
+
+info:
+ @echo "MODULES: $(MODULES)"
+ @echo "EBIN: $(EBIN)"
+ @echo "EMULATOR: $(EMULATOR)"
+ @echo "APP_TARGET: $(APP_TARGET)"
+ @echo "TARGET_FILES: $(TARGET_FILES)"
+ @echo "DOC_TARGET_FILES: $(DOC_TARGET_FILES)"
+ @echo "DOCDIR/%html: $(DOCDIR)/%.html"
+
+realclean: clean
+ $(RM) $(DOC_TARGET_FILES)
+
+# ----------------------------------------------------
+# Special Build Targets
+# ----------------------------------------------------
+EDOC_PATHS = \
+ -pa $(EDOC_APP)/ebin -pa $(XMERL_APP)/ebin -pa $(SYNTAX_TOOLS_APP)/ebin
+
+$(APP_TARGET): $(APP_SRC) ../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+xmerl_xpath_parse.erl: xmerl_xpath_parse.yrl
+ $(ERLC) -o $(ESRC) $<
+
+xmerl_b64Bin.erl: xmerl_b64Bin.yrl
+ $(ERLC) -o $(ESRC) $<
+
+xmerl_sax_parser_list.erl: xmerl_sax_parser_list.erlsrc xmerl_sax_parser_base.erlsrc
+ cat xmerl_sax_parser_list.erlsrc xmerl_sax_parser_base.erlsrc >$@
+
+xmerl_sax_parser_latin1.erl: xmerl_sax_parser_latin1.erlsrc xmerl_sax_parser_base.erlsrc
+ cat xmerl_sax_parser_latin1.erlsrc xmerl_sax_parser_base.erlsrc >$@
+
+xmerl_sax_parser_utf8.erl: xmerl_sax_parser_utf8.erlsrc xmerl_sax_parser_base.erlsrc
+ cat xmerl_sax_parser_utf8.erlsrc xmerl_sax_parser_base.erlsrc >$@
+
+xmerl_sax_parser_utf16be.erl: xmerl_sax_parser_utf16be.erlsrc xmerl_sax_parser_base.erlsrc
+ cat xmerl_sax_parser_utf16be.erlsrc xmerl_sax_parser_base.erlsrc >$@
+
+xmerl_sax_parser_utf16le.erl: xmerl_sax_parser_utf16le.erlsrc xmerl_sax_parser_base.erlsrc
+ cat xmerl_sax_parser_utf16le.erlsrc xmerl_sax_parser_base.erlsrc >$@
+
+$(EBIN)/%.beam: %.erl
+ $(ERLC) $(ERL_COMPILE_FLAGS) -o $(EBIN) $<
+
+$(DOCDIR)/%.html: %.erl
+ $(ERL) -noshell $(EDOC_PATHS) \
+ -run edoc_run file '"$<"' '[{dir,"$(DOCDIR)"}]' -s erlang halt
+
+
+#$(DOCDIR)/%.html: %.erl
+# $(ERL) $(EDOC_PATHS) -s edoc file $< ['{dir,"$(DOCDIR)"}']
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+#ifeq "${MA_TOP}" ""
+#MA_TOP = ../../..
+#endif
+#include $(MA_TOP)/config/make.release
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+
+release_spec: opt
+ $(INSTALL_DIR) $(RELSYSDIR)/ebin
+ $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin
+ $(INSTALL_DIR) $(RELSYSDIR)/src
+ $(INSTALL_DATA) $(ERL_FILES) $(INTERNAL_HRL_FILES) $(APP_SRC) $(APPUP_SRC) $(RELSYSDIR)/src
+ $(INSTALL_DATA) xmerl_xpath_parse.yrl $(RELSYSDIR)/src
+ $(INSTALL_DATA) xmerl_b64Bin.yrl $(RELSYSDIR)/src
+ $(INSTALL_DIR) $(RELSYSDIR)/include
+ $(INSTALL_DATA) $(HRL_FILES) $(RELSYSDIR)/include
+
+release_docs_spec:
+
+
+release_tests_spec:
+
+
+#------------------------------------------------------------
+# .hrl dependencies
+$(EBIN)/xmerl.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_lib.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_scan.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_xlate.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_xml.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_html.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_text.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_eventp.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_simple.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_xpath.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_xpath_lib.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_xpath_pred.beam:../include/xmerl.hrl
+$(EBIN)/xmerl_xpath_scan.beam:../include/xmerl.hrl
+#$(EBIN)/xmerl_xsd.beam:../include/xmerl_xsd.hrl
+#$(EBIN)/xmerl_xsd.beam:../include/xmerl.hrl
+
+
diff --git a/lib/xmerl/src/xmerl.app.src b/lib/xmerl/src/xmerl.app.src
new file mode 100644
index 0000000000..b471447bbd
--- /dev/null
+++ b/lib/xmerl/src/xmerl.app.src
@@ -0,0 +1,43 @@
+{application,xmerl,
+ [{description,"XML parser"},
+ {vsn,"%VSN%"},
+ {modules,[
+ xmerl,
+ xmerl_b64Bin,
+ xmerl_b64Bin_scan,
+ xmerl_eventp,
+ xmerl_html,
+ xmerl_lib,
+ xmerl_otpsgml,
+ xmerl_regexp,
+ xmerl_sax_parser,
+ xmerl_sax_parser_list,
+ xmerl_sax_parser_latin1,
+ xmerl_sax_parser_utf8,
+ xmerl_sax_parser_utf16be,
+ xmerl_sax_parser_utf16le,
+ xmerl_sax_simple_dom,
+ xmerl_sax_old_dom,
+ xmerl_scan,
+ xmerl_sgml,
+ xmerl_simple,
+ xmerl_text,
+ xmerl_ucs,
+ xmerl_uri,
+ xmerl_validate,
+ xmerl_xlate,
+ xmerl_xml,
+ xmerl_xpath,
+ xmerl_xpath_lib,
+ xmerl_xpath_parse,
+ xmerl_xpath_pred,
+ xmerl_xpath_scan,
+ xmerl_xs,
+ xmerl_xsd,
+ xmerl_xsd_type
+ ]},
+
+ {registered, []},
+ {env, []},
+ {applications, [kernel, stdlib]}
+ ]}.
diff --git a/lib/xmerl/src/xmerl.appup.src b/lib/xmerl/src/xmerl.appup.src
new file mode 100644
index 0000000000..0d8aa4eb04
--- /dev/null
+++ b/lib/xmerl/src/xmerl.appup.src
@@ -0,0 +1,14 @@
+{"%VSN%",
+ [
+ {"1.1.11",
+ [
+ ]
+ }
+ ],
+ [
+ {"1.1.11",
+ [
+ ]
+ }
+ ]
+}.
diff --git a/lib/xmerl/src/xmerl.erl b/lib/xmerl/src/xmerl.erl
new file mode 100644
index 0000000000..cf78f7bdf7
--- /dev/null
+++ b/lib/xmerl/src/xmerl.erl
@@ -0,0 +1,320 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Description : Functions to export simple and complete XML forms
+%%
+
+%% @doc Functions for exporting XML data to an external format.
+%%
+
+-module(xmerl).
+
+%-compile(export_all).
+
+-export([export/2,
+ export/3,
+ export_content/2,
+ export_element/2,
+ export_element/3,
+ export_simple/2,
+ export_simple/3,
+ export_simple_element/2,
+ export_simple_content/2,
+ callbacks/1]).
+
+-include("xmerl.hrl").
+
+
+%% @spec export(Content, Callback) -> ExportedFormat
+%% @equiv export(Data, Callback, [])
+
+export(Content, Callback) ->
+ export(Content, Callback, []).
+
+%% @spec export(Content, Callback, RootAttributes) -> ExportedFormat
+%% Content = [Element]
+%% Callback = atom()
+%% RootAttributes = [XmlAttributes]
+%% @doc Exports normal, well-formed XML content, using the specified
+%% callback-module.
+%% <p><code>Element</code> is any of:</p>
+%% <ul>
+%% <li><code>#xmlText{}</code></li>
+%% <li><code>#xmlElement{}</code></li>
+%% <li><code>#xmlPI{}</code></li>
+%% <li><code>#xmlComment{}</code></li>
+%% <li><code>#xmlDecl{}</code></li>
+%% </ul>
+%% <p>(See <tt>xmerl.hrl</tt> for the record definitions.)
+%% Text in <code>#xmlText{}</code> elements can be deep lists of
+%% characters and/or binaries.</p>
+%%
+%% <p><code>RootAttributes</code> is a list of
+%% <code>#xmlAttribute{}</code> attributes for the <code>#root#</code>
+%% element, which implicitly becomes the parent of the given
+%% <code>Content</code>. The tag-handler function for
+%% <code>#root#</code> is thus called with the complete exported data of
+%% <code>Content</code>. Root attributes can be used to specify
+%% e.g. encoding or other metadata of an XML or HTML document.</p>
+%%
+%% <p>The <code>Callback</code> module should contain hook functions for
+%% all tags present in the data structure. A hook function must have the
+%% following format:</p>
+%% <pre> Tag(Data, Attributes, Parents, E)</pre>
+%% <p>where <code>E</code> is the corresponding <code>#xmlElement{}</code>,
+%% <code>Data</code> is the already-exported contents of <code>E</code>
+%% and <code>Attributes</code> is the list of
+%% <code>#xmlAttribute{}</code> records of <code>E</code>. Finally,
+%% <code>Parents</code> is the list of parent nodes of <code>E</code>,
+%% on the form <code>[{ParentTag::atom(),
+%% ParentPosition::integer()}]</code>.</p>
+%%
+%% <p>The hook function should return either the data to be exported, or
+%% a tuple <code>{'#xml-alias#', NewTag::atom()}</code>, or a tuple
+%% <code>{'#xml-redefine#', Content}</code>, where <code>Content</code>
+%% is a content list (which can be on simple-form; see
+%% <code>export_simple/2</code> for details).</p>
+%%
+%% <p>A callback module can inherit definitions from other callback
+%% modules, through the required function <code>'#xml-interitance#() ->
+%% [ModuleName::atom()]</code>.</p>
+%%
+%% @see export/2
+%% @see export_simple/3
+
+export(Content, Callback, RootAttributes) when is_atom(Callback) ->
+ export1(Content, callbacks(Callback), RootAttributes);
+export(Content, Callbacks, RootAttrs) when is_list(Callbacks) ->
+ export1(Content, Callbacks, RootAttrs).
+
+%% @spec export_simple(Content, Callback) -> ExportedFormat
+%% @equiv export_simple(Content, Callback, [])
+
+export_simple(Content, Callback) ->
+ export_simple(Content, Callback, []).
+
+%% @spec export_simple(Content, Callback, RootAttributes) -> ExportedFormat
+%% Content = [Element]
+%% Callback = atom()
+%% RootAttributes = [XmlAttributes]
+%% @doc Exports "simple-form" XML content, using the specified
+%% callback-module.
+%% <p><code>Element</code> is any of:</p>
+%% <ul>
+%% <li><code>{Tag, Attributes, Content}</code></li>
+%% <li><code>{Tag, Content}</code></li>
+%% <li><code>Tag</code></li>
+%% <li><code>IOString</code></li>
+%% <li><code>#xmlText{}</code></li>
+%% <li><code>#xmlElement{}</code></li>
+%% <li><code>#xmlPI{}</code></li>
+%% <li><code>#xmlComment{}</code></li>
+%% <li><code>#xmlDecl{}</code></li>
+%% </ul>
+%% <p>where</p>
+%% <ul>
+%% <li><code>Tag = atom()</code></li>
+%% <li><code>Attributes = [{Name, Value}]</code></li>
+%% <li><code>Name = atom()</code></li>
+%% <li><code>Value = IOString | atom() | integer()</code></li>
+%% </ul>
+%% <p>Normal-form XML elements can thus be included in the simple-form
+%% representation. Note that content lists must be flat. An
+%% <code>IOString</code> is a (possibly deep) list of characters and/or
+%% binaries.</p>
+%%
+%% <p><code>RootAttributes</code> is a list of:</p>
+%% <ul>
+%% <li><code>XmlAttributes = #xmlAttribute{}</code></li>
+%%</ul>
+%%
+%% <p>See <code>export/3</code> for details on the callback module and
+%% the root attributes. The XML-data is always converted to normal form
+%% before being passed to the callback module.</p>
+%%
+%% @see export/3
+%% @see export_simple/2
+
+export_simple(Content, Callback, RootAttrs) when is_atom(Callback) ->
+ export_simple1(Content, callbacks(Callback), RootAttrs);
+export_simple(Content, Callbacks, RootAttrs) when is_list(Callbacks) ->
+ export_simple1(Content, Callbacks, RootAttrs).
+
+export_simple1(Content, Callback, RootAttrs) ->
+ export1(xmerl_lib:expand_content(Content), Callback, RootAttrs).
+
+%% This exports proper XML content in root context.
+
+export1(Content, Callbacks, RootAttrs) when is_list(Content) ->
+ Result = export_content(Content, Callbacks),
+ Attrs = xmerl_lib:expand_attributes(RootAttrs, 1, [{'#root#',1}]),
+ Root = #xmlElement{name = '#root#',
+ pos = 1,
+ parents = [],
+ attributes = Attrs},
+ Args = [Result, Root#xmlElement.attributes, [], Root],
+ tagdef('#root#',1,[],Args,Callbacks).
+
+%% @doc Exports simple XML content directly, without further context.
+
+export_simple_content(Content, Callback) when is_atom(Callback) ->
+ export_content(xmerl_lib:expand_content(Content),
+ callbacks(Callback));
+export_simple_content(Content, Callbacks) when is_list(Callbacks) ->
+ export_content(xmerl_lib:expand_content(Content), Callbacks).
+
+
+%% @spec export_content(Content, Callbacks) -> term()
+%% Content = [Element]
+%% Callback = [atom()]
+%% @doc Exports normal XML content directly, without further context.
+export_content([#xmlText{value = Text} | Es], Callbacks) ->
+ [apply_text_cb(Callbacks, Text) | export_content(Es, Callbacks)];
+export_content([#xmlPI{} | Es], Callbacks) ->
+ export_content(Es, Callbacks);
+export_content([#xmlComment{} | Es], Callbacks) ->
+ export_content(Es, Callbacks);
+export_content([#xmlDecl{} | Es], Callbacks) ->
+ export_content(Es, Callbacks);
+export_content([E | Es], Callbacks) ->
+ [export_element(E, Callbacks) | export_content(Es, Callbacks)];
+export_content([], _Callbacks) ->
+ [].
+
+%% @doc Exports a simple XML element directly, without further context.
+
+export_simple_element(Content, Callback) when is_atom(Callback) ->
+ export_element(xmerl_lib:expand_element(Content),
+ callbacks(Callback));
+export_simple_element(Content, Callbacks) when is_list(Callbacks) ->
+ export_element(xmerl_lib:expand_element(Content), Callbacks).
+
+%% @doc Exports a normal XML element directly, without further context.
+
+%% This is the usual DOM style parsing.
+
+export_element(E, CB) when is_atom(CB) ->
+ export_element(E, callbacks(CB));
+export_element(#xmlText{value = Text}, CBs) ->
+ apply_text_cb(CBs, Text);
+export_element(E = #xmlElement{name = Tag,
+ pos = Pos,
+ attributes = Attributes,
+ parents = Parents,
+ content = Content}, CBs) ->
+ Data = export_content(Content, CBs),
+ Args = [Data, Attributes, Parents, E],
+ tagdef(Tag,Pos,Parents,Args,CBs);
+export_element(#xmlPI{}, _CBs) ->
+ [];
+export_element(#xmlComment{}, _CBs) ->
+ [];
+export_element(#xmlDecl{}, _CBs) ->
+ [].
+
+
+%% @spec export_element(E,CallbackModule,CallbackState) -> ExportedFormat
+%% @doc For on-the-fly exporting during parsing (SAX style) of the XML
+%% document.
+export_element(E, CallbackModule, CallbackState) when is_atom(CallbackModule) ->
+ export_element(E, callbacks(CallbackModule), CallbackState);
+export_element(#xmlText{value = Text},CallbackModule,_CallbackState) ->
+%% apply_cb(CallbackModule, '#text#', '#text#', [Text,CallbackState]);
+ apply_text_cb(CallbackModule,Text);
+export_element(E=#xmlElement{name = Tag,
+ pos = Pos,
+ parents = Parents,
+ attributes = Attributes,
+ content = Content},Callbacks,CBstate) ->
+ Args = [Content, Attributes,CBstate,E],
+ tagdef(Tag,Pos,Parents,Args,Callbacks);
+export_element(#xmlPI{}, _CallbackModule, CallbackState) ->
+ CallbackState;
+export_element(#xmlComment{},_CallbackModule, CallbackState) ->
+ CallbackState;
+export_element(#xmlDecl{},_CallbackModule, CallbackState) ->
+ CallbackState.
+
+%% A thing returned with #xml-redefine is assumed to be a content list
+%% The data may be on "simple" format.
+
+tagdef(Tag,Pos,Parents,Args,CBs) ->
+ case apply_tag_cb(CBs, Tag, Args) of
+ {'#xml-alias#', NewTag} ->
+ tagdef(NewTag,Pos,Parents,Args,CBs);
+ {'#xml-redefine#', Data} ->
+ export_content(xmerl_lib:expand_content(Data, Pos, Parents),
+ CBs);
+ Other ->
+ Other
+ end.
+
+%% @spec callbacks(Module) -> Result
+%% Module = atom()
+%% Result = [atom()]
+%% @doc Find the list of inherited callback modules for a given module.
+
+callbacks(Module) ->
+ Result = check_inheritance(Module, []),
+%%% io:format("callbacks = ~p~n", [lists:reverse(Result)]),
+ lists:reverse(Result).
+
+callbacks([M|Mods], Visited) ->
+ case lists:member(M, Visited) of
+ false ->
+ NewVisited = check_inheritance(M, Visited),
+ callbacks(Mods, NewVisited);
+ true ->
+ exit({cyclic_inheritance, {M, hd(Visited)}})
+ end;
+callbacks([], Visited) ->
+ Visited.
+
+check_inheritance(M, Visited) ->
+%%% io:format("calling ~p:'#xml-inheritance#'()~n", [M]),
+ case M:'#xml-inheritance#'() of
+ [] ->
+ [M|Visited];
+ Mods ->
+ callbacks(Mods, [M|Visited])
+ end.
+
+apply_text_cb(Ms, Text) ->
+ apply_cb(Ms, '#text#', '#text#', [Text]).
+
+apply_tag_cb(Ms, F, Args) ->
+ apply_cb(Ms, F, '#element#', Args).
+
+apply_cb(Ms, F, Df, Args) ->
+ apply_cb(Ms, F, Df, Args, Ms).
+
+apply_cb([M|Ms], F, Df, Args, Ms0) ->
+ case catch apply(M, F, Args) of
+ {'EXIT', {undef,[{M,F,_}|_]}} ->
+ apply_cb(Ms, F, Df, Args, Ms0);
+ {'EXIT', Reason} ->
+ exit(Reason);
+ Res ->
+ Res
+ end;
+apply_cb([], Df, Df, Args, _Ms0) ->
+ exit({unknown_tag, {Df, Args}});
+apply_cb([], F, Df, Args, Ms0) ->
+ apply_cb(Ms0, Df, Df, [F|Args]).
diff --git a/lib/xmerl/src/xmerl_b64Bin.yrl b/lib/xmerl/src/xmerl_b64Bin.yrl
new file mode 100644
index 0000000000..2d94ea30e2
--- /dev/null
+++ b/lib/xmerl/src/xmerl_b64Bin.yrl
@@ -0,0 +1,68 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2006-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% The base64Binary grammar is as follows:
+%% Base64Binary ::= ((B64S B64S B64S B64S)*
+%% ((B64S B64S B64S B64) |
+%% (B64S B64S B16S '=') |
+%% (B64S B04S '=' #x20? '=')))?
+%%
+%% B64S ::= B64 #x20?
+%%
+%% B16S ::= B16 #x20?
+%%
+%% B04S ::= B04 #x20?
+%%
+%% B04 ::= [AQgw]
+%% (B16 ::= [AEIMQUYcgkosw048])
+%% (B64 ::= [A-Za-z0-9+/])
+
+%% Changes
+%% B16x ::= [EIMUYckos048]
+%% B64x ::= [B-DF-HJ-LN-PR-TV-XZabd-fh-jl-np-rt-vx-z1-35-79+/]
+
+%% B16 ::= B04 | B16x
+%% B64 ::= B04 | B16x | B64x
+%% Handle whitespace in scanner
+
+Nonterminals base64Binary base64Binary2 b64 b16.
+
+Terminals '=' b04 b16x b64x.
+
+Rootsymbol base64Binary.
+
+Endsymbol '$end'.
+
+
+
+
+base64Binary -> base64Binary2.
+base64Binary -> '$empty'.
+
+base64Binary2 -> b64 b64 b64 b64 base64Binary2.
+base64Binary2 -> b64 b64 b64 b64.
+base64Binary2 -> b64 b64 b16 '='.
+base64Binary2 -> b64 b04 '=' '='.
+
+b64 -> b04.
+b64 -> b16x.
+b64 -> b64x.
+
+b16 -> b04.
+b16 -> b16x.
diff --git a/lib/xmerl/src/xmerl_b64Bin_scan.erl b/lib/xmerl/src/xmerl_b64Bin_scan.erl
new file mode 100644
index 0000000000..6f40680547
--- /dev/null
+++ b/lib/xmerl/src/xmerl_b64Bin_scan.erl
@@ -0,0 +1,59 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2006-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(xmerl_b64Bin_scan).
+
+-export([scan/1]).
+
+-define(L, 1).
+
+scan(Str) ->
+ scan(Str,[]).
+
+scan([],Acc) ->
+ lists:reverse([{'$end', ?L, '$end'}|Acc]);
+scan(Str,Acc) ->
+ case scan_token(Str) of
+ {Token,Rest} ->
+ scan(Rest,[Token|Acc])
+ end.
+
+scan_token([$ ,H|T]) ->
+ scan_token([H|T]);
+scan_token([H|T]) when H==$A;H==$Q;H==$g;H==$w ->
+ {{b04,?L,H},T};
+scan_token([H|T])
+ when H==$E;H==$I;H==$M;H==$U;H==$Y;H==$c;H==$k;H==$o;H==$s;H==$0;
+ H==$4;H==$8 ->
+ {{b16x,?L,H},T};
+scan_token([H|T])
+ when H==$B;H==$C;H==$D;H==$F;H==$G;H==$H;H==$J;H==$K;H==$L;H==$N;
+ H==$O;H==$P;H==$R;H==$S;H==$T;H==$V;H==$W;H==$X;H==$Z ->
+ {{b64x,?L,H},T};
+scan_token([H|T])
+ when H==$a;H==$b;H==$d;H==$e;H==$f;H==$h;H==$i;H==$j;H==$l;H==$m;H==$n;H==$p;
+ H==$q;H==$r;H==$t;H==$u;H==$v;H==$x;H==$y;H==$z ->
+ {{b64x,?L,H},T};
+scan_token([H|T])
+ when H==$1;H==$2;H==$3;H==$5;H==$6;H==$7;H==$9;H==$+;H==$/ ->
+ {{b64x,?L,H},T};
+scan_token("="++T) ->
+ {{'=',?L,"="},T};
+scan_token([H|_T]) ->
+ exit({error,{base64Binary_scan_illegal_char,H}}).
diff --git a/lib/xmerl/src/xmerl_dtd.erl b/lib/xmerl/src/xmerl_dtd.erl
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib/xmerl/src/xmerl_dtd.erl
diff --git a/lib/xmerl/src/xmerl_eventp.erl b/lib/xmerl/src/xmerl_eventp.erl
new file mode 100644
index 0000000000..ad5c3cbc47
--- /dev/null
+++ b/lib/xmerl/src/xmerl_eventp.erl
@@ -0,0 +1,366 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%% Description : Simple event-based processor (front-end to xmerl_scan)
+
+%% @doc Simple event-based front-ends to xmerl_scan for processing
+%% of XML documents in streams and for parsing in SAX style.
+%% Each contain more elaborate settings of xmerl_scan that makes usage of
+%% the customization functions.
+%%
+-module(xmerl_eventp).
+-vsn('0.19').
+-date('03-09-17').
+
+-export([stream/2,stream_sax/4, file_sax/4, string_sax/4]).
+
+% -export([cont/3, rules_read/3,rules_write/4,fetch/2,close/1]).
+
+-include("xmerl.hrl").
+-include("xmerl_internal.hrl").
+-include_lib("kernel/include/file.hrl").
+
+%% @spec stream(Fname::string(), Options::option_list()) -> xmlElement()
+%%
+%% @doc Parse file containing an XML document as a stream, DOM style.
+%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
+%% <code>continuation_fun</code> for handling streams of XML data.
+%% Note that the <code>continuation_fun</code>, <code>acc_fun</code>,
+%% <code>fetch_fun</code>, <code>rules</code> and <code>close_fun</code>
+%% options cannot be user defined using this parser.
+stream(Fname, Options) ->
+ AccF = fun(X, Acc, S) -> acc(X,Acc,S) end,
+ case file:open(Fname, [read, raw, binary]) of
+ {ok, Fd} ->
+ B0 = list_to_binary([]),
+ ContS = [{B0, Fname, Fd}],
+ Opts=scanner_options(Options,
+ [{continuation_fun, fun cont/3, ContS},
+ {acc_fun, AccF},
+ {fetch_fun, fun fetch/2},
+ {rules,fun rules_read/3,fun rules_write/4,""},
+ {close_fun, fun close/1}]),
+ xmerl_scan:string([], Opts);
+ Error ->
+ Error
+ end.
+
+
+%% @spec stream_sax(Fname,CallBackModule,UserState,Options) -> xmlElement()
+%% Fname = string()
+%% CallBackModule = atom()
+%% Options = option_list()
+%%
+%% @doc Parse file containing an XML document as a stream, SAX style.
+%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
+%% <code>continuation_fun</code> for handling streams of XML data.
+%% Note that the <code>continuation_fun</code>, <code>acc_fun</code>,
+%% <code>fetch_fun</code>, <code>rules</code>, <code>hook_fun</code>,
+%% <code>close_fun</code> and <code>user_state</code> options cannot be user
+%% defined using this parser.
+stream_sax(Fname, CallBack, UserState,Options) ->
+ US={xmerl:callbacks(CallBack), UserState},
+ AccF = fun(X, Acc, S) -> acc(X,Acc,S) end,
+ HookF=
+ fun(ParsedEntity, S) ->
+ {CBs,Arg}=xmerl_scan:user_state(S),
+% io:format("stream_sax Arg=~p~n",[Arg]),
+ case ParsedEntity of
+ #xmlComment{} -> % Toss away comments...
+ {[],S};
+ _ -> % Use callback module for the rest
+% io:format("stream_sax ParsedEntity=~p~n",[ParsedEntity]),
+ case xmerl:export_element(ParsedEntity,CBs,Arg) of
+ {error,Reason} ->
+ throw({error,Reason});
+ Resp ->
+% io:format("stream_sax Resp=~p~n",[Resp]),
+ {Resp,xmerl_scan:user_state({CBs,Resp},S)}
+ end
+ end
+ end,
+ case file:open(Fname, [read, raw, binary]) of
+ {ok, Fd} ->
+ B0 = list_to_binary([]),
+ ContS = [{B0, Fname, Fd}],
+ Opts=scanner_options(Options,
+ [{acc_fun, AccF},
+ {close_fun, fun close/1},
+ {continuation_fun, fun cont/3, ContS},
+ {fetch_fun, fun fetch/2},
+ {hook_fun,HookF},
+ {rules,fun rules_read/3,fun rules_write/4,""},
+ {user_state,US}]),
+ xmerl_scan:string([], Opts);
+ Error ->
+ Error
+ end.
+
+
+%% @spec file_sax(Fname::string(), CallBackModule::atom(), UserState,
+%% Options::option_list()) -> NewUserState
+%%
+%% @doc Parse file containing an XML document, SAX style.
+%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
+%% <code>hook_fun</code> for using xmerl export functionality directly after
+%% an entity is parsed.
+file_sax(Fname,CallBack, UserState, Options) ->
+ US={xmerl:callbacks(CallBack), UserState},
+ AccF=fun(X,Acc,S) -> {[X|Acc], S} end,
+ HookF=
+ fun(ParsedEntity, S) ->
+ {CBs,Arg}=xmerl_scan:user_state(S),
+ case ParsedEntity of
+ #xmlComment{} -> % Toss away comments...
+ {[],S};
+ _ -> % Use callback module for the rest
+ case xmerl:export_element(ParsedEntity,CBs,Arg) of
+ {error,Reason} ->
+ throw({error,Reason});
+ Resp ->
+ {Resp,xmerl_scan:user_state({CBs,Resp},S)}
+ end
+ end
+ end,
+
+ Opts=scanner_options(Options,[{acc_fun, AccF},
+ {hook_fun,HookF},
+ {user_state,US}]),
+ xmerl_scan:file(Fname,Opts).
+
+
+%% @spec string_sax(String::list(), CallBackModule::atom(), UserState,
+%% Options::option_list()) ->
+%% xmlElement()
+%%
+%% @doc Parse file containing an XML document, SAX style.
+%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
+%% <code>hook_fun</code> for using xmerl export functionality directly after
+%% an entity is parsed.
+string_sax(String,CallBack, UserState, Options) ->
+ US={xmerl:callbacks(CallBack), UserState},
+ AccF=fun(X,Acc,S) -> {[X|Acc], S} end,
+ HookF=
+ fun(ParsedEntity, S) ->
+ {CBs,Arg}=xmerl_scan:user_state(S),
+ case ParsedEntity of
+ #xmlComment{} -> % Toss away comments...
+ {[],S};
+ _ -> % Use callback module for the rest
+ case xmerl:export_element(ParsedEntity,CBs,Arg) of
+ {error,Reason} ->
+ throw({error,Reason});
+ Resp ->
+ {Resp,xmerl_scan:user_state({CBs,Resp},S)}
+ end
+ end
+ end,
+
+ Opts=scanner_options(Options,[{acc_fun, AccF},
+ {hook_fun,HookF},
+ {user_state,US}]),
+ xmerl_scan:string(String,Opts).
+
+
+
+%%% ----------------------------------------------------------------------------
+%%% Streaming support functions
+
+%%% Continuation callback function for xmerl_scan
+cont(F, Exception, S) ->
+ case xmerl_scan:cont_state(S) of
+ [{_Fname, eof}|_] ->
+ Exception(S);
+ [{Sofar, Fname, Fd}|T] ->
+ cont2(F, Exception, Sofar, Fd, Fname, T, S)
+ end.
+
+
+cont2(F, Exception, Sofar, Fd, Fname, T, S) ->
+ case catch read_chunk(Fd, Fname, Sofar) of
+ {ok, Bin} ->
+ find_good_split(list_to_binary([Sofar,Bin]),
+ F,Exception,Fd,Fname,T,S);
+ eof ->
+ file:close(Fd),
+ NewS = xmerl_scan:cont_state([{Fname, eof}|T], S),
+ F(binary_to_list(Sofar), NewS);
+ Error ->
+ exit(Error)
+ end.
+
+read_chunk(Fd, _Fname, _Sofar) ->
+ file:read(Fd, 8192).
+
+-ifndef(no_bitsyntax).
+
+find_good_split(Bin, F, Exception, Fd, Fname, T, S) ->
+ find_good_split(size(Bin)-1, Bin, F, Exception, Fd, Fname, T, S).
+
+find_good_split(0, B, F, Exception, Fd, Fname, T, S) ->
+ cont2(F, Exception, B, Fd, Fname, T, S);
+find_good_split(Size, B, F, Exception, Fd, Fname, T, S) ->
+ case B of
+ <<_Bytes:Size/binary, H/integer, Tail/binary>> when ?whitespace(H) ->
+ {SubB,_} = split_binary(B, Size+1),
+ NewS = xmerl_scan:cont_state([{Tail, Fname, Fd}|T], S),
+ F(binary_to_list(SubB), NewS);
+ _ ->
+ find_good_split(Size-1, B, F, Exception, Fd, Fname, T, S)
+ end.
+
+-else.
+
+find_good_split(Bin, F, Exception, Fd, Fname, T, S) ->
+ find_good_split(size(Bin), Bin, F, Exception, Fd, Fname, T, S).
+
+find_good_split(0, B, F, Exception, Fd, Fname, T, S) ->
+ cont2(F, Exception, B, Fd, Fname, T, S);
+find_good_split(Size, B, F, Exception, Fd, Fname, T, S) ->
+ case binary_to_list(B, Size, Size) of
+ [H] when ?whitespace(H) ->
+ {SubB,Tail} = split_binary(B, Size),
+ NewS = xmerl_scan:cont_state([{Tail, Fname, Fd}|T], S),
+ F(binary_to_list(SubB), NewS);
+ _ ->
+ find_good_split(Size-1, B, F, Exception, Fd, Fname, T, S)
+ end.
+
+-endif.
+
+
+
+%%% Accumulator callback function for xmerl_scan
+acc(X = #xmlText{value = Text}, Acc, S) ->
+ case detect_nul_text(Text) of
+ ok->
+ {[X#xmlText{value = lists:flatten(Text)}|Acc], S};
+ nok->
+ {Acc,S}
+ end;
+acc(X, Acc, S) ->
+ {[X|Acc], S}.
+
+%%% don't acc xmlText when text contains only " " , "\n" and "\t".
+detect_nul_text([H|T]) when H==10; H==32; H==9->
+ detect_nul_text(T);
+detect_nul_text([]) ->
+ nok;
+detect_nul_text(_)->
+ ok.
+
+
+
+%%% Fetch callback function for xmerl_scan
+fetch({system, URI}, S) ->
+ fetch_URI(URI, S);
+fetch({public, _PublicID, URI}, S) ->
+ fetch_URI(URI, S).
+
+fetch_URI(URI, S) ->
+ %% assume URI is a filename
+ Split = filename:split(URI),
+ Filename = lists:last(Split),
+ Fullname =
+ case Split of
+ ["/", _|_] ->
+ %% absolute path name
+ URI;
+ ["file:",Name]->
+ %% file:/dtd_name
+ filename:join(S#xmerl_scanner.xmlbase, Name);
+ _ ->
+ filename:join(S#xmerl_scanner.xmlbase, URI)
+ end,
+ File = path_locate(S#xmerl_scanner.fetch_path, Filename, Fullname),
+ ?dbg("fetch(~p) -> {file, ~p}.~n", [URI, File]),
+ case file:open(File, [read, raw, binary]) of
+ {ok, Fd} ->
+ ContS=xmerl_scan:cont_state(S),
+ NewS=xmerl_scan:cont_state([{list_to_binary([]),File,Fd}|ContS],S),
+ {ok, {string, []}, NewS};
+ _Error ->
+ ?dbg("ERROR fetch(~p) -> ~p~n", [URI, _Error]),
+ {ok, not_fetched, S}
+ end.
+
+path_locate([Dir|Dirs], FN, FullName) ->
+ F = filename:join(Dir, FN),
+ case file:read_file_info(F) of
+ {ok, #file_info{type = regular}} ->
+ F;
+ _ ->
+ path_locate(Dirs, FN, FullName)
+ end;
+path_locate([], _FN, FullName) ->
+ FullName.
+
+%%% Close callback function for xmerl_scan
+close(S) ->
+ ContS = xmerl_scan:cont_state(S),
+ case ContS of
+ [{_Fname, eof}|T] ->
+ xmerl_scan:cont_state(T, S);
+ [{_Sofar, _Fname, Fd}|T] ->
+ file:close(Fd),
+ xmerl_scan:cont_state(T, S)
+ end.
+
+
+%%% Rules callback functions for xmerl_scan
+rules_write(Context, Name, Value, #xmerl_scanner{rules = undefined}=S) ->
+ Tab = ets:new(rules, [set, public]),
+ rules_write(Context, Name, Value, S#xmerl_scanner{rules = Tab});
+rules_write(Context, Name, Value, #xmerl_scanner{rules = T} = S) ->
+ ets:insert(T, {{Context, Name}, Value}),
+ S.
+
+rules_read(_Context, _Name, #xmerl_scanner{rules = undefined}) ->
+ undefined;
+rules_read(Context, Name, #xmerl_scanner{rules = T}) ->
+ case ets:lookup(T, {Context, Name}) of
+ [] ->
+ undefined;
+ [{_K, V}] ->
+ V
+ end.
+
+
+
+%%% ----------------------------------------------------------------------------
+%%% Generic helper functions
+
+scanner_options([H|T], Opts) ->
+ case catch keyreplace(H, 1, Opts) of
+ false ->
+ scanner_options(T, [H|Opts]);
+ NewOpts ->
+ scanner_options(T, NewOpts)
+ end;
+scanner_options([], Opts) ->
+ Opts.
+
+keyreplace(X, Pos, [H|T]) when element(Pos, X) == element(Pos, H) ->
+ [X|T];
+keyreplace(X, Pos, [H|T]) ->
+ [H|keyreplace(X, Pos, T)];
+keyreplace(_, _Pos, []) ->
+ throw(false).
+
+
diff --git a/lib/xmerl/src/xmerl_html.erl b/lib/xmerl/src/xmerl_html.erl
new file mode 100644
index 0000000000..38fdeb2205
--- /dev/null
+++ b/lib/xmerl/src/xmerl_html.erl
@@ -0,0 +1,122 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%% Description : Callback module for exporting XHTML to HTML.
+
+-module(xmerl_html).
+
+-export(['#xml-inheritance#'/0]).
+
+%% Note: we assume XHTML data, so all tags are lowercase!
+
+-export(['#root#'/4,
+ '#element#'/5,
+ '#text#'/1,
+ p/4]).
+
+-import(xmerl_lib, [start_tag/2, end_tag/1, is_empty_data/1,
+ find_attribute/2, export_text/1]).
+
+-include("xmerl.hrl").
+
+
+'#xml-inheritance#'() -> [].
+
+
+%% The '#text#' function is called for every text segment.
+
+'#text#'(Text) ->
+ export_text(Text).
+
+
+%% The '#root#' tag is called when the entire structure has been
+%% exported. It does not appear in the structure itself.
+
+'#root#'(Data, Attrs, [], _E) ->
+ Ver = case find_attribute(version, Attrs) of
+ {value, V} ->
+ V;
+ false ->
+%%% "-//W3C//DTD HTML 4.01//EN" % strict
+ "-//W3C//DTD HTML 4.01 Transitional//EN" % loose
+ end,
+ URI = case find_attribute(uri, Attrs) of
+ {value, U} ->
+ [" \"", U, "\""];
+ false ->
+%%% " \"http://www.w3.org/TR/html4/strict.dtd\""
+%%% " \"http://www.w3.org/TR/html4/loose.dtd\""
+ ""
+ end,
+ ["<!DOCTYPE HTML PUBLIC \"", Ver, "\"", URI, ">\n", Data].
+
+
+%% HTML does not have the <Tag/> empty-element form of XML/XHTML.
+markup(Tag, Attrs, Data) ->
+ [start_tag(Tag, Attrs), Data, end_tag(Tag)].
+
+%% Some HTML elements must not have an end tag at all.
+markup_noend(Tag, Attrs, Data) ->
+ [start_tag(Tag, Attrs), Data].
+
+
+%% For some element types, the end tag is forbidden. (In all other
+%% cases, we always generate the end tag, to make sure that the scope of
+%% a markup is not extended by mistake.)
+
+'#element#'(Tag, Data, Attrs, _Parents, _E) ->
+ case forbid_end(Tag) of
+ false ->
+ markup(Tag, Attrs, Data);
+ true ->
+ markup_noend(Tag, Attrs, Data)
+ end.
+
+
+%% HTML tags with special handling
+
+p(Data, Attrs, _Parents, _E) ->
+ %% In general, we cannot drop the end tag for paragraph elements;
+ %% that is only allowed if we know that it is immediately followed
+ %% by some other block-level tag.
+ case is_empty_data(Data) of
+ true ->
+ %% Paragraph elements should never be completely empty.
+ markup(p, Attrs, "\s");
+ false ->
+ markup(p, Attrs, Data)
+ end.
+
+
+%% Utility functions
+
+forbid_end(area) -> true;
+forbid_end(base) -> true;
+forbid_end(basefont) -> true;
+forbid_end(br) -> true;
+forbid_end(col) -> true;
+forbid_end(frame) -> true;
+forbid_end(hr) -> true;
+forbid_end(img) -> true;
+forbid_end(input) -> true;
+forbid_end(isindex) -> true;
+forbid_end(link) -> true;
+forbid_end(meta) -> true;
+forbid_end(param) -> true;
+forbid_end(_) -> false.
diff --git a/lib/xmerl/src/xmerl_internal.hrl b/lib/xmerl/src/xmerl_internal.hrl
new file mode 100644
index 0000000000..85df757138
--- /dev/null
+++ b/lib/xmerl/src/xmerl_internal.hrl
@@ -0,0 +1,46 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Contributor(s):
+%% <[email protected]>: suggested #xmlDocument{}
+%%
+%%--------------------------------------------------------------------
+%% File : xmerl_internal.hrl
+%% Description : Internal definitions
+%%
+%% Created : 11 Sep 2009
+%%--------------------------------------------------------------------
+
+%%--------------------------------------------------------------------
+%% Macros
+%%--------------------------------------------------------------------
+% -define(debug,1).
+
+-ifdef(debug).
+-define(dbg(Fmt, Args), ok=io:format("~p: " ++ Fmt, [?LINE|Args])).
+-define(DBG, ok=io:format("<<~p:~p>>~n", [?MODULE, ?LINE])).
+-else.
+-define(dbg(Fmt, Args), no_debug).
+-define(DBG, no_debug).
+-endif.
+
+
+%%--------------------------------------------------------------------
+%% Records
+%%--------------------------------------------------------------------
+
diff --git a/lib/xmerl/src/xmerl_lib.erl b/lib/xmerl/src/xmerl_lib.erl
new file mode 100644
index 0000000000..7b76a76a33
--- /dev/null
+++ b/lib/xmerl/src/xmerl_lib.erl
@@ -0,0 +1,1065 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%% Description : Utility module for handling XML trees.
+%%%----------------------------------------------------------------------
+
+-module(xmerl_lib).
+
+-export([normalize_content/1, normalize_content/3, expand_content/1,
+ expand_content/3, normalize_element/1, normalize_element/3,
+ expand_element/1, expand_element/3, expand_attributes/1,
+ expand_attributes/3, export_text/1, flatten_text/1,
+ export_attribute/1, markup/2, markup/3, simplify_element/1,
+ simplify_content/1, start_tag/1, start_tag/2, end_tag/1,
+ empty_tag/1, empty_tag/2,is_empty_data/1, find_attribute/2,
+ remove_whitespace/1,to_lower/1]).
+
+-export([is_letter/1,is_namechar/1,is_ncname/1,
+ detect_charset/1,detect_charset/2,is_name/1,is_char/1]).
+
+
+-export([mapxml/2, foldxml/3, mapfoldxml/3]).
+
+%% exports for XSD
+-export([is_facet/1,is_builtin_simple_type/1,is_xsd_string/1]).
+
+-include("xmerl.hrl").
+-include("xmerl_xsd.hrl").
+
+
+%% Escape special characters `<' and `&', flattening the text.
+%% Also escapes `>', just for symmetry.
+
+export_text(T) ->
+ export_text(T, []).
+
+export_text([$< | T], Cont) ->
+ "&lt;" ++ export_text(T, Cont);
+export_text([$> | T], Cont) ->
+ "&gt;" ++ export_text(T, Cont);
+export_text([$& | T], Cont) ->
+ "&amp;" ++ export_text(T, Cont);
+export_text([C | T], Cont) when is_integer(C) ->
+ [C | export_text(T, Cont)];
+export_text([T | T1], Cont) ->
+ export_text(T, [T1 | Cont]);
+export_text([], [T | Cont]) ->
+ export_text(T, Cont);
+export_text([], []) ->
+ [];
+export_text(Bin, Cont) ->
+ export_text(binary_to_list(Bin), Cont).
+
+
+%% Only flatten text.
+
+flatten_text(T) ->
+ flatten_text(T, []).
+
+flatten_text([C | T], Cont) when is_integer(C) ->
+ [C | flatten_text(T, Cont)];
+flatten_text([T | T1], Cont) ->
+ flatten_text(T, [T1 | Cont]);
+flatten_text([], [T | Cont]) ->
+ flatten_text(T, Cont);
+flatten_text([], []) ->
+ [];
+flatten_text(Bin, Cont) ->
+ flatten_text(binary_to_list(Bin), Cont).
+
+%% Convert attribute value to a flat string, escaping characters `"',
+%% `<' and `&'. (Note that single-quote characters are not escaped; the
+%% markup-generating functions (`start_tag', `end_tag', ...) always use
+%% `"' to delimit the attribute values.)
+
+export_attribute(I) when is_integer(I) ->
+ integer_to_list(I);
+export_attribute(A) when is_atom(A) ->
+ export_attribute(atom_to_list(A), []);
+export_attribute(S) ->
+ export_attribute(S, []).
+
+export_attribute([$< | T], Cont) ->
+ "&lt;" ++ export_attribute(T, Cont);
+export_attribute([$& | T], Cont) ->
+ "&amp;" ++ export_attribute(T, Cont);
+export_attribute([$" | T], Cont) ->
+ "&quot;" ++ export_attribute(T, Cont);
+export_attribute([C | T], Cont) when is_integer(C) ->
+ [C | export_attribute(T, Cont)];
+export_attribute([T | T1], Cont) ->
+ export_attribute(T, [T1 | Cont]);
+export_attribute([], [T | Cont]) ->
+ export_attribute(T, Cont);
+export_attribute([], []) ->
+ [];
+export_attribute(Bin, Cont) ->
+ export_attribute(binary_to_list(Bin), Cont).
+
+
+%% SimpleContent: [SimpleElement]
+%% SimpleElement: #xml...{} | String | {atom(), [Attr], SimpleContent}
+%% | {atom(), SimpleContent} | atom()
+%% Attr: {atom(), Value} | #xmlAttribute{}
+%% Value: atom() | integer() | String
+%% String: [char() | binary() | String]
+%%
+%% Because strings can be deep, we do not allow content lists to also be
+%% deep; otherwise, traversal of the simple representation becomes too
+%% complicated and expensive. Simple content lists are thus flat lists
+%% of simple elements.
+
+%% TODO: namespace-qualified tags in simple-form? /RC
+
+%% 'normalize' is like 'expand', but also turns all text elements into
+%% flat strings.
+
+normalize_element(Element) ->
+ normalize_element(Element, 1, []).
+
+normalize_element(Element, Pos, Parents) ->
+ expand_element(Element, Pos, Parents, true).
+
+%% 'expand' expands simple-form elements to normal XML elements.
+%% All attribute values (also in #xmlAttribute records) become flat
+%% strings, so that string comparisons can be made. Text elements are
+%% not flattened.
+
+expand_element(Element) ->
+ expand_element(Element, 1, []).
+
+expand_element(Element, Pos, Parents) ->
+ expand_element(Element, Pos, Parents, false).
+
+expand_element(E = #xmlElement{}, Pos, Parents, Norm) ->
+ Content = expand_content(E#xmlElement.content, 1, Parents, Norm),
+ Attrs = expand_attributes(E#xmlElement.attributes, 1, []),
+ E#xmlElement{pos = Pos,
+ parents = Parents,
+ attributes = Attrs,
+ content = Content};
+expand_element(E = #xmlText{}, Pos, Parents, Norm) ->
+ E#xmlText{pos = Pos,
+ parents = Parents,
+ value = expand_text(E#xmlText.value, Norm)};
+expand_element(E = #xmlPI{}, Pos, _Parents, Norm) ->
+ E#xmlPI{pos = Pos,
+ value = expand_text(E#xmlPI.value, Norm)};
+expand_element(E = #xmlComment{}, Pos, Parents, Norm) ->
+ E#xmlComment{pos = Pos,
+ parents = Parents,
+ value = expand_text(E#xmlComment.value, Norm)};
+expand_element(E = #xmlDecl{}, _Pos, _Parents, _Norm) ->
+ Attrs = expand_attributes(E#xmlDecl.attributes, 1, []),
+ E#xmlDecl{attributes = Attrs};
+expand_element({Tag, Attrs, Content}, Pos, Parents, Norm) when is_atom(Tag) ->
+ NewParents = [{Tag, Pos} | Parents],
+ #xmlElement{name = Tag,
+ pos = Pos,
+ parents = Parents,
+ attributes = expand_attributes(Attrs, 1, NewParents),
+ content = expand_content(Content, 1, NewParents, Norm)};
+expand_element({Tag, Content}, Pos, Parents, Norm) when is_atom(Tag) ->
+ NewParents = [{Tag, Pos} | Parents],
+ #xmlElement{name = Tag,
+ pos = Pos,
+ parents = Parents,
+ attributes = [],
+ content = expand_content(Content, 1, NewParents, Norm)};
+expand_element(Tag, Pos, Parents, _Norm) when is_atom(Tag) ->
+ #xmlElement{name = Tag,
+ pos = Pos,
+ parents = Parents,
+ attributes = [],
+ content = []};
+expand_element(String, Pos, Parents, Norm) when is_list(String) ->
+ #xmlText{pos = Pos,
+ parents = Parents,
+ value = expand_text(String, Norm)}.
+
+expand_text(S, false) -> S;
+expand_text(S, true) -> flatten_text(S).
+
+%% Content must be a flat list of elements.
+
+normalize_content(Content) ->
+ normalize_content(Content, 1, []).
+
+normalize_content(Content, Pos, Parents) ->
+ expand_content(Content, Pos, Parents, true).
+
+expand_content(Content) ->
+ expand_content(Content, 1, []).
+
+expand_content(Content, Pos, Parents) ->
+ expand_content(Content, Pos, Parents, false).
+
+expand_content([{H} | T], Pos, Parents, Norm) ->
+ expand_content(H ++ T, Pos, Parents, Norm);
+expand_content([{F,S}|T], Pos, Parents, Norm) when is_function(F) ->
+ case F(S) of
+ done -> expand_content(T, Pos, Parents, Norm);
+ {C,S2} -> expand_content([{F,S2},C|T], Pos, Parents, Norm)
+ end;
+expand_content([H | T], Pos, Parents, Norm) ->
+ [expand_element(H, Pos, Parents, Norm)
+ | expand_content(T, Pos+1, Parents, Norm)];
+expand_content([], _Pos, _Parents, _Norm) ->
+ [].
+
+expand_attributes(Attrs) ->
+ expand_attributes(Attrs, 1, []).
+
+%% Expanding always turns all attribute values into flat strings.
+
+expand_attributes([H = #xmlAttribute{} | T], Pos, Parents) ->
+ [H#xmlAttribute{pos = Pos,
+ value = expand_value(H#xmlAttribute.value)}
+ | expand_attributes(T, Pos+1, Parents)];
+expand_attributes([{P,S}|T], Pos, Parents) when is_function(P) ->
+ case P(S) of
+ done ->
+ expand_attributes(T, Pos, Parents);
+ {A,S2} ->
+ expand_attributes([{P,S2},A|T], Pos, Parents)
+ end;
+expand_attributes([{K, V} | T], Pos, Parents) ->
+ [#xmlAttribute{name = K,
+ pos = Pos,
+ parents = Parents,
+ value = expand_value(V)}
+ | expand_attributes(T, Pos+1, Parents)];
+expand_attributes([], _Pos, _Parents) ->
+ [].
+
+expand_value(S) when is_atom(S) ->
+ atom_to_list(S);
+expand_value(S) when is_integer(S) ->
+ integer_to_list(S);
+expand_value(S) ->
+ flatten_text(S).
+
+%% We want simplification to yield a normal form, so we always generate
+%% three-tuples for elements. PI, Comment and Decl elements are
+%% discarded from content lists. Attribute values become flat
+%% strings. Text elements are not flattened.
+
+simplify_element(#xmlElement{expanded_name = [], name = Tag,
+ attributes = Attrs, content = Content}) ->
+ {Tag, simplify_attributes(Attrs), simplify_content(Content)};
+simplify_element(#xmlElement{expanded_name = Name,
+ attributes = Attrs, content = Content}) ->
+ {Name, simplify_attributes(Attrs), simplify_content(Content)};
+simplify_element(#xmlText{value = Text}) ->
+ Text;
+simplify_element({Tag, Attrs, Content}) when is_atom(Tag) ->
+ {Tag, simplify_attributes(Attrs), simplify_content(Content)};
+simplify_element({Tag, Content}) when is_atom(Tag) ->
+ {Tag, [], simplify_content(Content)};
+simplify_element(Tag) when is_atom(Tag) ->
+ {Tag, [], []};
+simplify_element(Text) when is_list(Text) ->
+ Text.
+
+simplify_content([#xmlPI{} | T]) ->
+ simplify_content(T);
+simplify_content([#xmlComment{} | T]) ->
+ simplify_content(T);
+simplify_content([#xmlDecl{} | T]) ->
+ simplify_content(T);
+simplify_content([H | T]) ->
+ [simplify_element(H) | simplify_content(T)];
+simplify_content([]) ->
+ [].
+
+simplify_attributes([#xmlAttribute{name = K, value = V} | T])
+ when is_atom(K) ->
+ [{K, expand_value(V)} | simplify_attributes(T)];
+simplify_attributes([H = {K, _} | T]) when is_atom(K) ->
+ [H | simplify_attributes(T)];
+simplify_attributes([]) ->
+ [].
+
+%% Looking up an attribute value
+
+find_attribute(Name, Attrs) ->
+ case lists:keysearch(Name, #xmlAttribute.name, Attrs) of
+ {value, #xmlAttribute{value = V}} ->
+ {value, V};
+ false ->
+ false
+ end.
+
+
+markup(Tag, Data) ->
+ markup(Tag, [], Data).
+
+markup(Tag, Attrs, []) ->
+ empty_tag(Tag, Attrs);
+markup(Tag, Attrs, Data) ->
+ [start_tag(Tag, Attrs), Data, end_tag(Tag)].
+
+start_tag(TagStr) ->
+ start_tag(TagStr, []).
+
+start_tag(Tag, Attrs) when is_atom(Tag) ->
+ start_tag(atom_to_list(Tag), Attrs);
+start_tag(TagStr, []) ->
+ ["<", TagStr, ">"];
+start_tag(TagStr, Attrs) ->
+ ["<", TagStr, attributes(Attrs), ">"].
+
+empty_tag(Tag) ->
+ empty_tag(Tag, []).
+
+empty_tag(Tag, Attrs) when is_atom(Tag) ->
+ empty_tag(atom_to_list(Tag), Attrs);
+empty_tag(TagStr, []) ->
+ ["<", TagStr, "/>"];
+empty_tag(TagStr, Attrs) ->
+ ["<", TagStr, attributes(Attrs), "/>"].
+
+end_tag(Tag) when is_atom(Tag) ->
+ end_tag(atom_to_list(Tag));
+end_tag(TagStr) ->
+ ["</", TagStr, ">"].
+
+attributes(Attrs) ->
+ [attr_string(A) || A <- Attrs].
+
+attr_string(#xmlAttribute{name = K, value = V}) ->
+ [" ", atom_to_list(K), "=\"", export_attribute(V), "\""].
+
+is_empty_data([]) ->
+ true;
+is_empty_data([X | Xs]) ->
+ case is_empty_data(X) of
+ false ->
+ false;
+ true ->
+ is_empty_data(Xs)
+ end;
+is_empty_data(_) ->
+ false.
+
+
+%% Removing normalised whitespace-only text segments.
+
+remove_whitespace([#xmlText{value = " "} | Data]) ->
+ remove_whitespace(Data);
+remove_whitespace([E = #xmlElement{content = Content} | Data]) ->
+ [E#xmlElement{content = remove_whitespace(Content)}
+ | remove_whitespace(Data)];
+remove_whitespace([Other | Data]) ->
+ [Other | remove_whitespace(Data)];
+remove_whitespace([]) ->
+ [].
+
+
+%%% ----------------------------------------------------------------------------
+%%% funs traversing the xmerl tree left-right and top-down
+
+%% mapxml
+%% Fun is fun(Old#xmlElement) -> New#xmlElement
+mapxml(Fun, #xmlElement{}= E) ->
+ C1 = Fun(E),
+ C2 = mapxml(Fun,lists:flatten(C1#xmlElement.content)),
+ C1#xmlElement{content=C2};
+mapxml(Fun, List) when is_list(List) ->
+ AFun = fun(E) -> mapxml(Fun, E) end,
+ lists:map(AFun, List);
+mapxml(Fun, E) ->
+ Fun(E).
+
+
+%% foldxml
+%% Fun is fun(#xmlElement, OldAccu) -> NewAccu
+foldxml(Fun, Accu0, #xmlElement{content=C}=E) ->
+ Accu1 = Fun(E, Accu0),
+ foldxml(Fun, Accu1, C);
+foldxml(Fun, Accu, List) when is_list(List) ->
+ AFun = fun(E,A) -> foldxml(Fun, A, E) end,
+ lists:foldl(AFun, Accu, List);
+foldxml(Fun, Accu, E) ->
+ Fun(E, Accu).
+
+
+%% mapfoldxml
+%% Fun is fun(Old#xmlElement, OldAccu) -> {New#xmlElement, NewAccu}
+mapfoldxml(Fun, Accu0, #xmlElement{}=E) ->
+ {C1,Accu1} = Fun(E, Accu0),
+ {C2,Accu2} = mapfoldxml(Fun, Accu1, lists:flatten(C1#xmlElement.content)),
+ {C1#xmlElement{content=C2},Accu2};
+mapfoldxml(Fun, Accu, List) when is_list(List) ->
+ AFun = fun(E,A) -> mapfoldxml(Fun, A, E) end,
+ lists:mapfoldl(AFun, Accu, List);
+mapfoldxml(Fun, Accu, E) ->
+ Fun(E,Accu).
+
+
+%%% @spec detect_charset(T::list()) -> charset_info()
+%%% @equiv detect_charset(undefined,T)
+detect_charset(Content) ->
+ detect_charset(undefined,Content).
+
+%%% FIXME! Whatabout aliases etc? Shouldn't transforming with ucs be optional?
+%%% @spec detect_charset(ExtCharset::atom(),T::list()) -> charset_info()
+%%% @doc Automatically decides character set used in XML document.
+%%% charset_info() is
+%%% <table>
+%%% <tr><td><code>{auto,'iso-10646-utf-1',Content} |</code></td></tr>
+%%% <tr><td><code>{external,'iso-10646-utf-1',Content} |</code></td></tr>
+%%% <tr><td><code>{undefined,undefined,Content} |</code></td></tr>
+%%% <tr><td><code>{external,ExtCharset,Content}</code></td></tr>
+%%% </table>
+%%% ExtCharset is any externally declared character set (e.g. in HTTP
+%%% Content-Type header) and Content is an XML Document.
+%%%
+detect_charset(ExtCharset,Content) when is_list(ExtCharset) ->
+ %% FIXME! Don't allow both atom and list for character set names
+ detect_charset(list_to_atom(ExtCharset),Content);
+detect_charset(ExtCharset,Content) ->
+ case autodetect(ExtCharset,Content) of
+ {auto,Content1} ->
+ {auto,'iso-10646-utf-1',Content1};
+ {external,Content1} ->
+ {external,'iso-10646-utf-1',Content1};
+ {undefined,_} ->
+ {undefined,undefined,Content};
+ {ExtCharset, Content} ->
+ {external,ExtCharset,Content}
+ end.
+
+%%------------------------------------------------------------------------------
+%% Auto detect what kind of character set we are dealing with and transform
+%% to Erlang integer Unicode format if found.
+%% Appendix F, Page 56-57, XML 1.0 W3C Recommendation 6 October 2000
+%% (http://www.w3.org/TR/REC-xml)
+%% 00 00 00 3C ( "<" in UCS-4 big-endian)
+%% 3C 00 00 00 ( "<" in UCS-4 little-endian)
+%% FE FF (UTF-16 - big-endian Mark)
+%% FF FE (UTF-16 - little-endian Mark)
+%% 00 3C 00 3F ( "<?" in UTF-16 big-endian)
+%% 3C 00 3F 00 ( "<?" in UTF-16 big-endian)
+%% 3C 3F (7-bit,8-bit or mixed width encoding)
+%% 4C 6F A7 94 (EBCDIC) - Not Implemented!!!!
+
+%% Check byte-order mark and transform to Unicode, Erlang integer
+%%% --- With byte-order mark
+autodetect(undefined,[0,0,16#fe,16#ff | Input]) ->
+ {auto, xmerl_ucs:from_ucs4be(Input)};
+autodetect('iso-10646-utf-1',[0,0,16#fe,16#ff | Input]) ->
+ {external, xmerl_ucs:from_ucs4be(Input)};
+autodetect(undefined,[16#ff,16#fe,0,0 | Input]) ->
+ {auto, xmerl_ucs:from_ucs4le(Input)};
+autodetect('iso-10646-utf-1',[16#ff,16#fe,0,0 | Input]) ->
+ {external, xmerl_ucs:from_ucs4le(Input)};
+
+autodetect(undefined,[16#fe,16#ff | Input]) ->
+ {auto, xmerl_ucs:from_utf16be(Input)};
+autodetect('utf-16be',[16#fe,16#ff | Input]) ->
+ {external, xmerl_ucs:from_utf16be(Input)};
+autodetect(undefined,[16#ff,16#fe | Input]) ->
+ {auto, xmerl_ucs:from_utf16le(Input)};
+autodetect('utf-16le',[16#ff,16#fe | Input]) ->
+ {external, xmerl_ucs:from_utf16le(Input)};
+
+autodetect(undefined,[16#ef,16#bb,16#bf | Input]) ->
+ {auto, xmerl_ucs:from_utf8(Input)};
+autodetect('utf-8',[16#ef,16#bb,16#bf | Input]) ->
+ {external, xmerl_ucs:from_utf8(Input)};
+autodetect('utf-8',[16#ff,16#fe | Input]) ->
+ {external, xmerl_ucs:from_utf16le(Input)};
+autodetect('utf-8',[16#fe,16#ff | Input]) ->
+ {external, xmerl_ucs:from_utf16be(Input)};
+
+%%% --- Without byte-order mark
+autodetect(undefined,[0,0,0,16#3c|Input]) ->
+ {auto, xmerl_ucs:from_ucs4be([0,0,0,16#3c|Input])};
+autodetect('iso-10646-utf-1',[0,0,0,16#3c|Input]) ->
+ {external, xmerl_ucs:from_ucs4be([0,0,0,16#3c|Input])};
+autodetect(undefined,[16#3c,0,0,0|Input]) ->
+ {auto, xmerl_ucs:from_ucs4le([16#3c,0,0,0|Input])};
+autodetect('iso-10646-utf-1',[16#3c,0,0,0|Input]) ->
+ {external, xmerl_ucs:from_ucs4le([16#3c,0,0,0|Input])};
+
+autodetect(undefined,[0,16#3c,0,16#3f | Input]) ->
+ {auto, xmerl_ucs:from_utf16be([0,16#3c,0,16#3f|Input])};
+autodetect('utf-16be',[0,16#3c,0,16#3f | Input]) ->
+ {external, xmerl_ucs:from_utf16be([0,16#3c,0,16#3f|Input])};
+autodetect(undefined,[16#3c,0,16#3f,0 | Input]) ->
+ {auto, xmerl_ucs:from_utf16le([16#3c,0,16#3f,0|Input])};
+autodetect('utf-16le',[16#3c,0,16#3f,0 | Input]) ->
+ {external, xmerl_ucs:from_utf16le([16#3c,0,16#3f,0|Input])};
+
+autodetect(ExtCharset,Content) ->
+ {ExtCharset, Content}.
+
+
+is_ncname(A) when is_atom(A) ->
+ is_ncname(atom_to_list(A));
+is_ncname([$_|T]) ->
+ is_name1(T);
+is_ncname([H|T]) ->
+ case is_letter(H) of
+ true ->
+ is_name1(T);
+ _ -> false
+ end.
+
+is_name(A) when is_atom(A) ->
+ is_name(atom_to_list(A));
+is_name([$_|T]) ->
+ is_name1(T);
+is_name([$:|T]) ->
+ is_name1(T);
+is_name([H|T]) ->
+ case is_letter(H) of
+ true ->
+ is_name1(T);
+ _ -> false
+ end.
+
+is_name1([]) ->
+ true;
+is_name1([H|T]) ->
+ case is_namechar(H) of
+ true ->
+ is_name1(T);
+ _ -> false
+ end.
+
+
+
+% =======
+%%% UNICODE character definitions
+
+%%%%%%%% [2] Char
+
+is_char(16#09) -> true;
+is_char(16#0A) -> true;
+is_char(16#0D) -> true;
+is_char(X) when X >= 16#20, X =< 16#D7FF -> true;
+is_char(X) when X >= 16#E000, X =< 16#FFFD -> true;
+is_char(X) when X >= 16#10000, X =< 16#10FFFF -> true;
+is_char(_) -> false.
+
+%% 0 - not classified,
+%% 1 - base_char or ideographic,
+%% 2 - combining_char or digit or extender,
+%% 3 - $. or $- or $_ or $:
+-define(SMALL, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,2,2,2,2,2,2,2,2,2,3,0,
+ 0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,0,0,0,0,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1}).
+
+%% [4] NameChar
+is_namechar(X) ->
+ try element(X, ?SMALL) > 0
+ catch _:_ ->
+ case is_letter(X) of
+ true -> true;
+ false ->
+ case is_digit(X) of
+ true -> true;
+ false ->
+ case is_combining_char(X) of
+ true -> true;
+ false ->
+ is_extender(X)
+ end
+ end
+ end
+ end.
+
+%% [84] Letter
+is_letter(X) ->
+ try element(X, ?SMALL) =:= 1
+ catch _:_ ->
+ case is_base_char(X) of
+ false ->
+ is_ideographic(X);
+ true ->
+ true
+ end
+ end.
+
+%% [85] BaseChar
+is_base_char(X) when X >= 16#0041, X =< 16#005A -> true;
+is_base_char(X) when X >= 16#0061, X =< 16#007A -> true;
+is_base_char(X) when X >= 16#00C0, X =< 16#00D6 -> true;
+is_base_char(X) when X >= 16#00D8, X =< 16#00F6 -> true;
+is_base_char(X) when X >= 16#00F8, X =< 16#00FF -> true;
+is_base_char(X) when X >= 16#0100, X =< 16#0131 -> true;
+is_base_char(X) when X >= 16#0134, X =< 16#013E -> true;
+is_base_char(X) when X >= 16#0141, X =< 16#0148 -> true;
+is_base_char(X) when X >= 16#014A, X =< 16#017E -> true;
+is_base_char(X) when X >= 16#0180, X =< 16#01C3 -> true;
+is_base_char(X) when X >= 16#01CD, X =< 16#01F0 -> true;
+is_base_char(X) when X >= 16#01F4, X =< 16#01F5 -> true;
+is_base_char(X) when X >= 16#01FA, X =< 16#0217 -> true;
+is_base_char(X) when X >= 16#0250, X =< 16#02A8 -> true;
+is_base_char(X) when X >= 16#02BB, X =< 16#02C1 -> true;
+is_base_char(16#0386) -> true;
+is_base_char(X) when X >= 16#0388, X =< 16#038A -> true;
+is_base_char(16#038C) -> true;
+is_base_char(X) when X >= 16#038E, X =< 16#03A1 -> true;
+is_base_char(X) when X >= 16#03A3, X =< 16#03CE -> true;
+is_base_char(X) when X >= 16#03D0, X =< 16#03D6 -> true;
+is_base_char(16#03DA) -> true;
+is_base_char(16#03DC) -> true;
+is_base_char(16#03DE) -> true;
+is_base_char(16#03E0) -> true;
+is_base_char(X) when X >= 16#03E2, X =< 16#03F3 -> true;
+is_base_char(X) when X >= 16#0401, X =< 16#040C -> true;
+is_base_char(X) when X >= 16#040E, X =< 16#044F -> true;
+is_base_char(X) when X >= 16#0451, X =< 16#045C -> true;
+is_base_char(X) when X >= 16#045E, X =< 16#0481 -> true;
+is_base_char(X) when X >= 16#0490, X =< 16#04C4 -> true;
+is_base_char(X) when X >= 16#04C7, X =< 16#04C8 -> true;
+is_base_char(X) when X >= 16#04CB, X =< 16#04CC -> true;
+is_base_char(X) when X >= 16#04D0, X =< 16#04EB -> true;
+is_base_char(X) when X >= 16#04EE, X =< 16#04F5 -> true;
+is_base_char(X) when X >= 16#04F8, X =< 16#04F9 -> true;
+is_base_char(X) when X >= 16#0531, X =< 16#0556 -> true;
+is_base_char(16#0559) -> true;
+is_base_char(X) when X >= 16#0561, X =< 16#0586 -> true;
+is_base_char(X) when X >= 16#05D0, X =< 16#05EA -> true;
+is_base_char(X) when X >= 16#05F0, X =< 16#05F2 -> true;
+is_base_char(X) when X >= 16#0621, X =< 16#063A -> true;
+is_base_char(X) when X >= 16#0641, X =< 16#064A -> true;
+is_base_char(X) when X >= 16#0671, X =< 16#06B7 -> true;
+is_base_char(X) when X >= 16#06BA, X =< 16#06BE -> true;
+is_base_char(X) when X >= 16#06C0, X =< 16#06CE -> true;
+is_base_char(X) when X >= 16#06D0, X =< 16#06D3 -> true;
+is_base_char(16#06D5) -> true;
+is_base_char(X) when X >= 16#06E5, X =< 16#06E6 -> true;
+is_base_char(X) when X >= 16#0905, X =< 16#0939 -> true;
+is_base_char(16#093D) -> true;
+is_base_char(X) when X >= 16#0958, X =< 16#0961 -> true;
+is_base_char(X) when X >= 16#0985, X =< 16#098C -> true;
+is_base_char(X) when X >= 16#098F, X =< 16#0990 -> true;
+is_base_char(X) when X >= 16#0993, X =< 16#09A8 -> true;
+is_base_char(X) when X >= 16#09AA, X =< 16#09B0 -> true;
+is_base_char(16#09B2) -> true;
+is_base_char(X) when X >= 16#09B6, X =< 16#09B9 -> true;
+is_base_char(X) when X >= 16#09DC, X =< 16#09DD -> true;
+is_base_char(X) when X >= 16#09DF, X =< 16#09E1 -> true;
+is_base_char(X) when X >= 16#09F0, X =< 16#09F1 -> true;
+is_base_char(X) when X >= 16#0A05, X =< 16#0A0A -> true;
+is_base_char(X) when X >= 16#0A0F, X =< 16#0A10 -> true;
+is_base_char(X) when X >= 16#0A13, X =< 16#0A28 -> true;
+is_base_char(X) when X >= 16#0A2A, X =< 16#0A30 -> true;
+is_base_char(X) when X >= 16#0A32, X =< 16#0A33 -> true;
+is_base_char(X) when X >= 16#0A35, X =< 16#0A36 -> true;
+is_base_char(X) when X >= 16#0A38, X =< 16#0A39 -> true;
+is_base_char(X) when X >= 16#0A59, X =< 16#0A5C -> true;
+is_base_char(16#0A5E) -> true;
+is_base_char(X) when X >= 16#0A72, X =< 16#0A74 -> true;
+is_base_char(X) when X >= 16#0A85, X =< 16#0A8B -> true;
+is_base_char(16#0A8D) -> true;
+is_base_char(X) when X >= 16#0A8F, X =< 16#0A91 -> true;
+is_base_char(X) when X >= 16#0A93, X =< 16#0AA8 -> true;
+is_base_char(X) when X >= 16#0AAA, X =< 16#0AB0 -> true;
+is_base_char(X) when X >= 16#0AB2, X =< 16#0AB3 -> true;
+is_base_char(X) when X >= 16#0AB5, X =< 16#0AB9 -> true;
+is_base_char(16#0ABD) -> true;
+is_base_char(16#0AE0) -> true;
+is_base_char(X) when X >= 16#0B05, X =< 16#0B0C -> true;
+is_base_char(X) when X >= 16#0B0F, X =< 16#0B10 -> true;
+is_base_char(X) when X >= 16#0B13, X =< 16#0B28 -> true;
+is_base_char(X) when X >= 16#0B2A, X =< 16#0B30 -> true;
+is_base_char(X) when X >= 16#0B32, X =< 16#0B33 -> true;
+is_base_char(X) when X >= 16#0B36, X =< 16#0B39 -> true;
+is_base_char(16#0B3D) -> true;
+is_base_char(X) when X >= 16#0B5C, X =< 16#0B5D -> true;
+is_base_char(X) when X >= 16#0B5F, X =< 16#0B61 -> true;
+is_base_char(X) when X >= 16#0B85, X =< 16#0B8A -> true;
+is_base_char(X) when X >= 16#0B8E, X =< 16#0B90 -> true;
+is_base_char(X) when X >= 16#0B92, X =< 16#0B95 -> true;
+is_base_char(X) when X >= 16#0B99, X =< 16#0B9A -> true;
+is_base_char(16#0B9C) -> true;
+is_base_char(X) when X >= 16#0B9E, X =< 16#0B9F -> true;
+is_base_char(X) when X >= 16#0BA3, X =< 16#0BA4 -> true;
+is_base_char(X) when X >= 16#0BA8, X =< 16#0BAA -> true;
+is_base_char(X) when X >= 16#0BAE, X =< 16#0BB5 -> true;
+is_base_char(X) when X >= 16#0BB7, X =< 16#0BB9 -> true;
+is_base_char(X) when X >= 16#0C05, X =< 16#0C0C -> true;
+is_base_char(X) when X >= 16#0C0E, X =< 16#0C10 -> true;
+is_base_char(X) when X >= 16#0C12, X =< 16#0C28 -> true;
+is_base_char(X) when X >= 16#0C2A, X =< 16#0C33 -> true;
+is_base_char(X) when X >= 16#0C35, X =< 16#0C39 -> true;
+is_base_char(X) when X >= 16#0C60, X =< 16#0C61 -> true;
+is_base_char(X) when X >= 16#0C85, X =< 16#0C8C -> true;
+is_base_char(X) when X >= 16#0C8E, X =< 16#0C90 -> true;
+is_base_char(X) when X >= 16#0C92, X =< 16#0CA8 -> true;
+is_base_char(X) when X >= 16#0CAA, X =< 16#0CB3 -> true;
+is_base_char(X) when X >= 16#0CB5, X =< 16#0CB9 -> true;
+is_base_char(16#0CDE) -> true;
+is_base_char(X) when X >= 16#0CE0, X =< 16#0CE1 -> true;
+is_base_char(X) when X >= 16#0D05, X =< 16#0D0C -> true;
+is_base_char(X) when X >= 16#0D0E, X =< 16#0D10 -> true;
+is_base_char(X) when X >= 16#0D12, X =< 16#0D28 -> true;
+is_base_char(X) when X >= 16#0D2A, X =< 16#0D39 -> true;
+is_base_char(X) when X >= 16#0D60, X =< 16#0D61 -> true;
+is_base_char(X) when X >= 16#0E01, X =< 16#0E2E -> true;
+is_base_char(16#0E30) -> true;
+is_base_char(X) when X >= 16#0E32, X =< 16#0E33 -> true;
+is_base_char(X) when X >= 16#0E40, X =< 16#0E45 -> true;
+is_base_char(X) when X >= 16#0E81, X =< 16#0E82 -> true;
+is_base_char(16#0E84) -> true;
+is_base_char(X) when X >= 16#0E87, X =< 16#0E88 -> true;
+is_base_char(16#0E8A) -> true;
+is_base_char(16#0E8D) -> true;
+is_base_char(X) when X >= 16#0E94, X =< 16#0E97 -> true;
+is_base_char(X) when X >= 16#0E99, X =< 16#0E9F -> true;
+is_base_char(X) when X >= 16#0EA1, X =< 16#0EA3 -> true;
+is_base_char(16#0EA5) -> true;
+is_base_char(16#0EA7) -> true;
+is_base_char(X) when X >= 16#0EAA, X =< 16#0EAB -> true;
+is_base_char(X) when X >= 16#0EAD, X =< 16#0EAE -> true;
+is_base_char(16#0EB0) -> true;
+is_base_char(X) when X >= 16#0EB2, X =< 16#0EB3 -> true;
+is_base_char(16#0EBD) -> true;
+is_base_char(X) when X >= 16#0EC0, X =< 16#0EC4 -> true;
+is_base_char(X) when X >= 16#0F40, X =< 16#0F47 -> true;
+is_base_char(X) when X >= 16#0F49, X =< 16#0F69 -> true;
+is_base_char(X) when X >= 16#10A0, X =< 16#10C5 -> true;
+is_base_char(X) when X >= 16#10D0, X =< 16#10F6 -> true;
+is_base_char(16#1100) -> true;
+is_base_char(X) when X >= 16#1102, X =< 16#1103 -> true;
+is_base_char(X) when X >= 16#1105, X =< 16#1107 -> true;
+is_base_char(16#1109) -> true;
+is_base_char(X) when X >= 16#110B, X =< 16#110C -> true;
+is_base_char(X) when X >= 16#110E, X =< 16#1112 -> true;
+is_base_char(16#113C) -> true;
+is_base_char(16#113E) -> true;
+is_base_char(16#1140) -> true;
+is_base_char(16#114C) -> true;
+is_base_char(16#114E) -> true;
+is_base_char(16#1150) -> true;
+is_base_char(X) when X >= 16#1154, X =< 16#1155 -> true;
+is_base_char(16#1159) -> true;
+is_base_char(X) when X >= 16#115F, X =< 16#1161 -> true;
+is_base_char(16#1163) -> true;
+is_base_char(16#1165) -> true;
+is_base_char(16#1167) -> true;
+is_base_char(16#1169) -> true;
+is_base_char(X) when X >= 16#116D, X =< 16#116E -> true;
+is_base_char(X) when X >= 16#1172, X =< 16#1173 -> true;
+is_base_char(16#1175) -> true;
+is_base_char(16#119E) -> true;
+is_base_char(16#11A8) -> true;
+is_base_char(16#11AB) -> true;
+is_base_char(X) when X >= 16#11AE, X =< 16#11AF -> true;
+is_base_char(X) when X >= 16#11B7, X =< 16#11B8 -> true;
+is_base_char(16#11BA) -> true;
+is_base_char(X) when X >= 16#11BC, X =< 16#11C2 -> true;
+is_base_char(16#11EB) -> true;
+is_base_char(16#11F0) -> true;
+is_base_char(16#11F9) -> true;
+is_base_char(X) when X >= 16#1E00, X =< 16#1E9B -> true;
+is_base_char(X) when X >= 16#1EA0, X =< 16#1EF9 -> true;
+is_base_char(X) when X >= 16#1F00, X =< 16#1F15 -> true;
+is_base_char(X) when X >= 16#1F18, X =< 16#1F1D -> true;
+is_base_char(X) when X >= 16#1F20, X =< 16#1F45 -> true;
+is_base_char(X) when X >= 16#1F48, X =< 16#1F4D -> true;
+is_base_char(X) when X >= 16#1F50, X =< 16#1F57 -> true;
+is_base_char(16#1F59) -> true;
+is_base_char(16#1F5B) -> true;
+is_base_char(16#1F5D) -> true;
+is_base_char(X) when X >= 16#1F5F, X =< 16#1F7D -> true;
+is_base_char(X) when X >= 16#1F80, X =< 16#1FB4 -> true;
+is_base_char(X) when X >= 16#1FB6, X =< 16#1FBC -> true;
+is_base_char(16#1FBE) -> true;
+is_base_char(X) when X >= 16#1FC2, X =< 16#1FC4 -> true;
+is_base_char(X) when X >= 16#1FC6, X =< 16#1FCC -> true;
+is_base_char(X) when X >= 16#1FD0, X =< 16#1FD3 -> true;
+is_base_char(X) when X >= 16#1FD6, X =< 16#1FDB -> true;
+is_base_char(X) when X >= 16#1FE0, X =< 16#1FEC -> true;
+is_base_char(X) when X >= 16#1FF2, X =< 16#1FF4 -> true;
+is_base_char(X) when X >= 16#1FF6, X =< 16#1FFC -> true;
+is_base_char(16#2126) -> true;
+is_base_char(X) when X >= 16#212A, X =< 16#212B -> true;
+is_base_char(16#212E) -> true;
+is_base_char(X) when X >= 16#2180, X =< 16#2182 -> true;
+is_base_char(X) when X >= 16#3041, X =< 16#3094 -> true;
+is_base_char(X) when X >= 16#30A1, X =< 16#30FA -> true;
+is_base_char(X) when X >= 16#3105, X =< 16#312C -> true;
+is_base_char(X) when X >= 16#ac00, X =< 16#d7a3 -> true;
+is_base_char(_) ->
+ false.
+
+%% [86] Ideographic
+is_ideographic(X) when X >= 16#4e00, X =< 16#9fa5 -> true;
+is_ideographic(16#3007) -> true;
+is_ideographic(X) when X >= 16#3021, X =< 16#3029 -> true;
+is_ideographic(_) ->
+ false.
+
+%% [87] CombiningChar
+is_combining_char(X) when X >= 16#0300, X =< 16#0345 -> true;
+is_combining_char(X) when X >= 16#0360, X =< 16#0361 -> true;
+is_combining_char(X) when X >= 16#0483, X =< 16#0486 -> true;
+is_combining_char(X) when X >= 16#0591, X =< 16#05a1 -> true;
+is_combining_char(X) when X >= 16#05a3, X =< 16#05b9 -> true;
+is_combining_char(X) when X >= 16#05bb, X =< 16#05bd -> true;
+is_combining_char(16#05bf) -> true;
+is_combining_char(X) when X >= 16#05c1, X =< 16#05c2 -> true;
+is_combining_char(16#05c4) -> true;
+is_combining_char(X) when X >= 16#064b, X =< 16#0652 -> true;
+is_combining_char(16#0670) -> true;
+is_combining_char(X) when X >= 16#06d6, X =< 16#06dc -> true;
+is_combining_char(X) when X >= 16#06dd, X =< 16#06df -> true;
+is_combining_char(X) when X >= 16#06e0, X =< 16#06e4 -> true;
+is_combining_char(X) when X >= 16#06e7, X =< 16#06e8 -> true;
+is_combining_char(X) when X >= 16#06ea, X =< 16#06ed -> true;
+is_combining_char(X) when X >= 16#0901, X =< 16#0903 -> true;
+is_combining_char(16#093c) -> true;
+is_combining_char(X) when X >= 16#093e, X =< 16#094c -> true;
+is_combining_char(16#094d) -> true;
+is_combining_char(X) when X >= 16#0951, X =< 16#0954 -> true;
+is_combining_char(X) when X >= 16#0962, X =< 16#0963 -> true;
+is_combining_char(X) when X >= 16#0981, X =< 16#0983 -> true;
+is_combining_char(16#09bc) -> true;
+is_combining_char(16#09be) -> true;
+is_combining_char(16#09bf) -> true;
+is_combining_char(X) when X >= 16#09c0, X =< 16#09c4 -> true;
+is_combining_char(X) when X >= 16#09c7, X =< 16#09c8 -> true;
+is_combining_char(X) when X >= 16#09cb, X =< 16#09cd -> true;
+is_combining_char(16#09d7) -> true;
+is_combining_char(X) when X >= 16#09e2, X =< 16#09e3 -> true;
+is_combining_char(16#0a02) -> true;
+is_combining_char(16#0a3c) -> true;
+is_combining_char(16#0a3e) -> true;
+is_combining_char(16#0a3f) -> true;
+is_combining_char(X) when X >= 16#0a40, X =< 16#0a42 -> true;
+is_combining_char(X) when X >= 16#0a47, X =< 16#0a48 -> true;
+is_combining_char(X) when X >= 16#0a4b, X =< 16#0a4d -> true;
+is_combining_char(X) when X >= 16#0a70, X =< 16#0a71 -> true;
+is_combining_char(X) when X >= 16#0a81, X =< 16#0a83 -> true;
+is_combining_char(16#0abc) -> true;
+is_combining_char(X) when X >= 16#0abe, X =< 16#0ac5 -> true;
+is_combining_char(X) when X >= 16#0ac7, X =< 16#0ac9 -> true;
+is_combining_char(X) when X >= 16#0acb, X =< 16#0acd -> true;
+is_combining_char(X) when X >= 16#0b01, X =< 16#0b03 -> true;
+is_combining_char(16#0b3c) -> true;
+is_combining_char(X) when X >= 16#0b3e, X =< 16#0b43 -> true;
+is_combining_char(X) when X >= 16#0b47, X =< 16#0b48 -> true;
+is_combining_char(X) when X >= 16#0b4b, X =< 16#0b4d -> true;
+is_combining_char(X) when X >= 16#0b56, X =< 16#0b57 -> true;
+is_combining_char(X) when X >= 16#0b82, X =< 16#0b83 -> true;
+is_combining_char(X) when X >= 16#0bbe, X =< 16#0bc2 -> true;
+is_combining_char(X) when X >= 16#0bc6, X =< 16#0bc8 -> true;
+is_combining_char(X) when X >= 16#0bca, X =< 16#0bcd -> true;
+is_combining_char(16#0bd7) -> true;
+is_combining_char(X) when X >= 16#0c01, X =< 16#0c03 -> true;
+is_combining_char(X) when X >= 16#0c3e, X =< 16#0c44 -> true;
+is_combining_char(X) when X >= 16#0c46, X =< 16#0c48 -> true;
+is_combining_char(X) when X >= 16#0c4a, X =< 16#0c4d -> true;
+is_combining_char(X) when X >= 16#0c55, X =< 16#0c56 -> true;
+is_combining_char(X) when X >= 16#0c82, X =< 16#0c83 -> true;
+is_combining_char(X) when X >= 16#0cbe, X =< 16#0cc4 -> true;
+is_combining_char(X) when X >= 16#0cc6, X =< 16#0cc8 -> true;
+is_combining_char(X) when X >= 16#0cca, X =< 16#0ccd -> true;
+is_combining_char(X) when X >= 16#0cd5, X =< 16#0cd6 -> true;
+is_combining_char(X) when X >= 16#0d02, X =< 16#0d03 -> true;
+is_combining_char(X) when X >= 16#0d3e, X =< 16#0d43 -> true;
+is_combining_char(X) when X >= 16#0d46, X =< 16#0d48 -> true;
+is_combining_char(X) when X >= 16#0d4a, X =< 16#0d4d -> true;
+is_combining_char(16#0d57) -> true;
+is_combining_char(16#0e31) -> true;
+is_combining_char(X) when X >= 16#0e34, X =< 16#0e3a -> true;
+is_combining_char(X) when X >= 16#0e47, X =< 16#0e4e -> true;
+is_combining_char(16#0eb1) -> true;
+is_combining_char(X) when X >= 16#0eb4, X =< 16#0eb9 -> true;
+is_combining_char(X) when X >= 16#0ebb, X =< 16#0ebc -> true;
+is_combining_char(X) when X >= 16#0ec8, X =< 16#0ecd -> true;
+is_combining_char(X) when X >= 16#0f18, X =< 16#0f19 -> true;
+is_combining_char(16#0f35) -> true;
+is_combining_char(16#0f37) -> true;
+is_combining_char(16#0f39) -> true;
+is_combining_char(16#0f3e) -> true;
+is_combining_char(16#0f3f) -> true;
+is_combining_char(X) when X >= 16#0f71, X =< 16#0f84 -> true;
+is_combining_char(X) when X >= 16#0f86, X =< 16#0f8b -> true;
+is_combining_char(X) when X >= 16#0f90, X =< 16#0f95 -> true;
+is_combining_char(16#0f97) -> true;
+is_combining_char(X) when X >= 16#0f99, X =< 16#0fad -> true;
+is_combining_char(X) when X >= 16#0fb1, X =< 16#0fb7 -> true;
+is_combining_char(16#0fb9) -> true;
+is_combining_char(X) when X >= 16#20d0, X =< 16#20dc -> true;
+is_combining_char(16#20e1) -> true;
+is_combining_char(X) when X >= 16#302a, X =< 16#302f -> true;
+is_combining_char(16#3099) -> true;
+is_combining_char(16#309a) -> true;
+is_combining_char(_) -> false.
+
+%% [88] Digit
+is_digit(X) when X >= 16#0030, X =< 16#0039 -> true;
+is_digit(X) when X >= 16#0660, X =< 16#0669 -> true;
+is_digit(X) when X >= 16#06F0, X =< 16#06F9 -> true;
+is_digit(X) when X >= 16#0966, X =< 16#096f -> true;
+is_digit(X) when X >= 16#09e6, X =< 16#09ef -> true;
+is_digit(X) when X >= 16#0a66, X =< 16#0a6f -> true;
+is_digit(X) when X >= 16#0ae6, X =< 16#0aef -> true;
+is_digit(X) when X >= 16#0b66, X =< 16#0b6f -> true;
+is_digit(X) when X >= 16#0be7, X =< 16#0bef -> true;
+is_digit(X) when X >= 16#0c66, X =< 16#0c6f -> true;
+is_digit(X) when X >= 16#0ce6, X =< 16#0cef -> true;
+is_digit(X) when X >= 16#0d66, X =< 16#0d6f -> true;
+is_digit(X) when X >= 16#0e50, X =< 16#0e59 -> true;
+is_digit(X) when X >= 16#0ed0, X =< 16#0ed9 -> true;
+is_digit(X) when X >= 16#0f20, X =< 16#0f29 -> true;
+is_digit(_) -> false.
+
+%% [89] Extender
+is_extender(16#00b7) -> true;
+is_extender(16#02d0) -> true;
+is_extender(16#02d1) -> true;
+is_extender(16#0387) -> true;
+is_extender(16#0640) -> true;
+is_extender(16#0e46) -> true;
+is_extender(16#0ec6) -> true;
+is_extender(16#3005) -> true;
+is_extender(X) when X >= 16#3031, X =< 16#3035 -> true;
+is_extender(X) when X >= 16#309d, X =< 16#309e -> true;
+is_extender(X) when X >= 16#30fc, X =< 16#30fe -> true;
+is_extender(_) -> false.
+
+to_lower(Str) ->
+ to_lower(Str, []).
+to_lower([C|Cs], Acc) when C >= $A, C =< $Z ->
+ to_lower(Cs, [C+($a-$A)| Acc]);
+to_lower([C|Cs], Acc) ->
+ to_lower(Cs, [C| Acc]);
+to_lower([], Acc) ->
+ lists:reverse(Acc).
+
+%%% XSD helpers
+
+is_facet(length) -> true;
+is_facet(minLength) -> true;
+is_facet(maxLength) -> true;
+is_facet(pattern) -> true;
+is_facet(enumeration) -> true;
+is_facet(whiteSpace) -> true;
+is_facet(maxInclusive) -> true;
+is_facet(maxExclusive) -> true;
+is_facet(minInclusive) -> true;
+is_facet(minExclusive) -> true;
+is_facet(totalDigits) -> true;
+is_facet(fractionDigits) -> true;
+is_facet(_) -> false.
+
+
+is_builtin_simple_type({Type,_,?XSD_NAMESPACE}) when is_atom(Type) ->
+ is_builtin_simple_type(atom_to_list(Type));
+is_builtin_simple_type({Type,_,?XSD_NAMESPACE}) ->
+ is_builtin_simple_type(Type);
+is_builtin_simple_type({_,_,_}) ->
+ false;
+is_builtin_simple_type("string") -> true;
+is_builtin_simple_type("normalizedString") -> true;
+is_builtin_simple_type("token") -> true;
+is_builtin_simple_type("base64Binary") -> true;
+is_builtin_simple_type("hexBinary") -> true;
+is_builtin_simple_type("integer") -> true;
+is_builtin_simple_type("positiveInteger") -> true;
+is_builtin_simple_type("negativeInteger") -> true;
+is_builtin_simple_type("nonNegativeInteger") -> true;
+is_builtin_simple_type("nonPositiveInteger") -> true;
+is_builtin_simple_type("long") -> true;
+is_builtin_simple_type("unsignedLong") -> true;
+is_builtin_simple_type("int") -> true;
+is_builtin_simple_type("unsignedInt") -> true;
+is_builtin_simple_type("short") -> true;
+is_builtin_simple_type("unsignedShort") -> true;
+is_builtin_simple_type("decimal") -> true;
+is_builtin_simple_type("float") -> true;
+is_builtin_simple_type("double") -> true;
+is_builtin_simple_type("boolean") -> true;
+is_builtin_simple_type("duration") -> true;
+is_builtin_simple_type("dateTime") -> true;
+is_builtin_simple_type("date") -> true;
+is_builtin_simple_type("time") -> true;
+is_builtin_simple_type("gYear") -> true;
+is_builtin_simple_type("gYearMonth") -> true;
+is_builtin_simple_type("gMonth") -> true;
+is_builtin_simple_type("gMonthDay") -> true;
+is_builtin_simple_type("gDay") -> true;
+is_builtin_simple_type("Name") -> true;
+is_builtin_simple_type("QName") -> true;
+is_builtin_simple_type("NCName") -> true;
+is_builtin_simple_type("anyURI") -> true;
+is_builtin_simple_type("language") -> true;
+is_builtin_simple_type("ID") -> true;
+is_builtin_simple_type("IDREF") -> true;
+is_builtin_simple_type("IDREFS") -> true;
+is_builtin_simple_type("ENTITY") -> true;
+is_builtin_simple_type("ENTITIES") ->true;
+is_builtin_simple_type("NOTATION") -> true;
+is_builtin_simple_type("NMTOKEN") -> true;
+is_builtin_simple_type("NMTOKENS") -> true;
+is_builtin_simple_type("byte") -> true;
+is_builtin_simple_type("unsignedByte") -> true;
+is_builtin_simple_type(_) -> false.
+
+is_xsd_string({Type,_,?XSD_NAMESPACE}) when is_atom(Type) ->
+ is_xsd_string(Type);
+is_xsd_string({Type,_,?XSD_NAMESPACE}) ->
+ is_xsd_string(Type);
+is_xsd_string({_,_,_}) ->
+ false;
+is_xsd_string(Atom) when is_atom(Atom) ->
+ is_xsd_string(atom_to_list(Atom));
+is_xsd_string("string") ->
+ true;
+is_xsd_string("normalizedString") ->
+ true;
+is_xsd_string("token") ->
+ true;
+is_xsd_string("language") ->
+ true;
+is_xsd_string("Name") ->
+ true;
+is_xsd_string("NMTOKEN") ->
+ true;
+is_xsd_string("NMTOKENS") ->
+ true;
+is_xsd_string("NCName") ->
+ true;
+is_xsd_string("ID") ->
+ true;
+is_xsd_string("IDREF") ->
+ true;
+is_xsd_string("IDREFS") ->
+ true;
+is_xsd_string("ENTITY") ->
+ true;
+is_xsd_string("ENTITIES") ->
+ true;
+is_xsd_string(_) ->
+ false.
diff --git a/lib/xmerl/src/xmerl_otpsgml.erl b/lib/xmerl/src/xmerl_otpsgml.erl
new file mode 100644
index 0000000000..38688e788f
--- /dev/null
+++ b/lib/xmerl/src/xmerl_otpsgml.erl
@@ -0,0 +1,162 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%% Description : Callback module for exporting XHTML to OTP-SGML.
+
+-module(xmerl_otpsgml).
+
+-export(['#xml-inheritance#'/0]).
+
+%% Note: we assume XML data, so all tags are lowercase!
+
+-export(['#root#'/4,
+ '#element#'/5,
+ '#text#'/1,
+ p/4]).
+
+-import(xmerl_lib, [markup/3, start_tag/2, is_empty_data/1,
+ export_text/1]).
+
+-include("xmerl.hrl").
+
+
+'#xml-inheritance#'() -> [xmerl_sgml].
+
+
+%% The '#text#' function is called for every text segment.
+
+'#text#'(Text) ->
+ export_text(Text).
+
+
+%% The '#root#' tag is called when the entire structure has been
+%% exported. It does not appear in the structure itself.
+
+'#root#'(Data, _Attrs, [], _E) ->
+ ["<!doctype erlref PUBLIC \"-//Stork//DTD erlref//EN\">\n",Data].
+
+
+%% Note that SGML does not have the <Tag/> empty-element form.
+%% Furthermore, for some element types, the end tag is forbidden. (In
+%% all other cases, we always generate the end tag, to make sure that
+%% the scope of a markup is not extended by mistake.)
+
+'#element#'(Tag, Data, Attrs, _Parents, _E) ->
+% io:format("parents:\n~p\n",[_Parents]),
+ case convert_tag(Tag,Attrs) of
+ {false,NewTag,NewAttrs} ->
+ markup(NewTag, NewAttrs, Data);
+ {true,NewTag,NewAttrs} ->
+ [start_tag(NewTag, NewAttrs), Data]
+ end.
+
+
+%% HTML tags with special handling
+
+p(Data, Attrs, _Parents, _E) ->
+ %% In general, we cannot drop the end tag for paragraph elements;
+ %% that is only allowed if we know that it is immediately followed
+ %% by some other block-level tag.
+ case is_empty_data(Data) of
+ true ->
+ %% Paragraph elements should never be completely empty.
+ markup(p, Attrs, "\s");
+ false ->
+ markup(p, Attrs, Data)
+ end.
+
+
+%% Utility functions
+
+convert_tag(code,Attrs) -> convert_tag(c,Attrs);
+convert_tag(strong,Attrs) -> convert_tag(em,Attrs);
+convert_tag(b,Attrs) -> convert_tag(em,Attrs);
+convert_tag(underline,Attrs) -> convert_tag(em,Attrs); % what is underline in sgml???
+convert_tag(dl,Attrs) -> convert_tag(taglist,Attrs);
+convert_tag(dt,Attrs) -> convert_tag(tag,Attrs);
+convert_tag(dd,Attrs) -> convert_tag(item,Attrs);
+convert_tag(ul,Attrs) -> convert_tag(list,Attrs);
+convert_tag(li,Attrs) -> convert_tag(item,Attrs);
+convert_tag(tt,Attrs) -> convert_tag(c,Attrs);
+%convert_tag(a, Attrs) -> convert_tag(seealso,convert_seealso_attrs(Attrs));
+convert_tag(a, Attrs) -> convert_tag(convert_aref(Attrs),convert_aref_attrs(convert_aref(Attrs),Attrs));
+convert_tag(Tag,Attrs) -> {forbid_end(Tag),Tag,Attrs}.
+
+convert_aref([#xmlAttribute{name = href, value = V}|_Rest]) ->
+ %% search if it is a html link, thus make it a 'url' ref otherwise
+ %% a 'seealso'.
+ case html_content(V) of
+ true ->
+ url;
+ _ ->
+ seealso
+ end;
+convert_aref([#xmlAttribute{name = K}|Rest]) ->
+ io:format("Warning: ignoring attribute \'~p\' for tag \'a\'\n",[K]),
+ convert_aref(Rest).
+convert_aref_attrs(url,Attrs) ->
+ Attrs;
+convert_aref_attrs(SA,[#xmlAttribute{name = href, value = V}=A|Rest]) ->
+ [A#xmlAttribute{name=marker,value=V}|convert_aref_attrs(SA,Rest)];
+convert_aref_attrs(_,[])->
+ [].
+html_content([]) ->
+ false;
+html_content([$.|Rest]) ->
+ case Rest of
+ "htm"++_EmaNfeR ->
+ true;
+ _ -> html_content(Rest)
+ end;
+html_content([_H|T]) ->
+ html_content(T).
+
+% convert_seealso_attrs([#xmlAttribute{name = href, value = V} = A|Rest]) ->
+% [A#xmlAttribute{name=marker,value=normalize_web_ref(V)}|convert_seealso_attrs(Rest)];
+% convert_seealso_attrs([#xmlAttribute{name = K}|Rest]) ->
+% io:format("Warning: ignoring attribute \'~p\' for tag \'a\'\n",[K]),
+% convert_seealso_attrs(Rest);
+% convert_seealso_attrs([]) ->
+% [].
+
+% normalize_web_ref(RefName) ->
+% normalize_web_ref1(lists:reverse(RefName)).
+
+% normalize_web_ref1("lmth."++EmaNfeR) ->
+% lists:reverse(EmaNfeR);
+% normalize_web_ref1("mth"++EmaNfeR) ->
+% lists:reverse(EmaNfeR);
+% normalize_web_ref1(RefName) ->
+% RefName.
+
+forbid_end(area) -> true;
+forbid_end(base) -> true;
+forbid_end(basefont) -> true;
+forbid_end(br) -> true;
+forbid_end(col) -> true;
+forbid_end(frame) -> true;
+forbid_end(hr) -> true;
+forbid_end(img) -> true;
+forbid_end(input) -> true;
+forbid_end(isindex) -> true;
+forbid_end(link) -> true;
+forbid_end(marker) -> true;
+forbid_end(meta) -> true;
+forbid_end(param) -> true;
+forbid_end(_) -> false.
diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl
new file mode 100644
index 0000000000..0c53e6f34a
--- /dev/null
+++ b/lib/xmerl/src/xmerl_regexp.erl
@@ -0,0 +1,1437 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2006-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(xmerl_regexp).
+
+%% This module provides a basic set of regular expression functions
+%% for strings. The functions provided are taken from AWK.
+%%
+%% Note that we interpret the syntax tree of a regular expression
+%% directly instead of converting it to an NFA and then interpreting
+%% that. This method seems to go significantly faster.
+
+-export([sh_to_awk/1,parse/1,format_error/1,match/2,first_match/2,matches/2]).
+-export([sub/3,gsub/3,split/2,sub_match/2,sub_first_match/2]).
+
+-export([make_nfa/1,make_dfa/1,make_dfa/2,compile/1]).
+
+-import(string, [substr/2,substr/3]).
+-import(lists, [reverse/1,reverse/2,last/1,duplicate/2,seq/2]).
+-import(lists, [member/2,keysearch/3,keysort/2,map/2,foldl/3]).
+-import(ordsets, [is_element/2,add_element/2,union/2,subtract/2]).
+
+%%-compile([export_all]).
+
+-export([setup/1,compile_proc/2]).
+
+setup(RE0) ->
+ RE = setup(RE0, [$^]),
+ Pid = spawn(?MODULE,compile_proc,[self(),RE]),
+ receive
+ {ok,Result} ->
+ Result
+ after 2000 ->
+ exit(Pid,force),
+ parse(RE)
+ end.
+ %% compile(RE).
+%%RE.
+compile_proc(From,RE) ->
+ Res = compile(RE),
+ From ! {ok,Res}.
+
+
+setup([$\\,$d|S],Acc) -> setup(S,"]9-0[" ++Acc);
+setup([$\\,$D|S],Acc) -> setup(S,"]9-0^[" ++Acc);
+setup([$\\,$s|S],Acc) -> setup(S,"]s\\t\\n\\r\\[" ++Acc);
+setup([$\\,$S|S],Acc) -> setup(S,"]\\s\\t\\n\\r^[" ++Acc);
+setup([$\\,$i|S],Acc) -> setup(S,"]z-aZ-A_:[" ++Acc); %% Only Latin-1 now
+setup([$\\,$I|S],Acc) -> setup(S,"]z-aZ-A_:^[" ++Acc);
+setup([$\\,$c|S],Acc) -> setup(S,"]9-0z-aZ-A_:."++[183]++"-[" ++Acc);
+setup([$\\,$C|S],Acc) -> setup(S,"]9-0z-aZ-A_:."++[183]++"-^[" ++Acc);
+%% fixme setup([$\\,$w|S]) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup([$\\,$W|S]) -> {{comp_class,"\s\t\n\r"},S};
+%% Letter, Any
+%% fixme setup(["\\p{L}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{L}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Letter, Uppercase
+%% fixme setup(["\\p{Lu}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Lu}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Letter, Lowercase
+%% fixme setup(["\\p{Ll}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Ll}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Letter, Titlecase
+%% fixme setup(["\\p{Lt}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Lt}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Letter, Modifier
+%% fixme setup(["\\p{Lm}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Lm}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Letter, Other
+%% fixme setup(["\\p{Lo}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Lo}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Mark, Any
+%% fixme setup(["\\p{M}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{M}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Mark, Nonspacing
+%% fixme setup(["\\p{Mn}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Mn}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Mark, Spacing Combining
+%% fixme setup(["\\p{Mc}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Mc}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Mark, Enclosing
+%% fixme setup(["\\p{Me}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Me}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Number, Any
+%% fixme setup(["\\p{N}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{N}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Number, Decimal Digit
+%% fixme setup(["\\p{Nd}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Nd}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Number, Letter
+%% fixme setup(["\\p{Nl}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Nl}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Number, Other
+%% fixme setup(["\\p{No}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{No}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Punctuation, Any
+%% fixme setup(["\\p{P}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{P}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Punctuation, Connector
+%% fixme setup(["\\p{Pc}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Pc}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Punctuation, Dash
+%% fixme setup(["\\p{Pd}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Pd}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Punctuation, Open
+%% fixme setup(["\\p{Ps}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Ps}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Punctuation, Close
+%% fixme setup(["\\p{Pe}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Pe}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Punctuation, Initial quote (may behave like Ps or Pe, depending on usage)
+%% fixme setup(["\\p{Pi}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Pi}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Punctuation, Final quote (may behave like Ps or Pe, depending on usage)
+%% fixme setup(["\\p{Pf}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Pf}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Punctuation, Other
+%% fixme setup(["\\p{Po}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Po}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Symbol, Any
+%% fixme setup(["\\p{S}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{S}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Symbol, Math
+%% fixme setup(["\\p{Sm}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Sm}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Symbol, Currency
+%% fixme setup(["\\p{Sc}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Sc}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Symbol, Modifier
+%% fixme setup(["\\p{Sk}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Sk}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Symbol, Other
+%% fixme setup(["\\p{So}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{So}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Separator, Any
+%% fixme setup(["\\p{Z}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Z}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Separator, Space
+%% fixme setup(["\\p{Zs}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Zs}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Separator, Line
+%% fixme setup(["\\p{Zl}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Zl}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Separator, Paragraph
+%% fixme setup(["\\p{Zp}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Zp}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Other, Any
+%% fixme setup(["\\p{C}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{C}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Other, Control
+%% fixme setup(["\\p{Cc}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Cc}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Other, Format
+%% fixme setup(["\\p{Cf}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Cf}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Other, Surrogate not supported by schema recommendation
+%% fixme setup(["\\p{Cs}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Cs}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Other, Private Use
+%% fixme setup(["\\p{Co}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Co}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+%% Other, Not assigned (no characters in the file have this property)
+%% fixme setup(["\\p{Cn}" ++ S) -> {{char_class,"\s\t\n\r"},S};
+%% fixme setup(["\\P{Cn}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
+setup([A|S], Acc) -> setup(S, [A|Acc]);
+setup([],Acc) -> reverse([$$|Acc]).
+
+%% sh_to_awk(ShellRegExp)
+%% Convert a sh style regexp into a full AWK one. The main difficulty is
+%% getting character sets right as the conventions are different.
+
+sh_to_awk(Sh) -> "^(" ++ sh_to_awk_1(Sh). %Fix the beginning
+
+sh_to_awk_1([$*|Sh]) -> %This matches any string
+ ".*" ++ sh_to_awk_1(Sh);
+sh_to_awk_1([$?|Sh]) -> %This matches any character
+ [$.|sh_to_awk_1(Sh)];
+sh_to_awk_1([$[,$^,$]|Sh]) -> %This takes careful handling
+ "\\^" ++ sh_to_awk_1(Sh);
+%% Must move '^' to end.
+sh_to_awk_1("[^" ++ Sh) -> [$[|sh_to_awk_2(Sh, true)];
+sh_to_awk_1("[!" ++ Sh) -> "[^" ++ sh_to_awk_2(Sh, false);
+sh_to_awk_1([$[|Sh]) -> [$[|sh_to_awk_2(Sh, false)];
+sh_to_awk_1([C|Sh]) ->
+ %% Unspecialise everything else which is not an escape character.
+ case sh_special_char(C) of
+ true -> [$\\,C|sh_to_awk_1(Sh)];
+ false -> [C|sh_to_awk_1(Sh)]
+ end;
+sh_to_awk_1([]) -> ")$". %Fix the end
+
+sh_to_awk_2([$]|Sh], UpArrow) -> [$]|sh_to_awk_3(Sh, UpArrow)];
+sh_to_awk_2(Sh, UpArrow) -> sh_to_awk_3(Sh, UpArrow).
+
+sh_to_awk_3([$]|Sh], true) -> "^]" ++ sh_to_awk_1(Sh);
+sh_to_awk_3([$]|Sh], false) -> [$]|sh_to_awk_1(Sh)];
+sh_to_awk_3([C|Sh], UpArrow) -> [C|sh_to_awk_3(Sh, UpArrow)];
+sh_to_awk_3([], true) -> [$^|sh_to_awk_1([])];
+sh_to_awk_3([], false) -> sh_to_awk_1([]).
+
+%% -type sh_special_char(char()) -> bool().
+%% Test if a character is a special character.
+
+sh_special_char($|) -> true;
+sh_special_char($*) -> true;
+sh_special_char($+) -> true;
+sh_special_char($?) -> true;
+sh_special_char($() -> true;
+sh_special_char($)) -> true;
+sh_special_char($\\) -> true;
+sh_special_char($^) -> true;
+sh_special_char($$) -> true;
+sh_special_char($.) -> true;
+sh_special_char($[) -> true;
+sh_special_char($]) -> true;
+sh_special_char($") -> true;
+sh_special_char(_C) -> false.
+
+%% parse(RegExp) -> {ok,RE} | {error,E}.
+%% Parse the regexp described in the string RegExp.
+
+parse(S) ->
+ case catch reg(S, 0) of
+ {R,Sc,[]} -> {ok,{regexp,{R,Sc}}};
+ {_R,_Sc,[C|_]} -> {error,{illegal,[C]}};
+ {error,E} -> {error,E}
+ end.
+
+%% format_error(Error) -> String.
+
+format_error({interval_range,What}) ->
+ ["illegal interval range",io_lib:write_string(What)];
+format_error({illegal,What}) -> ["illegal character `",What,"'"];
+format_error({unterminated,What}) -> ["unterminated `",What,"'"];
+format_error({posix_cc,What}) ->
+ ["illegal POSIX character class ",io_lib:write_string(What)];
+format_error({char_class,What}) ->
+ ["illegal character class ",io_lib:write_string(What)].
+
+%% match(String, RegExp) -> {match,Start,Length} | nomatch | {error,E}.
+%% Find the longest match of RegExp in String.
+
+match(S, RegExp) when is_list(RegExp) ->
+ case parse(RegExp) of
+ {ok,RE} -> match(S, RE);
+ {error,E} -> {error,E}
+ end;
+match(S, {regexp,RE}) ->
+ case match_re(RE, S, 1, 0, -1) of
+ {Start,Len} when Len >= 0 ->
+ {match,Start,Len};
+ {_Start,_Len} -> nomatch
+ end;
+match(S, {comp_regexp,RE}) ->
+ case match_comp(RE, S, 1, 0, -1) of
+ {Start,Len} when Len >= 0 ->
+ {match,Start,Len};
+ {_Start,_Len} -> nomatch
+ end.
+
+match_re(RE, [_|Cs]=S0, P0, Mst, Mlen) ->
+ case re_apply(S0, P0, RE) of
+ {match,P1,_S1,_Subs} ->
+ Len = P1-P0,
+ if Len > Mlen -> match_re(RE, Cs, P0+1, P0, Len);
+ true -> match_re(RE, Cs, P0+1, Mst, Mlen)
+ end;
+ nomatch -> match_re(RE, Cs, P0+1, Mst, Mlen);
+ never_match -> {Mst,Mlen} %No need to go on
+ end;
+match_re(_RE, _S, _P, Mst, Mlen) -> {Mst,Mlen}.
+
+match_comp(RE, [_|Cs]=S0, P0, Mst, Mlen) ->
+ case comp_apply(S0, P0, RE) of
+ {match,P1,_S1} ->
+ Len = P1-P0,
+ if Len > Mlen -> match_comp(RE, Cs, P0+1, P0, Len);
+ true -> match_comp(RE, Cs, P0+1, Mst, Mlen)
+ end;
+ nomatch -> match_comp(RE, Cs, P0+1, Mst, Mlen)
+ end;
+match_comp(_RE, _S, _P, Mst, Mlen) -> {Mst,Mlen}.
+
+%% match_re(RE, S0, Pos0, Mst, Mlen) ->
+%% case first_match_re(RE, S0, Pos0) of
+%% {St,Len,_} -> %Found a match
+%% Pos1 = St + 1, %Where to start next match
+%% S1 = lists:nthtail(Pos1-Pos0, S0),
+%% if Len > Mlen -> match_re(RE, S1, Pos1, St, Len);
+%% true -> match_re(RE, S1, Pos1, Mst, Mlen)
+%% end;
+%% nomatch -> {Mst,Mlen}
+%% end.
+
+%% match_comp(RE, S0, Pos0, Mst, Mlen) ->
+%% case first_match_comp(RE, S0, Pos0) of
+%% {St,Len} -> %Found a match
+%% Pos1 = St + 1, %Where to start next match
+%% S1 = lists:nthtail(Pos1-Pos0, S0),
+%% if Len > Mlen -> match_comp(RE, S1, Pos1, St, Len);
+%% true -> match_comp(RE, S1, Pos1, Mst, Mlen)
+%% end;
+%% nomatch -> {Mst,Mlen}
+%% end.
+
+%% first_match(String, RegExp) -> {match,Start,Length} | nomatch | {error,E}.
+%% Find the first match of RegExp in String.
+
+first_match(S, RegExp) when is_list(RegExp) ->
+ case parse(RegExp) of
+ {ok,RE} -> first_match(S, RE);
+ {error,E} -> {error,E}
+ end;
+first_match(S, {regexp,RE}) ->
+ case first_match_re(RE, S, 1) of
+ {Start,Len,_} -> {match,Start,Len};
+ nomatch -> nomatch
+ end;
+first_match(S, {comp_regexp,RE}) ->
+ case first_match_comp(RE, S, 1) of
+ {Start,Len} -> {match,Start,Len};
+ nomatch -> nomatch
+ end.
+
+first_match_re(RE, S, St) when S /= [] ->
+ case re_apply(S, St, RE) of
+ {match,P,_Rest,Subs} -> {St,P-St,Subs};
+ nomatch -> first_match_re(RE, tl(S), St+1);
+ never_match -> nomatch
+ end;
+first_match_re(_RE, [], _St) -> nomatch.
+
+first_match_comp(RE, S, St) when S /= [] ->
+ case comp_apply(S, St, RE) of
+ {match,P,_Rest} -> {St,P-St};
+ nomatch -> first_match_comp(RE, tl(S), St+1)
+ end;
+first_match_comp(_RE, [], _St) -> nomatch.
+
+%% matches(String, RegExp) -> {match,[{Start,Length}]} | {error,E}.
+%% Return the all the non-overlapping matches of RegExp in String.
+
+matches(S, RegExp) when is_list(RegExp) ->
+ case parse(RegExp) of
+ {ok,RE} -> matches(S, RE);
+ {error,E} -> {error,E}
+ end;
+matches(S, {regexp,RE}) -> {match,matches_re(S, RE, 1)};
+matches(S, {comp_regexp,RE}) -> {match,matches_comp(S, RE, 1)}.
+
+matches_re([_|Cs]=S0, RE, P0) ->
+ case re_apply(S0, P0, RE) of
+ {match,P0,S1,_Subs} -> %0 length match
+ [{P0,0}|matches_re(tl(S1), RE, P0+1)];
+ {match,P1,S1,_Subs} ->
+ [{P0,P1-P0}|matches_re(S1, RE, P1)];
+ nomatch -> matches_re(Cs, RE, P0+1);
+ never_match -> []
+ end;
+matches_re([], _RE, _P) -> [].
+
+matches_comp([_|Cs]=S0, RE, P0) ->
+ case comp_apply(S0, P0, RE) of
+ {match,P0,S1} -> %0 length match
+ [{P0,0}|matches_comp(tl(S1), RE, P0+1)];
+ {match,P1,S1} ->
+ [{P0,P1-P0}|matches_comp(S1, RE, P1)];
+ nomatch -> matches_comp(Cs, RE, P0+1)
+ end;
+matches_comp([], _RE, _P) -> [].
+
+%% sub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.
+%% Substitute the first match of the regular expression RegExp with
+%% the string Replace in String. Accept pre-parsed regular
+%% expressions.
+
+sub(String, RegExp, Rep) when is_list(RegExp) ->
+ case parse(RegExp) of
+ {ok,RE} -> sub(String, RE, Rep);
+ {error,E} -> {error,E}
+ end;
+sub(String, {regexp,RE}, Rep) ->
+ case sub_re(String, 1, RE, [], Rep) of
+ {yes,NewStr} -> {ok,NewStr,1};
+ no -> {ok,String,0}
+ end;
+sub(String, {comp_regexp,RE}, Rep) ->
+ case sub_comp(String, 1, RE, [], Rep) of
+ {yes,NewStr} -> {ok,NewStr,1};
+ no -> {ok,String,0}
+ end.
+
+%% sub_re(String, Position, Regexp, Before, Replacement) ->
+%% {NewString,Count}.
+%% sub_comp(String, Position, Regexp, Before, Replacement) ->
+%% {NewString,Count}.
+%% Step forward over String until a match is found saving stepped over
+%% chars in Before. Return reversed Before prepended to replacement
+%% and rest of string.
+
+sub_re([C|Cs]=S0, P0, RE, Bef, Rep) ->
+ case re_apply(S0, P0, RE) of
+ {match,P0,_S1,_} -> %Ignore 0 length match
+ sub_re(Cs, P0+1, RE, [C|Bef], Rep);
+ {match,P1,Rest,_Gps} ->
+ {yes,reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), Rest))};
+ nomatch -> sub_re(Cs, P0+1, RE, [C|Bef], Rep);
+ never_match -> no %No need to go on
+ end;
+sub_re([], _P, _RE, _Bef, _Rep) -> no.
+
+sub_comp([C|Cs]=S0, P0, RE, Bef, Rep) ->
+ case comp_apply(S0, P0, RE) of
+ {match,P0,_S1} -> %Ignore 0 length match
+ sub_comp(Cs, P0+1, RE, [C|Bef], Rep);
+ {match,P1,Rest} ->
+ {yes,reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), Rest))};
+ nomatch -> sub_comp(Cs, P0+1, RE, [C|Bef], Rep)
+ end;
+sub_comp([], _P, _RE, _Bef, _Rep) -> no.
+
+sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest);
+sub_repl("\\&" ++ Rep, M, Rest) -> [$&|sub_repl(Rep, M, Rest)];
+sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)];
+sub_repl([], _M, Rest) -> Rest.
+
+%% gsub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.
+%% Substitute every match of the regular expression RegExp with the
+%% string New in String. Accept pre-parsed regular expressions.
+
+gsub(String, RegExp, Rep) when is_list(RegExp) ->
+ case parse(RegExp) of
+ {ok,RE} -> gsub(String, RE, Rep);
+ {error,E} -> {error,E}
+ end;
+gsub(String, {regexp,RE}, Rep) ->
+ case gsub_re(String, 1, RE, [], Rep) of
+ {NewStr,N} -> {ok,NewStr,N};
+ no -> {ok,String,0} %No substitutions
+ end;
+gsub(String, {comp_regexp,RE}, Rep) ->
+ case gsub_comp(String, 1, RE, [], Rep) of
+ {NewStr,N} -> {ok,NewStr,N};
+ no -> {ok,String,0} %No substitutions
+ end.
+
+%% gsub_re(String, Position, Regexp, Before, Replacement) ->
+%% {NewString,Count}.
+%% gsub_comp(String, Position, Regexp, Before, Replacement) ->
+%% {NewString,Count}.
+%% Step forward over String until a match is found saving stepped over
+%% chars in Before. Call recursively to do rest of string after
+%% match. Return reversed Before prepended to return from recursive
+%% call.
+
+gsub_re([C|Cs]=S0, P0, RE, Bef, Rep) ->
+ case re_apply(S0, P0, RE) of
+ {match,P0,_S1,_} -> %Ignore 0 length match
+ gsub_re(Cs, P0+1, RE, [C|Bef], Rep);
+ {match,P1,S1,_Gps} ->
+ case gsub_re(S1, P1, RE, [], Rep) of
+ {NewStr,N0} -> %Substituitions
+ {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), NewStr)),
+ N0+1};
+ no -> %No substituitions.
+ {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), S1)),1}
+ end;
+ %%No match so step forward saving C on Bef.
+ nomatch -> gsub_re(Cs, P0+1, RE, [C|Bef], Rep);
+ never_match -> no %No need to go on
+ end;
+gsub_re([], _P, _RE, _Bef, _Rep) -> no.
+
+gsub_comp([C|Cs]=S0, P0, RE, Bef, Rep) ->
+ case comp_apply(S0, P0, RE) of
+ {match,P0,_S1} -> %Ignore 0 length match
+ gsub_comp(Cs, P0+1, RE, [C|Bef], Rep);
+ {match,P1,S1} ->
+ case gsub_comp(S1, P1, RE, [], Rep) of
+ {NewStr,N0} -> %Substituitions
+ {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), NewStr)),
+ N0+1};
+ no -> %No substituitions.
+ {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), S1)),1}
+ end;
+ %%No match so step forward saving C on Bef.
+ nomatch -> gsub_comp(Cs, P0+1, RE, [C|Bef], Rep)
+ end;
+gsub_comp([], _P, _RE, _Bef, _Rep) -> no.
+
+%% split(String, RegExp) -> {ok,[SubString]} | {error,E}.
+%% Split a string into substrings where the RegExp describes the
+%% field seperator. The RegExp " " is specially treated.
+
+split(String, " ") -> %This is really special
+ {ok,{regexp,RE}} = parse("[ \t]+"),
+ case split_apply_re(String, RE, true) of
+ [[]|Ss] -> {ok,Ss};
+ Ss -> {ok,Ss}
+ end;
+split(String, RegExp) when is_list(RegExp) ->
+ case parse(RegExp) of
+ {ok,{regexp,RE}} -> {ok,split_apply_re(String, RE, false)};
+ {error,E} -> {error,E}
+ end;
+split(String, {regexp,RE}) -> {ok,split_apply_re(String, RE, false)};
+split(String, {comp_regexp,RE}) -> {ok,split_apply_comp(String, RE, false)}.
+
+split_apply_re(S, RE, Trim) -> split_apply_re(S, 1, RE, Trim, []).
+
+split_apply_re([], _P, _RE, true, []) -> [];
+split_apply_re([], _P, _RE, _T, Sub) -> [reverse(Sub)];
+split_apply_re([C|Cs]=S, P0, RE, T, Sub) ->
+ case re_apply(S, P0, RE) of
+ {match,P0,_S1,_} -> %Ignore 0 length match
+ split_apply_re(Cs, P0+1, RE, T, [C|Sub]);
+ {match,P1,S1,_} ->
+ [reverse(Sub)|split_apply_re(S1, P1, RE, T, [])];
+ nomatch ->
+ split_apply_re(Cs, P0+1, RE, T, [C|Sub]);
+ never_match -> [reverse(Sub, S)] %No need to go on
+ end.
+
+split_apply_comp(S, RE, Trim) -> split_apply_comp(S, 1, RE, Trim, []).
+
+%%split_apply_comp([], _P, _RE, true, []) -> [];
+split_apply_comp([], _P, _RE, _T, Sub) -> [reverse(Sub)];
+split_apply_comp([C|Cs]=S, P0, RE, T, Sub) ->
+ case comp_apply(S, P0, RE) of
+ {match,P0,_S1} -> %Ignore 0 length match
+ split_apply_comp(Cs, P0+1, RE, T, [C|Sub]);
+ {match,P1,S1} ->
+ [reverse(Sub)|split_apply_comp(S1, P1, RE, T, [])];
+ nomatch ->
+ split_apply_comp(Cs, P0+1, RE, T, [C|Sub])
+ end.
+
+%% sub_match(String, RegExp) ->
+%% {match,Start,Length,SubExprs} | nomatch | {error,E}.
+%% Find the longest match of RegExp in String.
+
+sub_match(S, RegExp) when is_list(RegExp) ->
+ case parse(RegExp) of
+ {ok,RE} -> sub_match(S, RE);
+ {error,E} -> {error,E}
+ end;
+sub_match(S, {regexp,RE}) ->
+ case sub_match_re(RE, S, 1, 0, -1, none) of
+ {Start,Len,Subs} when Len >= 0 ->
+ {match,Start,Len,Subs};
+ {_Start,_Len,_Subs} -> nomatch
+ end.
+
+sub_match_re(RE, S0, Pos0, Mst, Mlen, Msubs) ->
+ case first_match_re(RE, S0, Pos0) of
+ {St,Len,Subs} -> %Found a match
+ Pos1 = St + 1, %Where to start next match
+ S1 = lists:nthtail(Pos1-Pos0, S0),
+ if Len > Mlen -> sub_match_re(RE, S1, Pos1, St, Len, Subs);
+ true -> sub_match_re(RE, S1, Pos1, Mst, Mlen, Msubs)
+ end;
+ nomatch -> {Mst,Mlen,Msubs}
+ end.
+
+%% sub_first_match(String, RegExp) ->
+%% {match,Start,Length,SubExprs} | nomatch | {error,E}.
+%% Find the longest match of RegExp in String, return Start and Length
+%% as well as tuple of sub-expression matches.
+
+sub_first_match(S, RegExp) when is_list(RegExp) ->
+ {ok,RE} = parse(RegExp),
+ sub_first_match(S, RE);
+sub_first_match(S, {regexp,RE}) ->
+ case first_match_re(RE, S, 1) of
+ {St,Len,Subs} -> {match,St,Len,Subs};
+ nomatch -> nomatch
+ end.
+
+
+%% This is the regular expression grammar used. It is equivalent to the
+%% one used in AWK, except that we allow ^ $ to be used anywhere and fail
+%% in the matching.
+%%
+%% reg -> reg1 : '$1'.
+%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}.
+%% reg1 -> reg2 : '$1'.
+%% reg2 -> reg2 reg3 : {concat,'$1','$2'}.
+%% reg2 -> reg3 : '$1'.
+%% reg3 -> reg3 "*" : {kclosure,'$1'}.
+%% reg3 -> reg3 "+" : {pclosure,'$1'}.
+%% reg3 -> reg3 "?" : {optional,'$1'}.
+%% reg3 -> reg3 "{" [Min],[Max] "}" : {closure_range, Num, '$1'} see below
+%% reg3 -> reg4 : '$1'.
+%% reg4 -> "(" reg ")" : '$2'.
+%% reg4 -> "\\" char : '$2'.
+%% reg4 -> "^" : bos.
+%% reg4 -> "$" : eos.
+%% reg4 -> "." : char.
+%% reg4 -> "[" class "]" : {char_class,char_class('$2')}
+%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')}
+%% reg4 -> "\"" chars "\"" : char_string('$2')
+%% reg4 -> char : '$1'.
+%% reg4 -> empty : epsilon.
+%% The grammar of the current regular expressions. The actual parser
+%% is a recursive descent implementation of the grammar.
+
+reg(S, Sc) -> reg1(S, Sc).
+
+%% reg1 -> reg2 reg1'
+%% reg1' -> "|" reg2
+%% reg1' -> empty
+
+reg1(S0, Sc0) ->
+ {L,Sc1,S1} = reg2(S0, Sc0),
+ reg1p(S1, L, Sc1).
+
+reg1p([$||S0], L, Sc0) ->
+ {R,Sc1,S1} = reg2(S0, Sc0),
+ reg1p(S1, {'or',L,R}, Sc1);
+reg1p(S, L, Sc) -> {L,Sc,S}.
+
+%% reg2 -> reg3 reg2'
+%% reg2' -> reg3
+%% reg2' -> empty
+
+reg2(S0, Sc0) ->
+ {L,Sc1,S1} = reg3(S0, Sc0),
+ reg2p(S1, L, Sc1).
+
+reg2p([C|S0], L, Sc0) when C /= $|, C /= $) ->
+ {R,Sc1,S1} = reg3([C|S0], Sc0),
+ %% reg2p(S1, {concat,L,R}, Sc1);
+ case is_integer(R) of
+ true ->
+ case L of
+ {literal,Lit} ->
+ reg2p(S1, {literal,Lit ++[R]}, Sc1);
+ {concat,S2,Char} when is_integer(Char) ->
+ reg2p(S1, {concat,S2,{literal,[Char,R]}}, Sc1);
+ {concat,S2,{literal,Lit}} ->
+ reg2p(S1, {concat,S2,{literal,Lit ++ [R]}}, Sc1);
+ Char when is_integer(Char) ->
+ reg2p(S1, {literal,[Char,R]}, Sc1);
+ _ ->
+ reg2p(S1, {concat,L,R}, Sc1)
+ end;
+ false ->
+ reg2p(S1, {concat,L,R}, Sc1)
+ end;
+reg2p(S, L, Sc) -> {L,Sc,S}.
+
+%% reg3 -> reg4 reg3'
+%% reg3' -> "*" reg3'
+%% reg3' -> "+" reg3'
+%% reg3' -> "?" reg3'
+%% reg3' -> "{" [Min],[Max] "}" reg3'
+%% reg3' -> empty
+
+reg3(S0, Sc0) ->
+ {L,Sc1,S1} = reg4(S0, Sc0),
+ reg3p(S1, L, Sc1).
+
+reg3p([$*|S], L, Sc) -> reg3p(S, {kclosure,L}, Sc);
+reg3p([$+|S], L, Sc) -> reg3p(S, {pclosure,L}, Sc);
+reg3p([$?|S], L, Sc) -> reg3p(S, {optional,L}, Sc);
+reg3p([${|Cs0], L, Sc) -> % $}
+ case interval_range(Cs0) of
+ {none,none,_Cs1} -> parse_error({interval_range,[${|Cs0]});
+ {N,M,[$}|Cs1]} -> reg3p(Cs1, {iclosure,L,N,M}, Sc);
+ {_N,_M,_Cs1} -> parse_error({unterminated,"{"})
+ end;
+reg3p(S, L, Sc) -> {L,Sc,S}.
+
+reg4([$(|S0], Sc0) ->
+ Sc1 = Sc0+1,
+ case reg(S0, Sc1) of
+ {R,Sc2,[$)|S1]} -> {{subexpr,Sc1,R},Sc2,S1};
+ {_R,_Sc,_S} -> parse_error({unterminated,"("})
+ end;
+reg4([$^|S], Sc) -> {bos,Sc,S};
+reg4([$$|S], Sc) -> {eos,Sc,S};
+reg4([$.|S], Sc) -> {{comp_class,"\n"},Sc,S};
+reg4("[^" ++ S0, Sc) ->
+ case char_class(S0) of
+ {Cc,[$]|S1]} -> {{comp_class,Cc},Sc,S1};
+ {_Cc,_S} -> parse_error({unterminated,"["})
+ end;
+reg4([$[|S0], Sc) ->
+ case char_class(S0) of
+ {Cc,[$]|S1]} -> {{char_class,Cc},Sc,S1};
+ {_Cc,_S1} -> parse_error({unterminated,"["})
+ end;
+%reg4([$"|S0], Sc) ->
+% case char_string(S0) of
+% {St,[$"|S1]} -> {St,Sc,S1};
+% {St,S1} -> parse_error({unterminated,"\""})
+% end;
+reg4([C0|S0], Sc) when
+ is_integer(C0), C0 /= $*, C0 /= $+, C0 /= $?, C0 /= $], C0 /= $), C0 /= $} ->
+ %% Handle \ quoted characters as well, at least those we see.
+ {C1,S1} = char(C0, S0),
+ {C1,Sc,S1};
+reg4(S=[$)|_], Sc) -> {epsilon,Sc,S};
+reg4([C|_S], _Sc) -> parse_error({illegal,[C]});
+reg4([], Sc) -> {epsilon,Sc,[]}.
+
+char($\\, [O1,O2,O3|S]) when
+ O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
+ {(O1*8 + O2)*8 + O3 - 73*$0,S};
+char($\\, [C|S]) -> {escape_char(C),S};
+char($\\, []) -> parse_error({unterminated,"\\"});
+char(C, S) -> {C,S}.
+
+escape_char($n) -> $\n; %\n = LF
+escape_char($r) -> $\r; %\r = CR
+escape_char($t) -> $\t; %\t = TAB
+escape_char($v) -> $\v; %\v = VT
+escape_char($b) -> $\b; %\b = BS
+escape_char($f) -> $\f; %\f = FF
+escape_char($e) -> $\e; %\e = ESC
+escape_char($s) -> $\s; %\s = SPACE
+escape_char($d) -> $\d; %\d = DEL
+escape_char(C) -> C.
+
+char_class([$]|S0]) ->
+ {Cc,S1} = char_class(S0, [$]]),
+ {pack_cc(Cc),S1};
+char_class(S0) ->
+ {Cc,S1} = char_class(S0, []),
+ {pack_cc(Cc),S1}.
+
+pack_cc(Cc0) ->
+ %% First sort the list.
+ Cc1 = lists:usort(fun ({Cf1,_}, {Cf2,_}) -> Cf1 < Cf2;
+ ({Cf1,_}, C) -> Cf1 < C;
+ (C, {Cf,_}) -> C < Cf;
+ (C1, C2) -> C1 =< C2
+ end, Cc0),
+ pack_cc1(Cc1).
+
+pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1 >= Cf2, Cl1 =< Cl2 ->
+ pack_cc1([{Cf1,Cl2}|Cc]);
+pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1 >= Cf2, Cl1 >= Cl2 ->
+ pack_cc1([{Cf1,Cl1}|Cc]);
+pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1+1 == Cf2 ->
+ pack_cc1([{Cf1,Cl2}|Cc]);
+pack_cc1([{Cf,Cl},C|Cc]) when Cl >= C -> pack_cc1([{Cf,Cl}|Cc]);
+pack_cc1([{Cf,Cl},C|Cc]) when Cl+1 == C -> pack_cc1([{Cf,C}|Cc]);
+pack_cc1([C,{Cf,Cl}|Cc]) when C == Cf-1 -> pack_cc1([{C,Cl}|Cc]);
+pack_cc1([C1,C2|Cc]) when C1+1 == C2 -> pack_cc1([{C1,C2}|Cc]);
+pack_cc1([C|Cc]) -> [C|pack_cc1(Cc)];
+pack_cc1([]) -> [].
+
+char_class("[:" ++ S0, Cc0) -> %Start of POSIX char class
+ case posix_cc(S0, Cc0) of
+ {Cc1,":]" ++ S1} -> char_class(S1, Cc1);
+ {_,_S1} -> parse_error({posix_cc,"[:" ++ S0})
+ end;
+char_class([C1|S0], Cc) when C1 /= $] ->
+ case char(C1, S0) of
+ {Cf,[$-,C2|S1]} when C2 /= $] ->
+ case char(C2, S1) of
+ {Cl,S2} when Cf < Cl -> char_class(S2, [{Cf,Cl}|Cc]);
+ {_Cl,_S2} -> parse_error({char_class,[C1|S0]})
+ end;
+ {C,S1} -> char_class(S1, [C|Cc])
+ end;
+char_class(S, Cc) -> {Cc,S}.
+
+%% posix_cc(String, CharClass) -> {NewCharClass,RestString}.
+%% Handle POSIX character classes, use Latin-1 character set.
+
+posix_cc("alnum" ++ S, Cc) ->
+ {[{$0,$9},{$A,$Z},{192,214},{216,223},{$a,$z},{224,246},{248,255}|Cc],S};
+posix_cc("alpha" ++ S, Cc) ->
+ {[{$A,$Z},{192,214},{216,223},{$a,$z},{224,246},{248,255}|Cc],S};
+posix_cc("blank" ++ S, Cc) -> {[$\s,$\t,160|Cc],S};
+posix_cc("cntrl" ++ S, Cc) -> {[{0,31},{127,159}|Cc],S};
+posix_cc("digit" ++ S, Cc) -> {[{$0,$9}|Cc],S};
+posix_cc("graph" ++ S, Cc) -> {[{33,126},{161,255}|Cc],S};
+posix_cc("lower" ++ S, Cc) -> {[{$a,$z},{224,246},{248,255}|Cc],S};
+posix_cc("print" ++ S, Cc) -> {[{32,126},{160,255}|Cc],S};
+posix_cc("punct" ++ S, Cc) -> {[{$!,$/},{$:,$?},{${,$~},{161,191}|Cc],S};
+posix_cc("space" ++ S, Cc) -> {[$\s,$\t,$\f,$\r,$\v,160|Cc],S};
+posix_cc("upper" ++ S, Cc) -> {[{$A,$Z},{192,214},{216,223}|Cc],S};
+posix_cc("xdigit" ++ S, Cc) -> {[{$a,$f},{$A,$F},{$0,$9}|Cc],S};
+posix_cc(S, _Cc) -> parse_error({posix_cc,"[:" ++ S}).
+
+interval_range(Cs0) ->
+ case number(Cs0) of
+ {none,Cs1} -> {none,none,Cs1};
+ {N,[$,|Cs1]} ->
+ case number(Cs1) of
+ {none,Cs2} -> {N,any,Cs2};
+ {M,Cs2} -> {N,M,Cs2}
+ end;
+ {N,Cs1} -> {N,none,Cs1}
+ end.
+
+number([C|Cs]) when C >= $0, C =< $9 ->
+ number(Cs, C - $0);
+number(Cs) -> {none,Cs}.
+
+number([C|Cs], Acc) when C >= $0, C =< $9 ->
+ number(Cs, 10*Acc + (C - $0));
+number(Cs, Acc) -> {Acc,Cs}.
+
+parse_error(E) -> throw({error,E}).
+
+%char_string([C|S]) when C /= $" -> char_string(S, C);
+%char_string(S) -> {epsilon,S}.
+
+%char_string([C|S0], L) when C /= $" ->
+% char_string(S0, {concat,L,C});
+%char_string(S, L) -> {L,S}.
+
+%% re_apply(String, StartPos, RegExp) ->
+%% {match,RestPos,Rest,SubExprs} | nomatch.
+%%
+%% Apply the (parse of the) regular expression RegExp to String. If
+%% there is a match return the position of the remaining string and
+%% the string if else return 'nomatch'.
+%%
+%% StartPos should be the real start position as it is used to decide
+%% if we are at the beginning of the string.
+
+re_apply(S, St, {RE,Sc}) ->
+ Subs = erlang:make_tuple(Sc, none), %Make a sub-regexp table.
+ Res = re_apply(RE, [], S, St, Subs),
+ %% io:format("~p x ~p -> ~p\n", [RE,S,Res]),
+ Res.
+
+re_apply(epsilon, More, S, P, Subs) -> %This always matches
+ re_apply_more(More, S, P, Subs);
+re_apply({'or',RE1,RE2}, More, S, P, Subs) ->
+ re_apply_or(re_apply(RE1, More, S, P, Subs),
+ re_apply(RE2, More, S, P, Subs));
+re_apply({concat,RE1,RE2}, More, S0, P, Subs) ->
+ re_apply(RE1, [RE2|More], S0, P, Subs);
+re_apply({literal,[C|Lcs]}, More, [C|S], P, Subs) ->
+ re_apply_lit(Lcs, More, S, P+1, Subs); %Have matched first char
+re_apply({kclosure,RE}, More, S0, P0, Subs0) ->
+ %% Greedy so try RE first, no difference here actually.
+ Loop = case re_apply(RE, [], S0, P0, Subs0) of
+ {match,P0,_S1,_Subs1} -> %0 length match, don't loop!
+ nomatch;
+ {match,P1,S1,Subs1} ->
+ re_apply_more([{kclosure,RE}|More], S1, P1, Subs1);
+ nomatch -> nomatch;
+ never_match -> never_match
+ end,
+ re_apply_or(Loop, re_apply_more(More, S0, P0, Subs0));
+re_apply({pclosure,RE}, More, S, P, Subs) ->
+ re_apply(RE, [{kclosure,RE}|More], S, P, Subs);
+re_apply({optional,RE}, More, S, P, Subs) ->
+ %% Greedy so try RE first, no difference here actually.
+ re_apply_or(re_apply(RE, More, S, P, Subs),
+ re_apply_more(More, S, P, Subs));
+re_apply({iclosure,RE,N,M}, More, S, P, Subs) when N > 0 ->
+ re_apply(RE, [{iclosure,RE,N-1,M}|More], S, P, Subs);
+re_apply({iclosure,RE,0,M}, More, S, P, Subs) ->
+ Exp = expand_opt(RE, M),
+ re_apply(Exp, More, S, P, Subs);
+re_apply({subexpr,N,RE}, More, S, P, Subs) ->
+ re_apply(RE, [{endsub,N,P}|More], S, P, Subs);
+re_apply({endsub,N,St}, More, S, P, Subs0) ->
+ Subs1 = setelement(N, Subs0, {St,P-St}), %Record sub-expr
+ re_apply_more(More, S, P, Subs1);
+re_apply(bos, More, S, 1, Subs) -> re_apply_more(More, S, 1, Subs);
+re_apply(bos, _More, _S, _, _) -> never_match;
+re_apply(eos, More, [$\n], P, Subs) -> re_apply_more(More, [], P, Subs);
+re_apply(eos, More, [], P, Subs) -> re_apply_more(More, [], P, Subs);
+re_apply({char_class,Cc}, More, [C|S], P, Subs) ->
+ case in_char_class(C, Cc) of
+ true -> re_apply_more(More, S, P+1, Subs);
+ false -> nomatch
+ end;
+re_apply({comp_class,Cc}, More, [C|S], P, Subs) ->
+ case in_char_class(C, Cc) of
+ true -> nomatch;
+ false -> re_apply_more(More, S, P+1, Subs)
+ end;
+re_apply(C, More, [C|S], P, Subs) when is_integer(C) ->
+ re_apply_more(More, S, P+1, Subs);
+re_apply(_RE, _More, _S, _P, _Subs) ->
+ %% io:format("~p : ~p\n", [_RE,_S]),
+ nomatch.
+
+%% re_apply_more([RegExp], String, Length, SubsExprs) ->
+%% {match,RestPos,Rest,SubExprs} | nomatch.
+
+re_apply_more([RE|More], S, P, Subs) -> re_apply(RE, More, S, P, Subs);
+re_apply_more([], S, P, Subs) -> {match,P,S,Subs}.
+
+%% re_apply_lit(Literal, More, String, Position, SubExprs) ->
+%% {match,RestPos,Rest,SubExprs} | nomatch.
+re_apply_lit([C|Lit], More, [C|Cs], P, Subs) ->
+ re_apply_lit(Lit, More, Cs, P+1, Subs);
+re_apply_lit([], More, Cs, P, Subs) ->
+ re_apply_more(More, Cs, P, Subs);
+re_apply_lit(_Lit, _More, _Cs, _P, _Subs) ->
+ nomatch.
+
+%% expand_iclosure(RE, N, M) -> RE.
+
+expand_iclosure(RE, 0, M) -> expand_opt(RE, M);
+expand_iclosure(RE, N, M) ->
+ {concat,RE,expand_iclosure(RE, N-1, M)}.
+
+%% expand_opt(RegExp, Count) -> RE.
+%% Handle all the cases.
+
+expand_opt(_RE, none) -> epsilon;
+expand_opt(RE, any) -> {kclosure,RE};
+expand_opt(_RE, 0) -> epsilon;
+expand_opt(RE, 1) -> {optional,RE};
+expand_opt(RE, N) ->
+ {optional,{concat,RE,expand_opt(RE, N-1)}}.
+
+%% find_prefix(PrefixStr, SourceStr)
+%% if PrefixStr is a prefix of Str then return {ok,RemainingStr}
+%% otherwise return false
+
+%% find_prefix([C|Prest], [C|Rest]) ->
+%% find_prefix(Prest, Rest);
+%% find_prefix([], Rest) -> {yes,Rest};
+%% find_prefix(_, _) -> no.
+
+%% in_char_class(Char, Class) -> bool().
+
+in_char_class(C, [{C1,C2}|_Cc]) when C >= C1, C =< C2 -> true;
+in_char_class(C, [C|_Cc]) -> true;
+in_char_class(C, [_|Cc]) -> in_char_class(C, Cc);
+in_char_class(_C, []) -> false.
+
+%% re_apply_or(Match1, Match2, SubExprs) ->
+%% {match,RestPos,Rest,SubExprs} | nomatch.
+%% If we want the best match then choose the longest match, else just
+%% choose one by trying sequentially.
+
+re_apply_or(M1={match,P1,_,_},{match,P2,_,_}) when P1 >= P2 -> M1;
+re_apply_or({match,_,_,_}, M2={match,_,_,_}) -> M2;
+re_apply_or(never_match, R2) -> R2;
+re_apply_or(R1, never_match) -> R1;
+re_apply_or(nomatch, R2) -> R2;
+re_apply_or(R1, nomatch) -> R1.
+
+%% Record definitions for the NFA, DFA and compiler.
+
+-record(nfa_state, {no,edges=[],accept=no}).
+-record(dfa_state, {no,nfa=[],trans=[],accept=no}).
+
+-record(c_state, {no,trans=[],tmin=0,smin=none,tmax=0,smax=none,
+ accept=false,spec=[]}).
+
+%% We use standard methods, Thompson's construction and subset
+%% construction, to create first an NFA and then a DFA from the
+%% regexps. A non-standard feature is that we work with sets of
+%% character ranges (crs) instead sets of characters. This is most
+%% noticeable when constructing DFAs. The major benefit is that we can
+%% handle characters from any set, not just limited ASCII or 8859,
+%% even 16/32 bit unicode.
+%%
+%% The whole range of characters is 0-maxchar, where maxchar is a BIG
+%% number. We don't make any assumptions about the size of maxchar, it
+%% is just bigger than any character.
+%%
+%% Using character ranges makes describing many regexps very simple,
+%% for example the regexp "." just becomes the range
+%% [{0-9},{11-maxchar}].
+
+%% make_nfa(RegExpActions) -> {ok,{NFA,StartState}} | {error,E}.
+%% Build a complete nfa from a list of {RegExp,Action}. The NFA field
+%% accept has values {yes,Action}|no. The NFA is a list of states.
+
+make_nfa(REAs0) ->
+ case parse_reas(REAs0) of
+ {ok,REAs1} ->
+ {NFA,Start} = build_combined_nfa(REAs1),
+ {ok,{NFA,Start}};
+ {error,E} -> {error,E}
+ end.
+
+%% make_dfa(RegExpActions) -> {ok,{DFA,StartState}} | {error,E}.
+%% make_dfa(RegExpActions, LowestState) -> {ok,{DFA,StartState}} | {error,E}.
+%% Build a complete dfa from a list of {RegExp,Action}. The DFA field
+%% accept has values {yes,Action}|no. If multiple Regexps can result
+%% in same match string then RegExpActions list define priority.
+
+make_dfa(REAs) -> make_dfa(REAs, 0).
+
+make_dfa(REAs0, Low) ->
+ case parse_reas(REAs0) of
+ {ok,REAs1} ->
+ {NFA,Start0} = build_combined_nfa(REAs1),
+ {DFA0,Start1} = build_dfa(NFA, Start0),
+ {DFA,Start} = minimise_dfa(DFA0, Start1, Low),
+ {ok,{DFA,Start}};
+ {error,E} -> {error,E}
+ end.
+
+parse_reas(REAs) -> parse_reas(REAs, []).
+
+parse_reas([{{regexp,{R,_Sc}},A}|REAs], S) -> %Already parsed
+ parse_reas(REAs, [{R,A}|S]);
+parse_reas([{RegExp,A}|REAs], S) ->
+ case parse(RegExp) of
+ {ok,{regexp,{R,_Sc}}} -> parse_reas(REAs, [{R,A}|S]);
+ {error,E} -> {error,E}
+ end;
+parse_reas([], Stack) -> {ok,reverse(Stack)}.
+
+%% build_combined_nfa(RegExpActionList) -> {NFA,StartState}.
+%% Build the combined NFA using Thompson's construction straight out
+%% of the book. Build the separate NFAs in the same order as the
+%% rules so that the accepting have ascending states have ascending
+%% state numbers. Start numbering the states from 1 as we put the
+%% states in a tuple with the state number as the index.
+
+build_combined_nfa(REAs) ->
+ {NFA,Starts,Next} = build_nfa_list(REAs, [], [], 1),
+ F = #nfa_state{no=Next,edges=epsilon_trans(Starts),accept=no},
+ {[F|NFA],Next}.
+
+build_nfa_list([{RE,Action}|REAs], NFA0, Starts, Next0) ->
+ {NFA1,Next1,Start} = build_nfa(RE, Next0, Action),
+ build_nfa_list(REAs, NFA1 ++ NFA0, [Start|Starts], Next1);
+build_nfa_list([], NFA, Starts, Next) ->
+ {NFA,reverse(Starts),Next}.
+
+epsilon_trans(Sts) -> [ {epsilon,S} || S <- Sts ].
+
+%% build_nfa(RegExp, NextState, Action) -> {NFA,NextFreeState,StartState}.
+%% When building the NFA states for a ??? we don't build the end
+%% state, just allocate a State for it and return this state
+%% number. This allows us to avoid building unnecessary states for
+%% concatenation which would then have to be removed by overwriting
+%% an existing state.
+
+build_nfa(RE, Next, Action) ->
+ {NFA,N,E} = build_nfa(RE, Next+1, Next, []),
+ {[#nfa_state{no=E,accept={yes,Action}}|NFA],N,Next}.
+
+%% build_nfa(RegExp, NextState, StartState, NFA) -> {NFA,NextState,EndState}.
+%% The NFA is a list of nfa_state is no predefined order. The state
+%% number of the returned EndState is already allocated!
+
+build_nfa({'or',RE1,RE2}, N0, S, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE1, N0+1, N0, NFA0),
+ {NFA2,N2,E2} = build_nfa(RE2, N1+1, N1, NFA1),
+ E = N2,
+ {[#nfa_state{no=S,edges=[{epsilon,N0},{epsilon,N1}]},
+ #nfa_state{no=E1,edges=[{epsilon,E}]},
+ #nfa_state{no=E2,edges=[{epsilon,E}]}|NFA2],
+ N2+1,E};
+build_nfa({literal,[]}, N, S, NFA) ->
+ {NFA,N,S};
+build_nfa({literal,[C|Cs]}, N0, S, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(C, N0, S, NFA0),
+ build_nfa({literal,Cs}, N1, E1, NFA1);
+build_nfa({concat,RE1,RE2}, N0, S, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE1, N0, S, NFA0),
+ {NFA2,N2,E2} = build_nfa(RE2, N1, E1, NFA1),
+ {NFA2,N2,E2};
+build_nfa({kclosure,RE}, N0, S, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
+ E = N1,
+ {[#nfa_state{no=S,edges=[{epsilon,N0},{epsilon,E}]},
+ #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],
+ N1+1,E};
+build_nfa({pclosure,RE}, N0, S, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
+ E = N1,
+ {[#nfa_state{no=S,edges=[{epsilon,N0}]},
+ #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],
+ N1+1,E};
+build_nfa({optional,RE}, N0, S, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
+ E = N1,
+ {[#nfa_state{no=S,edges=[{epsilon,N0},{epsilon,E}]},
+ #nfa_state{no=E1,edges=[{epsilon,E}]}|NFA1],
+ N1+1,E};
+build_nfa({iclosure,RE,I1,I2}, N, S, NFA) ->
+ Exp = expand_iclosure(RE, I1, I2),
+ build_nfa(Exp, N, S, NFA);
+build_nfa({char_class,Cc}, N, S, NFA) ->
+ {[#nfa_state{no=S,edges=[{nfa_char_class(Cc),N}]}|NFA],N+1,N};
+build_nfa({comp_class,Cc}, N, S, NFA) ->
+ {[#nfa_state{no=S,edges=[{nfa_comp_class(Cc),N}]}|NFA],N+1,N};
+build_nfa(epsilon, N, S, NFA) ->
+ {NFA,N,S};
+build_nfa({group,RE}, N, S, NFA) -> %%% FIXME %%%%%%%
+ build_nfa(RE, N, S, NFA);
+build_nfa({subexpr,_N,RE}, N, S, NFA) -> %%% FIXME %%%%%%%
+ build_nfa(RE, N, S, NFA);
+build_nfa(bos, N, S, NFA) ->
+ {[#nfa_state{no=S,edges=[{[bos],N}]}|NFA],N+1,N};
+build_nfa(eos, N, S, NFA) ->
+ {[#nfa_state{no=S,edges=[{[eos],N}]}|NFA],N+1,N};
+%%{[#nfa_state{no=S,edges=[{[eos],N}]}|NFA],N+1,N};
+build_nfa(C, N, S, NFA) when is_integer(C) ->
+ {[#nfa_state{no=S,edges=[{[{C,C}],N}]}|NFA],N+1,N}.
+
+nfa_char_class(Cc) ->
+ Crs = lists:foldl(fun({C1,C2}, Set) -> add_element({C1,C2}, Set);
+ (C, Set) -> add_element({C,C}, Set) end, [], Cc),
+ %% io:fwrite("cc: ~p\n", [Crs]),
+ pack_crs(Crs).
+
+pack_crs([{C1,C2}=Cr,{C3,C4}|Crs]) when C1 =< C3, C2 >= C4 ->
+ %% C1 C2
+ %% C3 C4
+ pack_crs([Cr|Crs]);
+pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 >= C3, C2 < C4 ->
+ %% C1 C2
+ %% C3 C4
+ pack_crs([{C1,C4}|Crs]);
+pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 + 1 == C3 ->
+ %% C1 C2
+ %% C3 C4
+ pack_crs([{C1,C4}|Crs]);
+pack_crs([Cr|Crs]) -> [Cr|pack_crs(Crs)];
+pack_crs([]) -> [].
+
+nfa_comp_class(Cc) ->
+ Crs = nfa_char_class(Cc),
+ %% io:fwrite("comp: ~p\n", [Crs]),
+ comp_crs(Crs, 0).
+
+comp_crs([{C1,C2}|Crs], Last) ->
+ [{Last,C1-1}|comp_crs(Crs, C2+1)];
+comp_crs([], Last) -> [{Last,maxchar}].
+
+%% build_dfa(NFA, NfaStartState) -> {DFA,DfaStartState}.
+%% Build a DFA from an NFA using "subset construction". The major
+%% difference from the book is that we keep the marked and unmarked
+%% DFA states in seperate lists. New DFA states are added to the
+%% unmarked list and states are marked by moving them to the marked
+%% list. We assume that the NFA accepting state numbers are in
+%% ascending order for the rules and use ordsets to keep this order.
+
+build_dfa(NFA0, Start) ->
+ %% We want NFA as sorted tuple for fast access, assume lowest state 1.
+ NFA1 = list_to_tuple(keysort(#nfa_state.no, NFA0)),
+ D = #dfa_state{no=0,nfa=eclosure([Start], NFA1),accept=no},
+ {build_dfa([D], 1, [], NFA1),0}.
+
+%% build_dfa([UnMarked], NextState, [Marked], NFA) -> DFA.
+%% Traverse the unmarked states. Temporarily add the current unmarked
+%% state to the marked list before calculating translation, this is
+%% to avoid adding too many duplicate states. Add it properly to the
+%% marked list afterwards with correct translations.
+
+build_dfa([U|Us0], N0, Ms, NFA) ->
+ {Ts,Us1,N1} = build_dfa(U#dfa_state.nfa, Us0, N0, [], [U|Ms], NFA),
+ M = U#dfa_state{trans=Ts,accept=accept(U#dfa_state.nfa, NFA)},
+ build_dfa(Us1, N1, [M|Ms], NFA);
+build_dfa([], _N, Ms, _NFA) -> Ms.
+
+%% build_dfa([NfaState], [Unmarked], NextState, [Transition], [Marked], NFA) ->
+%% {Transitions,UnmarkedStates,NextState}.
+%% Foreach NFA state set calculate the legal translations. N.B. must
+%% search *BOTH* the unmarked and marked lists to check if DFA state
+%% already exists. As the range of characters is potentially VERY
+%% large we cannot explicitly test all characters. Instead we first
+%% calculate the set of all disjoint character ranges which are
+%% possible candidates to the set of NFA states.
+
+build_dfa(Set, Us, N, Ts, Ms, NFA) ->
+ %% List of all transition sets.
+ Crs0 = [Cr || S <- Set,
+ {Crs,_St} <- (element(S, NFA))#nfa_state.edges,
+ is_list(Crs),
+ Cr <- Crs ],
+ Crs1 = lists:usort(Crs0), %Must remove duplicates!
+ %% Build list of disjoint test ranges.
+ Test = disjoint_crs(Crs1),
+ %% io:fwrite("bd: ~p\n ~p\n ~p\n ~p\n", [Set,Crs0,Crs1,Test]),
+ build_dfa(Test, Set, Us, N, Ts, Ms, NFA).
+
+%% disjoint_crs([CharRange]) -> [CharRange].
+%% Take a sorted list of char ranges and make a sorted list of
+%% disjoint char ranges. No new char range extends past an existing
+%% char range.
+
+disjoint_crs([{_C1,C2}=Cr1,{C3,_C4}=Cr2|Crs]) when C2 < C3 ->
+ %% C1 C2
+ %% C3 C4
+ [Cr1|disjoint_crs([Cr2|Crs])];
+disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 == C3 ->
+ %% C1 C2
+ %% C3 C4
+ [{C1,C2}|disjoint_crs(add_element({C2+1,C4}, Crs))];
+disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 >= C3, C2 < C4 ->
+ %% C1 C2
+ %% C3 C4
+ [{C1,C3-1}|disjoint_crs(union([{C3,C2},{C2+1,C4}], Crs))];
+disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 == C4 ->
+ %% C1 C2
+ %% C3 C4
+ [{C1,C3-1}|disjoint_crs(add_element({C3,C4}, Crs))];
+disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 > C4 ->
+ %% C1 C2
+ %% C3 C4
+ [{C1,C3-1}|disjoint_crs(union([{C3,C4},{C4+1,C2}], Crs))];
+disjoint_crs([Cr|Crs]) -> [Cr|disjoint_crs(Crs)];
+disjoint_crs([]) -> [].
+
+build_dfa([Cr|Crs], Set, Us, N, Ts, Ms, NFA) ->
+ case eclosure(move(Set, Cr, NFA), NFA) of
+ S when S /= [] ->
+ case keysearch(S, #dfa_state.nfa, Us) of
+ {value,#dfa_state{no=T}} ->
+ build_dfa(Crs, Set, Us, N, [{Cr,T}|Ts], Ms, NFA);
+ false ->
+ case keysearch(S, #dfa_state.nfa, Ms) of
+ {value,#dfa_state{no=T}} ->
+ build_dfa(Crs, Set, Us, N, [{Cr,T}|Ts], Ms, NFA);
+ false ->
+ U = #dfa_state{no=N,nfa=S},
+ build_dfa(Crs, Set, [U|Us], N+1, [{Cr,N}|Ts], Ms, NFA)
+ end
+ end;
+ [] ->
+ build_dfa(Crs, Set, Us, N, Ts, Ms, NFA)
+ end;
+build_dfa([], _Set, Us, N, Ts, _Ms, _NFA) ->
+ {Ts,Us,N}.
+
+%% eclosure([State], NFA) -> [State].
+%% move([State], Char, NFA) -> [State].
+%% These are straight out of the book. As eclosure uses ordsets then
+%% the generated state sets are in ascending order.
+
+eclosure(Sts, NFA) -> eclosure(Sts, NFA, []).
+
+eclosure([St|Sts], NFA, Ec) ->
+ #nfa_state{edges=Es} = element(St, NFA),
+ eclosure([ N || {epsilon,N} <- Es,
+ not is_element(N, Ec) ] ++ Sts,
+ NFA, add_element(St, Ec));
+eclosure([], _NFA, Ec) -> Ec.
+
+move(Sts, Cr, NFA) ->
+ [ St || N <- Sts,
+ {Crs,St} <- (element(N, NFA))#nfa_state.edges,
+ is_list(Crs),
+%% begin
+%% io:fwrite("move1: ~p\n", [{Sts,Cr,Crs,in_crs(Cr,Crs)}]),
+%% true
+%% end,
+ in_crs(Cr, Crs) ].
+
+in_crs({C1,C2}, [{C3,C4}|_Crs]) when C1 >= C3, C2 =< C4 -> true;
+in_crs(Cr, [Cr|_Crs]) -> true; %Catch bos and eos.
+in_crs(Cr, [_|Crs]) -> in_crs(Cr, Crs);
+in_crs(_Cr, []) -> false.
+
+%% accept([State], NFA) -> true | false.
+%% Scan down the state list until we find an accepting state.
+
+accept([St|Sts], NFA) ->
+ case element(St, NFA) of
+ #nfa_state{accept={yes,A}} -> {yes,A};
+ #nfa_state{accept=no} -> accept(Sts, NFA)
+ end;
+accept([], _NFA) -> no.
+
+%% minimise_dfa(DFA, StartState, FirstState) -> {DFA,StartState}.
+%% Minimise the DFA by removing equivalent states. We consider a
+%% state if both the transitions and the their accept state is the
+%% same. First repeatedly run throught the DFA state list removing
+%% equivalent states and updating remaining transitions with
+%% remaining equivalent state numbers. When no more reductions are
+%% possible then pack the remaining state numbers to get consecutive
+%% states.
+
+minimise_dfa(DFA0, Start, N) ->
+ case min_dfa(DFA0) of
+ {DFA1,[]} -> %No reduction!
+ {DFA2,Rs} = pack_dfa(DFA1, N),
+ {min_update(DFA2, Rs),min_new_state(Start, Rs)};
+ {DFA1,Rs} ->
+ minimise_dfa(min_update(DFA1, Rs), min_new_state(Start, Rs), N)
+ end.
+
+min_dfa(DFA) -> min_dfa(DFA, [], []).
+
+min_dfa([D|DFA0], Rs0, MDFA) ->
+ {DFA1,Rs1} = min_delete(DFA0, D#dfa_state.trans, D#dfa_state.accept,
+ D#dfa_state.no, Rs0, []),
+ min_dfa(DFA1, Rs1, [D|MDFA]);
+min_dfa([], Rs, MDFA) -> {MDFA,Rs}.
+
+min_delete([#dfa_state{no=N,trans=T,accept=A}|DFA], T, A, NewN, Rs, MDFA) ->
+ min_delete(DFA, T, A, NewN, [{N,NewN}|Rs], MDFA);
+min_delete([D|DFA], T, A, NewN, Rs, MDFA) ->
+ min_delete(DFA, T, A, NewN, Rs, [D|MDFA]);
+min_delete([], _T, _A, _NewN, Rs, MDFA) -> {MDFA,Rs}.
+
+min_update(DFA, Rs) ->
+ [ D#dfa_state{trans=min_update_trans(D#dfa_state.trans, Rs)} || D <- DFA ].
+
+min_update_trans(Tr, Rs) ->
+ [ {C,min_new_state(S, Rs)} || {C,S} <- Tr ].
+
+min_new_state(Old, [{Old,New}|_Reds]) -> New;
+min_new_state(Old, [_R|Reds]) -> min_new_state(Old, Reds);
+min_new_state(Old, []) -> Old.
+
+pack_dfa(DFA, N) -> pack_dfa(DFA, N, [], []).
+
+pack_dfa([D|DFA], NewN, Rs, PDFA) ->
+ pack_dfa(DFA, NewN+1, [{D#dfa_state.no,NewN}|Rs],
+ [D#dfa_state{no=NewN}|PDFA]);
+pack_dfa([], _NewN, Rs, PDFA) -> {PDFA,Rs}.
+
+%% comp_apply(String, StartPos, DFAReg) -> {match,RestPos,Rest} | nomatch.
+%% Apply the DFA of a regular expression to a string. If
+%% there is a match return the position of the remaining string and
+%% the string if else return 'nomatch'.
+%%
+%% StartPos should be the real start position as it is used to decide
+%% if we are at the beginning of the string.
+
+comp_apply(Cs, P, {DFA,Start,_Fail}) ->
+ comp_apply(element(Start, DFA), Cs, P, DFA, nomatch).
+
+comp_apply(#c_state{spec=[]}=St, Cs, P, DFA, Accept) ->
+ comp_apply_tr(St, Cs, P, DFA, Accept);
+comp_apply(#c_state{spec=Sp}=St, Cs, P, DFA, Accept) ->
+ comp_apply_sp(St, Cs, P, DFA, Accept, Sp).
+
+comp_apply_tr(#c_state{trans=none,accept=A}, Cs, P, _DFA, Accept) ->
+ %% End state.
+ accept_value(A, Cs, P, Accept);
+comp_apply_tr(#c_state{trans=Tr,tmin=Tmin,smin=Smin,tmax=Tmax,smax=Smax,accept=A},
+ [C|Cs]=Cs0, P, DFA, Accept) ->
+ %% Get the next state number to go to.
+ NextSt = if C =< Tmin -> Smin; %Below transition table
+ C >= Tmax -> Smax; %Above transition table
+ true -> %Otherwise use table
+ element(C - Tmin, Tr)
+ end,
+ comp_apply(element(NextSt, DFA), Cs, P+1, DFA,
+ accept_value(A, Cs0, P, Accept));
+comp_apply_tr(#c_state{trans=_Tr,accept=A}, [], P, _DFA, Accept) ->
+ accept_value(A, [], P, Accept).
+
+comp_apply_sp(_St, Cs, 1, DFA, Accept, [{bos,S}|_]) ->
+ comp_apply(element(S, DFA), Cs, 1, DFA, Accept);
+comp_apply_sp(_St, [$\n], P, DFA, Accept, [{eos,S}|_]) ->
+ comp_apply(element(S, DFA), [], P, DFA, Accept);
+comp_apply_sp(_St, [], P, DFA, Accept, [{eos,S}|_]) ->
+ comp_apply(element(S, DFA), [], P, DFA, Accept);
+comp_apply_sp(St, Cs, P, DFA, Accept, [_|Sp]) ->
+ comp_apply_sp(St, Cs, P, DFA, Accept, Sp);
+comp_apply_sp(St, Cs, P, DFA, Accept, []) ->
+ comp_apply_tr(St, Cs, P, DFA, Accept).
+
+accept_value(true, Cs, P, _Accept) -> {match,P,Cs};
+accept_value(false, _Cs, _P, Accept) -> Accept.
+
+%% compile(RegExp) -> {ok,RE} | {error,E}.
+%% Parse the regexp described in the string RegExp.
+
+compile(RegExp) ->
+ case make_dfa([{RegExp,yes}], 2) of
+ {ok,{DFA0,Start}} ->
+ Fail = 1,
+ DFA1 = [#dfa_state{no=Fail,accept=no,trans=[]}|DFA0],
+ DFA = tuplelise_dfa(DFA1, 1),
+ {ok,{comp_regexp,{DFA,Start,Fail}}};
+ {error,E} -> {error,E}
+ end.
+
+%% tuplelise_dfa(DFAstates, NoAcceptState) -> {{CompState},FirstState}.
+
+tuplelise_dfa(DFA0, NoAccept) ->
+ DFA1 = map(fun (#dfa_state{no=N,trans=Ts,accept=A}) ->
+ {Tr,Tmin,Smin,Tmax,Smax,Sp} = build_trans(Ts, NoAccept),
+ #c_state{no=N,trans=Tr,tmin=Tmin,smin=Smin,
+ tmax=Tmax,smax=Smax,
+ accept=fix_accept(A),spec=Sp}
+ end, DFA0),
+ list_to_tuple(keysort(#dfa_state.no, DFA1)).
+
+build_trans(Ts0, NoAccept) ->
+ %% Split transitions into character ranges and specials.
+ {Ts1,Sp1} = foldl(fun ({{_,_},_}=T, {Ts,Sp}) -> {[T|Ts],Sp};
+ ({_,_}=T, {Ts,Sp}) -> {Ts,[T|Sp]}
+ end, {[],[]}, Ts0),
+ if Ts1 == [] ->
+ {none,none,none,none,none,Sp1};
+ true ->
+ %% Have transitions, convert to tuple.
+ Ts2 = keysort(1, Ts1),
+ {Tmin,Smin,Ts3} = min_trans(Ts2, NoAccept),
+ %% io:fwrite("exptr: ~p\n", [{Ts3,Tmin}]),
+ {Trans,Tmax,Smax} = expand_trans(Ts3, Tmin, NoAccept),
+ {list_to_tuple(Trans),Tmin,Smin,Tmax,Smax,Sp1}
+ end.
+
+min_trans([{{0,C2},S}|Crs], _Def) -> {C2,S,Crs};
+min_trans([{{C1,_C2},_S}|_]=Crs, Def) -> {C1-1,Def,Crs}.
+
+expand_trans([{{C1,maxchar},S}], Last, Def) ->
+ Trs = duplicate(C1-(Last+1), Def),
+ {Trs,C1,S};
+expand_trans([{{C1,C2},S}], Last, Def) ->
+ Trs = duplicate(C1-(Last+1), Def) ++ duplicate(C2-C1+1, S),
+ {Trs,C2+1,Def};
+expand_trans([{{C1,C2},S}|Crs], Last, Def) ->
+ {Trs0,Tmax,Smax} = expand_trans(Crs, C2, Def),
+ Trs1 = duplicate(C1-(Last+1), Def) ++ duplicate(C2-C1+1, S) ++ Trs0,
+ {Trs1,Tmax,Smax}.
+
+fix_accept({yes,_}) -> true;
+fix_accept(no) -> false.
+
diff --git a/lib/xmerl/src/xmerl_sax_old_dom.erl b/lib/xmerl/src/xmerl_sax_old_dom.erl
new file mode 100644
index 0000000000..c357816a1e
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_old_dom.erl
@@ -0,0 +1,293 @@
+%%-*-erlang-*-
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_old_dom.erl
+%% Description :
+%%
+%% Created : 02 Oct 2008
+%%----------------------------------------------------------------------
+-module(xmerl_sax_old_dom).
+
+%%----------------------------------------------------------------------
+%% Include files
+%%----------------------------------------------------------------------
+-include("xmerl_sax_old_dom.hrl").
+
+%%----------------------------------------------------------------------
+%% External exports
+%%----------------------------------------------------------------------
+-export([
+ initial_state/0,
+ get_dom/1,
+ event/3
+ ]).
+
+%%----------------------------------------------------------------------
+%% Internal exports
+%%----------------------------------------------------------------------
+-export([
+ ]).
+
+%%======================================================================
+%% Macros
+%%======================================================================
+%%----------------------------------------------------------------------
+%% Error handling
+%%----------------------------------------------------------------------
+-define(error(Reason),
+ throw({xmerl_sax_old_dom_error, Reason})).
+
+%%======================================================================
+%% Records
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% State record for the validator
+%%----------------------------------------------------------------------
+-record(xmerl_sax_old_dom_state, {
+ tags=[], %% Tag stack
+ cno=[], %% Current node number
+ namespaces = [], %% NameSpace stack
+ dom=[] %% DOM structure
+ }).
+
+%%======================================================================
+%% External functions
+%%======================================================================
+%%----------------------------------------------------------------------
+%% Function: initial_state() -> Result
+%% Parameters:
+%% Result:
+%% Description:
+%%----------------------------------------------------------------------
+initial_state() ->
+ #xmerl_sax_old_dom_state{}.
+
+%%----------------------------------------------------------------------
+%% Function: get_dom(State) -> Result
+%% Parameters:
+%% Result:
+%% Description:
+%%----------------------------------------------------------------------
+get_dom(#xmerl_sax_old_dom_state{dom=Dom}) ->
+ Dom.
+
+%%----------------------------------------------------------------------
+%% Function: event(Event, LineNo, State) -> Result
+%% Parameters:
+%% Result:
+%% Description:
+%%----------------------------------------------------------------------
+event(Event, _LineNo, State) ->
+ build_dom(Event, State).
+
+
+%%======================================================================
+%% Internal functions
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% Function : build_dom(Event, State) -> Result
+%% Parameters: Event = term()
+%% State = #xmerl_sax_old_dom_state{}
+%% Result : #xmerl_sax_old_dom_state{} |
+%% Description:
+%%----------------------------------------------------------------------
+
+%% Document
+%%----------------------------------------------------------------------
+build_dom(startDocument, State) ->
+ State#xmerl_sax_old_dom_state{dom=[startDocument]};
+build_dom(endDocument,
+ #xmerl_sax_old_dom_state{dom=[#xmlElement{content=C} = Current |D]} = State) ->
+ case D of
+ [startDocument] ->
+ State#xmerl_sax_old_dom_state{dom=[Current#xmlElement{
+ content=lists:reverse(C)
+ }]};
+ [#xmlDecl{} = Decl, startDocument] ->
+ State#xmerl_sax_old_dom_state{dom=[Decl, Current#xmlElement{
+ content=lists:reverse(C)
+ }]};
+ _ ->
+ io:format("~p\n", [D]),
+ ?error("we're not at end the document when endDocument event is encountered.")
+ end;
+
+%% Element
+%%----------------------------------------------------------------------
+build_dom({startElement, Uri, LocalName, QName, Attributes},
+ #xmerl_sax_old_dom_state{tags=T, cno=CN, namespaces=NS, dom=D} = State) ->
+
+ A = parse_attributes(LocalName, Attributes),
+ {Num, NewCN} =
+ case CN of
+ [] ->
+ {1, [1]};
+ [ N |CNs] ->
+ {N, [1, N+1 |CNs]}
+ end,
+
+ NsInfo =
+ case QName of
+ {[], _} -> [];
+ QN -> QN
+ end,
+ NameAsAtom = convert_qname_to_atom(QName),
+
+ State#xmerl_sax_old_dom_state{tags=[{NameAsAtom, Num} |T],
+ cno=NewCN,
+ dom=[#xmlElement{name=NameAsAtom,
+ expanded_name=NameAsAtom,
+ nsinfo=NsInfo,
+ namespace=#xmlNamespace{default=list_to_atom(Uri),
+ nodes=NS},
+ pos=Num,
+ parents=T,
+ attributes=lists:reverse(A),
+ xmlbase="."
+ } | D]};
+build_dom({endElement, _Uri, LocalName, QName},
+ #xmerl_sax_old_dom_state{tags=[_ |T],
+ cno=[_ |CN],
+ dom=[#xmlElement{name=CName, content=C} = Current,
+ #xmlElement{content=PC} = Parent | D]} = State) ->
+ case convert_qname_to_atom(QName) of
+ CName ->
+ State#xmerl_sax_old_dom_state{tags=T,
+ cno=CN,
+ dom=[Parent#xmlElement{
+ content=[Current#xmlElement{
+ content=lists:reverse(C)
+ }
+ |PC]
+ } | D]};
+ _ ->
+ ?error("Got end of element: " ++ LocalName ++ " but expected: " ++
+ Current#xmlElement.name)
+ end;
+
+%% Text
+%%----------------------------------------------------------------------
+build_dom({characters, String},
+ #xmerl_sax_old_dom_state{tags=T,
+ cno=[Num |CN],
+ dom=[#xmlElement{content=C} = Current| D]} = State) ->
+ State#xmerl_sax_old_dom_state{cno=[Num+1 |CN],
+ dom=[Current#xmlElement{content=[#xmlText{value=String, parents=T, pos=Num, type=text}
+ |C]} | D]};
+build_dom({ignorableWhitespace, String},
+ #xmerl_sax_old_dom_state{tags=T,
+ cno=[Num |CN],
+ dom=[#xmlElement{content=C} = Current| D]} = State) ->
+ State#xmerl_sax_old_dom_state{cno=[Num+1 |CN],
+ dom=[Current#xmlElement{content=[#xmlText{value=String,
+ parents=T, pos=Num,
+ type=text}
+ |C]} | D]};
+
+%% Comments
+%%----------------------------------------------------------------------
+build_dom({comment, String},
+ #xmerl_sax_old_dom_state{tags=T,
+ cno=[Num |CN],
+ dom=[#xmlElement{content=C} = Current| D]} = State) ->
+ State#xmerl_sax_old_dom_state{cno=[Num+1 |CN],
+ dom=[Current#xmlElement{content=[#xmlComment{parents=T, pos=Num, value=String}|C]} | D]};
+
+%% NameSpaces
+%%----------------------------------------------------------------------
+build_dom({startPrefixMapping, [], _Uri}, State) ->
+ State;
+build_dom({startPrefixMapping, Prefix, Uri},
+ #xmerl_sax_old_dom_state{namespaces=NS} = State) ->
+ State#xmerl_sax_old_dom_state{namespaces=[{Prefix, list_to_atom(Uri)} |NS]};
+build_dom({endPrefixMapping, Prefix},
+ #xmerl_sax_old_dom_state{namespaces=[{Prefix, _} |NS]} = State) ->
+ State#xmerl_sax_old_dom_state{namespaces=NS};
+
+%% Processing instructions
+%%----------------------------------------------------------------------
+build_dom({processingInstruction,"xml", PiData},
+ #xmerl_sax_old_dom_state{dom=D} = State) ->
+ {Vsn, PiData1} = find_and_remove_attribute("version", PiData, []),
+ {Enc, PiData2} = find_and_remove_attribute("encoding", PiData1, []),
+ {Standalone, PiData3} = find_and_remove_attribute("standalone", PiData2, yes),
+ State#xmerl_sax_old_dom_state{dom=[#xmlDecl{vsn=Vsn, encoding=Enc, standalone=Standalone, attributes=PiData3}| D]};
+build_dom({processingInstruction, PiTarget, PiData},
+ #xmerl_sax_old_dom_state{cno=[Num |CN],
+ dom=[#xmlElement{content=C} = Current| D]} = State) ->
+ State#xmerl_sax_old_dom_state{cno=[Num+1 |CN],
+ dom=[Current#xmlElement{content=[#xmlPI{name=PiTarget,pos=Num, value=PiData}
+ |C]} | D]};
+%% Default
+%%----------------------------------------------------------------------
+build_dom(_E, State) ->
+ State.
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_attributes(ElName, Attributes) -> Result
+%% Parameters:
+%% Result :
+%% Description:
+%%----------------------------------------------------------------------
+parse_attributes(ElName, Attributes) ->
+ parse_attributes(ElName, Attributes, 1, []).
+
+parse_attributes(_, [], _, Acc) ->
+ Acc;
+parse_attributes(ElName, [{_Uri, Prefix, LocalName, AttrValue} |As], N, Acc) ->
+ Name = convert_qname_to_atom({Prefix,LocalName}),
+ NsInfo =
+ case Prefix of
+ [] -> [];
+ P -> {P,LocalName}
+ end,
+ parse_attributes(ElName, As, N+1, [#xmlAttribute{name=Name,
+ pos=N,
+ nsinfo=NsInfo,
+ value=AttrValue,
+ normalized=false} |Acc]).
+
+%%----------------------------------------------------------------------
+%% Function : convert_qname_to_atom(QName) -> Result
+%% Parameters:
+%% Result :
+%% Description:
+%%----------------------------------------------------------------------
+convert_qname_to_atom({[], N}) ->
+ list_to_atom(N);
+convert_qname_to_atom({P,N}) ->
+ list_to_atom(P ++ ":" ++ N).
+
+%%----------------------------------------------------------------------
+%% Function : find_and_remove_attribute(Key, Data, Default) -> Result
+%% Parameters:
+%% Result :
+%% Description:
+%%----------------------------------------------------------------------
+find_and_remove_attribute(Key, Data, Default) ->
+ case lists:keysearch(Key, 1, Data) of
+ {value, {Key, Value}} ->
+ Data2 = lists:keydelete(Key, 1, Data),
+ {Value, Data2};
+ false ->
+ {Default, Data}
+ end.
diff --git a/lib/xmerl/src/xmerl_sax_old_dom.hrl b/lib/xmerl/src/xmerl_sax_old_dom.hrl
new file mode 100644
index 0000000000..92000f8d7f
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_old_dom.hrl
@@ -0,0 +1,138 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%% Contributor(s):
+%% <[email protected]>: suggested #xmlDocument{}
+%%
+%%----------------------------------------------------------------------
+%% #0. BASIC INFORMATION
+%%----------------------------------------------------------------------
+%% File: xmerl.hrl
+%% Author : Ulf Wiger <[email protected]>
+%% Date : 00-09-22
+%% Description : Record and macro definitions for xmerl
+%%----------------------------------------------------------------------
+
+
+
+%% records generated by the scanner
+%% --------------------------------
+
+%% XML declaration
+-record(xmlDecl,{
+ vsn, % string() XML version
+ encoding, % string() Character encoding
+ standalone, % (yes | no)
+ attributes % [#xmlAttribute()] Other attributes than above
+ }).
+
+%% Attribute
+-record(xmlAttribute,{
+ name, % atom()
+ expanded_name=[],% atom() | {string(),atom()}
+ nsinfo = [], % {Prefix, Local} | []
+ namespace = [], % inherits the element's namespace
+ parents = [], % [{atom(),integer()}]
+ pos, % integer()
+ language = [], % inherits the element's language
+ value, % IOlist() | atom() | integer()
+ normalized % atom() one of (true | false)
+ }).
+
+%% namespace record
+-record(xmlNamespace,{
+ default = [],
+ nodes = []
+ }).
+
+%% namespace node - i.e. a {Prefix, URI} pair
+%% TODO: these are not currently used?? /RC
+-record(xmlNsNode,{
+ prefix,
+ uri = []
+ }).
+
+%% XML Element
+%% content = [#xmlElement()|#xmlText()|#xmlPI()|#xmlComment()|#xmlDecl()]
+-record(xmlElement,{
+ name, % atom()
+ expanded_name = [], % string() | {URI,Local} | {"xmlns",Local}
+ nsinfo = [], % {Prefix, Local} | []
+ namespace=#xmlNamespace{},
+ parents = [], % [{atom(),integer()}]
+ pos, % integer()
+ attributes = [], % [#xmlAttribute()]
+ content = [],
+ language = "", % string()
+ xmlbase="", % string() XML Base path, for relative URI:s
+ elementdef=undeclared % atom(), one of [undeclared | prolog | external | element]
+ }).
+
+%% plain text
+%% IOlist = [char() | binary () | IOlist]
+-record(xmlText,{
+ parents = [], % [{atom(),integer()}]
+ pos, % integer()
+ language = [],% inherits the element's language
+ value, % IOlist()
+ type = text % atom() one of (text|cdata)
+ }).
+
+%% plain text
+-record(xmlComment,{
+ parents = [], % [{atom(),integer()}]
+ pos, % integer()
+ language = [], % inherits the element's language
+ value % IOlist()
+ }).
+
+%% processing instruction
+-record(xmlPI,{
+ name, % atom()
+ pos, % integer()
+ value % IOlist()
+ }).
+
+-record(xmlDocument,{
+ content
+ }).
+
+
+%% XPATH (xmerl_xpath, xmerl_pred_funcs) records
+
+-record(xmlContext, {
+ axis_type = forward,
+ context_node,
+ context_position = 1,
+ nodeset = [],
+ bindings = [],
+ functions = [],
+ namespace = [],
+ whole_document
+ }).
+
+-record(xmlNode, {
+ type = element,
+ node,
+ parents = [],
+ pos = 1
+ }).
+
+-record(xmlObj, {
+ type,
+ value
+ }).
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
new file mode 100644
index 0000000000..eb9f8deec6
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -0,0 +1,399 @@
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_parser.erl
+%% Description : XML SAX parse API module.
+%%
+%% Created : 4 Jun 2008
+%%----------------------------------------------------------------------
+-module(xmerl_sax_parser).
+
+%%----------------------------------------------------------------------
+%% Include files
+%%----------------------------------------------------------------------
+-include("xmerl_sax_parser.hrl").
+
+%%----------------------------------------------------------------------
+%% External exports
+%%----------------------------------------------------------------------
+-export([
+ file/2,
+ stream/2
+ ]).
+
+%%----------------------------------------------------------------------
+%% Internal exports
+%%----------------------------------------------------------------------
+-export([
+ default_continuation_cb/1
+ ]).
+
+%%----------------------------------------------------------------------
+%% Macros
+%%----------------------------------------------------------------------
+
+%%----------------------------------------------------------------------
+%% Records
+%%----------------------------------------------------------------------
+
+%%======================================================================
+%% External functions
+%%======================================================================
+%%----------------------------------------------------------------------
+%% Function: file(Filename, Options) -> Result
+%% Input: Filename = string()
+%% Options = [{OptTag, term()}]
+%% OptTag = event_state | event_fun | continuation_state |
+%% continuation_fun | ....
+%% Output: Result = {ok, EventState, Rest}
+%% Rest = unicode_binary() | latin1_binary()
+%% EventState = term()
+%% Description: Parse file containing an XML document.
+%%----------------------------------------------------------------------
+file(Name,Options) ->
+ case file:open(Name, [raw, read,binary]) of
+ {error, Reason} ->
+ {error,{Name, file:format_error(Reason)}};
+ {ok, FD} ->
+ Dir = filename:dirname(Name),
+ CL = filename:absname(Dir),
+ File = filename:basename(Name),
+ ContinuationFun = fun default_continuation_cb/1,
+ Res = stream(<<>>, [{continuation_fun, ContinuationFun},
+ {continuation_state, FD},
+ {current_location, CL},
+ {entity, File}
+ |Options]),
+ file:close(FD),
+ Res
+ end.
+
+%%----------------------------------------------------------------------
+%% Function: stream(Xml, Options) -> Result
+%% Input: Xml = string() | binary()
+%% Options = [{OptTag, term()}]
+%% OptTag = event_state | event_fun | continuation_state |
+%% continuation_fun | ....
+%% Output: Result = {ok, EventState, Rest}
+%% Rest = unicode_binary() | latin1_binary() | [unicode_char()]
+%% EventState = term()
+%% Description: Parse a stream containing an XML document.
+%%----------------------------------------------------------------------
+stream(Xml, Options) when is_list(Xml), is_list(Options) ->
+ State = parse_options(Options, initial_state()),
+ case State#xmerl_sax_parser_state.file_type of
+ dtd ->
+ xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list});
+ normal ->
+ xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list})
+ end;
+stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
+ case parse_options(Options, initial_state()) of
+ {error, Reason} -> {error, Reason};
+ State ->
+ ParseFunction =
+ case State#xmerl_sax_parser_state.file_type of
+ dtd ->
+ parse_dtd;
+ normal ->
+ parse
+ end,
+ case detect_charset(Xml, State) of
+ {error, Reason} -> {fatal_error,
+ {
+ State#xmerl_sax_parser_state.current_location,
+ State#xmerl_sax_parser_state.entity,
+ 1
+ },
+ Reason,
+ [],
+ State#xmerl_sax_parser_state.event_state};
+ {Xml1, State1} ->
+ parse(Xml1, State1, ParseFunction)
+ end
+ end.
+
+
+%%======================================================================
+%% Internal functions
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% Function: parse(Encoding, Xml, State, F) -> Result
+%% Input: Encoding = atom()
+%% Xml = [integer()] | binary()
+%% State = #xmerl_sax_parser_state
+%% F = atom()
+%% Output: Result = {ok, Rest, EventState}
+%% Rest = list() | binary()
+%% EventState = term()
+%% Description: Chooses the correct parser depending on the encoding.
+%%----------------------------------------------------------------------
+parse(Xml, #xmerl_sax_parser_state{encoding=utf8}=State, F) ->
+ xmerl_sax_parser_utf8:F(Xml, State);
+parse(Xml, #xmerl_sax_parser_state{encoding={utf16,little}}=State, F) ->
+ xmerl_sax_parser_utf16le:F(Xml, State);
+parse(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) ->
+ xmerl_sax_parser_utf16be:F(Xml, State);
+parse(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) ->
+ xmerl_sax_parser_latin1:F(Xml, State);
+parse(_, #xmerl_sax_parser_state{encoding=Enc}, _) ->
+ {error, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))}.
+
+%%----------------------------------------------------------------------
+%% Function: initial_state/0
+%% Input: -
+%% Output: #xmerl_sax_parser_state{}
+%% Description: Creates the initial state record.
+%%----------------------------------------------------------------------
+initial_state() ->
+ #xmerl_sax_parser_state{
+ event_fun = fun default_event_cb/3,
+ ns = [{"xml", "http://www.w3.org/XML/1998/namespace"}],
+ current_location = ".",
+ entity = ""
+ }.
+
+%%----------------------------------------------------------------------
+%% Function: parse_options(Options, State)
+%% Input: Options = [Option]
+%% Option = {event_state, term()} | {event_fun, fun()} |
+%% {continuation_state, term()} | {continuation_fun, fun()} |
+%% {encoding, Encoding} | {file_type, FT}
+%% FT = normal | dtd
+%% Encoding = utf8 | utf16le | utf16be | list | iso8859
+%% State = #xmerl_sax_parser_state{}
+%% Output: #xmerl_sax_parser_state{}
+%% Description: Checks the parser options.
+%%----------------------------------------------------------------------
+parse_options([], State) ->
+ State;
+parse_options([{event_state, CbState} |Options], State) ->
+ parse_options(Options, State#xmerl_sax_parser_state{event_state = CbState});
+parse_options([{event_fun, CbF} |Options], State) ->
+ parse_options(Options, State#xmerl_sax_parser_state{event_fun = CbF});
+parse_options([{continuation_state, CState} |Options], State) ->
+ parse_options(Options, State#xmerl_sax_parser_state{continuation_state = CState});
+parse_options([{continuation_fun, CF} |Options], State) ->
+ parse_options(Options, State#xmerl_sax_parser_state{continuation_fun = CF});
+parse_options([{file_type, FT} |Options], State) when FT==normal; FT==dtd ->
+ parse_options(Options, State#xmerl_sax_parser_state{file_type = FT});
+parse_options([{encoding, E} |Options], State) ->
+ case check_encoding_option(E) of
+ {error, Reason} ->
+ {error, Reason};
+ Enc ->
+ parse_options(Options, State#xmerl_sax_parser_state{encoding = Enc})
+ end;
+parse_options([{current_location, CL} |Options], State) ->
+ parse_options(Options, State#xmerl_sax_parser_state{current_location = CL});
+parse_options([{entity, Entity} |Options], State) ->
+ parse_options(Options, State#xmerl_sax_parser_state{entity = Entity});
+parse_options([skip_external_dtd |Options], State) ->
+ parse_options(Options, State#xmerl_sax_parser_state{skip_external_dtd = true});
+parse_options([O |_], _State) ->
+ {error,
+ lists:flatten(io_lib:format("Option: ~p not supported", [O]))}.
+
+
+check_encoding_option(E) when E==utf8; E=={utf16,little}; E=={utf16,big};
+ E==latin1; E==list ->
+ E;
+check_encoding_option(utf16) ->
+ {utf16,big};
+check_encoding_option(E) ->
+ {error, io_lib:format("Charcter set ~p not supported", [E])}.
+
+%%----------------------------------------------------------------------
+%% Function: detect_charset(Xml, State)
+%% Input: Xml = list() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: {utf8|utf16le|utf16be|iso8859, Xml, State}
+%% Description: Detects which character set is used in a binary stream.
+%%----------------------------------------------------------------------
+detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) ->
+ throw({error, "Can't detect character encoding due to no indata"});
+detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun,
+ continuation_state = CState} = State) ->
+ case CFun(CState) of
+ {<<>>, _} ->
+ throw({error, "Can't detect character encoding due to lack of indata"});
+ {NewBytes, NewContState} ->
+ detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState})
+ end;
+detect_charset(Bytes, State) ->
+ case unicode:bom_to_encoding(Bytes) of
+ {latin1, 0} ->
+ detect_charset_1(Bytes, State);
+ {Enc, Length} ->
+ <<_:Length/binary, RealBytes/binary>> = Bytes,
+ {RealBytes, State#xmerl_sax_parser_state{encoding=Enc}}
+ end.
+
+detect_charset_1(<<16#00, 16#3C, 16#00, 16#3F, _/binary>> = Xml, State) ->
+ {Xml, State#xmerl_sax_parser_state{encoding={utf16, big}}};
+detect_charset_1(<<16#3C, 16#00, 16#3F, 16#00, _/binary>> = Xml, State) ->
+ {Xml, State#xmerl_sax_parser_state{encoding={utf16, little}}};
+detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>> = Xml, State) ->
+ case parse_xml_directive(Xml2) of
+ {error, Reason} ->
+ {error, Reason};
+ AttrList ->
+ case lists:keysearch("encoding", 1, AttrList) of
+ {value, {_, E}} ->
+ case convert_encoding(E) of
+ {error, Reason} ->
+ {error, Reason};
+ Enc ->
+ {Xml, State#xmerl_sax_parser_state{encoding=Enc}}
+ end;
+ _ ->
+ {Xml, State#xmerl_sax_parser_state{encoding=utf8}}
+ end
+ end;
+detect_charset_1(Xml, State) ->
+ {Xml, State#xmerl_sax_parser_state{encoding=utf8}}.
+
+%%----------------------------------------------------------------------
+%% Function: convert_encoding(Enc)
+%% Input: Enc = string()
+%% Output: utf8 | iso8859
+%% Description: Converting 7,8 bit and utf8 encoding strings to internal format.
+%%----------------------------------------------------------------------
+convert_encoding(Enc) -> %% Just for 7,8 bit + utf8
+ case string:to_lower(Enc) of
+ "utf-8" -> utf8;
+ "iso-8859-1" -> latin1; % Handle all iso-8859 as latin1
+ "iso-8859-2" -> latin1;
+ "iso-8859-3" -> latin1;
+ "iso-8859-4" -> latin1;
+ "iso-8859-5" -> latin1;
+ "iso-8859-6" -> latin1;
+ "iso-8859-7" -> latin1;
+ "iso-8859-8" -> latin1;
+ "iso-8859-9" -> latin1;
+ _ -> {error, "Unknown encoding: " ++ Enc}
+ end.
+
+%%----------------------------------------------------------------------
+%% Function: parse_xml_directive(Xml)
+%% Input: Xml = binary()
+%% Acc = list()
+%% Output:
+%% Description: Parsing the xml declaration from the input stream.
+%%----------------------------------------------------------------------
+parse_xml_directive(<<C, Rest/binary>>) when ?is_whitespace(C) ->
+ parse_xml_directive_1(Rest, []).
+
+%%----------------------------------------------------------------------
+%% Function: parse_xml_directive_1(Xml, Acc) -> [{Name, Value}]
+%% Input: Xml = binary()
+%% Acc = [{Name, Value}]
+%% Name = string()
+%% Value = string()
+%% Output: see above
+%% Description: Parsing the xml declaration from the input stream.
+%%----------------------------------------------------------------------
+parse_xml_directive_1(<<C, Rest/binary>>, Acc) when ?is_whitespace(C) ->
+ parse_xml_directive_1(Rest, Acc);
+parse_xml_directive_1(<<"?>", _/binary>>, Acc) ->
+ Acc;
+parse_xml_directive_1(<<C, Rest/binary>>, Acc) when 97 =< C, C =< 122 ->
+ {Name, Rest1} = parse_name(Rest, [C]),
+ Rest2 = parse_eq(Rest1),
+ {Value, Rest3} = parse_value(Rest2),
+ parse_xml_directive_1(Rest3, [{Name, Value} |Acc]);
+parse_xml_directive_1(_, _) ->
+ {error, "Unknown attribute in xml directive"}.
+
+%%----------------------------------------------------------------------
+%% Function: parse_xml_directive_1(Xml, Acc) -> Name
+%% Input: Xml = binary()
+%% Acc = string()
+%% Output: Name = string()
+%% Description: Parsing an attribute name from the stream.
+%%----------------------------------------------------------------------
+parse_name(<<C, Rest/binary>>, Acc) when 97 =< C, C =< 122 ->
+ parse_name(Rest, [C |Acc]);
+parse_name(Rest, Acc) ->
+ {lists:reverse(Acc), Rest}.
+
+%%----------------------------------------------------------------------
+%% Function: parse_eq(Xml) -> Rest
+%% Input: Xml = binary()
+%% Output: Rest = binary()
+%% Description: Reads an '=' from the stream.
+%%----------------------------------------------------------------------
+parse_eq(<<C, Rest/binary>>) when ?is_whitespace(C) ->
+ parse_eq(Rest);
+parse_eq(<<"=", Rest/binary>>) ->
+ Rest.
+
+%%----------------------------------------------------------------------
+%% Function: parse_value(Xml) -> {Value, Rest}
+%% Input: Xml = binary()
+%% Output: Value = string()
+%% Rest = binary()
+%% Description: Parsing an attribute value from the stream.
+%%----------------------------------------------------------------------
+parse_value(<<C, Rest/binary>>) when ?is_whitespace(C) ->
+ parse_value(Rest);
+parse_value(<<C, Rest/binary>>) when C == $'; C == $" ->
+ parse_value_1(Rest, C, []).
+
+%%----------------------------------------------------------------------
+%% Function: parse_value_1(Xml, Stop, Acc) -> {Value, Rest}
+%% Input: Xml = binary()
+%% Stop = $' | $"
+%% Acc = list()
+%% Output: Value = string()
+%% Rest = binary()
+%% Description: Parsing an attribute value from the stream.
+%%----------------------------------------------------------------------
+parse_value_1(<<Stop, Rest/binary>>, Stop, Acc) ->
+ {lists:reverse(Acc), Rest};
+parse_value_1(<<C, Rest/binary>>, Stop, Acc) ->
+ parse_value_1(Rest, Stop, [C |Acc]).
+
+%%======================================================================
+%%Default functions
+%%======================================================================
+%%----------------------------------------------------------------------
+%% Function: default_event_cb(Event, LineNo, State) -> Result
+%% Input: Event = tuple()
+%% LineNo = integer()
+%% State = term()
+%% Output: Result = {ok, State}
+%% Description: Default event callback printing event.
+%%----------------------------------------------------------------------
+default_event_cb(_Event, _LineNo, State) ->
+ State.
+
+%%----------------------------------------------------------------------
+%% Function: default_continuation_cb(IoDevice) -> Result
+%% IoDevice = iodevice()
+%% Output: Result = {[char()], State}
+%% Description: Default continuation callback reading blocks.
+%%----------------------------------------------------------------------
+default_continuation_cb(IoDevice) ->
+ case file:read(IoDevice, 1024) of
+ eof ->
+ {<<>>, IoDevice};
+ {ok, FileBin} ->
+ {FileBin, IoDevice}
+ end.
diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl
new file mode 100644
index 0000000000..736316e069
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_parser.hrl
@@ -0,0 +1,93 @@
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_parser.hrl
+%% Description :
+%%
+%% Created : 25 Jun 2008
+%%----------------------------------------------------------------------
+%%======================================================================
+%% Include files
+%%======================================================================
+
+
+%%======================================================================
+%% Macros
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% Definition of XML whitespace characters. These are 'space',
+%% 'carriage return', 'line feed' and 'tab'
+%%----------------------------------------------------------------------
+-define(is_whitespace(C), C=:=?space ; C=:=?cr ; C=:=?lf ; C=:=?tab).
+-define(space, 32).
+-define(cr, 13).
+-define(lf, 10).
+-define(tab, 9).
+
+%%----------------------------------------------------------------------
+%% Definition of hexadecimal digits
+%%----------------------------------------------------------------------
+-define(is_hex_digit(C), $0 =< C, C =< $9; $a =< C, C =< $f; $A =< C, C =< $F).
+
+%%----------------------------------------------------------------------
+%% Definition of XML charcters
+%%
+%% [2] Char #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+%%----------------------------------------------------------------------
+-define(is_char(C), ?space =< C, C =< 55295; C=:=?cr ; C=:=?lf ; C=:=?tab;
+ 57344 =< C, C =< 65533; 65536 =< C, C =< 1114111).
+
+%% non-characters according to Unicode: 16#ffff and 16#fffe
+%% -define(non_character(H1,H2), H1==16#ff,H2==16#fe;H1==16#ff,H2==16#ff).
+%% -define(non_ascii(H), list(H),hd(H)>=128;integer(H),H>=128).
+
+%%----------------------------------------------------------------------
+%% Error handling
+%%----------------------------------------------------------------------
+-define(fatal_error(State, Reason),
+ throw({fatal_error, {State, Reason}})).
+
+%%======================================================================
+%% Records
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% State record for the SAX parser
+%%----------------------------------------------------------------------
+-record(xmerl_sax_parser_state, {
+ event_state, % User state for events
+ event_fun, % Fun used for each event
+ continuation_state, % User state for continuation calls
+ continuation_fun, % Fun used to fetch more input
+ encoding=utf8, % Which encoding is used
+ line_no = 1, % Current line number
+ ns = [], % List of current namespaces
+ current_tag = [], % Current tag
+ end_tags = [], % Stack of tags used for end tag matching
+ match_end_tags = true, % Flag which defines if the parser should match on end tags
+ ref_table, % Table containing entitity definitions
+ standalone = no, % yes if the document is standalone and don't need an external DTD.
+ file_type = normal, % Can be normal, dtd and entity
+ current_location, % Location of the currently parsed XML entity
+ entity, % Parsed XML entity
+ skip_external_dtd = false % If true the external DTD is skipped during parsing
+ }).
+
+
+
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
new file mode 100644
index 0000000000..9d184152d1
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -0,0 +1,3571 @@
+%%-*-erlang-*-
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% Start of common source
+%%----------------------------------------------------------------------
+%-compile(export_all).
+
+%%----------------------------------------------------------------------
+%% Include files
+%%----------------------------------------------------------------------
+-include("xmerl_sax_parser.hrl").
+
+%%----------------------------------------------------------------------
+%% External exports
+%%----------------------------------------------------------------------
+-export([
+ parse/2,
+ parse_dtd/2,
+ is_name_char/1,
+ is_name_start/1
+ ]).
+
+%%----------------------------------------------------------------------
+%% Internal exports
+%%----------------------------------------------------------------------
+-export([
+ ]).
+
+%%----------------------------------------------------------------------
+%% Records
+%%----------------------------------------------------------------------
+
+%%----------------------------------------------------------------------
+%% Macros
+%%----------------------------------------------------------------------
+-define(HTTP_DEF_PORT, 80).
+
+%%======================================================================
+%% External functions
+%%======================================================================
+%%----------------------------------------------------------------------
+%% Function: parse(Xml, State) -> Result
+%% Input: Xml = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {ok, Rest, EventState} |
+%% EventState = term()
+%% Description: Parsing XML from input stream.
+%%----------------------------------------------------------------------
+parse(Xml, State) ->
+ RefTable = ets:new(xmerl_sax_entity_refs, [private]),
+
+ State1 = event_callback(startDocument, State),
+
+ case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
+ {ok, Rest, State2} ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ {fatal_error, {State2, Reason}} ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ format_error(fatal_error, State3, Reason);
+ {event_receiver_error, State2, {Tag, Reason}} ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ format_error(Tag, State3, Reason);
+ Other ->
+ _State2 = event_callback(endDocument, State1),
+ ets:delete(RefTable),
+ throw(Other)
+ end.
+
+%%----------------------------------------------------------------------
+%% Function: parse_dtd(Xml, State) -> Result
+%% Input: Xml = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {ok, Rest, EventState} |
+%% EventState = term()
+%% Description: Parsing XML DTD from input stream.
+%%----------------------------------------------------------------------
+parse_dtd(Xml, State) ->
+ RefTable = ets:new(xmerl_sax_entity_refs, [private]),
+
+ State1 = event_callback(startDocument, State),
+
+ case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
+ {fatal_error, {State2, Reason}} ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ format_error(fatal_error, State3, Reason);
+ {event_receiver_error, State2, {Tag, Reason}} ->
+ State3 = event_callback(endDocument, State2),
+ format_error(Tag, State3, Reason);
+ {Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ Other ->
+ _State2 = event_callback(endDocument, State1),
+ ets:delete(RefTable),
+ throw(Other)
+ end.
+
+
+%%======================================================================
+%% Internal functions
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% Function: parse_document(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {ok, Rest, State}
+%% Description: Parsing an XML document
+%% [1] document ::= prolog element Misc*
+%%----------------------------------------------------------------------
+parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) ->
+ {Rest1, State1} = parse_xml_decl(Rest, State),
+ {Rest2, State2} = parse_misc(Rest1, State1, true),
+ {ok, Rest2, State2}.
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_xml_decl(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {Rest, State}
+%% Description: Parsing the xml directive in the prolog.
+%% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
+%% [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
+%%----------------------------------------------------------------------
+parse_xml_decl(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_xml_decl/2);
+parse_xml_decl(?BYTE_ORDER_MARK_1, State) ->
+ cf(?BYTE_ORDER_MARK_1, State, fun parse_xml_decl/2);
+parse_xml_decl(?BYTE_ORDER_MARK_2, State) ->
+ cf(?BYTE_ORDER_MARK_2, State, fun parse_xml_decl/2);
+parse_xml_decl(?BYTE_ORDER_MARK_REST(Rest), State) ->
+ cf(Rest, State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<"), State) ->
+ cf(?STRING("<"), State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<?"), State) ->
+ cf(?STRING("<?"), State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<?x"), State) ->
+ cf(?STRING("<?x"), State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<?xm"), State) ->
+ cf(?STRING("<?xm"), State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING("<?xml"), State) ->
+ cf(?STRING("<?xml"), State, fun parse_xml_decl/2);
+parse_xml_decl(?STRING_REST("<?xml", Rest1), State) ->
+ parse_xml_decl_1(Rest1, State);
+parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
+ _ ->
+ parse_prolog(Bytes, State)
+ end;
+parse_xml_decl(Bytes, State) ->
+ parse_prolog(Bytes, State).
+
+
+parse_xml_decl_1(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
+ if
+ ?is_whitespace(C) ->
+ {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []),
+ %State2 = event_callback({processingInstruction, "xml", XmlAttributes}, State1),% The XML decl. should not be reported as a PI
+ parse_prolog(Rest1, State1);
+ true ->
+ parse_prolog(?STRING_REST("<?xml", Bytes), State)
+ end;
+parse_xml_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_xml_decl_1/2], undefined).
+
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_prolog(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {Rest, State}
+%% Description: Parsing XML prolog
+%% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
+%%----------------------------------------------------------------------
+parse_prolog(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_prolog/2);
+parse_prolog(?STRING("<"), State) ->
+ cf(?STRING("<"), State, fun parse_prolog/2);
+parse_prolog(?STRING_REST("<?", Rest), State) ->
+ {Rest1, State1} = parse_pi(Rest, State),
+ parse_prolog(Rest1, State1);
+parse_prolog(?STRING_REST("<!", Rest), State) ->
+ parse_prolog_1(Rest, State);
+parse_prolog(?STRING_REST("<", Rest), State) ->
+ parse_stag(Rest, State);
+parse_prolog(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_prolog(Rest1, State1);
+parse_prolog(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_prolog/2],
+ "expecting < or whitespace").
+
+
+parse_prolog_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("D"), State) ->
+ cf(?STRING("D"), State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DO"), State) ->
+ cf(?STRING("DO"), State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DOC"), State) ->
+ cf(?STRING("DOC"), State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DOCT"), State) ->
+ cf(?STRING("DOCT"), State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DOCTY"), State) ->
+ cf(?STRING("DOCTY"), State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING("DOCTYP"), State) ->
+ cf(?STRING("DOCTYP"), State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING_REST("DOCTYPE", Rest), State) ->
+ {Rest1, State1} = parse_doctype(Rest, State),
+ State2 = event_callback(endDTD, State1),
+ parse_prolog(Rest1, State2);
+parse_prolog_1(?STRING("-"), State) ->
+ cf(?STRING("-"), State, fun parse_prolog_1/2);
+parse_prolog_1(?STRING_REST("--", Rest), State) ->
+ {Rest1, State1} = parse_comment(Rest, State, []),
+ parse_prolog(Rest1, State1);
+parse_prolog_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_prolog_1/2],
+ "expecting comment or DOCTYPE").
+
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_version_info(Rest, State, Acc) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = [{Name, Value}]
+%% Name = string()
+%% Value = string()
+%% Output: Result = {[{Name, Value}], Rest, State}
+%% Description: Parsing the version number in the XML directive.
+%% [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
+%%----------------------------------------------------------------------
+parse_version_info(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_version_info/3);
+parse_version_info(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_version_info(Rest1, State1, Acc);
+parse_version_info(?STRING_UNBOUND_REST(C,Rest), State, Acc) ->
+ case is_name_start(C) of
+ true ->
+ case parse_name(Rest, State, [C]) of
+ {"version", Rest1, State1} ->
+ {Rest2, State2} = parse_eq(Rest1, State1),
+ {Version, Rest3, State3} = parse_att_value(Rest2, State2),
+ parse_xml_decl_rest(Rest3, State3, [{"version",Version}|Acc]);
+ {_, _, State1} ->
+ ?fatal_error(State1, "expecting attribute version")
+ end;
+ false ->
+ ?fatal_error(State, "expecting attribute version")
+ end;
+parse_version_info(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_version_info/3],
+ undefined).
+
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_xml_decl_rest(Rest, State, Acc) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = [{Name, Value}]
+%% Name = string()
+%% Value = string()
+%% Output: Result = {[{Name, Value}], Rest, State}
+%% Description: Checks if there is more to parse in the XML directive.
+%%----------------------------------------------------------------------
+parse_xml_decl_rest(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_rest/3);
+parse_xml_decl_rest(?STRING("?") = Rest, State, Acc) ->
+ cf(Rest, State, Acc, fun parse_xml_decl_rest/3);
+parse_xml_decl_rest(?STRING_REST("?>", Rest), State, Acc) ->
+ {lists:reverse(Acc), Rest, State};
+parse_xml_decl_rest(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_xml_decl_encoding(Rest1, State1, Acc);
+parse_xml_decl_rest(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_rest/3],
+ "expecting encoding, standalone, whitespace or ?>").
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_xml_decl_encoding(Rest, State, Acc) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = [{Name, Value}]
+%% Name = string()
+%% Value = string()
+%% Output: Result = {[{Name, Value}], Rest, State}
+%% Description: Parse the encoding attribute in the XML directive.
+%% [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
+% [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
+%%----------------------------------------------------------------------
+parse_xml_decl_encoding(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_encoding/3);
+parse_xml_decl_encoding(?STRING_REST("e", Rest), State, Acc) ->
+ case parse_name(Rest, State,[$e]) of
+ {"encoding", Rest1, State1} ->
+ {Rest2, State2} = parse_eq(Rest1, State1),
+ {Enc, Rest3, State3} = parse_att_value(Rest2, State2),
+ parse_xml_decl_encoding_1(Rest3, State3, [{"encoding",Enc} |Acc]);
+ {Name, _Rest1, State1} ->
+ ?fatal_error(State1, "Attribute " ++ Name ++
+ " not allowed in xml declaration")
+ end;
+parse_xml_decl_encoding(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ parse_xml_decl_standalone(Bytes, State, Acc);
+parse_xml_decl_encoding(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding/3],
+ undefined).
+
+
+parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Bytes, State, []),
+ parse_xml_decl_standalone(Rest1, State1, Acc);
+parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ parse_xml_decl_rest(Bytes, State, Acc);
+parse_xml_decl_encoding_1(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding_1/3],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_xml_decl_standalone(Rest, State, Acc) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = [{Name, Value}]
+%% Name = string()
+%% Value = string()
+%% Output: Result = {[{Name, Value}], Rest, State}
+%% Description: Parse the standalone attribute in the XML directive.
+%% [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") |
+%% ('"' ('yes' | 'no') '"'))
+%%----------------------------------------------------------------------
+parse_xml_decl_standalone(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_standalone/3);
+parse_xml_decl_standalone(?STRING_REST("s", Rest), State, Acc) ->
+ case parse_name(Rest, State,[$s]) of
+ {"standalone", Rest1, State1} ->
+ {Rest2, State2} = parse_eq(Rest1, State1),
+ {Standalone, Rest3, State3} = parse_att_value(Rest2, State2),
+ case Standalone of
+ "yes" -> ok;
+ "no" -> ok;
+ _ ->
+ ?fatal_error(State3, "Wrong value of attribute standalone in xml declaration, must be yes or no")
+ end,
+ {_WS, Rest4, State4} = whitespace(Rest3, State3, []),
+ parse_xml_decl_rest(Rest4, State4#xmerl_sax_parser_state{standalone=list_to_atom(Standalone)},
+ [{"standalone",Standalone} |Acc]);
+ {Name, _Rest1, State1} ->
+ ?fatal_error(State1, "Attribute " ++ Name ++
+ " not allowed in xml declaration")
+ end;
+parse_xml_decl_standalone(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ parse_xml_decl_rest(Bytes, State, Acc);
+parse_xml_decl_standalone(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_standalone/3],
+ undefined).
+
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_pi(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {Rest, State}
+%% Description: Parse processing instructions.
+%% [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
+%% [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
+%%----------------------------------------------------------------------
+parse_pi(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_pi/2);
+parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
+ case is_name_start(C) of
+ true ->
+ {PiTarget, Rest1, State1} =
+ parse_name(Rest, State, [C]),
+ case string:to_lower(PiTarget) of
+ "xml" ->
+ case State#xmerl_sax_parser_state.end_tags of
+ [] ->
+ {Bytes, State};
+ _ ->
+ ?fatal_error(State1, "<?xml ...?> not first in document")
+ end;
+ _ ->
+ {PiData, Rest2, State2} = parse_pi_1(Rest1, State1),
+ State3 = event_callback({processingInstruction, PiTarget, PiData}, State2),
+ {Rest2, State3}
+ end;
+ false ->
+ ?fatal_error(State, "expecting name")
+ end;
+parse_pi(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_pi/2], undefined).
+
+%%----------------------------------------------------------------------
+%% Function: parse_pi_1(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {Rest, State}
+%% Description: Parse processing instructions.
+%%----------------------------------------------------------------------
+parse_pi_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_pi_1/2);
+parse_pi_1(?STRING_UNBOUND_REST(C,_) = Rest, State) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} =
+ whitespace(Rest, State, []),
+ parse_pi_data(Rest1, State1, []);
+parse_pi_1(?STRING_REST("?>", Rest), State) ->
+ {[], Rest, State};
+parse_pi_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_pi/2],
+ "expecting whitespace or '?>'").
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_name(Rest, State, Acc) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Output: Result = {Name, Rest, State}
+%% Name = string()
+%% Description: Parse a name. Next character is put in the accumulator
+%% if it's a valid name character.
+%% [5] Name ::= (Letter | '_' | ':') (NameChar)*
+%%----------------------------------------------------------------------
+parse_name(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_name/3);
+parse_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Acc) ->
+ case is_name_char(C) of
+ true ->
+ parse_name(Rest, State, [C|Acc]);
+ false ->
+ {lists:reverse(Acc), Bytes, State}
+ end;
+parse_name(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_name/3], undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_ns_name(Rest, State, Prefix, Name) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Prefix = string()
+%% Name = string()
+%% Output: Result = {{Prefix, Name}, Rest, State}
+%% Name = string()
+%% Description: Parse a namespace name. Next character is put in the
+%% accumulator if it's a valid name character.
+%% The difference between this function and parse_name/3 is
+%% that a colon is interpreted as a separator between the
+%% namespace prefix and the name.
+%%----------------------------------------------------------------------
+parse_ns_name(?STRING_EMPTY, State, Prefix, Name) ->
+ cf(?STRING_EMPTY, State, Prefix, Name, fun parse_ns_name/4);
+parse_ns_name(?STRING_UNBOUND_REST($:, Rest), State, [], Name) ->
+ parse_ns_name(Rest, State, lists:reverse(Name), []);
+parse_ns_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Prefix, Name) ->
+ case is_name_char(C) of
+ true ->
+ parse_ns_name(Rest, State, Prefix, [C|Name]);
+ false ->
+ {{Prefix,lists:reverse(Name)}, Bytes, State}
+ end;
+parse_ns_name(Bytes, State, Prefix, Name) ->
+ unicode_incomplete_check([Bytes, State, Prefix, Name, fun parse_ns_name/4],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_pi_data(Rest, State, Acc) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Output: Result = {PiData, Rest, State}
+%% PiData = string()
+%% Description: Parse the data part of the processing instruction.
+%% If next character is valid it's put in the accumulator.
+%%----------------------------------------------------------------------
+parse_pi_data(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_pi_data/3);
+parse_pi_data(?STRING("?"), State, Acc) ->
+ cf(?STRING("?"), State, Acc, fun parse_pi_data/3);
+parse_pi_data(?STRING("\r"), State, Acc) ->
+ cf(?STRING("\r"), State, Acc, fun parse_pi_data/3);
+parse_pi_data(?STRING_REST("?>", Rest), State, Acc) ->
+ {lists:reverse(Acc), Rest, State};
+parse_pi_data(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+parse_pi_data(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+parse_pi_data(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+parse_pi_data(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C)->
+ parse_pi_data(Rest, State, [C|Acc]);
+parse_pi_data(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_pi_data/3],
+ "not an character").
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_cdata(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {Rest, State}
+%% Description: Start the parsing of a CDATA block.
+%% [18] CDSect ::= CDStart CData CDEnd
+%% [19] CDStart ::= '<![CDATA['
+%% [20] CData ::= (Char* - (Char* ']]>' Char*))
+%% [21] CDEnd ::= ']]>'
+%%----------------------------------------------------------------------
+parse_cdata(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_cdata/2);
+parse_cdata(?STRING("["), State) ->
+ cf(?STRING("["), State, fun parse_cdata/2);
+parse_cdata(?STRING("[C"), State) ->
+ cf(?STRING("[C"), State, fun parse_cdata/2);
+parse_cdata(?STRING("[CD"), State) ->
+ cf(?STRING("[CD"), State, fun parse_cdata/2);
+parse_cdata(?STRING("[CDA"), State) ->
+ cf(?STRING("[CDA"), State, fun parse_cdata/2);
+parse_cdata(?STRING("[CDAT"), State) ->
+ cf(?STRING("[CDAT"), State, fun parse_cdata/2);
+parse_cdata(?STRING("[CDATA"), State) ->
+ cf(?STRING("[CDATA"), State, fun parse_cdata/2);
+parse_cdata(?STRING_REST("[CDATA[", Rest), State) ->
+ State1 = event_callback(startCDATA, State),
+ parse_cdata(Rest, State1, []);
+parse_cdata(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_cdata/2],
+ "expecting comment or CDATA").
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_cdata(Rest, State, Acc) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Output: Result = {Rest, State}
+%% Description: Parse a CDATA block.
+%%----------------------------------------------------------------------
+parse_cdata(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_cdata/3);
+parse_cdata(?STRING("\r"), State, Acc) ->
+ cf(?STRING("\r"), State, Acc, fun parse_cdata/3);
+parse_cdata(?STRING("]"), State, Acc) ->
+ cf(?STRING("]"), State, Acc, fun parse_cdata/3);
+parse_cdata(?STRING("]]"), State, Acc) ->
+ cf(?STRING("]]"), State, Acc, fun parse_cdata/3);
+parse_cdata(?STRING_REST("]]>", Rest), State, Acc) ->
+ State1 = event_callback({characters, lists:reverse(Acc)}, State),
+ State2 = event_callback(endCDATA, State1),
+ parse_content(Rest, State2, [], true);
+parse_cdata(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+parse_cdata(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+parse_cdata(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+parse_cdata(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C) ->
+ parse_cdata(Rest, State, [C|Acc]);
+parse_cdata(?STRING_UNBOUND_REST(C, _), State, _) ->
+ ?fatal_error(State, "CDATA contains bad character value: " ++ [C]);
+parse_cdata(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_cdata/3],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_comment(Rest, State, Acc) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Output: Result = {Rest, State}
+%% Description: Parse a comment.
+%% [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+%%----------------------------------------------------------------------
+parse_comment(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_comment/3);
+parse_comment(?STRING("\r"), State, Acc) ->
+ cf(?STRING("\r"), State, Acc, fun parse_comment/3);
+parse_comment(?STRING("-"), State, Acc) ->
+ cf(?STRING("-"), State, Acc, fun parse_comment/3);
+parse_comment(?STRING("--"), State, Acc) ->
+ cf(?STRING("--"), State, Acc, fun parse_comment/3);
+parse_comment(?STRING_REST("-->", Rest), State, Acc) ->
+ State1 = event_callback({comment, lists:reverse(Acc)}, State),
+ {Rest, State1};
+parse_comment(?STRING_REST("--", _), State, _) ->
+ ?fatal_error(State, "comment contains '--'");
+parse_comment(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
+parse_comment(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
+parse_comment(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
+ parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
+parse_comment(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
+ if
+ ?is_char(C) ->
+ parse_comment(Rest, State, [C|Acc]);
+ true ->
+ ?fatal_error(State, "Bad character in comment: " ++ C)
+ end;
+parse_comment(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_comment/3],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_misc(Rest, State, Eod) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Eod = true |false
+%% Output: Result = {Rest, State}
+%% Description: Parse a misc clause, could be a comment, a processing
+%% instruction or whitespace. If the input stream is empty
+%% (Eod parameter true) then we return current state and quit.
+%% [27] Misc ::= Comment | PI | S
+%%----------------------------------------------------------------------
+parse_misc(?STRING_EMPTY, State, true) ->
+ {?STRING_EMPTY, State};
+parse_misc(?STRING_EMPTY, State, Eod) ->
+ cf(?STRING_EMPTY, State, Eod, fun parse_misc/3);
+parse_misc(?STRING("<") = Rest, State, Eod) ->
+ cf(Rest, State, Eod, fun parse_misc/3);
+parse_misc(?STRING_REST("<?", Rest), State, Eod) ->
+ {Rest1, State1} = parse_pi(Rest, State),
+ parse_misc(Rest1, State1, Eod);
+parse_misc(?STRING("<!") = Rest, State, Eod) ->
+ cf(Rest, State, Eod, fun parse_misc/3);
+parse_misc(?STRING("<!-") = Rest, State, Eod) ->
+ cf(Rest, State, Eod, fun parse_misc/3);
+parse_misc(?STRING_REST("<!--", Rest), State, Eod) ->
+ {Rest1, State1} = parse_comment(Rest, State, []),
+ parse_misc(Rest1, State1, Eod);
+parse_misc(?STRING_UNBOUND_REST(C, _) = Rest, State, Eod) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_misc(Rest1, State1, Eod);
+parse_misc(Rest, State, _Eod) ->
+ {Rest, State}.
+%% unicode_incomplete_check([Bytes, State, Eod, fun parse_misc/3],
+%% "expecting comment or PI").
+
+%%----------------------------------------------------------------------
+%% Function: parse_stag(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {Rest, State}
+%% Description: Parsing a start tag.
+%% [40] STag ::= '<' Name (S Attribute)* S? '>'
+%%----------------------------------------------------------------------
+parse_stag(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_stag/2);
+parse_stag(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {TagName, Rest1, State1} =
+ parse_ns_name(Rest, State, [], [C]),
+ parse_attributes(Rest1, State1, {TagName, [], []});
+ false ->
+ ?fatal_error(State, "expecting name")
+ end;
+parse_stag(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_stag/2],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function: parse_attributes(Rest, State, CurrentTag) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% CurrentTag = {Name, AttList, NewNsList}
+%% Name = string()
+%% AttList = [{Name, Value}]
+%% NewNsList = [{Name, Value}]
+%% Output: Result = {Rest, State}
+%% Description: Parsing the attribute list in the start tag. The current
+%% tag tuple contains the tag name, a list of attributes
+%% (exclusive NS attributes) and a list of new NS attributes.
+%% [41] Attribute ::= Name Eq AttValue
+%%----------------------------------------------------------------------
+parse_attributes(?STRING_EMPTY, State, CurrentTag) ->
+ cf(?STRING_EMPTY, State, CurrentTag, fun parse_attributes/3);
+parse_attributes(?STRING("/"), State, CurrentTag) ->
+ cf(?STRING("/"), State, CurrentTag, fun parse_attributes/3);
+parse_attributes(?STRING_REST("/>", Rest), State, {Tag, AttList, NewNsList}) ->
+ CompleteNsList = NewNsList ++ State#xmerl_sax_parser_state.ns,
+ {Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
+ State1 = send_start_prefix_mapping_event(lists:reverse(NewNsList), State),
+ State2 = event_callback({startElement, Uri, LocalName, QName, Attributes}, State1),
+ State3 = event_callback({endElement, Uri, LocalName, QName}, State2),
+ State4 = send_end_prefix_mapping_event(NewNsList, State3),
+ parse_content(Rest, State4, [], true);
+parse_attributes(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=ETags, ns = OldNsList} = State,
+ {Tag, AttList, NewNsList}) ->
+ CompleteNsList = NewNsList ++ OldNsList,
+ {Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
+ State1 = send_start_prefix_mapping_event(lists:reverse(NewNsList), State),
+ State2 = event_callback({startElement, Uri, LocalName, QName, Attributes}, State1),
+ parse_content(Rest, State2#xmerl_sax_parser_state{end_tags=[{Tag, Uri, LocalName, QName,
+ OldNsList, NewNsList} |ETags],
+ ns = CompleteNsList},
+ [], true);
+parse_attributes(?STRING_UNBOUND_REST(C, _) = Rest, State, CurrentTag) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_attributes(Rest1, State1, CurrentTag);
+parse_attributes(?STRING_UNBOUND_REST(C, Rest), State, {Tag, AttList, NsList}) ->
+ case is_name_start(C) of
+ true ->
+ {AttrName, Rest1, State1} =
+ parse_ns_name(Rest, State, [], [C]),
+ {Rest2, State2} = parse_eq(Rest1, State1),
+ {AttValue, Rest3, State3} = parse_att_value(Rest2, State2),
+ case AttrName of
+ {"xmlns", NsName} ->
+ parse_attributes(Rest3, State3, {Tag, AttList, [{NsName, AttValue} |NsList]});
+ {"", "xmlns"} ->
+ parse_attributes(Rest3, State3, {Tag, AttList, [{"", AttValue} |NsList]});
+ {_Prefix, _LocalName} ->
+ case lists:keyfind(AttrName, 1, AttList) of
+ false ->
+ parse_attributes(Rest3, State3, {Tag, [{AttrName, AttValue}|AttList], NsList});
+ _ ->
+ ElName =
+ case Tag of
+ {"", N} -> N;
+ {Ns, N} -> Ns ++ ":" ++ N
+ end,
+ ?fatal_error(State, "Attribute exist more than once in element: " ++ ElName)
+ end
+ end;
+ false ->
+ ?fatal_error(State, "Invalid start character in attribute name: " ++ C)
+ end;
+parse_attributes(Bytes, State, CurrentTag) ->
+ unicode_incomplete_check([Bytes, State, CurrentTag, fun parse_attributes/3],
+ "expecting name, whitespace, /> or >").
+
+
+
+%%----------------------------------------------------------------------
+%% Function: fix_ns({Prefix, Name}, Attributes, Ns) -> Result
+%% Input: Prefix = string()
+%% Name = string()
+%% Attributes = [{Name, Value}]
+%% Ns = [{Prefix, Uri}]
+%% Uri = string()
+%% Output: Result = {Uri, Name, QualifiedName, Attributes}
+%% QualifiedName = string()
+%% Description: Fix the name space prefixing for the attributes and start tag.
+%%----------------------------------------------------------------------
+% fix_ns({"", Name}, Attributes, Ns) ->
+% Attributes2 = fix_attributes_ns(Attributes, Ns, []),
+% {"", Name, Name, Attributes2};
+fix_ns({Prefix, Name}, Attributes, Ns) ->
+ Uri =
+ case lists:keysearch(Prefix, 1, Ns) of
+ {value, {Prefix, U}} ->
+ U;
+ false ->
+ ""
+ end,
+ Attributes2 = fix_attributes_ns(Attributes, Ns, []),
+
+ {Uri, Name, {Prefix, Name}, Attributes2}.
+
+%%----------------------------------------------------------------------
+%% Function: fix_attributes_ns(Attributes, Ns, Acc) -> Result
+%% Input: Attributes = [{{Prefix, Name}, Value}]
+%% Prefix = string()
+%% Name = string()
+%% Value = string()
+%% Ns = [{Prefix, Uri}]
+%% Uri = string()
+%% Output: Result = [{Uri, Name, Value}]
+%% Description: Fix the name spaces for the attributes.
+%%----------------------------------------------------------------------
+fix_attributes_ns([], _, Acc) ->
+ Acc;
+fix_attributes_ns([{{"", Name}, AttrValue} | Attrs], Ns, Acc) ->
+ fix_attributes_ns(Attrs, Ns, [{"", "", Name, AttrValue} |Acc]);
+fix_attributes_ns([{{Prefix, Name}, AttrValue} | Attrs], Ns, Acc) ->
+ Uri =
+ case lists:keysearch(Prefix, 1, Ns) of
+ {value, {Prefix, U}} ->
+ U;
+ false ->
+ ""
+ end,
+ fix_attributes_ns(Attrs, Ns, [{Uri, Prefix, Name, AttrValue} |Acc]).
+
+
+%%----------------------------------------------------------------------
+%% Function: send_start_prefix_mapping_event(Ns, State) -> Result
+%% Input: Ns = [{Prefix, Uri}]
+%% Prefix = string()
+%% Uri = string()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = #xmerl_sax_parser_state{}
+%% Description: Loops over a name space list and sends startPrefixMapping events.
+%%----------------------------------------------------------------------
+send_start_prefix_mapping_event([], State) ->
+ State;
+send_start_prefix_mapping_event([{Prefix, Uri} |Ns], State) ->
+ State1 = event_callback({startPrefixMapping, Prefix, Uri}, State),
+ send_start_prefix_mapping_event(Ns, State1).
+
+
+%%----------------------------------------------------------------------
+%% Function: send_end_prefix_mapping_event(Ns, State) -> Result
+%% Input: Ns = [{Prefix, Uri}]
+%% Prefix = string()
+%% Uri = string()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = #xmerl_sax_parser_state{}
+%% Description: Loops over a name space list and sends endPrefixMapping events.
+%%----------------------------------------------------------------------
+send_end_prefix_mapping_event([], State) ->
+ State;
+send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) ->
+ State1 = event_callback({endPrefixMapping, Prefix}, State),
+ send_end_prefix_mapping_event(Ns, State1).
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_eq(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {Rest, State}
+%% Description: Parsing an '=' from the stream.
+%% [25] Eq ::= S? '=' S?
+%%----------------------------------------------------------------------
+parse_eq(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_eq/2);
+parse_eq(?STRING_REST("=", Rest), State) ->
+ {Rest, State};
+parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} =
+ whitespace(Bytes, State, []),
+ parse_eq(Rest, State1);
+parse_eq(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_eq/2],
+ "expecting = or whitespace").
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_att_value(Rest, State) -> Result
+%% Input: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: Result = {Rest, State}
+%% Description: Start the parsing of an attribute value by checking the delimiter
+%% [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
+%% | "'" ([^<&'] | Reference)* "'"
+%%----------------------------------------------------------------------
+parse_att_value(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_att_value/2);
+parse_att_value(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" ->
+ parse_att_value(Rest, State, C, []);
+parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} =
+ whitespace(Bytes, State, []),
+ parse_att_value(Rest, State1);
+parse_att_value(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_att_value/2],
+ "\', \" or whitespace expected").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_att_value(Rest, State, Stop, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Stop = $' | $"
+%% Acc = string()
+%% Result : {Value, Rest, State}
+%% Value = string()
+%% Description: Parse an attribute value
+%%----------------------------------------------------------------------
+parse_att_value(?STRING_EMPTY, State, undefined, Acc) ->
+ {Acc, [], State}; %% stop clause when parsing references
+parse_att_value(?STRING_EMPTY, State, Stop, Acc) ->
+ cf(?STRING_EMPTY, State, Stop, Acc, fun parse_att_value/4);
+parse_att_value(?STRING("\r"), State, Stop, Acc) ->
+ cf(?STRING("\r"), State, Stop, Acc, fun parse_att_value/4);
+parse_att_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
+ parse_att_value(Rest,
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+parse_att_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
+ parse_att_value(Rest,
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+parse_att_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
+ parse_att_value(Rest,
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+parse_att_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
+ parse_att_value(Rest,
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+parse_att_value(?STRING_REST("&", Rest), State, Stop, Acc) ->
+ {Ref, Rest1, State1} = parse_reference(Rest, State, true),
+ case Ref of
+ {character, _, CharValue} ->
+ parse_att_value(Rest1, State1, Stop, [CharValue | Acc]);
+ {internal_general, true, _, Value} ->
+ parse_att_value(Rest1, State1, Stop, Value ++ Acc);
+ {internal_general, false, _, Value} ->
+ {ParsedValue, [], State2} = parse_att_value(?TO_INPUT_FORMAT(Value), State1, undefined, []),
+ parse_att_value(Rest1, State2, Stop, ParsedValue ++ Acc);
+ {external_general, Name, _} ->
+ ?fatal_error(State1, "External parsed entity reference in attribute value: " ++ Name);
+ {unparsed, Name, _} ->
+ ?fatal_error(State1, "Unparsed entity reference in attribute value: " ++ Name)
+ end;
+parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
+ {lists:reverse(Acc), Rest, State};
+parse_att_value(?STRING_UNBOUND_REST($<, _Rest), State, _Stop, _Acc) ->
+ ?fatal_error(State, "< not allowed in attribute value");
+parse_att_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
+ parse_att_value(Rest, State, Stop, [C|Acc]);
+parse_att_value(Bytes, State, Stop, Acc) ->
+ unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_att_value/4],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_etag(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse the end tag
+%% [42] ETag ::= '</' Name S? '>'
+%%----------------------------------------------------------------------
+parse_etag(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_etag/2);
+parse_etag(?STRING_UNBOUND_REST(C, Rest),
+ #xmerl_sax_parser_state{end_tags=[{ETag, _Uri, _LocalName, _QName, _OldNsList, _NewNsList}
+ |_RestOfETags]} = State) ->
+ case is_name_start(C) of
+ true ->
+ {Tag, Rest1, State1} = parse_ns_name(Rest, State, [], [C]),
+ case Tag == ETag of
+ true ->
+ {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
+ parse_etag_1(Rest2, State2, Tag);
+ false ->
+ case State1#xmerl_sax_parser_state.match_end_tags of
+ true ->
+ {P,TN} = Tag,
+ ?fatal_error(State1, "EndTag: " ++ P ++ ":" ++ TN ++
+ ", does not match StartTag");
+ false ->
+ {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
+ parse_etag_1(Rest2, State2, Tag)
+ end
+ end;
+ false ->
+ ?fatal_error(State, "Name expected")
+ end;
+parse_etag(?STRING_UNBOUND_REST(_C, _) = Rest, #xmerl_sax_parser_state{end_tags=[]}= State) ->
+ {Rest, State};
+parse_etag(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_etag/2],
+ undefined).
+
+
+parse_etag_1(?STRING_REST(">", Rest),
+ #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList}
+ |RestOfETags]} = State, _Tag) ->
+ State1 = event_callback({endElement, Uri, LocalName, QName}, State),
+ State2 = send_end_prefix_mapping_event(NewNsList, State1),
+ parse_content(Rest,
+ State2#xmerl_sax_parser_state{end_tags=RestOfETags,
+ ns = OldNsList},
+ [], true);
+parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) ->
+ {P,TN} = Tag,
+ ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN);
+parse_etag_1(Bytes, State, Tag) ->
+ unicode_incomplete_check([Bytes, State, Tag, fun parse_etag_1/3],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function: parse_content(Rest, State, Acc, IgnorableWS) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% IgnorableWS = true | false
+%% Result : {Rest, State}
+%% Description: Parsing the content part of tags
+%% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
+%%----------------------------------------------------------------------
+
+parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
+ case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State1};
+ {fatal_error, {State1, Msg}} ->
+ case check_if_document_complete(State1, Msg) of
+ true ->
+ State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
+ {?STRING_EMPTY, State2};
+ false ->
+ ?fatal_error(State1, Msg)
+ end;
+ Other ->
+ throw(Other)
+ end;
+parse_content(?STRING("\r"), State, Acc, IgnorableWS) ->
+ cf(?STRING("\r"), State, Acc, IgnorableWS, fun parse_content/4);
+parse_content(?STRING("<"), State, Acc, IgnorableWS) ->
+ cf(?STRING("<"), State, Acc, IgnorableWS, fun parse_content/4);
+parse_content(?STRING_REST("</", Rest), State, Acc, IgnorableWS) ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ parse_etag(Rest, State1);
+parse_content(?STRING("<!"), State, _Acc, IgnorableWS) ->
+ cf(?STRING("<!"), State, [], IgnorableWS, fun parse_content/4);
+parse_content(?STRING("<!-"), State, _Acc, IgnorableWS) ->
+ cf(?STRING("<!-"), State, [], IgnorableWS, fun parse_content/4);
+parse_content(?STRING_REST("<!--", Rest), State, Acc, IgnorableWS) ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ {Rest1, State2} = parse_comment(Rest, State1, []),
+ parse_content(Rest1, State2, [], true);
+parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ {Rest1, State2} = parse_pi(Rest, State1),
+ parse_content(Rest1, State2, [], true);
+parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
+ case ET of
+ [] ->
+ {Rest, State}; %%LATH : Skicka ignorable WS ???
+ _ ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ parse_cdata(Rest1, State1)
+ end;
+parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
+ case ET of
+ [] ->
+ {Rest, State}; %%LATH : Skicka ignorable WS ???
+ _ ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ parse_stag(Rest1, State1)
+ end;
+parse_content(?STRING_REST("\n", Rest), State, Acc, IgnorableWS) ->
+ N = State#xmerl_sax_parser_state.line_no,
+ parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
+parse_content(?STRING_REST("\r\n", Rest), State, Acc, IgnorableWS) ->
+ N = State#xmerl_sax_parser_state.line_no,
+ parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
+parse_content(?STRING_REST("\r", Rest), State, Acc, IgnorableWS) ->
+ N = State#xmerl_sax_parser_state.line_no,
+ parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
+parse_content(?STRING_REST(" ", Rest), State, Acc, IgnorableWS) ->
+ parse_content(Rest, State,[?space |Acc], IgnorableWS);
+parse_content(?STRING_REST("\t", Rest), State, Acc, IgnorableWS) ->
+ parse_content(Rest, State,[?tab |Acc], IgnorableWS);
+parse_content(?STRING_REST("]]>", _Rest), State, _Acc, _IgnorableWS) ->
+ ?fatal_error(State, "\"]]>\" is not allowed in content");
+parse_content(?STRING_UNBOUND_REST(_C, _) = Rest,
+ #xmerl_sax_parser_state{end_tags = []} = State,
+ _Acc, _IgnorableWS) ->
+ {Rest, State};
+parse_content(?STRING_REST("&", Rest), State, Acc, _IgnorableWS) ->
+ {Ref, Rest1, State1} = parse_reference(Rest, State, true),
+ case Ref of
+ {character, _, CharValue} ->
+ parse_content(Rest1, State1, [CharValue | Acc], false);
+ {internal_general, true, _, Value} ->
+ parse_content(Rest1, State1, Value ++ Acc, false);
+ {internal_general, false, _, Value} ->
+ IValue = ?TO_INPUT_FORMAT(Value),
+ parse_content(?APPEND_STRING(IValue, Rest1), State1, Acc, false);
+ {external_general, _, {PubId, SysId}} ->
+ State2 = parse_external_entity(State1, PubId, SysId),
+ parse_content(Rest1, State2, Acc, false);
+ {unparsed, Name, _} ->
+ ?fatal_error(State1, "Unparsed entity reference in content: " ++ Name)
+ end;
+parse_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) ->
+ if
+ ?is_char(C) ->
+ parse_content(Rest, State, [C|Acc], false);
+ true ->
+ ?fatal_error(State, "Bad character in content: " ++ C)
+ end;
+parse_content(Bytes, State, Acc, IgnorableWS) ->
+ unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_content/4],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function: check_if_document_complete(State, ErrorMsg) -> Result
+%% Parameters: State = #xmerl_sax_parser_state{}
+%% ErrorMsg = string()
+%% Result : boolean()
+%% Description: Checks that the document is complete if we don't have more data..
+%%----------------------------------------------------------------------
+check_if_document_complete(#xmerl_sax_parser_state{end_tags = []},
+ "No more bytes") ->
+ true;
+check_if_document_complete(#xmerl_sax_parser_state{end_tags = []},
+ "Continuation function undefined") ->
+ true;
+check_if_document_complete(_, _) ->
+ false.
+
+%%----------------------------------------------------------------------
+%% Function: send_character_event(Length, IgnorableWS, String, State) -> Result
+%% Parameters: Length = integer()
+%% IgnorableWS = true | false
+%% String = string()
+%% State = #xmerl_sax_parser_state{}
+%% Result : #xmerl_sax_parser_state{}
+%% Description: Sends the correct type of character event depending on if
+%% it's whitespaces that can be ignored or not.
+%%----------------------------------------------------------------------
+send_character_event(0, _, _, State) ->
+ State;
+send_character_event(_, false, String, State) ->
+ event_callback({characters, String}, State);
+send_character_event(_, true, String, State) ->
+ event_callback({ignorableWhitespace, String}, State).
+
+
+%%----------------------------------------------------------------------
+%% Function: whitespace(Rest, State, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Result : {Rest, State}
+%% Description: Parse whitespaces.
+%% [3] S ::= (#x20 | #x9 | #xD | #xA)+
+%%----------------------------------------------------------------------
+whitespace(?STRING_EMPTY, State, Acc) ->
+ case cf(?STRING_EMPTY, State, Acc, fun whitespace/3) of
+ {?STRING_EMPTY, State} ->
+ {lists:reverse(Acc), ?STRING_EMPTY, State};
+ Ret ->
+ Ret
+ end;
+whitespace(?STRING("\r") = Bytes, State, Acc) ->
+ case cf(Bytes, State, Acc, fun whitespace/3) of
+ {?STRING("\r") = Bytes, State} ->
+ {lists:reverse(Acc), Bytes, State};
+ Ret ->
+ Ret
+ end;
+whitespace(?STRING_REST("\n", Rest), State, Acc) ->
+ N = State#xmerl_sax_parser_state.line_no,
+ whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+whitespace(?STRING_REST("\r\n", Rest), State, Acc) ->
+ N = State#xmerl_sax_parser_state.line_no,
+ whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+whitespace(?STRING_REST("\r", Rest), State, Acc) ->
+ N = State#xmerl_sax_parser_state.line_no,
+ whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
+whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) ->
+ whitespace(Rest, State, [C|Acc]);
+whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ {lists:reverse(Acc), Bytes, State};
+whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, Acc, fun whitespace/3);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])))
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_reference(Rest, State, HaveToExist) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Value, Rest, State}
+%% Description: Parse entity references.
+%% [66] CharRef ::= '&#' [0-9]+ ';'
+%% | '&#x' [0-9a-fA-F]+ ';'
+%% [67] Reference ::= EntityRef | CharRef
+%% [68] EntityRef ::= '&' Name ';'
+%%----------------------------------------------------------------------
+parse_reference(?STRING_EMPTY, State, HaveToExist) ->
+ cf(?STRING_EMPTY, State, HaveToExist, fun parse_reference/3);
+parse_reference(?STRING("#"), State, HaveToExist) ->
+ cf(?STRING("#"), State, HaveToExist, fun parse_reference/3);
+parse_reference(?STRING_REST("#x", Rest), State, _HaveToExist) ->
+ {CharValue, RefString, Rest1, State1} = parse_hex(Rest, State, []),
+ if
+ ?is_char(CharValue) ->
+ {{character, is_delimiter(CharValue), CharValue},
+ Rest1, State1};
+ true ->
+ ?fatal_error(State1, "Not a legal character: #x" ++ RefString) %%WFC: Legal Character
+ end;
+parse_reference(?STRING_REST("#", Rest), State, _HaveToExist) ->
+ {CharValue, RefString, Rest1, State1} = parse_digit(Rest, State, []),
+ if
+ ?is_char(CharValue) ->
+ {{character, is_delimiter(CharValue), CharValue},
+ Rest1, State1};
+ true ->
+ ?fatal_error(State1, "Not a legal character: #" ++ RefString)%%WFC: Legal Character
+ end;
+parse_reference(?STRING_UNBOUND_REST(C, Rest), State, HaveToExist) ->
+ case is_name_start(C) of
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ parse_reference_1(Rest1, State1, HaveToExist, Name);
+ false ->
+ ?fatal_error(State, "name expected")
+ end;
+parse_reference(Bytes, State, HaveToExist) ->
+ unicode_incomplete_check([Bytes, State, HaveToExist, fun parse_reference/3],
+ underfined).
+
+
+parse_reference_1(?STRING_REST(";", Rest), State, HaveToExist, Name) ->
+ case look_up_reference(Name, HaveToExist, State) of
+ {internal_general, Name, RefValue} ->
+ {{internal_general, is_delimiter(RefValue), Name, RefValue},
+ Rest, State};
+ Result ->
+ {Result, Rest, State}
+ end;
+parse_reference_1(Bytes, State, HaveToExist, Name) ->
+ unicode_incomplete_check([Bytes, State, HaveToExist, Name, fun parse_reference_1/4],
+ "Missing semicolon after reference: " ++ Name).
+
+
+
+%%----------------------------------------------------------------------
+%% Function: is_delimiter(Character) -> Result
+%% Parameters: Character
+%% Result :
+%%----------------------------------------------------------------------
+is_delimiter(38) ->
+ true;
+is_delimiter(60) ->
+ true;
+is_delimiter(62) ->
+ true;
+is_delimiter(39) ->
+ true;
+is_delimiter(34) ->
+ true;
+is_delimiter("&") ->
+ true;
+is_delimiter("<") ->
+ true;
+is_delimiter(">") ->
+ true;
+is_delimiter("'") ->
+ true;
+is_delimiter("\"") ->
+ true;
+is_delimiter(_) ->
+ false.
+
+%%----------------------------------------------------------------------
+%% Function: parse_pe_reference(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Result : {Result, Rest, State}
+%% Description: Parse a parameter entity reference.
+%% [69] PEReference ::= '%' Name ';'
+%%----------------------------------------------------------------------
+parse_pe_reference(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_pe_reference/2);
+parse_pe_reference(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ parse_pe_reference_1(Rest1, State1, Name);
+ false ->
+ ?fatal_error(State, "Name expected")
+ end;
+parse_pe_reference(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_pe_reference/2],
+ underfined).
+
+
+parse_pe_reference_1(?STRING_REST(";", Rest), State, Name) ->
+ Name1 = "%" ++ Name,
+ Result = look_up_reference(Name1, true, State),
+ {Result, Rest, State};
+parse_pe_reference_1(Bytes, State, Name) ->
+ unicode_incomplete_check([Bytes, State, Name, fun parse_pe_reference_1/3],
+ "missing ; after reference " ++ Name).
+
+
+
+%%----------------------------------------------------------------------
+%% Function: insert_reference(Reference, State) -> Result
+%% Parameters: Reference = string()
+%% State = #xmerl_sax_parser_state{}
+%% Result :
+%%----------------------------------------------------------------------
+insert_reference({Name, Type, Value}, Table) ->
+ case ets:lookup(Table, Name) of
+ [{Name, _, _}] ->
+ ok;
+ _ ->
+ ets:insert(Table, {Name, Type, Value})
+ end.
+
+
+
+%%----------------------------------------------------------------------
+%% Function: look_up_reference(Reference, State) -> Result
+%% Parameters: Reference = string()
+%% State = #xmerl_sax_parser_state{}
+%% Result :
+%%----------------------------------------------------------------------
+look_up_reference("amp", _, _) ->
+ {internal_general, "amp", "&"};
+look_up_reference("lt", _, _) ->
+ {internal_general, "lt", "<"};
+look_up_reference("gt", _, _) ->
+ {internal_general, "gt", ">"};
+look_up_reference("apos", _, _) ->
+ {internal_general, "apos", "'"};
+look_up_reference("quot", _, _) ->
+ {internal_general, "quot", "\""};
+look_up_reference(Name, HaveToExist, State) ->
+ case ets:lookup(State#xmerl_sax_parser_state.ref_table, Name) of
+ [{Name, Type, Value}] ->
+ {Type, Name, Value};
+ _ ->
+ case HaveToExist of
+ true ->
+ case State#xmerl_sax_parser_state.standalone of
+ yes ->
+ ?fatal_error(State, "Entity not declared: " ++ Name); %%WFC: Entity Declared
+ no ->
+ ?fatal_error(State, "Entity not declared: " ++ Name) %%VC: Entity Declared
+ end;
+ false ->
+ {not_found, Name}
+ end
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_hex(Rest, State, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Result : {Value, Reference, Rest, State}
+%% Value = integer()
+%% Reference = string()
+%% Description: Parse a hex reference.
+%%----------------------------------------------------------------------
+parse_hex(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_hex/3);
+parse_hex(?STRING_REST(";", Rest), State, Acc) ->
+ RefString = lists:reverse(Acc),
+ {erlang:list_to_integer(RefString, 16), RefString, Rest, State};
+parse_hex(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_hex_digit(C) ->
+ parse_hex(Rest, State, [C |Acc]);
+parse_hex(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_hex/3],
+ "Bad hex value in reference: ").
+
+
+%%----------------------------------------------------------------------
+%% Function: parse_digit(Rest, State, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Result : {Value, Reference, Rest, State}
+%% Value = integer()
+%% Reference = string()
+%% Description: Parse a decimal reference.
+%%----------------------------------------------------------------------
+parse_digit(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_digit/3);
+parse_digit(?STRING_REST(";", Rest), State, Acc) ->
+ RefString = lists:reverse(Acc),
+ {list_to_integer(RefString), RefString, Rest, State};
+parse_digit(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
+ case is_digit(C) of
+ true ->
+ parse_digit(Rest, State, [C |Acc]);
+ false ->
+ ?fatal_error(State, "Character in reference not a digit: " ++ [C])
+ end;
+parse_digit(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_digit/3],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function: parse_system_litteral(Rest, State, Stop, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Stop = $' | $"
+%% Acc = string()
+%% Result : {Value, Reference, Rest, State}
+%% Value = integer()
+%% Reference = string()
+%% Description: Parse a system litteral.
+%% [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
+%%----------------------------------------------------------------------
+parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) ->
+ cf(?STRING_EMPTY, State, Stop, Acc, fun parse_system_litteral/4);
+parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
+ {lists:reverse(Acc), Rest, State};
+parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
+ parse_system_litteral(Rest, State, Stop, [C |Acc]);
+parse_system_litteral(Bytes, State, Stop, Acc) ->
+ unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function: parse_pubid_litteral(Rest, State, Stop, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Stop = $' | $"
+%% Acc = string()
+%% Result : {Value, Reference, Rest, State}
+%% Value = integer()
+%% Reference = string()
+%% Description: Parse a public idlitteral.
+%% [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
+%%----------------------------------------------------------------------
+parse_pubid_litteral(?STRING_EMPTY, State, Stop, Acc) ->
+ cf(?STRING_EMPTY, State, Stop, Acc, fun parse_pubid_litteral/4);
+parse_pubid_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
+ {lists:reverse(Acc), Rest, State};
+parse_pubid_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
+ case is_pubid_char(C) of
+ true ->
+ parse_pubid_litteral(Rest, State, Stop, [C |Acc]);
+ false ->
+ ?fatal_error(State, "Character not allowed in pubid litteral: " ++ [C])
+ end;
+parse_pubid_litteral(Bytes, State, Stop, Acc) ->
+ unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_pubid_litteral/4],
+ undefined).
+
+%%======================================================================
+%% DTD Parsing
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% Function : parse_doctype(Rest, State, Level, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Level = integer()
+%% Acc = string()
+%% Result : {string(), Rest, State}
+%% Description: This function is just searching the end of the doctype
+%% declaration and doesn't parse it. It's used when the
+%% parse_dtd option is set to skip.
+%%----------------------------------------------------------------------
+%% Just returns doctype as string
+%% parse_doctype(?STRING_EMPTY, State, Level, Acc) ->
+%% cf(?STRING_EMPTY, State, Level, Acc, fun parse_doctype/4);
+%% parse_doctype(?STRING("\r"), State, Level, Acc) ->
+%% cf(?STRING("\r"), State, Level, Acc, fun parse_doctype/4);
+%% parse_doctype(?STRING_REST(">", Rest), State, 0, Acc) ->
+%% {Acc, Rest, State};
+%% parse_doctype(?STRING_REST(">", Rest), State, Level, Acc) ->
+%% parse_doctype(Rest, State, Level-1, Acc);
+%% parse_doctype(?STRING_REST("<", Rest), State, Level, Acc) ->
+%% parse_doctype(Rest, State, Level+1, [$<|Acc]);
+%% parse_doctype(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
+%% parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
+%% parse_doctype(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
+%% parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
+%% parse_doctype(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
+%% parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
+%% parse_doctype(?STRING_UNBOUND_REST(C, Rest), State, Level, Acc) ->
+%% parse_doctype(Rest, State, Level, [C|Acc]);
+%% parse_doctype(Bytes, State, Level, Acc) ->
+%% unicode_incomplete_check([Bytes, State, Level, Acc, fun parse_doctype/4],
+%% undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_doctype(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: This function starts an parsing of the DTD
+%% that sends apropriate events.
+%% [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
+%% ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
+%%----------------------------------------------------------------------
+parse_doctype(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_doctype/2);
+parse_doctype(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_doctype(Rest, State1);
+parse_doctype(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ parse_doctype_1(Rest1, State1, Name, false);
+ false ->
+ ?fatal_error(State, "expecting name or whitespace")
+ end;
+parse_doctype(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_doctype/2],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_doctype_1(Rest, State, Name, Definition) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Name = string()
+%% Definition = true |false
+%% Result : {Rest, State}
+%% Description: Gets the DTD name as a parameter and contine parse the DOCTYPE
+%% directive
+%%----------------------------------------------------------------------
+parse_doctype_1(?STRING_EMPTY, State, Name, Definition) ->
+ cf(?STRING_EMPTY, State, Name, Definition, fun parse_doctype_1/4);
+parse_doctype_1(?STRING_REST(">", Rest), State, _, _) ->
+ {Rest, State};
+parse_doctype_1(?STRING_REST("[", Rest), State, Name, Definition) ->
+ State1 =
+ case Definition of
+ false ->
+ event_callback({startDTD, Name, "", ""}, State);
+ true ->
+ State
+ end,
+ {Rest1, State2} = parse_doctype_decl(Rest, State1),
+ {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
+ parse_doctype_2(Rest2, State3);
+parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, Definition) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_doctype_1(Rest1, State1, Name, Definition);
+parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, _Definition) when C == $S; C == $P ->
+ {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
+ State2 = event_callback({startDTD, Name, PubId, SysId}, State1),
+ State3 =
+ case State2#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ parse_external_entity(State2#xmerl_sax_parser_state{file_type=dtd}, PubId, SysId);
+ true ->
+ State2
+ end,
+ parse_doctype_1(Rest1, State3, Name, true);
+parse_doctype_1(Bytes, State, Name, Definition) ->
+ unicode_incomplete_check([Bytes, State, Name, Definition, fun parse_doctype_1/4],
+ "expecting >, external id or declaration part").
+
+
+parse_doctype_2(?STRING_REST(">", Rest), State) ->
+ {Rest, State};
+parse_doctype_2(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_doctype_2/2],
+ "expecting >").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_external_entity(State, PubId, SysId) -> Result
+%% Parameters: State = #xmerl_sax_parser_state{}
+%% PubId = string()
+%% SysId = string()
+%% Result : {Rest, State}
+%% Description: Starts the parsing of an external entity by calling the resolver and
+%% then sends the input to the parsing function.
+%%----------------------------------------------------------------------
+%% The public id is not handled
+parse_external_entity(State, _PubId, SysId) ->
+
+ ExtRef = check_uri(SysId, State#xmerl_sax_parser_state.current_location),
+
+ SaveState = event_callback({startEntity, SysId}, State),
+
+ State1 = State#xmerl_sax_parser_state{line_no=1,
+ continuation_state=undefined,
+ continuation_fun=fun xmerl_sax_parser:default_continuation_cb/1,
+ end_tags = []},
+
+
+ EventState = handle_external_entity(ExtRef, State1),
+
+ NewState = event_callback({endEntity, SysId}, SaveState#xmerl_sax_parser_state{event_state=EventState}),
+ NewState#xmerl_sax_parser_state{file_type=normal}.
+
+
+
+%%----------------------------------------------------------------------
+%% Function : handle_external_entity(ExtRef, State) -> Result
+%% Parameters: ExtRef = {file, string()} | {http, string()}
+%% State = #xmerl_sax_parser_state{}
+%% Result : string() | binary()
+%% Description: Returns working directory, entity and the opened
+%% filedescriptor.
+%%----------------------------------------------------------------------
+handle_external_entity({file, FileToOpen}, State) ->
+
+ case file:open(FileToOpen, [raw, read, binary]) of
+ {error, Reason} ->
+ ?fatal_error(State, "Couldn't open external entity "++ FileToOpen ++ " : "
+ ++ file:format_error(Reason));
+ {ok, FD} ->
+ {?STRING_EMPTY, EntityState} =
+ parse_external_entity_1(<<>>,
+ State#xmerl_sax_parser_state{continuation_state=FD,
+ current_location=filename:dirname(FileToOpen),
+ entity=filename:basename(FileToOpen)}),
+ file:close(FD),
+ EntityState#xmerl_sax_parser_state.event_state
+ end;
+handle_external_entity({http, Url}, State) ->
+
+ try
+ {Host, Port, Key} = http(Url),
+ TmpFile = http_get_file(Host, Port, Key),
+ case file:open(TmpFile, [raw, read, binary]) of
+ {error, Reason} ->
+ ?fatal_error(State, "Couldn't open temporary file " ++ TmpFile ++ " : "
+ ++ file:format_error(Reason));
+ {ok, FD} ->
+ {?STRING_EMPTY, EntityState} =
+ parse_external_entity_1(<<>>,
+ State#xmerl_sax_parser_state{continuation_state=FD,
+ current_location=filename:dirname(Url),
+ entity=filename:basename(Url)}),
+ file:close(FD),
+ file:delete(TmpFile),
+ EntityState#xmerl_sax_parser_state.event_state
+ end
+ catch
+ throw:{error, Error} ->
+ ?fatal_error(State, Error)
+ end;
+handle_external_entity({Tag, _Url}, State) ->
+ ?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)).
+
+%%----------------------------------------------------------------------
+%% Function : parse_external_entity_1(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse the external entity.
+%%----------------------------------------------------------------------
+parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) ->
+ case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State};
+ {fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity ->
+ {?STRING_EMPTY, State1};
+ Other ->
+ throw(Other)
+ end;
+parse_external_entity_1(?BYTE_ORDER_MARK_1, State) ->
+ cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_1/2);
+parse_external_entity_1(?BYTE_ORDER_MARK_2, State) ->
+ cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_1/2);
+parse_external_entity_1(?BYTE_ORDER_MARK_REST(Rest), State) ->
+ parse_external_entity_1(Rest, State);
+parse_external_entity_1(?STRING("<"), State) ->
+ cf(?STRING("<"), State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<?"), State) ->
+ cf(?STRING("<?"), State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<?x"), State) ->
+ cf(?STRING("<?x"), State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<?xm"), State) ->
+ cf(?STRING("<?xm"), State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING("<?xml"), State) ->
+ cf(?STRING("<?xml"), State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING_REST("<?xml", Rest) = Bytes,
+ #xmerl_sax_parser_state{file_type=Type} = State) ->
+ {Rest1, State1} =
+ case is_next_char_whitespace(Rest, State) of
+ false ->
+ {Bytes, State};
+ true ->
+ {_XmlAttributes, R, S} = parse_version_info(Rest, State, []),
+ %S1 = event_callback({processingInstruction, "xml", XmlAttributes}, S),% The XML decl. should not be reported as a PI
+ {R, S}
+ end,
+ case Type of
+ dtd ->
+ case catch parse_doctype_decl(Rest1, State1) of
+ {Rest2, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ {Rest2, State2};
+ {fatal_error, {State2, "No more bytes"}} ->
+ {?STRING_EMPTY, State2};
+ Other ->
+ throw(Other)
+ end;
+
+ _ -> % Type is normal or entity
+ parse_content(Rest1, State1, [], true)
+ end;
+parse_external_entity_1(?STRING_UNBOUND_REST(_C, _) = Bytes,
+ #xmerl_sax_parser_state{file_type=Type} = State) ->
+ case Type of
+ normal ->
+ parse_content(Bytes, State, [], true);
+ dtd ->
+ parse_doctype_decl(Bytes, State);
+ entity ->
+ parse_doctype_decl(Bytes, State) end;
+parse_external_entity_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_external_entity_1/2],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function : is_next_char_whitespace(Bytes, State) -> Result
+%% Parameters: Bytes = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : true | false
+%% Description: Checks if first character is whitespace.
+%%----------------------------------------------------------------------
+is_next_char_whitespace(?STRING_UNBOUND_REST(C, _), _) when ?is_whitespace(C) ->
+ true;
+is_next_char_whitespace(?STRING_UNBOUND_REST(_C, _), _) ->
+ false;
+is_next_char_whitespace(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun is_next_char_whitespace/2],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function : parse_external_id(Rest, State, OptionalSystemId) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% OptionalSystemId = true | false
+%% Result : {PubId, SysId, Rest, State}
+%% PubId = string()
+%% SysId = string()
+%% Description: Parse an external id. The function is used in two cases one
+%% where the system is optional and one where it's required
+%% after a public id.
+%% [75] ExternalID ::= 'SYSTEM' S SystemLiteral
+%% | 'PUBLIC' S PubidLiteral S SystemLiteral
+%%----------------------------------------------------------------------
+parse_external_id(?STRING_EMPTY, State, OptionalSystemId) ->
+ cf(?STRING_EMPTY, State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("S"), State,OptionalSystemId) ->
+ cf(?STRING("S"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("SY"), State, OptionalSystemId) ->
+ cf(?STRING("SY"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("SYS"), State, OptionalSystemId) ->
+ cf(?STRING("SYS"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("SYST"), State, OptionalSystemId) ->
+ cf(?STRING("SYST"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("SYSTE"), State, OptionalSystemId) ->
+ cf(?STRING("SYSTE"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING_REST("SYSTEM", Rest), State, _) ->
+ {SysId, Rest1, State1} = parse_system_id(Rest, State, false),
+ {"", SysId, Rest1, State1};
+parse_external_id(?STRING("P"), State, OptionalSystemId) ->
+ cf(?STRING("P"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("PU"), State, OptionalSystemId) ->
+ cf(?STRING("PU"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("PUB"), State, OptionalSystemId) ->
+ cf(?STRING("PUB"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("PUBL"), State, OptionalSystemId) ->
+ cf(?STRING("PUBL"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING("PUBLI"), State, OptionalSystemId) ->
+ cf(?STRING("PUBLI"), State, OptionalSystemId, fun parse_external_id/3);
+parse_external_id(?STRING_REST("PUBLIC", Rest), State, OptionalSystemId) ->
+ parse_public_id(Rest, State, OptionalSystemId);
+parse_external_id(Bytes, State, OptionalSystemId) ->
+ unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_external_id/3],
+ "expecting SYSTEM or PUBLIC").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_system_id(Rest, State, OptionalSystemId) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% OptionalSystemId = true | false
+%% Result : {SysId, Rest, State}
+%% SysId = string()
+%% Description: Parse a system id. The function is used in two cases one
+%% where the system is optional and one where it's required.
+%%----------------------------------------------------------------------
+parse_system_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ check_system_litteral(Rest, State1, OptionalSystemId);
+parse_system_id(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
+ {"", Bytes, State};
+parse_system_id(Bytes, State, OptionalSystemId) ->
+ unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_system_id/3],
+ "whitespace expected").
+
+check_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, _OptionalSystemId) when C == $'; C == $" ->
+ parse_system_litteral(Rest, State, C, []);
+check_system_litteral(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
+ {"", Bytes, State};
+check_system_litteral(Bytes, State, OptionalSystemId) ->
+ unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_system_litteral/3],
+ "\" or \' expected").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_public_id(Rest, State, OptionalSystemId) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% OptionalSystemId = true | false
+%% Result : {PubId, SysId, Rest, State}
+%% PubId = string()
+%% SysId = string()
+%% Description: Parse a public id. The function is used in two cases one
+%% where the following system is optional and one where it's required.
+%%----------------------------------------------------------------------
+parse_public_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ check_public_litteral(Rest, State1, OptionalSystemId);
+parse_public_id(Bytes, State,OptionalSystemId) ->
+ unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_public_id/3],
+ "whitespace expected").
+
+
+check_public_litteral(?STRING_UNBOUND_REST(C, Rest), State, OptionalSystemId) when C == $'; C == $" ->
+ {PubId, Rest1, State1} = parse_pubid_litteral(Rest, State, C, []),
+ {SysId, Rest2, State2} = parse_system_id(Rest1, State1, OptionalSystemId),
+ {PubId, SysId, Rest2, State2};
+check_public_litteral(Bytes, State, OptionalSystemId) ->
+ unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_public_litteral/3],
+ "\" or \' expected").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_doctype_decl(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse the DOCTYPE declaration part
+%% [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl
+%% | NotationDecl | PI | Comment
+%%----------------------------------------------------------------------
+parse_doctype_decl(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_doctype_decl/2);
+parse_doctype_decl(?STRING("<"), State) ->
+ cf(?STRING("<"), State, fun parse_doctype_decl/2);
+parse_doctype_decl(?STRING_REST("<?", Rest), State) ->
+ {Rest1, State1} = parse_pi(Rest, State),
+ parse_doctype_decl(Rest1, State1);
+parse_doctype_decl(?STRING_REST("%", Rest), State) ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
+ parse_doctype_decl(?APPEND_STRING(IValue, Rest1), State1);
+ {external_parameter, _, {PubId, SysId}} ->
+ State2 = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId),
+ parse_doctype_decl(Rest1, State2)
+ end;
+parse_doctype_decl(?STRING_REST("<!", Rest1), State) ->
+ parse_doctype_decl_1(Rest1, State);
+parse_doctype_decl(?STRING_REST("]", Rest), State) ->
+ {Rest, State};
+parse_doctype_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_doctype_decl(Rest, State1);
+parse_doctype_decl(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_doctype_decl/2],
+ "expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_doctype_decl_1(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Main switching function for the different markup declarations
+%% of the DOCTYPE.
+%%----------------------------------------------------------------------
+parse_doctype_decl_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_doctype_decl_1/2);
+
+parse_doctype_decl_1(?STRING("E"), State) ->
+ cf(?STRING("E"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("EL"), State) ->
+ cf(?STRING("EL"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ELE"), State) ->
+ cf(?STRING("ELE"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ELEM"), State) ->
+ cf(?STRING("ELEM"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ELEME"), State) ->
+ cf(?STRING("ELEME"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ELEMEN"), State) ->
+ cf(?STRING("ELEMEN"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING_REST("ELEMENT", Rest), State) ->
+ {Rest1, State1} = parse_element_decl(Rest, State),
+ parse_doctype_decl(Rest1, State1);
+
+parse_doctype_decl_1(?STRING("A"), State) ->
+ cf(?STRING("A"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("AT"), State) ->
+ cf(?STRING("AT"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ATT"), State) ->
+ cf(?STRING("ATT"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ATTL"), State) ->
+ cf(?STRING("ATTL"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ATTLI"), State) ->
+ cf(?STRING("ATTLI"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ATTLIS"), State) ->
+ cf(?STRING("ATTLIS"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING_REST("ATTLIST", Rest), State) ->
+ {Rest1, State1} = parse_att_list_decl(Rest, State),
+ parse_doctype_decl(Rest1, State1);
+
+%% E clause not needed here because already taken care of above.
+parse_doctype_decl_1(?STRING("EN"), State) ->
+ cf(?STRING("EN"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ENT"), State) ->
+ cf(?STRING("ENT"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ENTI"), State) ->
+ cf(?STRING("ENTI"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("ENTIT"), State) ->
+ cf(?STRING("ENTIT"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING_REST("ENTITY", Rest), State) ->
+ {Rest1, State1} = parse_entity_decl(Rest, State),
+ parse_doctype_decl(Rest1, State1);
+
+parse_doctype_decl_1(?STRING("N"), State) ->
+ cf(?STRING("N"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NO"), State) ->
+ cf(?STRING("NO"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOT"), State) ->
+ cf(?STRING("NOT"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOTA"), State) ->
+ cf(?STRING("NOTA"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOTAT"), State) ->
+ cf(?STRING("NOTAT"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOTATI"), State) ->
+ cf(?STRING("NOTATI"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING("NOTATIO"), State) ->
+ cf(?STRING("NOTATIO"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING_REST("NOTATION", Rest), State) ->
+ {Rest1, State1} = parse_notation_decl(Rest, State),
+ parse_doctype_decl(Rest1, State1);
+parse_doctype_decl_1(?STRING("-"), State) ->
+ cf(?STRING("-"), State, fun parse_doctype_decl_1/2);
+parse_doctype_decl_1(?STRING_REST("--", Rest), State) ->
+ {Rest1, State1} = parse_comment(Rest, State, []),
+ parse_doctype_decl(Rest1, State1);
+parse_doctype_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_doctype_decl_1/2],
+ "expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_element_decl(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse element declarations.
+%% [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
+%%----------------------------------------------------------------------
+parse_element_decl(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_element_decl/2);
+parse_element_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_element_decl_1(Rest, State1);
+parse_element_decl(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_element_decl/2],
+ "whitespace expected").
+
+parse_element_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ {Model, Rest2, State2} = parse_element_content(Rest1, State1),
+ State3 = event_callback({elementDecl, Name, Model}, State2),
+ {Rest2, State3};
+ false ->
+ ?fatal_error(State, "name expected")
+ end;
+parse_element_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_element_decl_1/2],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_element_content(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse contents of an element declaration.
+%% [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
+%%----------------------------------------------------------------------
+parse_element_content(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_element_content/2);
+parse_element_content(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_element_content_1(Rest, State1, []);
+parse_element_content(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_element_content/2],
+ "whitespace expected").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_element_content_1(Rest, State, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Result : {Content, Rest, State}
+%% Content = string()
+%% Description: Parse contents of an element declaration.
+%%----------------------------------------------------------------------
+parse_element_content_1(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_element_content_1/3);
+parse_element_content_1(?STRING_REST(">", Rest), State, Acc) ->
+ {lists:reverse(Acc), Rest, State};
+parse_element_content_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
+ parse_element_content_1(Rest, State, [C|Acc]);
+parse_element_content_1(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_element_content_1/3],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_att_list_decl(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse an attribute list declaration.
+%% [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
+%%----------------------------------------------------------------------
+parse_att_list_decl(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_att_list_decl/2);
+parse_att_list_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_att_list_decl_1(Rest, State1);
+parse_att_list_decl(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_att_list_decl/2],
+ "whitespace expected").
+
+
+parse_att_list_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {ElementName, Rest1, State1} = parse_name(Rest, State, [C]),
+ parse_att_defs(Rest1, State1, ElementName);
+ false ->
+ ?fatal_error(State, "name expected")
+ end;
+parse_att_list_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_att_list_decl_1/2],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_att_defs(Rest, State, ElementName) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% ElementName = string()
+%% Result : {Rest, State}
+%% Description: Parse an attribute definition.
+%% [53] AttDef ::= S Name S AttType S DefaultDecl
+%%----------------------------------------------------------------------
+parse_att_defs(?STRING_EMPTY, State, ElementName) ->
+ cf(?STRING_EMPTY, State, ElementName, fun parse_att_defs/3);
+parse_att_defs(?STRING_REST(">", Rest), State, _ElementName) ->
+ {Rest, State};
+parse_att_defs(?STRING_UNBOUND_REST(C, _) = Rest, State, ElementName) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_att_defs(Rest1, State1, ElementName);
+parse_att_defs(?STRING_UNBOUND_REST(C, Rest), State, ElementName) ->
+ case is_name_start(C) of
+ true ->
+ {AttrName, Rest1, State1} = parse_name(Rest, State, [C]),
+ {Type, Rest2, State2} = parse_att_type(Rest1, State1),
+ {Mode, Value, Rest3, State3} = parse_default_decl(Rest2, State2),
+ State4 = event_callback({attributeDecl, ElementName, AttrName, Type, Mode, Value}, State3),
+ parse_att_defs(Rest3, State4, ElementName);
+ false ->
+ ?fatal_error(State, "whitespace or name expected")
+ end;
+parse_att_defs(Bytes, State, ElementName) ->
+ unicode_incomplete_check([Bytes, State, ElementName, fun parse_att_defs/3],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_att_type(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Type, Rest, State}
+%% Type = string()
+%% Description: Parse an attribute type.
+%% [54] AttType ::= StringType | TokenizedType | EnumeratedType
+%% [55] StringType ::= 'CDATA'
+%% [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
+%% | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
+%% [57] EnumeratedType ::= NotationType | Enumeration
+%% [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
+%% [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
+%%----------------------------------------------------------------------
+parse_att_type(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_att_type/2);
+parse_att_type(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ case parse_att_type_1(Rest, State1, []) of
+ {Type, Rest1, State2} when Type == "("; Type == "NOTATION" ->
+ {T, Rest2, State3} = parse_until_right_paren(Rest1, State2, []),
+ {Type ++ T, Rest2, State3};
+ {Type, Rest1, State2} ->
+ case check_att_type(Type) of
+ true ->
+ {Type, Rest1, State2};
+ false ->
+ ?fatal_error(State2, "wrong attribute type")
+ end
+ end;
+parse_att_type(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_att_type/2],
+ "whitespace expected").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_att_type_1(Rest, State, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Result : {Type, Rest, State}
+%% Type = string()
+%% Description: Parse an attribute type.
+%%----------------------------------------------------------------------
+parse_att_type_1(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_att_type_1/3);
+parse_att_type_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) ->
+ {lists:reverse(Acc), Bytes, State};
+parse_att_type_1(?STRING_REST("(", Rest), State, []) ->
+ {"(", Rest, State};
+parse_att_type_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
+ parse_att_type_1(Rest, State, [C|Acc]);
+parse_att_type_1(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_att_type_1/3],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function : check_att_type(Type) -> Result
+%% Parameters: Type = string()
+%% Result : true | false
+%% Description:Check if an attribute type is valid.
+%%----------------------------------------------------------------------
+check_att_type("CDATA") ->
+ true;
+check_att_type("ID") ->
+ true;
+check_att_type("IDREF") ->
+ true;
+check_att_type("IDREFS") ->
+ true;
+check_att_type("ENTITY") ->
+ true;
+check_att_type("ENTITIES") ->
+ true;
+check_att_type("NMTOKEN") ->
+ true;
+check_att_type("NMTOKENS") ->
+ true;
+check_att_type(_) ->
+ false.
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_until_right_paren(Rest, State, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Result : {Type, Rest, State}
+%% Type = string()
+%% Description: Parse an enumurated type until ')'.
+%%----------------------------------------------------------------------
+parse_until_right_paren(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_until_right_paren/3);
+parse_until_right_paren(?STRING_REST(")", Rest), State, Acc) ->
+ {lists:reverse(")" ++ Acc), Rest, State};
+parse_until_right_paren(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
+ parse_until_right_paren(Rest, State, [C|Acc]);
+parse_until_right_paren(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_until_right_paren/3],
+ undefined).
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_default_decl(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Default, Rest, State}
+%% Default = string()
+%% Description: Parse a default declaration.
+%% [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
+%%----------------------------------------------------------------------
+parse_default_decl(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_default_decl/2);
+parse_default_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_default_decl_1(Rest, State1);
+parse_default_decl(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_default_decl/2],
+ "whitespace expected").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_default_decl_1(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Default, Rest, State}
+%% Default = string()
+%% Description: Parse a default declaration.
+%%----------------------------------------------------------------------
+parse_default_decl_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_default_decl_1/2);
+parse_default_decl_1(?STRING_REST("#", Rest), State) ->
+ case Rest of
+ ?STRING("R") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("RE") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("REQ") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("REQU") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("REQUI") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("REQUIR") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("REQUIRE") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING_REST("REQUIRED", Rest1) ->
+ {"#REQUIRED", undefined, Rest1, State};
+
+ ?STRING("I") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("IM") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("IMP") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("IMPL") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("IMPLI") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("IMPLIE") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING_REST("IMPLIED", Rest1) ->
+ {"#IMPLIED", undefined, Rest1, State};
+
+ ?STRING("F") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("FI") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("FIX") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING("FIXE") ->
+ cf(Rest, State, fun parse_default_decl_1/2);
+ ?STRING_REST("FIXED", Rest1) ->
+ parse_fixed(Rest1, State);
+ _ ->
+ ?fatal_error(State, "REQUIRED, IMPLIED or FIXED expected")
+ end;
+parse_default_decl_1(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" ->
+ {DefaultValue, Rest1, State1} = parse_att_value(Rest, State, C, []),
+ {"", DefaultValue, Rest1, State1};
+parse_default_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_default_decl_1/2],
+ "bad default declaration").
+
+
+parse_fixed(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {DefaultValue, Rest, State1} = parse_att_value(Bytes, State), % parse_att_value removes leading WS
+ {"#FIXED", DefaultValue, Rest, State1};
+parse_fixed(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_fixed/2],
+ "whitespace expected").
+
+%%----------------------------------------------------------------------
+%% Function : parse_entity_decl(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse an entity declaration.
+%% [70] EntityDecl ::= GEDecl | PEDecl
+%% [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
+%% [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
+%%----------------------------------------------------------------------
+parse_entity_decl(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_entity_decl/2);
+parse_entity_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_entity_decl_1(Rest, State1);
+parse_entity_decl(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_entity_decl/2],
+ "whitespace expected").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_entity_decl_1(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse an entity declaration.
+%%----------------------------------------------------------------------
+parse_entity_decl_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_entity_decl_1/2);
+parse_entity_decl_1(?STRING_REST("%", Rest), State) ->
+ case is_next_char_whitespace(Rest, State) of
+ true ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_pe_name(Rest1, State1);
+ false ->
+ ?fatal_error(State, "whitespace expected")
+ end;
+parse_entity_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ case is_next_char_whitespace(Rest1, State1) of
+ true ->
+ {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
+ parse_entity_def(Rest2, State2, Name);
+ false ->
+ ?fatal_error(State1, "whitespace expected")
+ end;
+ false ->
+ ?fatal_error(State, "name or % expected")
+ end;
+parse_entity_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_entity_decl_1/2],
+ undefined).
+
+
+
+
+parse_pe_name(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ case is_next_char_whitespace(Rest1, State1) of
+ true ->
+ {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
+ parse_pe_def(Rest2, State2, Name);
+ false ->
+ ?fatal_error(State1, "whitespace expected")
+ end;
+ false ->
+ ?fatal_error(State, "name expected")
+ end;
+parse_pe_name(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_pe_name/2],
+ undefined).
+
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_entity_def(Rest, State, Name) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Name = string()
+%% Result : {Rest, State}
+%% Description: Parse an entity definition.
+%% [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
+%%----------------------------------------------------------------------
+parse_entity_def(?STRING_EMPTY, State, Name) ->
+ cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3);
+parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
+ {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
+ insert_reference({Name, internal_general, Value}, State1#xmerl_sax_parser_state.ref_table),
+ State2 = event_callback({internalEntityDecl, Name, Value}, State1),
+ {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
+ parse_def_end(Rest2, State3);
+parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P ->
+ {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
+ {Ndata, Rest2, State2} = parse_ndata(Rest1, State1),
+ case Ndata of
+ undefined ->
+ insert_reference({Name, external_general, {PubId, SysId}},
+ State2#xmerl_sax_parser_state.ref_table),
+ State3 = event_callback({externalEntityDecl, Name, PubId, SysId}, State2),
+ {Rest2, State3};
+ _ ->
+ insert_reference({Name, unparsed, {PubId, SysId, Ndata}},
+ State2#xmerl_sax_parser_state.ref_table),
+ State3 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State2),
+ {Rest2, State3}
+ end;
+parse_entity_def(Bytes, State, Name) ->
+ unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3],
+ "\", \', SYSTEM or PUBLIC expected").
+
+
+parse_def_end(?STRING_REST(">", Rest), State) ->
+ {Rest, State};
+parse_def_end(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_def_end/2],
+ "> expected").
+
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_ndata(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse an NDATA declaration.
+%% [76] NDataDecl ::= S 'NDATA' S Name
+%%----------------------------------------------------------------------
+parse_ndata(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_ndata/2);
+parse_ndata(?STRING_REST(">", Rest), State) ->
+ {undefined, Rest, State};
+parse_ndata(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Bytes, State, []),
+ parse_ndata_decl(Rest1, State1);
+parse_ndata(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_ndata/2],
+ "Space before NDATA or > expected").
+
+%%----------------------------------------------------------------------
+%% Function : parse_entity_value(Rest, State, Stop, Acc) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Stop = $' | $"
+%% Acc = string()
+%% Result : {Value, Rest, State}
+%% Value = string()
+%% Description: Parse an attribute value
+%%----------------------------------------------------------------------
+parse_entity_value(?STRING_EMPTY, State, undefined, Acc) ->
+ {Acc, [], State}; %% stop clause when parsing references
+parse_entity_value(?STRING_EMPTY, State, Stop, Acc) ->
+ cf(?STRING_EMPTY, State, Stop, Acc, fun parse_entity_value/4);
+parse_entity_value(?STRING("\r"), State, Stop, Acc) ->
+ cf(?STRING("\r"), State, Stop, Acc, fun parse_entity_value/4);
+parse_entity_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
+ parse_entity_value(Rest,
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+parse_entity_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
+ parse_entity_value(Rest,
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+parse_entity_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
+ parse_entity_value(Rest,
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+parse_entity_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
+ parse_entity_value(Rest,
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+parse_entity_value(?STRING_REST("&", Rest), State, Stop, Acc) ->
+ {Ref, Rest1, State1} = parse_reference(Rest, State, false),
+ case Ref of
+ {character, _, CharValue} ->
+ parse_entity_value(Rest1, State1, Stop, [CharValue | Acc]);
+ {internal_general, _, Name, _} ->
+ parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
+ {external_general, Name, _} ->
+ parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
+ {not_found, Name} ->
+ parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
+ {unparsed, Name, _} ->
+ ?fatal_error(State1, "Unparsed entity reference in entity value: " ++ Name)
+ end;
+parse_entity_value(?STRING_REST("%", Rest), #xmerl_sax_parser_state{file_type=Type} = State, Stop, Acc) ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Type of
+ normal -> %WFC: PEs in Internal Subset
+ {_, Name, _} = Ref,
+ ?fatal_error(State1, "A parameter reference may not occur not within "
+ "markup declarations in the internal DTD subset: " ++ Name);
+ _ ->
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
+ parse_entity_value(?APPEND_STRING(IValue, Rest1), State1, Stop, Acc);
+ {external_parameter, _, {_PubId, _SysId}} ->
+ ?fatal_error(State1, "Parameter references in entity value not supported yet.")
+ end
+ end;
+parse_entity_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
+ {lists:reverse(Acc), Rest, State};
+parse_entity_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
+ parse_entity_value(Rest, State, Stop, [C|Acc]);
+parse_entity_value(Bytes, State, Stop, Acc) ->
+ unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_entity_value/4],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function : parse_ndata_decl(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Name, Rest, State}
+%% Name = string()
+%% Description: Parse an NDATA declaration.
+%% [76] NDataDecl ::= S 'NDATA' S Name
+%%----------------------------------------------------------------------
+parse_ndata_decl(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING_REST(">", Rest), State) ->
+ {undefined, Rest, State};
+parse_ndata_decl(?STRING("N") = Rest, State) ->
+ cf(Rest, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING("ND") = Rest, State) ->
+ cf(Rest, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING("NDA") = Rest, State) ->
+ cf(Rest, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING("NDAT") = Rest, State) ->
+ cf(Rest, State, fun parse_ndata_decl/2);
+parse_ndata_decl(?STRING_REST("NDATA", Rest), State) ->
+ parse_ndata_decl_1(Rest, State);
+parse_ndata_decl(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_ndata_decl/2],
+ "NDATA or > expected").
+
+
+parse_ndata_decl_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_ndecl_name(Rest, State1);
+parse_ndata_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_ndata_decl/2],
+ "whitespace expected").
+
+
+parse_ndecl_name(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
+ {Rest3, State3} = parse_def_end(Rest2, State2),
+ {Name, Rest3, State3};
+ false ->
+ ?fatal_error(State, "name expected")
+ end;
+parse_ndecl_name(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_ndecl_name/2],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function : parse_pe_def(Rest, State, Name) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Name = string()
+%% Result : {Rest, State}
+%% Description: Parse an parameter entity definition.
+%% [74] PEDef ::= EntityValue | ExternalID
+%%----------------------------------------------------------------------
+parse_pe_def(?STRING_EMPTY, State, Name) ->
+ cf(?STRING_EMPTY, State, Name, fun parse_pe_def/3);
+parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
+ {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
+ Name1 = "%" ++ Name,
+ insert_reference({Name1, internal_parameter, Value},
+ State1#xmerl_sax_parser_state.ref_table),
+ State2 = event_callback({internalEntityDecl, Name1, Value}, State1),
+ {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
+ parse_def_end(Rest2, State3);
+parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P ->
+ {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false),
+ Name1 = "%" ++ Name,
+ insert_reference({Name1, external_parameter, {PubId, SysId}},
+ State1#xmerl_sax_parser_state.ref_table),
+ State2 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State1),
+ {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
+ parse_def_end(Rest2, State3);
+parse_pe_def(Bytes, State, Name) ->
+ unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3],
+ "\", \', SYSTEM or PUBLIC expected").
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_notation_decl(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse a NOTATION declaration.
+%% [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
+%%----------------------------------------------------------------------
+parse_notation_decl(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_notation_decl/2);
+parse_notation_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_notation_decl_1(Rest, State1);
+parse_notation_decl(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_notation_decl/2],
+ "whitespace expected").
+
+
+parse_notation_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
+ case is_name_start(C) of
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ {PubId, SysId, Rest2, State2} = parse_notation_id(Rest1, State1),
+ State3 = event_callback({notationDecl, Name, PubId, SysId}, State2),
+ {Rest2, State3};
+ false ->
+ ?fatal_error(State, "name expected")
+ end;
+parse_notation_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_notation_decl_1/2],
+ undefined).
+
+%%----------------------------------------------------------------------
+%% Function : parse_notation_id(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {PubId, SysId, Rest, State}
+%% PubId = string()
+%% SysId = string()
+%% Description: Parse a NOTATION identity. The public id case is a special
+%% variant of extenal id where just the public part is allowed.
+%% This is allowed if the third parameter in parse_external_id/3
+%% is true.
+%% [83] PublicID ::= 'PUBLIC' S PubidLiteral
+%%----------------------------------------------------------------------
+parse_notation_id(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_notation_id/2);
+%parse_notation_id(?STRING_REST(">", Rest), State) ->
+% {"", "", Rest, State};
+parse_notation_id(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_notation_id_1(Rest, State1);
+parse_notation_id(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_notation_id/2],
+ "whitespace expected").
+
+%%----------------------------------------------------------------------
+%% Function : parse_notation_id_1(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {PubId, SysId, Rest, State}
+%% PubId = string()
+%% SysId = string()
+%% Description: Parse a NOTATION identity.
+%%----------------------------------------------------------------------
+parse_notation_id_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_notation_id_1/2);
+parse_notation_id_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when C == $S; C == $P ->
+ {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, true),
+ {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
+ {Rest3, State3} = parse_def_end(Rest2, State2),
+ {PubId, SysId, Rest3, State3};
+%parse_notation_id_1(?STRING_REST(">", Rest), State) ->
+% {"", "", Rest, State};
+parse_notation_id_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_notation_id_1/2],
+ "external id or public id expected").
+
+
+%%======================================================================
+%% Character checks and definitions
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% Definitions of the first 256 characters
+%% 0 - not classified,
+%% 1 - base_char or ideographic,
+%% 2 - combining_char or digit or extender,
+%% 3 - $. or $- or $_ or $:
+%%----------------------------------------------------------------------
+-define(SMALL, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,2,2,2,2,2,2,2,2,2,3,0,
+ 0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,0,0,0,0,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1}).
+
+
+%%----------------------------------------------------------------------
+%% Function : is_name_start(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is a valid start of a name.
+%% [5] Name ::= (Letter | '_' | ':') (NameChar)*
+%%----------------------------------------------------------------------
+is_name_start($_) ->
+ true;
+is_name_start($:) ->
+ true;
+is_name_start(C) ->
+ is_letter(C).
+
+
+%%----------------------------------------------------------------------
+%% Function : is_name_start(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is a valid name character.
+%% [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
+%% | CombiningChar | Extender
+%%----------------------------------------------------------------------
+is_name_char(C) ->
+ try element(C, ?SMALL) > 0
+ catch _:_ ->
+ case is_letter(C) of
+ true ->
+ true;
+ false ->
+ case is_digit(C) of
+ true -> true;
+ false ->
+ case is_combining_char(C) of
+ true -> true;
+ false ->
+ is_extender(C)
+ end
+ end
+ end
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function : is_pubid_char(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is a public identity character.
+%% [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9]
+%% | [-'()+,./:=?;!*#@$_%]
+%%----------------------------------------------------------------------
+is_pubid_char(?space) ->
+ true;
+is_pubid_char(?cr) ->
+ true;
+is_pubid_char(?lf) ->
+ true;
+is_pubid_char($!) ->
+ true;
+is_pubid_char($:) ->
+ true;
+is_pubid_char($;) ->
+ true;
+is_pubid_char($=) ->
+ true;
+is_pubid_char($@) ->
+ true;
+is_pubid_char($_) ->
+ true;
+is_pubid_char(C) when $# =< C, C =< $% ->
+ true;
+is_pubid_char(C) when $' =< C, C =< $/ ->
+ true;
+is_pubid_char(C) ->
+ case is_letter(C) of
+ true ->
+ true;
+ false ->
+ is_digit(C)
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function : is_letter(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is a letter.
+%% [84] Letter ::= BaseChar | Ideographic
+%%----------------------------------------------------------------------
+is_letter(C) ->
+ try element(C, ?SMALL) =:= 1
+ catch _:_ ->
+ case is_base_char(C) of
+ false ->
+ is_ideographic(C);
+ true ->
+ true
+ end
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function : is_letter(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is a basic character.
+%% [85] BaseChar
+%%----------------------------------------------------------------------
+is_base_char(C) when C >= 16#0041, C =< 16#005A -> true; %% ASCII Latin
+is_base_char(C) when C >= 16#0061, C =< 16#007A -> true;
+is_base_char(C) when C >= 16#00C0, C =< 16#00D6 -> true; %% ISO Latin
+is_base_char(C) when C >= 16#00D8, C =< 16#00F6 -> true;
+is_base_char(C) when C >= 16#00F8, C =< 16#00FF -> true;
+is_base_char(C) when C >= 16#0100, C =< 16#0131 -> true; %% Accented Latin
+is_base_char(C) when C >= 16#0134, C =< 16#013E -> true;
+is_base_char(C) when C >= 16#0141, C =< 16#0148 -> true;
+is_base_char(C) when C >= 16#014A, C =< 16#017E -> true;
+is_base_char(C) when C >= 16#0180, C =< 16#01C3 -> true;
+is_base_char(C) when C >= 16#01CD, C =< 16#01F0 -> true;
+is_base_char(C) when C >= 16#01F4, C =< 16#01F5 -> true;
+is_base_char(C) when C >= 16#01FA, C =< 16#0217 -> true;
+is_base_char(C) when C >= 16#0250, C =< 16#02A8 -> true; %% IPA
+is_base_char(C) when C >= 16#02BB, C =< 16#02C1 -> true; %% Spacing Modifiers
+is_base_char(16#0386) -> true; %% Greek
+is_base_char(C) when C >= 16#0388, C =< 16#038A -> true;
+is_base_char(16#038C) -> true;
+is_base_char(C) when C >= 16#038E, C =< 16#03A1 -> true;
+is_base_char(C) when C >= 16#03A3, C =< 16#03CE -> true;
+is_base_char(C) when C >= 16#03D0, C =< 16#03D6 -> true;
+is_base_char(16#03DA) -> true;
+is_base_char(16#03DC) -> true;
+is_base_char(16#03DE) -> true;
+is_base_char(16#03E0) -> true;
+is_base_char(C) when C >= 16#03E2, C =< 16#03F3 -> true;
+is_base_char(C) when C >= 16#0401, C =< 16#040C -> true; %% Cyrillic
+is_base_char(C) when C >= 16#040E, C =< 16#044F -> true;
+is_base_char(C) when C >= 16#0451, C =< 16#045C -> true;
+is_base_char(C) when C >= 16#045E, C =< 16#0481 -> true;
+is_base_char(C) when C >= 16#0490, C =< 16#04C4 -> true;
+is_base_char(C) when C >= 16#04C7, C =< 16#04C8 -> true;
+is_base_char(C) when C >= 16#04CB, C =< 16#04CC -> true;
+is_base_char(C) when C >= 16#04D0, C =< 16#04EB -> true;
+is_base_char(C) when C >= 16#04EE, C =< 16#04F5 -> true;
+is_base_char(C) when C >= 16#04F8, C =< 16#04F9 -> true;
+is_base_char(C) when C >= 16#0531, C =< 16#0556 -> true; %% Armenian
+is_base_char(16#0559) -> true;
+is_base_char(C) when C >= 16#0561, C =< 16#0586 -> true;
+is_base_char(C) when C >= 16#05D0, C =< 16#05EA -> true; %% Hebrew
+is_base_char(C) when C >= 16#05F0, C =< 16#05F2 -> true;
+is_base_char(C) when C >= 16#0621, C =< 16#063A -> true; %% Arabic
+is_base_char(C) when C >= 16#0641, C =< 16#064A -> true;
+is_base_char(C) when C >= 16#0671, C =< 16#06B7 -> true;
+is_base_char(C) when C >= 16#06BA, C =< 16#06BE -> true;
+is_base_char(C) when C >= 16#06C0, C =< 16#06CE -> true;
+is_base_char(C) when C >= 16#06D0, C =< 16#06D3 -> true;
+is_base_char(16#06D5) -> true;
+is_base_char(C) when C >= 16#06E5, C =< 16#06E6 -> true;
+is_base_char(C) when C >= 16#0905, C =< 16#0939 -> true; %% Devanagari
+is_base_char(16#093D) -> true;
+is_base_char(C) when C >= 16#0958, C =< 16#0961 -> true;
+is_base_char(C) when C >= 16#0985, C =< 16#098C -> true; %% Bengali
+is_base_char(C) when C >= 16#098F, C =< 16#0990 -> true;
+is_base_char(C) when C >= 16#0993, C =< 16#09A8 -> true;
+is_base_char(C) when C >= 16#09AA, C =< 16#09B0 -> true;
+is_base_char(16#09B2) -> true;
+is_base_char(C) when C >= 16#09B6, C =< 16#09B9 -> true;
+is_base_char(C) when C >= 16#09DC, C =< 16#09DD -> true;
+is_base_char(C) when C >= 16#09DF, C =< 16#09E1 -> true;
+is_base_char(C) when C >= 16#09F0, C =< 16#09F1 -> true;
+is_base_char(C) when C >= 16#0A05, C =< 16#0A0A -> true; %% Gurmukhi
+is_base_char(C) when C >= 16#0A0F, C =< 16#0A10 -> true;
+is_base_char(C) when C >= 16#0A13, C =< 16#0A28 -> true;
+is_base_char(C) when C >= 16#0A2A, C =< 16#0A30 -> true;
+is_base_char(C) when C >= 16#0A32, C =< 16#0A33 -> true;
+is_base_char(C) when C >= 16#0A35, C =< 16#0A36 -> true;
+is_base_char(C) when C >= 16#0A38, C =< 16#0A39 -> true;
+is_base_char(C) when C >= 16#0A59, C =< 16#0A5C -> true;
+is_base_char(16#0A5E) -> true;
+is_base_char(C) when C >= 16#0A72, C =< 16#0A74 -> true;
+is_base_char(C) when C >= 16#0A85, C =< 16#0A8B -> true; %% Gujarati
+is_base_char(16#0A8D) -> true;
+is_base_char(C) when C >= 16#0A8F, C =< 16#0A91 -> true;
+is_base_char(C) when C >= 16#0A93, C =< 16#0AA8 -> true;
+is_base_char(C) when C >= 16#0AAA, C =< 16#0AB0 -> true;
+is_base_char(C) when C >= 16#0AB2, C =< 16#0AB3 -> true;
+is_base_char(C) when C >= 16#0AB5, C =< 16#0AB9 -> true;
+is_base_char(16#0ABD) -> true;
+is_base_char(16#0AE0) -> true;
+is_base_char(C) when C >= 16#0B05, C =< 16#0B0C -> true; %% Oriya
+is_base_char(C) when C >= 16#0B0F, C =< 16#0B10 -> true;
+is_base_char(C) when C >= 16#0B13, C =< 16#0B28 -> true;
+is_base_char(C) when C >= 16#0B2A, C =< 16#0B30 -> true;
+is_base_char(C) when C >= 16#0B32, C =< 16#0B33 -> true;
+is_base_char(C) when C >= 16#0B36, C =< 16#0B39 -> true;
+is_base_char(16#0B3D) -> true;
+is_base_char(C) when C >= 16#0B5C, C =< 16#0B5D -> true;
+is_base_char(C) when C >= 16#0B5F, C =< 16#0B61 -> true;
+is_base_char(C) when C >= 16#0B85, C =< 16#0B8A -> true; %% Tamil
+is_base_char(C) when C >= 16#0B8E, C =< 16#0B90 -> true;
+is_base_char(C) when C >= 16#0B92, C =< 16#0B95 -> true;
+is_base_char(C) when C >= 16#0B99, C =< 16#0B9A -> true;
+is_base_char(16#0B9C) -> true;
+is_base_char(C) when C >= 16#0B9E, C =< 16#0B9F -> true;
+is_base_char(C) when C >= 16#0BA3, C =< 16#0BA4 -> true;
+is_base_char(C) when C >= 16#0BA8, C =< 16#0BAA -> true;
+is_base_char(C) when C >= 16#0BAE, C =< 16#0BB5 -> true;
+is_base_char(C) when C >= 16#0BB7, C =< 16#0BB9 -> true;
+is_base_char(C) when C >= 16#0C05, C =< 16#0C0C -> true; %% Telugu
+is_base_char(C) when C >= 16#0C0E, C =< 16#0C10 -> true;
+is_base_char(C) when C >= 16#0C12, C =< 16#0C28 -> true;
+is_base_char(C) when C >= 16#0C2A, C =< 16#0C33 -> true;
+is_base_char(C) when C >= 16#0C35, C =< 16#0C39 -> true;
+is_base_char(C) when C >= 16#0C60, C =< 16#0C61 -> true;
+is_base_char(C) when C >= 16#0C85, C =< 16#0C8C -> true; %% Kannada
+is_base_char(C) when C >= 16#0C8E, C =< 16#0C90 -> true;
+is_base_char(C) when C >= 16#0C92, C =< 16#0CA8 -> true;
+is_base_char(C) when C >= 16#0CAA, C =< 16#0CB3 -> true;
+is_base_char(C) when C >= 16#0CB5, C =< 16#0CB9 -> true;
+is_base_char(16#0CDE) -> true;
+is_base_char(C) when C >= 16#0CE0, C =< 16#0CE1 -> true;
+is_base_char(C) when C >= 16#0D05, C =< 16#0D0C -> true; %% Malayalam
+is_base_char(C) when C >= 16#0D0E, C =< 16#0D10 -> true;
+is_base_char(C) when C >= 16#0D12, C =< 16#0D28 -> true;
+is_base_char(C) when C >= 16#0D2A, C =< 16#0D39 -> true;
+is_base_char(C) when C >= 16#0D60, C =< 16#0D61 -> true;
+is_base_char(C) when C >= 16#0E01, C =< 16#0E2E -> true; %% Thai
+is_base_char(16#0E30) -> true;
+is_base_char(C) when C >= 16#0E32, C =< 16#0E33 -> true;
+is_base_char(C) when C >= 16#0E40, C =< 16#0E45 -> true;
+is_base_char(C) when C >= 16#0E81, C =< 16#0E82 -> true; %% Lao
+is_base_char(16#0E84) -> true;
+is_base_char(C) when C >= 16#0E87, C =< 16#0E88 -> true;
+is_base_char(16#0E8A) -> true;
+is_base_char(16#0E8D) -> true;
+is_base_char(C) when C >= 16#0E94, C =< 16#0E97 -> true;
+is_base_char(C) when C >= 16#0E99, C =< 16#0E9F -> true;
+is_base_char(C) when C >= 16#0EA1, C =< 16#0EA3 -> true;
+is_base_char(16#0EA5) -> true;
+is_base_char(16#0EA7) -> true;
+is_base_char(C) when C >= 16#0EAA, C =< 16#0EAB -> true;
+is_base_char(C) when C >= 16#0EAD, C =< 16#0EAE -> true;
+is_base_char(16#0EB0) -> true;
+is_base_char(C) when C >= 16#0EB2, C =< 16#0EB3 -> true;
+is_base_char(16#0EBD) -> true;
+is_base_char(C) when C >= 16#0EC0, C =< 16#0EC4 -> true;
+is_base_char(C) when C >= 16#0F40, C =< 16#0F47 -> true; %% Tibetan
+is_base_char(C) when C >= 16#0F49, C =< 16#0F69 -> true;
+is_base_char(C) when C >= 16#10A0, C =< 16#10C5 -> true; %% Hangul Jamo
+is_base_char(C) when C >= 16#10D0, C =< 16#10F6 -> true;
+is_base_char(16#1100) -> true;
+is_base_char(C) when C >= 16#1102, C =< 16#1103 -> true;
+is_base_char(C) when C >= 16#1105, C =< 16#1107 -> true;
+is_base_char(16#1109) -> true;
+is_base_char(C) when C >= 16#110B, C =< 16#110C -> true;
+is_base_char(C) when C >= 16#110E, C =< 16#1112 -> true;
+is_base_char(16#113C) -> true;
+is_base_char(16#113E) -> true;
+is_base_char(16#1140) -> true;
+is_base_char(16#114C) -> true;
+is_base_char(16#114E) -> true;
+is_base_char(16#1150) -> true;
+is_base_char(C) when C >= 16#1154, C =< 16#1155 -> true;
+is_base_char(16#1159) -> true;
+is_base_char(C) when C >= 16#115F, C =< 16#1161 -> true;
+is_base_char(16#1163) -> true;
+is_base_char(16#1165) -> true;
+is_base_char(16#1167) -> true;
+is_base_char(16#1169) -> true;
+is_base_char(C) when C >= 16#116D, C =< 16#116E -> true;
+is_base_char(C) when C >= 16#1172, C =< 16#1173 -> true;
+is_base_char(16#1175) -> true;
+is_base_char(16#119E) -> true;
+is_base_char(16#11A8) -> true;
+is_base_char(16#11AB) -> true;
+is_base_char(C) when C >= 16#11AE, C =< 16#11AF -> true;
+is_base_char(C) when C >= 16#11B7, C =< 16#11B8 -> true;
+is_base_char(16#11BA) -> true;
+is_base_char(C) when C >= 16#11BC, C =< 16#11C2 -> true;
+is_base_char(16#11EB) -> true;
+is_base_char(16#11F0) -> true;
+is_base_char(16#11F9) -> true;
+is_base_char(C) when C >= 16#1E00, C =< 16#1E9B -> true; %% Latin Extended Additional
+is_base_char(C) when C >= 16#1EA0, C =< 16#1EF9 -> true;
+is_base_char(C) when C >= 16#1F00, C =< 16#1F15 -> true; %% Greek Extended
+is_base_char(C) when C >= 16#1F18, C =< 16#1F1D -> true;
+is_base_char(C) when C >= 16#1F20, C =< 16#1F45 -> true;
+is_base_char(C) when C >= 16#1F48, C =< 16#1F4D -> true;
+is_base_char(C) when C >= 16#1F50, C =< 16#1F57 -> true;
+is_base_char(16#1F59) -> true;
+is_base_char(16#1F5B) -> true;
+is_base_char(16#1F5D) -> true;
+is_base_char(C) when C >= 16#1F5F, C =< 16#1F7D -> true;
+is_base_char(C) when C >= 16#1F80, C =< 16#1FB4 -> true;
+is_base_char(C) when C >= 16#1FB6, C =< 16#1FBC -> true;
+is_base_char(16#1FBE) -> true;
+is_base_char(C) when C >= 16#1FC2, C =< 16#1FC4 -> true;
+is_base_char(C) when C >= 16#1FC6, C =< 16#1FCC -> true;
+is_base_char(C) when C >= 16#1FD0, C =< 16#1FD3 -> true;
+is_base_char(C) when C >= 16#1FD6, C =< 16#1FDB -> true;
+is_base_char(C) when C >= 16#1FE0, C =< 16#1FEC -> true;
+is_base_char(C) when C >= 16#1FF2, C =< 16#1FF4 -> true;
+is_base_char(C) when C >= 16#1FF6, C =< 16#1FFC -> true;
+is_base_char(16#2126) -> true; %% Letterlike Symbols
+is_base_char(C) when C >= 16#212A, C =< 16#212B -> true;
+is_base_char(16#212E) -> true;
+is_base_char(C) when C >= 16#2180, C =< 16#2182 -> true; %% Number Forms
+is_base_char(C) when C >= 16#3041, C =< 16#3094 -> true; %% Hiragana
+is_base_char(C) when C >= 16#30A1, C =< 16#30FA -> true; %% Katakana
+is_base_char(C) when C >= 16#3105, C =< 16#312C -> true; %% Bopomofo
+is_base_char(C) when C >= 16#ac00, C =< 16#d7a3 -> true; %% Hangul Syllables
+is_base_char(_) ->
+ false.
+
+%%----------------------------------------------------------------------
+%% Function : is_ideographic(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is an ideographic letter.
+%% [86] Ideographic
+%%----------------------------------------------------------------------
+is_ideographic(C) when C >= 16#4e00, C =< 16#9fa5 -> true; %% Unified CJK Ideographs
+is_ideographic(16#3007) -> true; %% CJK Symbols and Punctuation
+is_ideographic(C) when C >= 16#3021, C =< 16#3029 -> true;
+is_ideographic(_) ->
+ false.
+
+%%----------------------------------------------------------------------
+%% Function : is_ideographic(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is a combining character.
+%% [87] CombiningChar
+%%----------------------------------------------------------------------
+is_combining_char(C) when C >= 16#0300, C =< 16#0345 -> true; %% Combining Diacritics
+is_combining_char(C) when C >= 16#0360, C =< 16#0361 -> true;
+is_combining_char(C) when C >= 16#0483, C =< 16#0486 -> true; %% Cyrillic Combining Diacritics
+is_combining_char(C) when C >= 16#0591, C =< 16#05a1 -> true; %% Hebrew Combining Diacritics
+is_combining_char(C) when C >= 16#05a3, C =< 16#05b9 -> true;
+is_combining_char(C) when C >= 16#05bb, C =< 16#05bd -> true;
+is_combining_char(16#05bf) -> true;
+is_combining_char(C) when C >= 16#05c1, C =< 16#05c2 -> true;
+is_combining_char(16#05c4) -> true;
+is_combining_char(C) when C >= 16#064b, C =< 16#0652 -> true; %% Arabic Combining Diacritics
+is_combining_char(16#0670) -> true;
+is_combining_char(C) when C >= 16#06d6, C =< 16#06dc -> true;
+is_combining_char(C) when C >= 16#06dd, C =< 16#06df -> true;
+is_combining_char(C) when C >= 16#06e0, C =< 16#06e4 -> true;
+is_combining_char(C) when C >= 16#06e7, C =< 16#06e8 -> true;
+is_combining_char(C) when C >= 16#06ea, C =< 16#06ed -> true;
+is_combining_char(C) when C >= 16#0901, C =< 16#0903 -> true; %% Devanagari Combining Diacritics
+is_combining_char(16#093c) -> true;
+is_combining_char(C) when C >= 16#093e, C =< 16#094c -> true;
+is_combining_char(16#094d) -> true;
+is_combining_char(C) when C >= 16#0951, C =< 16#0954 -> true;
+is_combining_char(C) when C >= 16#0962, C =< 16#0963 -> true;
+is_combining_char(C) when C >= 16#0981, C =< 16#0983 -> true; %% Bengali Combining Diacritics
+is_combining_char(16#09bc) -> true;
+is_combining_char(16#09be) -> true;
+is_combining_char(16#09bf) -> true;
+is_combining_char(C) when C >= 16#09c0, C =< 16#09c4 -> true;
+is_combining_char(C) when C >= 16#09c7, C =< 16#09c8 -> true;
+is_combining_char(C) when C >= 16#09cb, C =< 16#09cd -> true;
+is_combining_char(16#09d7) -> true;
+is_combining_char(C) when C >= 16#09e2, C =< 16#09e3 -> true;
+is_combining_char(16#0a02) -> true; %% Gurmukhi Combining Diacritics
+is_combining_char(16#0a3c) -> true;
+is_combining_char(16#0a3e) -> true;
+is_combining_char(16#0a3f) -> true;
+is_combining_char(C) when C >= 16#0a40, C =< 16#0a42 -> true;
+is_combining_char(C) when C >= 16#0a47, C =< 16#0a48 -> true;
+is_combining_char(C) when C >= 16#0a4b, C =< 16#0a4d -> true;
+is_combining_char(C) when C >= 16#0a70, C =< 16#0a71 -> true;
+is_combining_char(C) when C >= 16#0a81, C =< 16#0a83 -> true; %% Gujarati Combining Diacritics
+is_combining_char(16#0abc) -> true;
+is_combining_char(C) when C >= 16#0abe, C =< 16#0ac5 -> true;
+is_combining_char(C) when C >= 16#0ac7, C =< 16#0ac9 -> true;
+is_combining_char(C) when C >= 16#0acb, C =< 16#0acd -> true;
+is_combining_char(C) when C >= 16#0b01, C =< 16#0b03 -> true; %% Oriya Combining Diacritics
+is_combining_char(16#0b3c) -> true;
+is_combining_char(C) when C >= 16#0b3e, C =< 16#0b43 -> true;
+is_combining_char(C) when C >= 16#0b47, C =< 16#0b48 -> true;
+is_combining_char(C) when C >= 16#0b4b, C =< 16#0b4d -> true;
+is_combining_char(C) when C >= 16#0b56, C =< 16#0b57 -> true;
+is_combining_char(C) when C >= 16#0b82, C =< 16#0b83 -> true; %% Tamil Combining Diacritics
+is_combining_char(C) when C >= 16#0bbe, C =< 16#0bc2 -> true;
+is_combining_char(C) when C >= 16#0bc6, C =< 16#0bc8 -> true;
+is_combining_char(C) when C >= 16#0bca, C =< 16#0bcd -> true;
+is_combining_char(16#0bd7) -> true;
+is_combining_char(C) when C >= 16#0c01, C =< 16#0c03 -> true; %% Telugu Combining Diacritics
+is_combining_char(C) when C >= 16#0c3e, C =< 16#0c44 -> true;
+is_combining_char(C) when C >= 16#0c46, C =< 16#0c48 -> true;
+is_combining_char(C) when C >= 16#0c4a, C =< 16#0c4d -> true;
+is_combining_char(C) when C >= 16#0c55, C =< 16#0c56 -> true;
+is_combining_char(C) when C >= 16#0c82, C =< 16#0c83 -> true; %% Kannada Combining Diacritics
+is_combining_char(C) when C >= 16#0cbe, C =< 16#0cc4 -> true;
+is_combining_char(C) when C >= 16#0cc6, C =< 16#0cc8 -> true;
+is_combining_char(C) when C >= 16#0cca, C =< 16#0ccd -> true;
+is_combining_char(C) when C >= 16#0cd5, C =< 16#0cd6 -> true;
+is_combining_char(C) when C >= 16#0d02, C =< 16#0d03 -> true; %% Malayalam Combining Diacritics
+is_combining_char(C) when C >= 16#0d3e, C =< 16#0d43 -> true;
+is_combining_char(C) when C >= 16#0d46, C =< 16#0d48 -> true;
+is_combining_char(C) when C >= 16#0d4a, C =< 16#0d4d -> true;
+is_combining_char(16#0d57) -> true;
+is_combining_char(16#0e31) -> true; %% Thai Combining Diacritics
+is_combining_char(C) when C >= 16#0e34, C =< 16#0e3a -> true;
+is_combining_char(C) when C >= 16#0e47, C =< 16#0e4e -> true;
+is_combining_char(16#0eb1) -> true; %% Lao Combining Diacritics
+is_combining_char(C) when C >= 16#0eb4, C =< 16#0eb9 -> true;
+is_combining_char(C) when C >= 16#0ebb, C =< 16#0ebc -> true;
+is_combining_char(C) when C >= 16#0ec8, C =< 16#0ecd -> true;
+is_combining_char(C) when C >= 16#0f18, C =< 16#0f19 -> true; %% Tibetan Combining Diacritics
+is_combining_char(16#0f35) -> true;
+is_combining_char(16#0f37) -> true;
+is_combining_char(16#0f39) -> true;
+is_combining_char(16#0f3e) -> true;
+is_combining_char(16#0f3f) -> true;
+is_combining_char(C) when C >= 16#0f71, C =< 16#0f84 -> true;
+is_combining_char(C) when C >= 16#0f86, C =< 16#0f8b -> true;
+is_combining_char(C) when C >= 16#0f90, C =< 16#0f95 -> true;
+is_combining_char(16#0f97) -> true;
+is_combining_char(C) when C >= 16#0f99, C =< 16#0fad -> true;
+is_combining_char(C) when C >= 16#0fb1, C =< 16#0fb7 -> true;
+is_combining_char(16#0fb9) -> true;
+is_combining_char(C) when C >= 16#20d0, C =< 16#20dc -> true; %% Math/Technical Combining Diacritics
+is_combining_char(16#20e1) -> true;
+is_combining_char(C) when C >= 16#302a, C =< 16#302f -> true; %% Ideographic Diacritics
+is_combining_char(16#3099) -> true; %% Hiragana/Katakana Combining Diacritics
+is_combining_char(16#309a) -> true;
+is_combining_char(_) -> false.
+
+
+%%----------------------------------------------------------------------
+%% Function : is_digit(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is a digit.
+%% [88] Digit
+%%----------------------------------------------------------------------
+is_digit(C) when C >= 16#0030, C =< 16#0039 -> true; %% Basic ASCII digits 0-9
+is_digit(C) when C >= 16#0660, C =< 16#0669 -> true; %% Arabic Digits 0-9
+is_digit(C) when C >= 16#06F0, C =< 16#06F9 -> true; %% Eastern Arabic-Indic Digits 0-9
+is_digit(C) when C >= 16#0966, C =< 16#096f -> true; %% Devanagari Digits 0-9
+is_digit(C) when C >= 16#09e6, C =< 16#09ef -> true; %% Bengali Digits 0-9
+is_digit(C) when C >= 16#0a66, C =< 16#0a6f -> true; %% Gurmukhi Digits 0-9
+is_digit(C) when C >= 16#0ae6, C =< 16#0aef -> true; %% Gujarati Digits 0-9
+is_digit(C) when C >= 16#0b66, C =< 16#0b6f -> true; %% Oriya Digits 0-9
+is_digit(C) when C >= 16#0be7, C =< 16#0bef -> true; %% Tamil Digits 0-9
+is_digit(C) when C >= 16#0c66, C =< 16#0c6f -> true; %% Telugu Digits 0-9
+is_digit(C) when C >= 16#0ce6, C =< 16#0cef -> true; %% Kannada Digits 0-9
+is_digit(C) when C >= 16#0d66, C =< 16#0d6f -> true; %% Malayalam Digits 0-9
+is_digit(C) when C >= 16#0e50, C =< 16#0e59 -> true; %% Thai Digits 0-9
+is_digit(C) when C >= 16#0ed0, C =< 16#0ed9 -> true; %% Lao Digits 0-9
+is_digit(C) when C >= 16#0f20, C =< 16#0f29 -> true; %% Tibetan Digits 0-9
+is_digit(_) -> false.
+
+
+%%----------------------------------------------------------------------
+%% Function : is_extender(Char) -> Result
+%% Parameters: Char = char()
+%% Result : true | false
+%% Description: Check if character is an extender character.
+%% [89] Extender
+%%----------------------------------------------------------------------
+is_extender(16#00b7) -> true; %% Middle Dot
+is_extender(16#02d0) -> true; %% Triangular Colon and Half Colon
+is_extender(16#02d1) -> true;
+is_extender(16#0387) -> true; %% Greek Ano Teleia
+is_extender(16#0640) -> true; %% Arabic Tatweel
+is_extender(16#0e46) -> true; %% Thai Maiyamok
+is_extender(16#0ec6) -> true; %% Lao Ko La
+is_extender(16#3005) -> true; %% Ideographic Iteration Mark
+is_extender(C) when C >= 16#3031, C =< 16#3035 -> true; %% Japanese Kana Repetition Marks
+is_extender(C) when C >= 16#309d, C =< 16#309e -> true; %% Japanese Hiragana Iteration Marks
+is_extender(C) when C >= 16#30fc, C =< 16#30fe -> true; %% Japanese Kana Iteration Marks
+is_extender(_) -> false.
+
+
+
+%%======================================================================
+%% Callback and Continuation function handling
+%%======================================================================
+%%----------------------------------------------------------------------
+%% Function : event_callback(Event, State) -> Result
+%% Parameters: Event = term()
+%% State = #xmerl_sax_parser_state{}
+%% Result : #xmerl_sax_parser_state{}
+%% Description: Function that uses provided fun to send parser events.
+%%----------------------------------------------------------------------
+event_callback(Event,
+ #xmerl_sax_parser_state{
+ event_fun=CbFun,
+ event_state=EventState,
+ line_no=N,
+ entity=E,
+ current_location=L
+ } = State) ->
+ try
+ NewEventState = CbFun(Event, {L, E, N}, EventState),
+ State#xmerl_sax_parser_state{event_state=NewEventState}
+ catch
+ throw:ErrorTerm ->
+ throw({event_receiver_error, State, ErrorTerm});
+ exit:Reason ->
+ throw({event_receiver_error, State, {'EXIT', Reason}})
+ end.
+
+%%----------------------------------------------------------------------
+%% Function : cf(Rest, State, NextCall) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% NextCall = fun()
+%% Result : {Rest, State}
+%% Description: Function that uses provided fun to read another chunk from
+%% input stream and calls the fun in NextCall.
+%%----------------------------------------------------------------------
+cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) ->
+ ?fatal_error(State, "Continuation function undefined");
+cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
+ NextCall) ->
+ Result =
+ try
+ CFun(CState)
+ catch
+ throw:ErrorTerm ->
+ ?fatal_error(State, ErrorTerm);
+ exit:Reason ->
+ ?fatal_error(State, {'EXIT', Reason})
+ end,
+ case Result of
+ {?STRING_EMPTY, _} ->
+ ?fatal_error(State, "No more bytes");
+ {NewBytes, NewContState} ->
+ NextCall(?APPEND_STRING(Rest, NewBytes),
+ State#xmerl_sax_parser_state{continuation_state = NewContState})
+ end.
+
+%%----------------------------------------------------------------------
+%% Function : cf(Rest, State, NextCall, P) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% NextCall = fun()
+%% P = term()
+%% Result : {Rest, State}
+%% Description: Function that uses provided fun to read another chunk from
+%% input stream and calls the fun in NextCall with P as last parameter.
+%%----------------------------------------------------------------------
+cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _P, _) ->
+ ?fatal_error(State, "Continuation function undefined");
+cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
+ P, NextCall) ->
+ Result =
+ try
+ CFun(CState)
+ catch
+ throw:ErrorTerm ->
+ ?fatal_error(State, ErrorTerm);
+ exit:Reason ->
+ ?fatal_error(State, {'EXIT', Reason})
+ end,
+ case Result of
+ {?STRING_EMPTY, _} ->
+ ?fatal_error(State, "No more bytes");
+ {NewBytes, NewContState} ->
+ NextCall(?APPEND_STRING(Rest, NewBytes),
+ State#xmerl_sax_parser_state{continuation_state = NewContState},
+ P)
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function : cf(Rest, State, P1, P2, NextCall) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% NextCall = fun()
+%% P1 = term()
+%% P2 = term()
+%% Result : {Rest, State}
+%% Description: Function that uses provided fun to read another chunk from
+%% input stream and calls the fun in NextCall with P1 and
+%% P2 as last parameters.
+%%----------------------------------------------------------------------
+cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _P1, _P2, _) ->
+ ?fatal_error(State, "Continuation function undefined");
+cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
+ P1, P2, NextCall) ->
+ Result =
+ try
+ CFun(CState)
+ catch
+ throw:ErrorTerm ->
+ ?fatal_error(State, ErrorTerm);
+ exit:Reason ->
+ ?fatal_error(State, {'EXIT', Reason})
+ end,
+ case Result of
+ {?STRING_EMPTY, _} ->
+ ?fatal_error(State, "No more bytes");
+ {NewBytes, NewContState} ->
+ NextCall(?APPEND_STRING(Rest, NewBytes),
+ State#xmerl_sax_parser_state{continuation_state = NewContState},
+ P1, P2)
+ end.
+
+
+
+%%----------------------------------------------------------------------
+%% Function : unicode_incomplete_check(Args, ErrString) -> Result
+%% Parameters: Args = [Bytes, State | RestOfArgs]
+%% Bytes = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% RestOfArgs =
+%% ErrString = string()
+%% Result : {Rest, State}
+%% Description:
+%%----------------------------------------------------------------------
+unicode_incomplete_check([Bytes, #xmerl_sax_parser_state{encoding=Enc} = State | _] = Args, ErrString) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ apply(?MODULE, cf, Args);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
+ _ when ErrString =/= undefined ->
+ ?fatal_error(State, ErrString)
+ end;
+unicode_incomplete_check([Bytes,State | _], ErrString) when is_list(Bytes), ErrString =/= undefined ->
+ ?fatal_error(State, ErrString).
+
+
+%%----------------------------------------------------------------------
+%% Function : check_uri(Uri, CL) -> Result
+%% Parameters: Uri = string()
+%% CL = string()
+%% Result : {atom(), string()}
+%% Description:
+%%----------------------------------------------------------------------
+check_uri("http://" ++ _ = Url, _CL) ->
+ {http, Url};
+check_uri("file://" ++ Path, _CL) ->
+ {file, Path};
+check_uri(Path, CL) -> % ordinary filepath other URI's not supported yet
+ %% "file://" already removed when current_location set
+ Tag = get_uri_tag(CL),
+ case filename:pathtype(Path) of
+ relative ->
+ case Tag of
+ false ->
+ {file, filename:join(CL, Path)};
+ T ->
+ {T, CL ++ "/" ++ Path}
+ end;
+ absolute ->
+ case Tag of
+ false ->
+ {file, filename:absname(Path)};
+ T ->
+ {T, CL ++ "/" ++ Path}
+ end;
+ volumerelative -> % only windows
+ case Tag of
+ false ->
+ [Vol | _] = re:split(CL, ":", [{return,list}]),
+ {file, filename:join(Vol ++ ":", Path)};
+ T ->
+ {T, CL ++ "/" ++ Path}
+ end
+ end.
+
+%%----------------------------------------------------------------------
+%% Function : get_uri_tag(Uri) -> Result
+%% Parameters: Uri = string()
+%% Result : true |false
+%% Description: http / file is the only supported URI for the moment
+%%----------------------------------------------------------------------
+get_uri_tag(Uri) ->
+ case re:split(Uri, "://", [{return,list}]) of
+ [Tag, _] ->
+ list_to_atom(Tag);
+ [_] ->
+ false
+ end.
+
+%%----------------------------------------------------------------------
+%% Function : http_get_file(Host, Port, Key) -> Result
+%% Parameters: Host = string()
+%% Port = integer()
+%% Key = string()
+%% Result : string()
+%% Description:
+%%----------------------------------------------------------------------
+http_get_file(Host, Port, Key) ->
+ ConnectTimeOut = 10000,
+ SendTimeout = 10000,
+ FilenameTempl = filename:basename(Key),
+
+ {Filename, FD} = create_tempfile(FilenameTempl),
+ Socket = create_connection(Host, Port, ConnectTimeOut),
+ Request = "GET " ++ Key ++ " HTTP/1.0\r\n\r\n",
+
+ case gen_tcp:send(Socket, Request) of
+ ok ->
+ try
+ receive_msg(Socket, FD, true, SendTimeout)
+ catch
+ throw:{error, Error} ->
+ file:close(FD),
+ file:delete(Filename),
+ throw({error, Error})
+ end;
+ {error, _Reason} ->
+ file:close(FD),
+ file:delete(Filename),
+ throw({error, lists:flatten(io_lib:format("Couldn't fetch http://~s:~p/~s",
+ [Host, Port, Key]))})
+ end,
+ file:close(FD),
+ Filename.
+
+%%----------------------------------------------------------------------
+%% Function : receive_msg(Socket, FD, WaitForHeader, Timeout) -> Result
+%% Parameters: Socket = io_device()
+%% FD = io_device()
+%% WaitForHeader = boolean()
+%% Timeout = integer()
+%% Result : ok
+%% Description:
+%%----------------------------------------------------------------------
+receive_msg(Socket, FD, WaitForHeader, Timeout) ->
+ receive
+ {tcp_closed, Socket} ->
+ ok;
+ {tcp, Socket, Response} when WaitForHeader == false ->
+ file:write(FD, Response),
+ receive_msg(Socket, FD, WaitForHeader, Timeout);
+ {tcp, Socket, Response} ->
+ MsgBody = remove_header(Response),
+ file:write(FD, MsgBody),
+ receive_msg(Socket, FD, false, Timeout);
+ {tcp_error, Socket, _Reason} ->
+ gen_tcp:close(Socket),
+ throw({error, "http connection failed"})
+ after Timeout ->
+ gen_tcp:close(Socket),
+ throw({error, "http connection timedout"})
+ end.
+
+
+remove_header(<<"\r\n\r\n", MsgBody/binary>>) ->
+ MsgBody;
+remove_header(<<_C, Rest/binary>>) ->
+ remove_header(Rest).
+
+%%----------------------------------------------------------------------
+%% Function : create_connection(Host, Port, Timeout) -> Result
+%% Parameters: Host = string()
+%% Port = integer()
+%% Timeout = integer()
+%% Result : io_device()
+%% Description:
+%%----------------------------------------------------------------------
+create_connection(Host, Port, Timeout) ->
+ case gen_tcp:connect(Host, Port,[{packet,0}, binary, {reuseaddr,true}], Timeout) of
+ {ok,Socket} ->
+ Socket;
+ {error, Reason} ->
+ throw({error, lists:flatten(io_lib:format("Can't connect to ~s:~p ~p\n",
+ [Host, Port, Reason]))})
+ end.
+
+%%----------------------------------------------------------------------
+%% Function : http(Url) -> Result
+%% Parameters: Url = string()
+%% Result : {Host, PortInt, Key}
+%% Description:
+%%----------------------------------------------------------------------
+http("http://" ++ Address) ->
+ case string:tokens(Address, ":") of
+ [Host, Rest] ->
+ %% At his stage we know that address contains a Port number.
+ {Port, Key} = split_to_slash(Rest, []),
+ case catch list_to_integer(Port) of
+ PortInt when is_integer(PortInt) ->
+ {Host, PortInt, Key};
+ _ ->
+ throw({error, "Malformed key; port not an integer, should be http://Host:Port/path or http://Host/path"})
+ end;
+ [Address] ->
+ %% Use default port
+ {Host, Key} = split_to_slash(Address, []),
+ {Host, ?HTTP_DEF_PORT, Key};
+ _What ->
+ throw({error, "Malformed key; should be http://Host:Port/path or http://Host/path"})
+ end.
+
+%%----------------------------------------------------------------------
+%% Function : split_to_slash(String, Acc) -> Result
+%% Parameters: String = string()
+%% Acc = string()
+%% Result : {string(), string()}
+%% Description:
+%%----------------------------------------------------------------------
+split_to_slash([], _Acc) ->
+ throw({error, "No Key given Host:Port/Key"});
+split_to_slash([$/|Rest], Acc) ->
+ {lists:reverse(Acc), [$/|Rest]};
+split_to_slash([H|T], Acc) ->
+ split_to_slash(T, [H|Acc]).
+
+
+%%----------------------------------------------------------------------
+%% Function : create_tempfile(Template) -> Result
+%% Parameters: Template = string()
+%% Result : string()
+%% Description:
+%%----------------------------------------------------------------------
+create_tempfile(Template) ->
+ TmpDir =
+ case os:type() of
+ {unix, _} ->
+ case file:read_file_info("/tmp") of
+ {ok, _} ->
+ "/tmp";
+ {error,enoent} ->
+ throw({error, "/tmp doesn't exist"})
+ end;
+ {win32, _} ->
+ case os:getenv("TMP") of
+ false ->
+ case os:getenv("TEMP") of
+ false ->
+ throw({error, "Variabel TMP or TEMP doesn't exist"});
+ P2 ->
+ P2
+ end;
+ P1 ->
+ P1
+ end
+ end,
+ TmpNameBase = filename:join([TmpDir, os:getpid() ++ Template ++ "."]),
+ create_tempfile_1(TmpNameBase, 1).
+
+create_tempfile_1(TmpNameBase, N) ->
+ FileName = TmpNameBase ++ integer_to_list(N),
+ case file:open(FileName, [write, binary]) of
+ {error, _Reason} ->
+ create_tempfile_1(TmpNameBase, N+1);
+ {ok, FD} ->
+ {FileName, FD}
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function : filter_endtag_stack(EndTagStack) -> Result
+%% Parameters: EndTagStack = [{term(), string(), string(),
+%% term(), nslist(), nslist()}]
+%% Result : [string()]
+%% Description: Returns a stack with just local names.
+%%----------------------------------------------------------------------
+filter_endtag_stack(EndTagStack) ->
+ filter_endtag_stack(EndTagStack,[]).
+
+filter_endtag_stack([], Acc) ->
+ lists:reverse(Acc);
+filter_endtag_stack([{_,_,N,_,_,_}| Ts], Acc) ->
+ filter_endtag_stack(Ts, [N |Acc]).
+
+
+%%----------------------------------------------------------------------
+%% Function : format_error(Tag, State, Reason) -> Result
+%% Parameters: Tag = atom(),
+%% State = xmerl_sax_parser_state()
+%% Reason = string()
+%% Result : {atom(), {string(), string(), integer()}, string(), [string()], event_state()}
+%% Description: Format the resulting error tuple
+%%----------------------------------------------------------------------
+format_error(Tag, State, Reason) ->
+ {Tag,
+ {
+ State#xmerl_sax_parser_state.current_location,
+ State#xmerl_sax_parser_state.entity,
+ State#xmerl_sax_parser_state.line_no
+ },
+ Reason,
+ filter_endtag_stack(State#xmerl_sax_parser_state.end_tags),
+ State#xmerl_sax_parser_state.event_state}.
+
diff --git a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc
new file mode 100644
index 0000000000..f27758069d
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc
@@ -0,0 +1,40 @@
+%%-*-erlang-*-
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_parser_latin1.erl
+%% Description :
+%%
+%% Created : 26 May 2008
+%%----------------------------------------------------------------------
+-module(xmerl_sax_parser_latin1).
+
+%%----------------------------------------------------------------------
+%% Macros
+%%----------------------------------------------------------------------
+-define(STRING_EMPTY, <<>>).
+-define(STRING(MatchStr), <<MatchStr>>).
+-define(STRING_REST(MatchStr, Rest), <<MatchStr, Rest/binary>>).
+-define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>).
+-define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, latin1)).
+
+%% STRING_REST and STRING_UNBOUND_REST is only different in the list case
+-define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar, Rest/binary>>).
+-define(BYTE_ORDER_MARK_1, undefined_bom1).
+-define(BYTE_ORDER_MARK_2, undefined_bom2).
+-define(BYTE_ORDER_MARK_REST(Rest), <<undefined, Rest/binary>>).
diff --git a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc
new file mode 100644
index 0000000000..26fec41cb9
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc
@@ -0,0 +1,40 @@
+%%-*-erlang-*-
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_parser_list.erl
+%% Description :
+%%
+%% Created : 25 Apr 2008
+%%----------------------------------------------------------------------
+-module(xmerl_sax_parser_list).
+
+%%----------------------------------------------------------------------
+%% Macros
+%%----------------------------------------------------------------------
+-define(STRING_EMPTY, []).
+-define(STRING(MatchStr), MatchStr).
+-define(STRING_REST(MatchStr, Rest), MatchStr ++ Rest).
+-define(APPEND_STRING(Rest, New), Rest ++ New).
+-define(TO_INPUT_FORMAT(Val), Val).
+
+%% In the list case we can't use a '++' when matchin against an unbound variable
+-define(STRING_UNBOUND_REST(MatchChar, Rest), [MatchChar | Rest]).
+-define(BYTE_ORDER_MARK_1, undefined_bom1).
+-define(BYTE_ORDER_MARK_2, undefined_bom2).
+-define(BYTE_ORDER_MARK_REST(Rest), [undefined|Rest]).
diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc
new file mode 100644
index 0000000000..fae5346e6a
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc
@@ -0,0 +1,40 @@
+%%-*-erlang-*-
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_parser_utf16be.erl
+%% Description :
+%%
+%% Created : 26 May 2008
+%%----------------------------------------------------------------------
+-module(xmerl_sax_parser_utf16be).
+
+%%----------------------------------------------------------------------
+%% Macros
+%%----------------------------------------------------------------------
+-define(STRING_EMPTY, <<>>).
+-define(STRING(MatchStr), <<MatchStr/big-utf16>>).
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/big-utf16, Rest/binary>>).
+-define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>).
+-define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, big})).
+
+%% STRING_REST and STRING_UNBOUND_REST is only different in the list case
+-define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/big-utf16, Rest/binary>>).
+-define(BYTE_ORDER_MARK_1, undefined).
+-define(BYTE_ORDER_MARK_2, <<16#FE>>).
+-define(BYTE_ORDER_MARK_REST(Rest), <<16#FE, 16#FF, Rest/binary>>).
diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc
new file mode 100644
index 0000000000..5e1f0a217c
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc
@@ -0,0 +1,40 @@
+%%-*-erlang-*-
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_parser_utf16le.erl
+%% Description :
+%%
+%% Created : 26 May 2008
+%%----------------------------------------------------------------------
+-module(xmerl_sax_parser_utf16le).
+
+%%----------------------------------------------------------------------
+%% Macros
+%%----------------------------------------------------------------------
+-define(STRING_EMPTY, <<>>).
+-define(STRING(MatchStr), <<MatchStr/little-utf16>>).
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/little-utf16, Rest/binary>>).
+-define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>).
+-define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, little})).
+
+%% STRING_REST and STRING_UNBOUND_REST is only different in the list case
+-define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/little-utf16, Rest/binary>>).
+-define(BYTE_ORDER_MARK_1, undefined).
+-define(BYTE_ORDER_MARK_2, <<16#FF>>).
+-define(BYTE_ORDER_MARK_REST(Rest), <<16#FF, 16#FE, Rest/binary>>).
diff --git a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc
new file mode 100644
index 0000000000..542aca5a3a
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc
@@ -0,0 +1,43 @@
+%%-*-erlang-*-
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_parser_utf8.erl
+%% Description :
+%%
+%% Created : 27 May 2008
+%%----------------------------------------------------------------------
+-module(xmerl_sax_parser_utf8).
+
+%%----------------------------------------------------------------------
+%% Macros
+%%----------------------------------------------------------------------
+-define(STRING_EMPTY, <<>>).
+-define(STRING(MatchStr), <<MatchStr/utf8>>).
+-define(STRING_REST(MatchStr, Rest), <<MatchStr/utf8, Rest/binary>>).
+-define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>).
+-define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, utf8)).
+
+
+%% STRING_REST and STRING_UNBOUND_REST is only different in the list case
+-define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/utf8, Rest/binary>>).
+-define(BYTE_ORDER_MARK_1, <<16#EF>>).
+-define(BYTE_ORDER_MARK_2, <<16#EF, 16#BB>>).
+-define(BYTE_ORDER_MARK_REST(Rest), <<16#EF, 16#BB, 16#BF, Rest/binary>>).
+
+
diff --git a/lib/xmerl/src/xmerl_sax_simple_dom.erl b/lib/xmerl/src/xmerl_sax_simple_dom.erl
new file mode 100644
index 0000000000..58a11f70fe
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sax_simple_dom.erl
@@ -0,0 +1,263 @@
+%%-*-erlang-*-
+%%--------------------------------------------------------------------
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%----------------------------------------------------------------------
+%% File : xmerl_sax_simple_dom.erl
+%% Description :
+%%
+%% Created : 17 Apr 2009
+%%----------------------------------------------------------------------
+-module(xmerl_sax_simple_dom).
+
+%%----------------------------------------------------------------------
+%% Include files
+%%----------------------------------------------------------------------
+-include("xmerl_sax_old_dom.hrl").
+
+%%----------------------------------------------------------------------
+%% External exports
+%%----------------------------------------------------------------------
+-export([
+ initial_state/0,
+ get_dom/1,
+ event/3
+ ]).
+
+%%----------------------------------------------------------------------
+%% Internal exports
+%%----------------------------------------------------------------------
+-export([
+ ]).
+
+%%======================================================================
+%% Macros
+%%======================================================================
+%%----------------------------------------------------------------------
+%% Error handling
+%%----------------------------------------------------------------------
+-define(error(Reason),
+ throw({sax_simple_dom_error, Reason})).
+
+%%----------------------------------------------------------------------
+
+%%======================================================================
+%% Records
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% State record for the validator
+%%----------------------------------------------------------------------
+-record(xmerl_sax_simple_dom_state, {
+ tags=[], %% Tag stack
+ cno=[], %% Current node number
+ namespaces = [], %% NameSpace stack
+ dom=[] %% DOM structure
+ }).
+
+%%======================================================================
+%% External functions
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% Function: initial_state() -> Result
+%% Parameters:
+%% Result:
+%% Description:
+%%----------------------------------------------------------------------
+initial_state() ->
+ #xmerl_sax_simple_dom_state{}.
+
+%%----------------------------------------------------------------------
+%% Function: get_dom(State) -> Result
+%% Parameters:
+%% Result:
+%% Description:
+%%----------------------------------------------------------------------
+get_dom(#xmerl_sax_simple_dom_state{dom=Dom}) ->
+ Dom.
+
+%%----------------------------------------------------------------------
+%% Function: event(Event, LineNo, State) -> Result
+%% Parameters:
+%% Result:
+%% Description:
+%%----------------------------------------------------------------------
+event(Event, _LineNo, State) ->
+ build_dom(Event, State).
+
+
+%%======================================================================
+%% Internal functions
+%%======================================================================
+
+%%----------------------------------------------------------------------
+%% Function : build_dom(Event, State) -> Result
+%% Parameters: Event = term()
+%% State = #xmerl_sax_simple_dom_state{}
+%% Result : #xmerl_sax_simple_dom_state{} |
+%% Description:
+%%----------------------------------------------------------------------
+
+%% Document
+%%----------------------------------------------------------------------
+build_dom(startDocument, State) ->
+ State#xmerl_sax_simple_dom_state{dom=[startDocument]};
+build_dom(endDocument,
+ #xmerl_sax_simple_dom_state{dom=[{Tag, Attributes, Content} |D]} = State) ->
+ case D of
+ [startDocument] ->
+ State#xmerl_sax_simple_dom_state{dom=[{Tag, Attributes,
+ lists:reverse(Content)}]};
+ [#xmlDecl{} = Decl, startDocument] ->
+ State#xmerl_sax_simple_dom_state{dom=[Decl, {Tag, Attributes,
+ lists:reverse(Content)}]};
+ _ ->
+ io:format("~p\n", [D]),
+ ?error("we're not at end the document when endDocument event is encountered.")
+ end;
+
+%% Element
+%%----------------------------------------------------------------------
+build_dom({startElement, _Uri, LocalName, _QName, Attributes},
+ #xmerl_sax_simple_dom_state{tags=T, cno=CN, dom=D} = State) ->
+
+ A = parse_attributes(LocalName, Attributes),
+ {Num, NewCN} =
+ case CN of
+ [] ->
+ {1, [1]};
+ [ N |CNs] ->
+ {N, [1, N+1 |CNs]}
+ end,
+
+ State#xmerl_sax_simple_dom_state{tags=[{list_to_atom(LocalName), Num} |T],
+ cno=NewCN,
+ dom=[{list_to_atom(LocalName),
+ lists:reverse(A),
+ []
+ } | D]};
+build_dom({endElement, _Uri, LocalName, _QName},
+ #xmerl_sax_simple_dom_state{tags=[_ |T],
+ cno=[_ |CN],
+ dom=[{CName, CAttributes, CContent},
+ {PName, PAttributes, PContent} | D]} = State) ->
+ case list_to_atom(LocalName) of
+ CName ->
+ State#xmerl_sax_simple_dom_state{tags=T,
+ cno=CN,
+ dom=[{PName, PAttributes,
+ [{CName, CAttributes,
+ lists:reverse(CContent)}
+ |PContent]
+ } | D]};
+ _ ->
+ ?error("Got end of element: " ++ LocalName ++ " but expected: " ++
+ CName)
+ end;
+
+%% Text
+%%----------------------------------------------------------------------
+build_dom({characters, String},
+ #xmerl_sax_simple_dom_state{tags=_T,
+ cno=[Num |CN],
+ dom=[{Name, Attributes, Content}| D]} = State) ->
+ State#xmerl_sax_simple_dom_state{cno=[Num+1 |CN],
+ dom=[{Name, Attributes, [String|Content]} | D]};
+
+build_dom({ignorableWhitespace, String},
+ #xmerl_sax_simple_dom_state{tags=T,
+ cno=[Num |CN],
+ dom=[{Name, Attributes, Content}| D]} = State) ->
+ State#xmerl_sax_simple_dom_state{cno=[Num+1 |CN],
+ dom=[{Name, Attributes, [#xmlText{value=String, parents=T, pos=Num, type=text}|Content]} | D]};
+
+%% Comments
+%%----------------------------------------------------------------------
+build_dom({comment, String},
+ #xmerl_sax_simple_dom_state{tags=T,
+ cno=[Num |CN],
+ dom=[#xmlElement{content=C} = Current| D]} = State) ->
+ State#xmerl_sax_simple_dom_state{cno=[Num+1 |CN],
+ dom=[Current#xmlElement{content=[#xmlComment{parents=T, pos=Num, value=String}|C]} | D]};
+
+%% NameSpaces
+%%----------------------------------------------------------------------
+build_dom({startPrefixMapping, [], _Uri}, State) ->
+ State;
+build_dom({startPrefixMapping, Prefix, Uri},
+ #xmerl_sax_simple_dom_state{namespaces=NS} = State) ->
+ State#xmerl_sax_simple_dom_state{namespaces=[{Prefix, list_to_atom(Uri)} |NS]};
+build_dom({endPrefixMapping, Prefix},
+ #xmerl_sax_simple_dom_state{namespaces=[{Prefix, _} |NS]} = State) ->
+ State#xmerl_sax_simple_dom_state{namespaces=NS};
+
+%% Processing instructions
+%%----------------------------------------------------------------------
+build_dom({processingInstruction,"xml", PiData},
+ #xmerl_sax_simple_dom_state{dom=D} = State) ->
+ {Vsn, PiData1} = find_and_remove_attribute("version", PiData, []),
+ {Enc, PiData2} = find_and_remove_attribute("encoding", PiData1, []),
+ {Standalone, PiData3} = find_and_remove_attribute("standalone", PiData2, yes),
+ State#xmerl_sax_simple_dom_state{dom=[#xmlDecl{vsn=Vsn, encoding=Enc, standalone=Standalone, attributes=PiData3}| D]};
+build_dom({processingInstruction, PiTarget, PiData},
+ #xmerl_sax_simple_dom_state{cno=[Num |CN],
+ dom=[#xmlElement{content=C} = Current| D]} = State) ->
+ State#xmerl_sax_simple_dom_state{cno=[Num+1 |CN],
+ dom=[Current#xmlElement{content=[#xmlPI{name=PiTarget,pos=Num, value=PiData}
+ |C]} | D]};
+
+
+%% Default
+%%----------------------------------------------------------------------
+build_dom(_E, State) ->
+ State.
+
+
+
+
+
+%%----------------------------------------------------------------------
+%% Function : parse_attributes(ElName, Attributes) -> Result
+%% Parameters:
+%% Result :
+%% Description:
+%%----------------------------------------------------------------------
+parse_attributes(ElName, Attributes) ->
+ parse_attributes(ElName, Attributes, 1, []).
+
+parse_attributes(_, [], _, Acc) ->
+ Acc;
+parse_attributes(ElName, [{_Uri, _Prefix, LocalName, AttrValue} |As], N, Acc) ->
+ parse_attributes(ElName, As, N+1, [{list_to_atom(LocalName), AttrValue} |Acc]).
+
+
+
+%%----------------------------------------------------------------------
+%% Function : find_and_remove_attribute(Key, Data, Default) -> Result
+%% Parameters:
+%% Result :
+%% Description:
+%%----------------------------------------------------------------------
+find_and_remove_attribute(Key, Data, Default) ->
+ case lists:keysearch(Key, 1, Data) of
+ {value, {Key, Value}} ->
+ Data2 = lists:keydelete(Key, 1, Data),
+ {Value, Data2};
+ false ->
+ {Default, Data}
+ end.
diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl
new file mode 100644
index 0000000000..4e5cc59d8f
--- /dev/null
+++ b/lib/xmerl/src/xmerl_scan.erl
@@ -0,0 +1,4088 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Simgle-pass XML scanner. See xmerl.hrl for data defs.
+
+%% @doc This module is the interface to the XML parser, it handles XML 1.0.
+%% The XML parser is activated through
+%% <tt>xmerl_scan:string/[1,2]</tt> or
+%% <tt>xmerl_scan:file/[1,2]</tt>.
+%% It returns records of the type defined in xmerl.hrl.
+%% See also <a href="xmerl_examples.html">tutorial</a> on customization
+%% functions.
+%% @type global_state(). <p>
+%% The global state of the scanner, represented by the #xmerl_scanner{} record.
+%% </p>
+%% @type option_list(). <p>Options allow to customize the behaviour of the
+%% scanner.
+%% See also <a href="xmerl_examples.html">tutorial</a> on customization
+%% functions.
+%% </p>
+%% Possible options are:
+%% <dl>
+%% <dt><code>{acc_fun, Fun}</code></dt>
+%% <dd>Call back function to accumulate contents of entity.</dd>
+%% <dt><code>{continuation_fun, Fun} |
+%% {continuation_fun, Fun, ContinuationState}</code></dt>
+%% <dd>Call back function to decide what to do if the scanner runs into EOF
+%% before the document is complete.</dd>
+%% <dt><code>{event_fun, Fun} |
+%% {event_fun, Fun, EventState}</code></dt>
+%% <dd>Call back function to handle scanner events.</dd>
+%% <dt><code>{fetch_fun, Fun} |
+%% {fetch_fun, Fun, FetchState}</code></dt>
+%% <dd>Call back function to fetch an external resource.</dd>
+%% <dt><code>{hook_fun, Fun} |
+%% {hook_fun, Fun, HookState}</code></dt>
+%% <dd>Call back function to process the document entities once
+%% identified.</dd>
+%% <dt><code>{close_fun, Fun}</code></dt>
+%% <dd>Called when document has been completely parsed.</dd>
+%% <dt><code>{rules, ReadFun, WriteFun, RulesState} |
+%% {rules, Rules}</code></dt>
+%% <dd>Handles storing of scanner information when parsing.</dd>
+%% <dt><code>{user_state, UserState}</code></dt>
+%% <dd>Global state variable accessible from all customization functions</dd>
+%%
+%% <dt><code>{fetch_path, PathList}</code></dt>
+%% <dd>PathList is a list of
+%% directories to search when fetching files. If the file in question
+%% is not in the fetch_path, the URI will be used as a file
+%% name.</dd>
+%% <dt><code>{space, Flag}</code></dt>
+%% <dd>'preserve' (default) to preserve spaces, 'normalize' to
+%% accumulate consecutive whitespace and replace it with one space.</dd>
+%% <dt><code>{line, Line}</code></dt>
+%% <dd>To specify starting line for scanning in document which contains
+%% fragments of XML.</dd>
+%% <dt><code>{namespace_conformant, Flag}</code></dt>
+%% <dd>Controls whether to behave as a namespace conformant XML parser,
+%% 'false' (default) to not otherwise 'true'.</dd>
+%% <dt><code>{validation, Flag}</code></dt>
+%% <dd>Controls whether to process as a validating XML parser:
+%% 'off' (default) no validation, or validation 'dtd' by DTD or 'schema'
+%% by XML Schema. 'false' and 'true' options are obsolete
+%% (i.e. they may be removed in a future release), if used 'false'
+%% equals 'off' and 'true' equals 'dtd'.</dd>
+%% <dt><code>{schemaLocation, [{Namespace,Link}|...]}</code></dt>
+%% <dd>Tells explicitly which XML Schema documents to use to validate
+%% the XML document. Used together with the
+%% <code>{validation,schema}</code> option.</dd>
+%% <dt><code>{quiet, Flag}</code></dt>
+%% <dd>Set to 'true' if xmerl should behave quietly and not output any
+%% information to standard output (default 'false').</dd>
+%% <dt><code>{doctype_DTD, DTD}</code></dt>
+%% <dd>Allows to specify DTD name when it isn't available in the XML
+%% document. This option has effect only together with
+%% <code>{validation,'dtd'</code> option.</dd>
+%% <dt><code>{xmlbase, Dir}</code></dt>
+%% <dd>XML Base directory. If using string/1 default is current directory.
+%% If using file/1 default is directory of given file.</dd>
+%% <dt><code>{encoding, Enc}</code></dt>
+%% <dd>Set default character set used (default UTF-8).
+%% This character set is used only if not explicitly given by the XML
+%% declaration. </dd>
+%% </dl>
+
+-module(xmerl_scan).
+-vsn('0.20').
+-date('03-09-16').
+
+%% main API
+-export([string/1, string/2,
+ file/1, file/2]).
+
+%% access functions for various states
+-export([user_state/1, user_state/2,
+ event_state/1, event_state/2,
+ hook_state/1, hook_state/2,
+ rules_state/1, rules_state/2,
+ fetch_state/1, fetch_state/2,
+ cont_state/1, cont_state/2]).
+
+%% helper functions. To xmerl_lib ??
+-export([accumulate_whitespace/4]).
+
+%-define(debug, 1).
+-include("xmerl.hrl"). % record def, macros
+-include("xmerl_internal.hrl").
+-include_lib("kernel/include/file.hrl").
+
+
+-define(fatal(Reason, S),
+ if
+ S#xmerl_scanner.quiet ->
+ ok;
+ true ->
+ ok=io:format("~p- fatal: ~p~n", [?LINE, Reason])
+ end,
+ fatal(Reason, S)).
+
+
+-define(ustate(U, S), S#xmerl_scanner{user_state = U}).
+
+
+%% Functions to access the various states
+
+%%% @spec user_state(S::global_state()) -> global_state()
+%%% @equiv user_state(UserState,S)
+user_state(#xmerl_scanner{user_state = S}) -> S.
+
+%%% @spec event_state(S::global_state()) -> global_state()
+%%% @equiv event_state(EventState,S)
+event_state(#xmerl_scanner{fun_states = #xmerl_fun_states{event = S}}) -> S.
+
+%%% @spec hook_state(S::global_state()) -> global_state()
+%%% @equiv hook_state(HookState,S)
+hook_state(#xmerl_scanner{fun_states = #xmerl_fun_states{hook = S}}) -> S.
+
+%%% @spec rules_state(S::global_state()) -> global_state()
+%%% @equiv rules_state(RulesState,S)
+rules_state(#xmerl_scanner{fun_states = #xmerl_fun_states{rules = S}}) -> S.
+
+%%% @spec fetch_state(S::global_state()) -> global_state()
+%%% @equiv fetch_state(FetchState,S)
+fetch_state(#xmerl_scanner{fun_states = #xmerl_fun_states{fetch = S}}) -> S.
+
+%%% @spec cont_state(S::global_state()) -> global_state()
+%%% @equiv cont_state(ContinuationState,S)
+cont_state(#xmerl_scanner{fun_states = #xmerl_fun_states{cont = S}}) -> S.
+
+
+%%%% Functions to modify the various states
+
+%%% @spec user_state(UserState, S::global_state()) -> global_state()
+%%% @doc For controlling the UserState, to be used in a user function.
+%%% See <a href="xmerl_examples.html">tutorial</a> on customization functions.
+user_state(X, S) ->
+ S#xmerl_scanner{user_state = X}.
+
+%%% @spec event_state(EventState, S::global_state()) -> global_state()
+%%% @doc For controlling the EventState, to be used in an event
+%%% function, and called at the beginning and at the end of a parsed entity.
+%%% See <a href="xmerl_examples.html">tutorial</a> on customization functions.
+event_state(X, S=#xmerl_scanner{fun_states = FS}) ->
+ FS1 = FS#xmerl_fun_states{event = X},
+ S#xmerl_scanner{fun_states = FS1}.
+
+%%% @spec hook_state(HookState, S::global_state()) -> global_state()
+%%% @doc For controlling the HookState, to be used in a hook
+%%% function, and called when the parser has parsed a complete entity.
+%%% See <a href="xmerl_examples.html">tutorial</a> on customization functions.
+hook_state(X, S=#xmerl_scanner{fun_states = FS}) ->
+ FS1 = FS#xmerl_fun_states{hook = X},
+ S#xmerl_scanner{fun_states = FS1}.
+
+%%% @spec rules_state(RulesState, S::global_state()) -> global_state()
+%%% @doc For controlling the RulesState, to be used in a rules
+%%% function, and called when the parser store scanner information in a rules
+%%% database.
+%%% See <a href="xmerl_examples.html">tutorial</a> on customization functions.
+rules_state(X, S=#xmerl_scanner{fun_states = FS}) ->
+ FS1 = FS#xmerl_fun_states{rules = X},
+ S#xmerl_scanner{fun_states = FS1}.
+
+%%% @spec fetch_state(FetchState, S::global_state()) -> global_state()
+%%% @doc For controlling the FetchState, to be used in a fetch
+%%% function, and called when the parser fetch an external resource (eg. a DTD).
+%%% See <a href="xmerl_examples.html">tutorial</a> on customization functions.
+fetch_state(X, S=#xmerl_scanner{fun_states = FS}) ->
+ FS1 = FS#xmerl_fun_states{fetch = X},
+ S#xmerl_scanner{fun_states = FS1}.
+
+%%% @spec cont_state(ContinuationState, S::global_state()) -> global_state()
+%%% @doc For controlling the ContinuationState, to be used in a continuation
+%%% function, and called when the parser encounters the end of the byte stream.
+%%% See <a href="xmerl_examples.html">tutorial</a> on customization functions.
+cont_state(X, S=#xmerl_scanner{fun_states = FS}) ->
+ FS1 = FS#xmerl_fun_states{cont = X},
+ S#xmerl_scanner{fun_states = FS1}.
+
+
+%% @spec file(Filename::string()) -> {xmlElement(),Rest}
+%% Rest = list()
+%% @equiv file(Filename, [])
+file(F) ->
+ file(F, []).
+
+%% @spec file(Filename::string(), Options::option_list()) -> {xmlElement(),Rest}
+%% Rest = list()
+%%% @doc Parse file containing an XML document
+file(F, Options) ->
+ ExtCharset=case lists:keysearch(encoding,1,Options) of
+ {value,{_,Val}} -> Val;
+ false -> undefined
+ end,
+ case int_file(F,Options,ExtCharset) of
+ {Res, Tail,S=#xmerl_scanner{close_fun=Close}} ->
+ Close(S), % for side effects only - final state is dropped
+ {Res,Tail};
+ {error, Reason} ->
+ {error, Reason}
+ end.
+
+int_file(F, Options,_ExtCharset) ->
+ %%io:format("int_file F=~p~n",[F]),
+ case file:read_file(F) of
+ {ok, Bin} ->
+ int_string(binary_to_list(Bin), Options, filename:dirname(F),F);
+ Error ->
+ Error
+ end.
+
+int_file_decl(F, Options,_ExtCharset) ->
+% io:format("int_file_decl F=~p~n",[F]),
+ case file:read_file(F) of
+ {ok, Bin} ->
+ int_string_decl(binary_to_list(Bin), Options, filename:dirname(F),F);
+ Error ->
+ Error
+ end.
+
+%% @spec string(Text::list()) -> {xmlElement(),Rest}
+%% Rest = list()
+%% @equiv string(Test, [])
+string(Str) ->
+ string(Str, []).
+
+%% @spec string(Text::list(),Options::option_list()) -> {xmlElement(),Rest}
+%% Rest = list()
+%%% @doc Parse string containing an XML document
+string(Str, Options) ->
+ {Res, Tail, S=#xmerl_scanner{close_fun = Close}} =
+ int_string(Str, Options,file_name_unknown),
+ Close(S), % for side effects only - final state is dropped
+ {Res,Tail}.
+
+int_string(Str, Options,FileName) ->
+ {ok, XMLBase} = file:get_cwd(),
+ int_string(Str, Options, XMLBase, FileName).
+
+int_string(Str, Options, XMLBase, FileName) ->
+ S0=initial_state0(Options,XMLBase),
+ S = S0#xmerl_scanner{filename=FileName},
+ %%io:format("int_string1, calling xmerl_lib:detect_charset~n",[]),
+
+ %% In case of no encoding attribute in document utf-8 is default, but
+ %% another character set may be detected with help of Byte Order Marker or
+ %% with help of the encoding of the first 4 bytes.
+ case xmerl_lib:detect_charset(S#xmerl_scanner.encoding,Str) of
+ {auto,'iso-10646-utf-1',Str2} ->
+ scan_document(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"});
+ {external,'iso-10646-utf-1',Str2} ->
+ scan_document(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"});
+ {undefined,undefined,Str2} -> %% no auto detection
+ scan_document(Str2, S);
+ {external,ExtCharset,Str2} ->
+ %% no auto detection, ExtCharset is an explicitly provided
+ %% 7 bit,8 bit or utf-8 encoding
+ scan_document(Str2, S#xmerl_scanner{encoding=atom_to_list(ExtCharset)})
+ end.
+
+int_string_decl(Str, Options, XMLBase, FileName) ->
+ S0=initial_state0(Options,XMLBase),
+ S = S0#xmerl_scanner{filename=FileName},
+ case xmerl_lib:detect_charset(S#xmerl_scanner.encoding,Str) of
+ {auto,'iso-10646-utf-1',Str2} ->
+ scan_decl(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"});
+ {external,'iso-10646-utf-1',Str2} ->
+ scan_decl(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"});
+ {undefined,undefined,Str2} ->
+ scan_decl(Str2, S);
+ {external,ExtCharset,Str2} ->
+ scan_decl(Str2, S#xmerl_scanner{encoding=atom_to_list(ExtCharset)})
+ end.
+
+
+
+initial_state0(Options,XMLBase) ->
+ CommonData = common_data(),
+ initial_state(Options, #xmerl_scanner{
+ event_fun = fun event/2,
+ hook_fun = fun hook/2,
+ acc_fun = fun acc/3,
+ fetch_fun = fun fetch/2,
+ close_fun = fun close/1,
+ continuation_fun = fun cont/3,
+ rules_read_fun = fun rules_read/3,
+ rules_write_fun = fun rules_write/4,
+ rules_delete_fun= fun rules_delete/3,
+ xmlbase = XMLBase,
+ common_data = CommonData
+ }).
+
+initial_state([{event_fun, F}|T], S) ->
+ initial_state(T, S#xmerl_scanner{event_fun = F});
+initial_state([{event_fun, F, ES}|T], S) ->
+ S1 = event_state(ES, S#xmerl_scanner{event_fun = F}),
+ initial_state(T, S1);
+initial_state([{acc_fun, F}|T], S) ->
+ initial_state(T, S#xmerl_scanner{acc_fun = F});
+initial_state([{hook_fun, F}|T], S) ->
+ initial_state(T, S#xmerl_scanner{hook_fun = F});
+initial_state([{hook_fun, F, HS}|T], S) ->
+ S1 = hook_state(HS, S#xmerl_scanner{hook_fun = F}),
+ initial_state(T, S1);
+initial_state([{close_fun, F}|T], S) ->
+ initial_state(T, S#xmerl_scanner{close_fun = F});
+initial_state([{fetch_fun, F}|T], S) ->
+ initial_state(T, S#xmerl_scanner{fetch_fun = F});
+initial_state([{fetch_fun, F, FS}|T], S) ->
+ S1 = fetch_state(FS, S#xmerl_scanner{fetch_fun = F}),
+ initial_state(T, S1);
+initial_state([{fetch_path, P}|T], S) ->
+ initial_state(T, S#xmerl_scanner{fetch_path = P});
+initial_state([{continuation_fun, F}|T], S) ->
+ initial_state(T, S#xmerl_scanner{continuation_fun = F});
+initial_state([{continuation_fun, F, CS}|T], S) ->
+ S1 = cont_state(CS, S#xmerl_scanner{continuation_fun = F}),
+ initial_state(T, S1);
+initial_state([{rules, R}|T], S) ->
+ initial_state(T, S#xmerl_scanner{rules = R,
+ keep_rules = true});
+initial_state([{rules, Read, Write, RS}|T], S) ->
+ S1 = rules_state(RS, S#xmerl_scanner{rules_read_fun = Read,
+ rules_write_fun = Write,
+ keep_rules = true}),
+ initial_state(T, S1);
+initial_state([{user_state, F}|T], S) ->
+ initial_state(T, S#xmerl_scanner{user_state = F});
+initial_state([{space, L}|T], S) ->
+ initial_state(T, S#xmerl_scanner{space = L});
+initial_state([{line, L}|T], S) ->
+ initial_state(T, S#xmerl_scanner{line = L});
+initial_state([{namespace_conformant, F}|T], S) when F==true; F==false ->
+ initial_state(T, S#xmerl_scanner{namespace_conformant = F});
+initial_state([{validation, F}|T], S)
+ when F==off; F==dtd; F==schema; F==true; F==false ->
+ initial_state(T, S#xmerl_scanner{validation = validation_value(F)});
+initial_state([{schemaLocation, SL}|T], S) when is_list(SL) ->
+ initial_state(T, S#xmerl_scanner{schemaLocation=SL});
+initial_state([{quiet, F}|T], S) when F==true; F==false ->
+ initial_state(T, S#xmerl_scanner{quiet = F});
+initial_state([{doctype_DTD,DTD}|T], S) ->
+ initial_state(T,S#xmerl_scanner{doctype_DTD = DTD});
+initial_state([{text_decl,Bool}|T], S) ->
+ initial_state(T,S#xmerl_scanner{text_decl=Bool});
+initial_state([{environment,Env}|T], S) ->
+ initial_state(T,S#xmerl_scanner{environment=Env});
+initial_state([{xmlbase, D}|T], S) ->
+ initial_state(T, S#xmerl_scanner{xmlbase = D});
+initial_state([{encoding, Enc}|T], S) ->
+ initial_state(T, S#xmerl_scanner{encoding = Enc});
+initial_state([], S=#xmerl_scanner{rules = undefined}) ->
+ Tab = ets:new(rules, [set, public]),
+ S#xmerl_scanner{rules = Tab};
+initial_state([], S) ->
+ S.
+
+validation_value(true) ->
+ dtd;
+validation_value(false) ->
+ off;
+validation_value(F) ->
+ F.
+
+%% Used for compacting (some) indentations.
+%% See also fast_accumulate_whitespace().
+common_data() ->
+ {comdata(lists:duplicate(60, $\s), []),
+ comdata(lists:duplicate(15, $\t), []),
+ "\n"}.
+
+comdata([], CD)->
+ list_to_tuple(CD);
+comdata([_ | T]=L, CD) ->
+ comdata(T, [[$\n | L] | CD]).
+
+%%% -----------------------------------------------------
+%%% Default modifier functions
+
+%%% Hooks:
+%%% - {element, Line, Name, Attrs, Content}
+%%% - {processing_instruction, Line, Data}
+
+hook(X, State) ->
+ {X, State}.
+
+%%% Events:
+%%%
+%%% #xmerl_event{event : started | ended,
+%%% line : integer(),
+%%% col : integer(),
+%%% data}
+%%%
+%%% Data Events
+%%% document started, ended
+%%% #xmlElement started, ended
+%%% #xmlAttribute ended
+%%% #xmlPI ended
+%%% #xmlComment ended
+%%% #xmlText ended
+event(_X, S) ->
+ S.
+
+%% The acc/3 function can return either {Acc�, S'} or {Acc', Pos', S'},
+%% where Pos' can be derived from X#xmlElement.pos, X#xmlText.pos, or
+%% X#xmlAttribute.pos (whichever is the current object type.)
+%% The acc/3 function is not allowed to redefine the type of object
+%% being defined, but _is_ allowed to either ignore it or split it
+%% into multiple objects (in which case {Acc',Pos',S'} should be returned.)
+%% If {Acc',S'} is returned, Pos will be incremented by 1 by default.
+%% Below is an example of an acceptable operation
+acc(X = #xmlText{value = Text}, Acc, S) ->
+ {[X#xmlText{value = Text}|Acc], S};
+acc(X, Acc, S) ->
+ {[X|Acc], S}.
+
+fetch({system, URI}, S) ->
+ fetch_URI(URI, S);
+fetch({public, _PublicID, URI}, S) ->
+ fetch_URI(URI, S).
+
+%%% Always assume an external resource can be found locally! Thus
+%%% don't bother fetching with e.g. HTTP. Returns the path where the
+%%% resource is found. The path to the external resource is given by
+%%% URI directly or the option fetch_path (additional paths) or
+%%% directory (base path to external resource)
+fetch_URI(URI, S) ->
+ %% assume URI is a filename
+ Split = filename:split(URI),
+ Filename = fun([])->[];(X)->lists:last(X) end (Split),
+ Fullname =
+ case Split of %% how about Windows systems?
+ ["file:"|Name]-> %% absolute path, see RFC2396 sect 3
+ %% file:/dtd_name
+ filename:join(["/"|Name]);
+ ["/"|Rest] when Rest /= [] ->
+ %% absolute path name
+ URI;
+ ["http:"|_Rest] ->
+ {http,URI};
+ [] -> %% empty systemliteral
+ [];
+ _ ->
+ filename:join(S#xmerl_scanner.xmlbase, URI)
+ end,
+ Path = path_locate(S#xmerl_scanner.fetch_path, Filename, Fullname),
+ ?dbg("fetch(~p) -> {file, ~p}.~n", [URI, Path]),
+ {ok, Path, S}.
+
+path_locate(_, _, {http,_}=URI) ->
+ URI;
+path_locate(_, _, []) ->
+ [];
+path_locate([Dir|Dirs], FN, FullName) ->
+ F = filename:join(Dir, FN),
+ case file:read_file_info(F) of
+ {ok, #file_info{type = regular}} ->
+ {file,F};
+ _ ->
+ path_locate(Dirs, FN, FullName)
+ end;
+path_locate([], _FN, FullName) ->
+ {file,FullName}.
+
+
+cont(_F, Exception, US) ->
+ Exception(US).
+
+close(S) ->
+ S.
+
+
+%%% -----------------------------------------------------
+%%% Scanner
+
+%%% [1] document ::= prolog element Misc*
+scan_document(Str0, S=#xmerl_scanner{event_fun = Event,
+ line = L, col = C,
+ environment=Env,
+ encoding=Charset,
+ validation=ValidateResult}) ->
+ S1 = Event(#xmerl_event{event = started,
+ line = L,
+ col = C,
+ data = document}, S),
+
+ %% Transform to given character set.
+ %% Note that if another character set is given in the encoding
+ %% attribute in a XML declaration that one will be used later
+ Str=if
+ Charset == "utf-8" ->
+ Str0;
+ Charset=/=undefined -> % Default character set is UTF-8
+ xmerl_ucs:to_unicode(Str0,list_to_atom(Charset));
+ true -> %% Charset is undefined if no external input is
+ %% given, and no auto detection of character
+ %% encoding was made.
+ Str0
+ end,
+%% M1 = erlang:memory(),
+%% io:format("Memory status before prolog: ~p~n",[M1]),
+ {T1, S2} = scan_prolog(Str, S1, _StartPos = 1),
+%% M2 = erlang:memory(),
+%% io:format("Memory status after prolog: ~p~n",[M2]),
+ %%io:format("scan_document 2, prolog parsed~n",[]),
+ T2 = scan_mandatory("<",T1,1,S2,expected_element_start_tag),
+%% M3 = erlang:memory(),
+%% io:format("Memory status before element: ~p~n",[M3]),
+ {Res, T3, S3} =scan_element(T2,S2,_StartPos = 1),
+%% M4 = erlang:memory(),
+%% io:format("Memory status after element: ~p~n",[M4]),
+ {Tail, S4}=scan_misc(T3, S3, _StartPos = 1),
+%% M5 = erlang:memory(),
+%% io:format("Memory status after misc: ~p~n",[M5]),
+
+ S5 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S4#xmerl_scanner.line,
+ col = S4#xmerl_scanner.col,
+ data = document}, S4),
+
+ {Res2,S6} = case validation_mode(ValidateResult) of
+ off ->
+ {Res,cleanup(S5)};
+ dtd when Env == element; Env == prolog ->
+ check_decl2(S5),
+ case xmerl_validate:validate(S5,Res) of
+ {'EXIT',{error,Reason}} ->
+ S5b=cleanup(S5),
+ ?fatal({failed_validation,Reason}, S5b);
+ {'EXIT',Reason} ->
+ S5b=cleanup(S5),
+ ?fatal({failed_validation,Reason}, S5b);
+ {error,Reason} ->
+ S5b=cleanup(S5),
+ ?fatal({failed_validation,Reason}, S5b);
+ {error,Reason,_Next} ->
+ S5b=cleanup(S5),
+ ?fatal({failed_validation,Reason}, S5b);
+ _XML ->
+ {Res,cleanup(S5)}
+ end;
+ schema ->
+ case schemaLocations(Res,S5) of
+ {ok,Schemas} ->
+ cleanup(S5),
+ %%io:format("Schemas: ~p~nRes: ~p~ninhertih_options(S): ~p~n",
+ %% [Schemas,Res,inherit_options(S5)]),
+ XSDRes = xmerl_xsd:process_validate(Schemas,Res,
+ inherit_options(S5)),
+ handle_schema_result(XSDRes,S5);
+ _ ->
+ {Res,cleanup(S5)}
+ end;
+ _ ->
+ {Res,cleanup(S5)}
+ end,
+
+ {Res2, Tail, S6}.
+
+
+scan_decl(Str, S=#xmerl_scanner{event_fun = Event,
+ line = L, col = C,
+ environment=_Env,
+ encoding=_Charset,
+ validation=_ValidateResult}) ->
+ S1 = Event(#xmerl_event{event = started,
+ line = L,
+ col = C,
+ data = document}, S),
+
+ case scan_prolog(Str, S1, _StartPos = 1) of
+ {T2="<"++_, S2} ->
+ {{S2#xmerl_scanner.user_state,T2},[],S2};
+ {[], S2}->
+ {[],[],S2};
+ {T2, S2} ->
+ {_,_,S3} = scan_content(T2,S2,[],_Attrs=[],S2#xmerl_scanner.space,
+ _Lang=[],_Parents=[],#xmlNamespace{}),
+ {T2,[],S3}
+ end.
+
+
+%%% [22] Prolog
+%%% prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
+%%%
+%% empty text declarations are handled by the first function clause.
+scan_prolog([], S=#xmerl_scanner{continuation_fun = F}, Pos) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_prolog(MoreBytes, S1, Pos) end,
+ fun(S1) -> {[], S1} end,
+ S);
+scan_prolog("<?xml"++T,S0=#xmerl_scanner{encoding=Charset0,col=Col,line=L},Pos)
+ when ?whitespace(hd(T)) ->
+ {Charset,T3, S3}=
+ if
+ Col==1,L==1,S0#xmerl_scanner.text_decl==true ->
+ ?dbg("prolog(\"<?xml\")~n", []),
+ ?bump_col(5),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {Decl,T2, S2}=scan_text_decl(T1,S1),
+ Encoding=Decl#xmlDecl.encoding,
+ {Encoding,T2, S2#xmerl_scanner{encoding=Encoding}};
+ Col==1,L==1 ->
+ ?dbg("prolog(\"<?xml\")~n", []),
+ ?bump_col(5),
+ {Decl,T2, S2}=scan_xml_decl(T, S),
+ Encoding=Decl#xmlDecl.encoding,
+ {Encoding,T2, S2#xmerl_scanner{encoding=Encoding}};
+ true ->
+ ?fatal({xml_declaration_must_be_first_in_doc,Col,L},S0)
+ end,
+ %% Charset0 is either (1) 'iso-10646-utf-1' (transformation by
+ %% auto detection), (2) undefined (no auto detection and no
+ %% external encoding), (3) any other encoding format that must be
+ %% conformant to the internal explicitly given encoding. The two
+ %% former cases implies that the explicit internal encoding
+ %% (Charset) may be different from Charset0.
+
+ %% Now transform to declared character set.
+ if
+ Charset==Charset0 -> % Document already transformed to this charset!
+ scan_prolog(T3, S3, Pos);
+ Charset0=/=undefined ->
+ %% For example may an external entity
+ %% have the BOM for utf-16 and the internal
+ %% explicit encoding='utf-16', then it will be auto
+ %% detected and transformed, Charset0 will be
+ %% 'iso-10646-utf-1', and Charset will be 'utf-16', all
+ %% legal.
+ %%
+ scan_prolog(T3,S3#xmerl_scanner{encoding=Charset0},Pos);
+ Charset == "utf-8" ->
+ scan_prolog(T3, S3, Pos);
+ Charset=/=undefined -> % Document not previously transformed
+ T4=xmerl_ucs:to_unicode(T3,list_to_atom(Charset)),
+ scan_prolog(T4, S3, Pos);
+ true -> % No encoding info given
+ scan_prolog(T3, S3, Pos)
+ end;
+scan_prolog("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog,
+ encoding=_Charset}, Pos) ->
+ ?dbg("prolog(\"<!DOCTYPE\")~n", []),
+ ?bump_col(9),
+ %% If no known character set assume it is UTF-8
+ T1=if
+ %% Charset==undefined -> xmerl_ucs:to_unicode(T,'utf-8');
+ true -> T
+ end,
+ {T2, S1} = scan_doctype(T1, S),
+ scan_misc(T2, S1, Pos);
+scan_prolog(Str="%"++_T,S=#xmerl_scanner{environment={external,_}},_Pos) ->
+ scan_ext_subset(Str,S);
+scan_prolog(Str, S0 = #xmerl_scanner{user_state=_US,encoding=_Charset},Pos) ->
+ ?dbg("prolog(\"<\")~n", []),
+
+ %% Check for Comments, PI before possible DOCTYPE declaration
+ ?bump_col(1),
+ %% If no known character set assume it is UTF-8
+ T=if
+%% Charset==undefined -> xmerl_ucs:to_unicode(Str,'utf-8');
+ true -> Str
+ end,
+ {T1, S1}=scan_misc(T, S, Pos),
+ scan_prolog2(T1,S1,Pos).
+
+
+
+scan_prolog2([], S=#xmerl_scanner{continuation_fun = F}, Pos) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_prolog2(MoreBytes, S1, Pos) end,
+ fun(S1) -> {[], S1} end,
+ S);
+scan_prolog2("<!DOCTYPE" ++ T, S0=#xmerl_scanner{environment=prolog}, Pos) ->
+ ?dbg("prolog(\"<!DOCTYPE\")~n", []),
+ ?bump_col(9),
+ {T1, S1} = scan_doctype(T, S),
+ scan_misc(T1, S1, Pos);
+scan_prolog2(Str = "<!" ++ _, S, _Pos) ->
+ ?dbg("prolog(\"<!\")~n", []),
+ %% In e.g. a DTD, we jump directly to markup declarations
+ scan_ext_subset(Str, S);
+scan_prolog2(Str, S0 = #xmerl_scanner{user_state=_US},Pos) ->
+ ?dbg("prolog(\"<\")~n", []),
+
+ %% Here we consider the DTD provided by doctype_DTD option,
+ S1 =
+ case S0 of
+ #xmerl_scanner{validation=dtd,doctype_DTD=DTD} when is_list(DTD) ->
+ S=fetch_DTD(undefined,S0),
+ check_decl(S),
+ S;
+ _ -> S0
+ end,
+ %% Check for more Comments and PI after DOCTYPE declaration
+% ?bump_col(1),
+ scan_misc(Str, S1, Pos).
+
+
+
+
+%%% [27] Misc ::= Comment | PI | S
+%% Note:
+%% - Neither of Comment and PI are returned in the resulting parsed
+%% structure.
+%% - scan_misc/3 implements Misc* as that is how the rule is always used
+scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos) end,
+ fun(S1) -> {[], S1} end,
+ S);
+scan_misc("<!--" ++ T, S0, Pos) -> % Comment
+ ?bump_col(4),
+ {_, T1, S1} = scan_comment(T, S, Pos, _Parents = [], _Lang = []),
+ scan_misc(T1,S1,Pos);
+scan_misc("<?" ++ T, S0, Pos) -> % PI
+ ?dbg("prolog(\"<?\")~n", []),
+ ?bump_col(2),
+ {_PI, T1, S1} = scan_pi(T, S, Pos),
+ scan_misc(T1,S1,Pos);
+scan_misc(T=[H|_T], S, Pos) when ?whitespace(H) ->
+ ?dbg("prolog(whitespace)~n", []),
+ {_,T1,S1}=strip(T,S),
+ scan_misc(T1,S1,Pos);
+scan_misc(T,S,_Pos) ->
+ {T,S}.
+
+
+cleanup(S=#xmerl_scanner{keep_rules = false,
+ rules = Rules}) ->
+ ets:delete(Rules),
+ S#xmerl_scanner{rules = undefined};
+cleanup(S) ->
+ S.
+
+%%% Prolog and Document Type Declaration XML 1.0 Section 2.8
+%% [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
+%% [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
+scan_xml_decl(T, S) ->
+ %% VersionInfo [24] is mandatory
+ {_,T1,S1} = mandatory_strip(T,S),
+ {T2,S2} =
+ case T1 of
+ "version" ++ _T2 ->
+ {_T2,S1#xmerl_scanner{col=S1#xmerl_scanner.col+7}};
+ _ -> ?fatal(expected_version_attribute,S1)
+ end,
+ {T3, S3} = scan_eq(T2, S2),
+ {Vsn, T4, S4} = scan_xml_vsn(T3, S3),
+ Attr = #xmlAttribute{name = version,
+ parents = [{xml, _XMLPos = 1}],
+ value = Vsn},
+ scan_xml_decl(T4, S4, #xmlDecl{attributes = [Attr]}).
+
+scan_xml_decl([], S=#xmerl_scanner{continuation_fun = F}, Decl) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_xml_decl(MoreBytes, S1, Decl) end,
+ fun(S1) -> {[], [], S1} end,
+ S);
+scan_xml_decl("?>" ++ T, S0, Decl) ->
+ ?bump_col(2),
+ return_xml_decl(T,S,Decl);
+scan_xml_decl(T,S=#xmerl_scanner{event_fun = _Event},Decl) when ?whitespace(hd(T)) ->
+ {_,T1,S1}=mandatory_strip(T,S),
+ scan_xml_decl2(T1,S1,Decl);
+scan_xml_decl(_T,S=#xmerl_scanner{event_fun = _Event},_Decl) ->
+ ?fatal(preformat([expected,one,'of:'],['?>',whitespace_character],","),S).
+
+scan_xml_decl2("?>" ++ T, S0,Decl) ->
+ ?bump_col(2),
+ return_xml_decl(T,S,Decl);
+scan_xml_decl2("encoding" ++ T, S0 = #xmerl_scanner{event_fun = Event},
+ Decl0 = #xmlDecl{attributes = Attrs}) ->
+ %% [80] EncodingDecl
+ ?bump_col(8),
+ {T1, S1} = scan_eq(T, S),
+ {EncName, T2, S2} = scan_enc_name(T1, S1),
+ LowEncName=xmerl_lib:to_lower(EncName),
+ Attr = #xmlAttribute{name = encoding,
+ parents = [{xml, _XMLPos = 1}],
+ value = LowEncName},
+ Decl = Decl0#xmlDecl{encoding = LowEncName,
+ attributes = [Attr|Attrs]},
+ S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S0#xmerl_scanner.line,
+ col = S0#xmerl_scanner.col,
+ data = Attr}, S2),
+ case T2 of
+ "?>" ++ _T3 ->
+ scan_xml_decl3(T2,S3,Decl);
+ _ ->
+ {_,T3,S4} = mandatory_strip(T2,S3),
+ scan_xml_decl3(T3, S4, Decl)
+ end;
+scan_xml_decl2(T="standalone" ++ _T,S,Decl) ->
+ scan_xml_decl3(T,S,Decl);
+scan_xml_decl2(_BadString,S,_Decl) ->
+ ?fatal(preformat([expected,one,'of:'],['?>',standalone,encoding],","),S).
+% ?fatal(lists:flatten(io_lib:format("~s ~s ~s: ~s, ~s, ~s",[expected,one,'of','?>',standalone,encoding])),S).
+% ?fatal({expected_one_of,"?>",standalone,encoding},S).
+
+scan_xml_decl3("?>" ++ T, S0,Decl) ->
+ ?bump_col(2),
+ return_xml_decl(T,S,Decl);
+scan_xml_decl3("standalone" ++ T,S0 = #xmerl_scanner{event_fun = Event},
+ Decl0 = #xmlDecl{attributes = Attrs}) ->
+ %% [32] SDDecl
+ ?bump_col(10),
+ {T1, S1} = scan_eq(T, S),
+ {StValue,T2,S2}=scan_standalone_value(T1,S1),
+ Attr = #xmlAttribute{name = standalone,
+ parents = [{xml, _XMLPos = 1}],
+ value = StValue},
+ Decl = Decl0#xmlDecl{standalone = StValue,
+ attributes = [Attr|Attrs]},
+ S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S0#xmerl_scanner.line,
+ col = S0#xmerl_scanner.col,
+ data = Attr}, S2),
+ {_,T3,S4} = strip(T2,S3),
+ T4 = scan_mandatory("?>",T3,2,S4,expected_xml_decl_endtag),
+%% "?>" ++ T4 = T3,
+ return_xml_decl(T4, S4#xmerl_scanner{col=S4#xmerl_scanner.col+2}, Decl).
+
+
+return_xml_decl(T,S=#xmerl_scanner{hook_fun = _Hook,
+ event_fun = Event},
+ Decl0 = #xmlDecl{attributes = Attrs}) ->
+ ?strip1,
+ Decl = Decl0#xmlDecl{attributes = lists:reverse(Attrs)},
+ S2 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S#xmerl_scanner.line,
+ col = S#xmerl_scanner.col,
+ data = Decl}, S1),
+%% {Ret, S3} = Hook(Decl, S2),
+%% {Ret, T1, S3}.
+ {Decl, T1, S2}.
+
+
+scan_standalone_value("'yes'" ++T,S0)->
+ ?bump_col(5),
+ {'yes',T,S#xmerl_scanner{standalone=yes}};
+scan_standalone_value("\"yes\"" ++T,S0)->
+ ?bump_col(5),
+ {'yes',T,S#xmerl_scanner{standalone=yes}};
+scan_standalone_value("'no'" ++T,S0) ->
+ ?bump_col(4),
+ {'no',T,S};
+scan_standalone_value("\"no\"" ++T,S0) ->
+ ?bump_col(4),
+ {'no',T,S}.
+
+%%%
+%%% Text declaration XML 1.0 section 4.3.1
+%%% [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
+scan_text_decl(T,S=#xmerl_scanner{event_fun = Event}) ->
+ {#xmlDecl{attributes=Attrs}=Decl0,T1,S1} = scan_optional_version(T,S),
+ T2 =
+ case T1 of
+ "encoding" ++ _T2 -> _T2;
+ _ ->
+ ?fatal(expected_encoding_attribute,S1)
+ end,
+ S2 = S1#xmerl_scanner{col = S1#xmerl_scanner.col + 8},
+ {T3, S3} = scan_eq(T2, S2),
+ {EncName, T4, S4} = scan_enc_name(T3, S3),
+ LowEncName=xmerl_lib:to_lower(EncName),
+ ?strip5,
+ Attr = #xmlAttribute{name = encoding,
+ parents = [{xml,1}],
+ value = LowEncName},
+ Decl = Decl0#xmlDecl{encoding = LowEncName,
+ attributes = [Attr|Attrs]},
+ S6=#xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S5#xmerl_scanner.line,
+ col = S5#xmerl_scanner.col,
+ data = Attr}, S5),
+ scan_text_decl(T5,S6,Decl).
+
+scan_text_decl("?>"++T,S0 = #xmerl_scanner{hook_fun = _Hook,
+ event_fun = Event},
+ Decl0 = #xmlDecl{attributes = Attrs}) ->
+ ?bump_col(2),
+ ?strip1,
+ Decl = Decl0#xmlDecl{attributes = lists:reverse(Attrs)},
+ S2 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S0#xmerl_scanner.line,
+ col = S0#xmerl_scanner.col,
+ data = Decl}, S1),
+%% {Ret, S3} = Hook(Decl, S2),
+%% {Ret, T1, S3};
+ {Decl, T1, S2};
+scan_text_decl([H|_T],S,_) ->
+ ?fatal({unexpected_character_in_text_declaration,H},S).
+
+scan_optional_version("version"++T,S0) ->
+ ?bump_col(7),
+ ?strip1,
+ {T2, S2} = scan_eq(T1, S1),
+ {Vsn, T3, S3} = scan_xml_vsn(T2, S2),
+ {_,T4,S4} = mandatory_strip(T3,S3),
+ Attr = #xmlAttribute{name = version,parents = [{xml,1}],value = Vsn},
+ {#xmlDecl{attributes=[Attr]},T4,S4};
+scan_optional_version(T,S) ->
+ {#xmlDecl{attributes=[]},T,S}.
+
+
+
+%%%%%%% [81] EncName
+scan_enc_name([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_enc_name(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(expected_encoding_name, S1) end,
+ S);
+scan_enc_name([H|T], S0) when H >= $"; H =< $' ->
+ ?bump_col(1),
+ scan_enc_name(T, S, H, []).
+
+
+scan_enc_name([], S=#xmerl_scanner{continuation_fun = F}, Delim, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_enc_name(MoreBytes, S1, Delim, Acc) end,
+ fun(S1) -> ?fatal(expected_encoding_name, S1) end,
+ S);
+scan_enc_name([H|T], S0, Delim, Acc) when H >= $a, H =< $z ->
+ ?bump_col(1),
+ scan_enc_name2(T, S, Delim, [H|Acc]);
+scan_enc_name([H|T], S0, Delim, Acc) when H >= $A, H =< $Z ->
+ ?bump_col(1),
+ scan_enc_name2(T, S, Delim, [H|Acc]);
+scan_enc_name([H|_T],S,_Delim,_Acc) ->
+ ?fatal({error,{unexpected_character_in_Enc_Name,H}},S).
+
+scan_enc_name2([], S=#xmerl_scanner{continuation_fun = F}, Delim, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_enc_name2(MoreBytes, S1, Delim, Acc) end,
+ fun(S1) -> ?fatal(expected_encoding_name, S1) end,
+ S);
+scan_enc_name2([H|T], S0, H, Acc) ->
+ ?bump_col(1),
+ {lists:reverse(Acc), T, S};
+scan_enc_name2([H|T], S0, Delim, Acc) when H >= $a, H =< $z ->
+ ?bump_col(1),
+ scan_enc_name2(T, S, Delim, [H|Acc]);
+scan_enc_name2([H|T], S0, Delim, Acc) when H >= $A, H =< $Z ->
+ ?bump_col(1),
+ scan_enc_name2(T, S, Delim, [H|Acc]);
+scan_enc_name2([H|T], S0, Delim, Acc) when H >= $0, H =< $9 ->
+ ?bump_col(1),
+ scan_enc_name2(T, S, Delim, [H|Acc]);
+scan_enc_name2([H|T], S0, Delim, Acc) when H == $.; H == $_; H == $- ->
+ ?bump_col(1),
+ scan_enc_name2(T, S, Delim, [H|Acc]).
+
+
+%%%%%%% [26] VersionNum
+%%% VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
+scan_xml_vsn([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_xml_vsn(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_xml_vsn([H|T], S) when H==$"; H==$'->
+ xml_vsn(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1}, H, []).
+
+xml_vsn([], S=#xmerl_scanner{continuation_fun = F}, Delim, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> xml_vsn(MoreBytes, S1, Delim, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+xml_vsn([H|T], S=#xmerl_scanner{col = C}, H, Acc) ->
+ {lists:reverse(Acc), T, S#xmerl_scanner{col = C+1}};
+xml_vsn([H|T], S=#xmerl_scanner{col = C},Delim, Acc) when H >= $a, H =< $z ->
+ xml_vsn(T, S#xmerl_scanner{col = C+1}, Delim, [H|Acc]);
+xml_vsn([H|T], S=#xmerl_scanner{col = C},Delim, Acc) when H >= $A, H =< $Z ->
+ xml_vsn(T, S#xmerl_scanner{col = C+1}, Delim, [H|Acc]);
+xml_vsn([H|T], S=#xmerl_scanner{col = C},Delim, Acc) when H >= $0, H =< $9 ->
+ xml_vsn(T, S#xmerl_scanner{col = C+1}, Delim, [H|Acc]);
+xml_vsn([H|T], S=#xmerl_scanner{col = C}, Delim, Acc) ->
+ case lists:member(H, "_.:-") of
+ true ->
+ xml_vsn(T, S#xmerl_scanner{col = C+1}, Delim, [H|Acc]);
+ false ->
+ ?fatal({invalid_vsn_char, H}, S)
+ end.
+
+%%%%%%% [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
+
+scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Pos) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Pos) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_pi(Str = [H1,H2,H3 | T],S0=#xmerl_scanner{line = L, col = C}, Pos)
+ when H1==$x;H1==$X ->
+ %% names beginning with [xX][mM][lL] are reserved for future use.
+ ?bump_col(3),
+ if
+ ((H2==$m) or (H2==$M)) and
+ ((H3==$l) or (H3==$L)) ->
+ scan_wellknown_pi(T,S,Pos);
+ true ->
+ {Target, _NamespaceInfo, T1, S1} = scan_name(Str, S),
+ scan_pi(T1, S1, Target, L, C, Pos, [])
+ end;
+scan_pi(Str, S=#xmerl_scanner{line = L, col = C}, Pos) ->
+ {Target, _NamespaceInfo, T1, S1} = scan_name(Str, S),
+ scan_pi(T1, S1, Target, L, C, Pos,[]).
+
+
+%%% More info on xml-stylesheet can be found at:
+%%% "Associating Style Sheets with XML documents", Version 1.0,
+%%% W3C Recommendation 29 June 1999 (http://www.w3.org/TR/xml-stylesheet/)
+scan_wellknown_pi("-stylesheet"++T, S0=#xmerl_scanner{line=L,col=C},Pos) ->
+ ?dbg("prolog(\"<?xml-stylesheet\")~n", []),
+ ?bump_col(16),
+ scan_pi(T, S, "xml-stylesheet",L,C,Pos,[]);
+scan_wellknown_pi(Str,S,_Pos) ->
+ ?fatal({invalid_target_name, lists:sublist(Str, 1, 10)}, S).
+
+
+
+scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Target,L, C, Pos, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Target, L, C, Pos, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_pi("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
+ event_fun = Event},
+ Target, L, C, Pos, Acc) ->
+ ?bump_col(2),
+ PI = #xmlPI{name = Target,
+ pos = Pos,
+ value = lists:reverse(Acc)},
+ S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = L,
+ col = C,
+ data = PI}, S),
+ {Ret, S2} = Hook(PI, S1),
+ {Ret, T, S2};
+scan_pi([H|T], S, Target, L, C, Pos, Acc) when ?whitespace(H) ->
+ ?strip1,
+ scan_pi2(T1, S1, Target, L, C, Pos, Acc);
+scan_pi([H|_T],S,_Target, _L, _C, _Pos, _Acc) ->
+ ?fatal({expected_whitespace_OR_end_of_PI,{char,H}}, S).
+
+scan_pi2([], S=#xmerl_scanner{continuation_fun = F}, Target,L, C, Pos, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_pi2(MoreBytes, S1, Target, L, C, Pos, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_pi2("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
+ event_fun = Event},
+ Target, L, C, Pos, Acc) ->
+ ?bump_col(2),
+ PI = #xmlPI{name = Target,
+ pos = Pos,
+ value = lists:reverse(Acc)},
+ S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = L,
+ col = C,
+ data = PI}, S),
+ {Ret, S2} = Hook(PI, S1),
+ {Ret, T, S2};
+scan_pi2(Str, S0, Target, L, C, Pos, Acc) ->
+ ?bump_col(1),
+ {Ch,T} = wfc_legal_char(Str,S),
+ scan_pi2(T, S, Target, L, C, Pos, [Ch|Acc]).
+
+
+
+%% [28] doctypedecl ::=
+%% '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
+scan_doctype([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_doctype(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_doctype(T, S) ->
+ {_,T1,S1} = mandatory_strip(T,S),
+ {DTName, _NamespaceInfo, T2, S2} = scan_name(T1, S1),
+ ?strip3,
+ scan_doctype1(T3, S3#xmerl_scanner{doctype_name = DTName}).
+
+
+%% [75] ExternalID ::= 'SYSTEM' S SystemLiteral
+%% | 'PUBLIC' S PubidLiteral S SystemLiteral
+scan_doctype1([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_doctype1(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_doctype1("PUBLIC" ++ T, S0) ->
+ ?bump_col(6),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {PIDL, T2, S2} = scan_pubid_literal(T1, S1),
+ {_,T3,S3} = mandatory_strip(T2,S2),
+ {SL, T4, S4} = scan_system_literal(T3, S3),
+ ?strip5,
+ scan_doctype2(T5, S5, {public, PIDL, SL});
+scan_doctype1("SYSTEM" ++ T, S0) ->
+ ?bump_col(6),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {SL, T2, S2} = scan_system_literal(T1, S1),
+ ?strip3,
+ scan_doctype2(T3, S3, {system, SL});
+scan_doctype1(T, S) ->
+ scan_doctype2(T, S, undefined).
+
+
+scan_doctype2([], S=#xmerl_scanner{continuation_fun = F},DTD) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_doctype2(MoreBytes, S1, DTD) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_doctype2("[" ++ T, S0, DTD) ->
+ ?bump_col(1),
+ ?strip1,
+ scan_doctype3(T1, S1, DTD);
+scan_doctype2(">" ++ T, S0, DTD) ->
+ ?bump_col(1),
+ ?strip1,
+ S2 = fetch_DTD(DTD, S1),
+ check_decl(S2),
+ {T1, S2};
+scan_doctype2(_T,S,_DTD) ->
+ ?fatal(expected_end_of_DOCTYPE_declaration, S).
+
+%% [28a] DeclSep ::= PEReference | S
+%% [28b] intSubset ::= (markupdecl | DeclSep)*
+scan_doctype3([], S=#xmerl_scanner{continuation_fun = F},DTD) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_doctype3(MoreBytes, S1,DTD) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_doctype3("%" ++ T, S0, DTD) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ?strip2,
+ case expand_pe_reference(PERefName, S2,as_PE) of
+ {system, _} = Name ->
+ S3 = fetch_DTD(Name, S2),
+ check_decl(S3),
+ scan_doctype3(T2, S3, DTD);
+ {public, _} = Name ->
+ S3 = fetch_DTD(Name, S2),
+ check_decl(S3),
+ scan_doctype3(T2, S3, DTD);
+ {public, _, _} = Name ->
+ S3 = fetch_DTD(Name, S2),
+ check_decl(S3),
+ scan_doctype3(T2, S3, DTD);
+ ExpRef when is_list(ExpRef) -> % Space added, see Section 4.4.8
+ {_,T3,S3} = strip(ExpRef++T2,S2),
+ scan_doctype3(T3,S3,DTD)
+ end;
+scan_doctype3("]" ++ T, S0, DTD) ->
+ ?bump_col(1),
+ ?strip1,
+ S2 = fetch_DTD(DTD, S1),
+ check_decl(S2),
+ T2 = scan_mandatory(">",T1,1,S2,expected_doctype_end_tag),
+%% ">" ++ T2 = T1,
+ {T2, S2};
+scan_doctype3(T, S, DTD) ->
+ {_, T1, S1} = scan_markup_decl(T, S),
+ scan_doctype3(T1, S1, DTD).
+
+
+
+fetch_DTD(undefined, S=#xmerl_scanner{doctype_DTD=URI}) when is_list(URI)->
+ %% allow to specify DTD name when it isn't available in xml stream
+ fetch_DTD({system,URI},S#xmerl_scanner{doctype_DTD=option_provided});
+fetch_DTD(undefined, S) ->
+ S;
+% fetch_DTD(_,S=#xmerl_scanner{validation=false}) ->
+% S;
+fetch_DTD(DTDSpec, S)->
+ case fetch_and_parse(DTDSpec,S,[{text_decl,true},
+ {environment,{external,subset}}]) of
+ NewS when is_record(NewS,xmerl_scanner) ->
+ NewS;
+ {_Res,_Tail,_Sx} -> % Continue with old scanner data, result in Rules
+ S
+ end.
+
+fetch_and_parse(ExtSpec,S=#xmerl_scanner{fetch_fun=Fetch,
+ rules=Rules,
+ xmlbase = XMLBase},
+ Options0) ->
+ RetS =
+ case Fetch(ExtSpec, S) of
+ {ok, NewS} ->
+ %% For backward compatibility only. This will be removed later!!
+ NewS;
+ {ok, not_fetched,NewS} ->
+ NewS;
+ {ok, DataRet, NewS = #xmerl_scanner{
+ fetch_path=FetchPath,
+ user_state = UState,
+ event_fun = Event,
+ hook_fun = Hook,
+ fetch_fun = Fetch1,
+ close_fun = Close1,
+ continuation_fun = Cont,
+ acc_fun = Acc,
+ rules_read_fun = Read,
+ rules_write_fun = Write,
+ validation = Valid,
+ quiet = Quiet,
+ encoding = Charset
+ }} ->
+ EvS = event_state(NewS),
+ HoS = hook_state(NewS),
+ FeS = fetch_state(NewS),
+ CoS = cont_state(NewS),
+ Options = Options0++[{fetch_path,FetchPath},
+ {user_state, UState},
+ {rules, Rules},
+ {event_fun, Event, EvS},
+ {hook_fun, Hook, HoS},
+ {fetch_fun, Fetch1, FeS},
+ {close_fun, Close1},
+ {continuation_fun, Cont, CoS},
+ {rules, Read, Write, ""},
+ {acc_fun, Acc},
+ {validation,Valid},
+ {quiet,Quiet},
+ {encoding,Charset}],
+
+ case DataRet of
+ {file, F} ->
+ int_file_decl(F, Options,Charset);
+ {string, String} ->
+ int_string_decl(String, Options,XMLBase,file_name_unknown);
+ _ ->
+ %% other scheme
+ {DataRet,[],NewS}
+ end;
+ Error ->
+ ?fatal({error_fetching_DTD, {ExtSpec, Error}}, S)
+ end,
+ case RetS of
+ #xmerl_scanner{} ->
+ RetS#xmerl_scanner{text_decl=false,
+ environment=S#xmerl_scanner.environment};
+ _ -> RetS
+ end.
+
+
+fetch_not_parse(ExtSpec,S=#xmerl_scanner{fetch_fun=Fetch}) ->
+ case Fetch(ExtSpec,S) of
+ {ok, not_fetched,_NewS} ->
+ ?fatal({error_fetching_external_source,ExtSpec},S);
+ {ok, DataRet, NewS} ->
+ {String,LocationName} =
+ case DataRet of
+ {file,F} ->
+ {get_file(F,S),F};
+ {string,Str} ->
+ {binary_to_list(Str),file_name_unknown};
+ {http,URI} ->
+ {{http,URI},URI};
+ _ -> DataRet
+ end,
+ {String, NewS#xmerl_scanner{filename=LocationName}};
+ _ ->
+ ?fatal({error_fetching_external_resource,ExtSpec},S)
+ end.
+
+get_file(F,S) ->
+% io:format("get_file F=~p~n",[F]),
+ case file:read_file(F) of
+ {ok,Bin} ->
+ binary_to_list(Bin);
+ Err ->
+ ?fatal({error_reading_file,F,Err},S)
+ end.
+%% check_decl/1
+%% Now it is necessary to check that all referenced types is declared,
+%% since it is legal to reference some xml types before they are
+%% declared.
+check_decl(#xmerl_scanner{validation=V}) when V =/= dtd ->
+ ok;
+check_decl(#xmerl_scanner{rules=Tab} = S) ->
+ check_notations(Tab,S),
+ check_elements(Tab,S), %% check also attribute defs for element
+ check_entities(Tab,S).
+
+check_notations(Tab,S) ->
+ case ets:match(Tab,{{notation,'$1'},undeclared}) of
+ [[]] -> ok;
+ [] -> ok;
+ [L] when is_list(L) ->
+ ?fatal({error_missing_declaration_in_DTD,hd(L)},S);
+ Err ->
+ ?fatal({error_missing_declaration_in_DTD,Err},S)
+ end.
+
+check_elements(Tab,S) ->
+ case catch ets:match(Tab,{{elem_def,'_'},'$2'},10) of
+ {_,_}=M ->
+ Fun = fun({Match,'$end_of_table'},_F) ->
+ lists:foreach(fun(X)->check_elements2(X,S) end,
+ Match),
+ ok;
+ ('$end_of_table',_) ->
+ ok;
+ ({Match,Cont},F) ->
+ lists:foreach(fun(X)->check_elements2(X,S) end,
+ Match),
+ F(ets:match(Cont),F)
+ end,
+ Fun(M,Fun);
+ '$end_of_table' -> ok;
+ Err -> ?fatal({error_missing_declaration_in_DTD,Err},S)
+ end.
+
+% it is not an error to declare attributes for an element that is not
+% declared.
+check_elements2([#xmlElement{attributes=Attrs}],S) ->
+ check_attributes(Attrs,S);
+check_elements2(_,_) ->
+ ok.
+
+check_attributes([{N1,'ID',_,_,_}=Attr|Rest],S) ->
+ case lists:keysearch('ID',2,Rest) of
+ {value,Att2} ->
+ ?fatal({error_more_than_one_ID_def,N1,element(1,Att2)},S);
+ _ ->
+ ok
+ end,
+ vc_ID_Attribute_Default(Attr,S),
+ check_attributes(Rest,S);
+check_attributes([{_,{enumeration,_},_,_,_}=Attr|T],S) ->
+ vc_Enumeration(Attr,S),
+ check_attributes(T,S);
+check_attributes([{_,Ent,_,_,_}=Attr|T],S)
+ when Ent=='ENTITY';Ent=='ENTITIES' ->
+ vc_Entity_Name(Attr,S),
+ check_attributes(T,S);
+check_attributes([_|T],S) ->
+ check_attributes(T,S);
+check_attributes([],_S) ->
+ ok.
+
+check_entities(Tab,S=#xmerl_scanner{validation=dtd}) ->
+ case ets:match(Tab,{{entity,'$1'},undeclared}) of
+ [[]] -> ok;
+ [] -> ok;
+ [L] when is_list(L) ->
+ ?fatal({error_missing_declaration_in_DTD,hd(L)},S);
+ Err ->
+ ?fatal({error_missing_declaration_in_DTD,Err},S)
+ end;
+check_entities(_,_) ->
+ ok.
+
+
+%% check_decl2/1: checks that all referenced ID attributes are declared
+check_decl2(S=#xmerl_scanner{rules=Tab}) ->
+ check_referenced_ids(Tab,S).
+
+
+check_referenced_ids(Tab,S) ->
+ case ets:match(Tab,{{id,'$1'},undeclared}) of
+ [[]] -> ok;
+ [] -> ok;
+ [L] when is_list(L) ->
+ ?fatal({error_missing_declaration_in_DTD,hd(L)},S);
+ Err ->
+ ?fatal({error_missing_declaration_in_DTD,Err},S)
+ end.
+
+%%%%%%% [30] extSubSet ::= TextDecl? extSubsetDecl
+
+scan_ext_subset([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_ext_subset(MoreBytes, S1) end,
+ fun(S1) -> {[], S1} end,
+ S);
+scan_ext_subset("%" ++ T, S0) ->
+ %% DeclSep [28a]: WFC: PE Between Declarations.
+ %% The replacement text of a parameter entity reference in a
+ %% DeclSep must match the production extSubsetDecl.
+ ?bump_col(1),
+ {T1,S1} = scan_decl_sep(T,S),
+ scan_ext_subset(T1, S1);
+scan_ext_subset("<![" ++ T, S0) ->
+ ?bump_col(3),
+ ?strip1,
+ {_, T2, S2} = scan_conditional_sect(T1, S1),
+ scan_ext_subset(T2,S2);
+scan_ext_subset(T, S) when ?whitespace(hd(T)) ->
+ {_,T1,S1} = strip(T,S),
+ scan_ext_subset(T1, S1);
+scan_ext_subset(T, S) ->
+ {_, T1, S1} = scan_markup_decl(T, S),
+ scan_ext_subset(T1, S1).
+
+
+%%%%%%% [28a] DeclSep ::= PEReference | S
+scan_decl_sep(T,S) ->
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ {ExpandedRef,S2} =
+ case expand_pe_reference(PERefName,S1,as_PE) of
+ Tuple when is_tuple(Tuple) ->
+ %% {system,URI} or {public,URI}
+ {ExpRef,_Sx}=fetch_not_parse(Tuple,S1),
+ {ExpRef,S1};
+ ExpRef ->
+ {ExpRef,S1}
+ end,
+ {_,TRef,S3} = strip(ExpandedRef,S2),
+ {_,S4}=scan_ext_subset(TRef,S3),
+ {T1,S4}.
+% scan_decl_sep(T,S=#xmerl_scanner{rules_read_fun=Read,
+% rules_write_fun=Write,
+% rules_delete_fun=Delete}) ->
+% {PERefName, T1, S1} = scan_pe_reference(T, S),
+% {ExpandedRef,S2} =
+% case expand_pe_reference(PERefName,S1,as_PE) of
+% Tuple when tuple(Tuple) ->
+% %% {system,URI} or {public,URI}
+% {ExpRef,Sx}=fetch_not_parse(Tuple,S1),
+% {EntV,_,_S2} = scan_entity_value(ExpRef, Sx, no_delim,
+% PERefName,parameter),
+% %% should do an update Write(parameter_entity) so next
+% %% expand_pe_reference is faster
+% Delete(parameter_entity,PERefName,_S2),
+% _S3 = Write(parameter_entity,PERefName,EntV,_S2),
+% EntV2 = Read(parameter_entity,PERefName,_S3),
+% {" " ++ EntV2 ++ " ",_S3};
+% ExpRef ->
+% {ExpRef,S1}
+% end,
+% {_, T3, S3} = strip(ExpandedRef,S2),
+% {_T4,S4} = scan_ext_subset(T3,S3),
+% strip(T1,S4).
+
+%%%%%%% [61] ConditionalSect ::= includeSect | ignoreSect
+
+scan_conditional_sect([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_conditional_sect(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_conditional_sect("IGNORE" ++ T, S0) ->
+ ?bump_col(6),
+ ?strip1,
+ T2 = scan_mandatory("[",T1,1,S,expected_IGNORE_bracket),
+% "[" ++ T2 = T1,
+ {_,T3,S3} = strip(T2,S1),
+ scan_ignore(T3,S3);
+scan_conditional_sect("INCLUDE" ++ T, S0) ->
+ ?bump_col(7),
+ ?strip1,
+ T2 = scan_mandatory("[",T1,1,S,expected_INCLUDE_bracket),
+% "[" ++ T2 = T1,
+ {_,T3,S3} = strip(T2,S1),
+ scan_include(T3, S3);
+scan_conditional_sect("%"++T,S0) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,as_PE),
+ {_,T2,S2} = strip(ExpRef ++ T1,S1),
+ scan_conditional_sect(T2,S2).
+
+
+%%%% [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
+%%%% [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
+%%%% [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
+scan_ignore(Str,S) ->
+ scan_ignore(Str,S,0).
+
+scan_ignore([], S=#xmerl_scanner{continuation_fun = F},Level) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_ignore(MoreBytes, S1,Level) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_ignore("<![" ++ T, S0,Level) ->
+ %% nested conditional section. Topmost condition is ignore, though
+ ?bump_col(3),
+ scan_ignore(T, S,Level+1);
+scan_ignore("]]>" ++ T, S0,0) ->
+ ?bump_col(3),
+ {[], T, S};
+scan_ignore("]]>" ++ T, S0,Level) ->
+ ?bump_col(3),
+ scan_ignore(T, S,Level-1);
+scan_ignore([_H|T],S0,Level) ->
+ ?bump_col(1),
+ scan_ignore(T,S,Level).
+
+
+%%%%%%% [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
+scan_include([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_include(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_include("]]>" ++ T, S0) ->
+ ?bump_col(3),
+ {[], T, S};
+scan_include("%" ++ T, S0) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,as_PE),
+ {_,T2,S2} = strip(ExpRef ++ T1,S1),
+ scan_include(T2, S2);
+scan_include("<![" ++ T, S0) ->
+ ?bump_col(3),
+ ?strip1,
+ {_, T2, S2} = scan_conditional_sect(T1, S1),
+ ?strip3,
+ scan_include(T3,S3);
+scan_include(T, S) ->
+ {_, T1, S1} = scan_markup_decl(T, S),
+ scan_include(T1, S1).
+
+
+%%%%%%% [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
+%%%%%%% NotationDecl | PI |Comment
+%%%%%%% [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
+
+%% Validity constraint: Unique Type Declaration: No element type may be
+%% declared more than once.
+%%
+scan_markup_decl([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_markup_decl(MoreBytes, S1) end,
+ fun(S1) -> {[], [], S1} end,
+ S);
+scan_markup_decl("<!--" ++ T, S0) ->
+ ?bump_col(4),
+ scan_comment(T, S);
+scan_markup_decl("<?" ++ T, S0) ->
+ ?bump_col(2),
+ {_PI, T1, S1} = scan_pi(T, S,_Pos=markup),
+ strip(T1, S1);
+scan_markup_decl("<!ELEMENT" ++ T,
+ #xmerl_scanner{rules_read_fun = Read,
+ rules_write_fun = Write,
+ rules_delete_fun = Delete} = S0) ->
+ ?bump_col(9),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {Ename, _NamespaceInfo, T2, S2} = scan_name(T1, S1),
+ Element =
+ case Read(elem_def, Ename, S2) of
+ El = #xmlElement{elementdef=Decl} when Decl =/= undeclared ->
+ case S2#xmerl_scanner.validation of
+ dtd ->
+ ?fatal({already_defined, Ename}, S2);
+ _ ->
+ Delete(elem_def,Ename,S2),
+ El
+ end;
+ El = #xmlElement{} ->
+ Delete(elem_def,Ename,S2),
+ El;
+ undefined ->
+ #xmlElement{}
+ end,
+ {_,T3,S3} = mandatory_strip(T2,S2),
+ {Edef, T4, S4} = scan_contentspec(T3, S3),
+ ?strip5,
+ {">" ++ T6,S6} = scan_element_completion(T5,S5),
+ S7 = Write(elem_def, Ename,
+ Element#xmlElement{name = Ename,
+ content = Edef,
+ elementdef=S6#xmerl_scanner.environment},
+ S6#xmerl_scanner{col=S6#xmerl_scanner.col+1}),
+ strip(T6,S7);
+scan_markup_decl("<!ENTITY" ++ T, S0) ->
+ %% <!ENTITY [%] entity.name NDATA notation.name>
+ %% <!ENTITY [%] entity.name "replacement text">
+ %% <!ENTITY [%] entity.name SYSTEM "system.identifier">
+ %% <!ENTITY [%] entity.name PUBLIC public.identifier "system.identifier">
+ ?bump_col(8),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {T2, S2} = scan_entity(T1, S1),
+ strip(T2,S2);
+scan_markup_decl("<!NOTATION" ++ T, S0) ->
+ %% <!NOTATION notation.name "public.identifier" "helper.application">
+ ?bump_col(10),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {T2, S2} = scan_notation_decl(T1, S1),
+ strip(T2,S2);
+scan_markup_decl("<!ATTLIST" ++ T,
+ #xmerl_scanner{rules_read_fun = Read,
+ rules_write_fun = Write,
+ rules_delete_fun= Delete} = S0) ->
+ %% <!ATTLIST Ename ( AttrName Type Value )*>
+ ?bump_col(9),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {Ename, _NamespaceInfo, T2, S2} = scan_name(T1, S1),
+% ?strip3,
+ {Attributes, T4, S4} = scan_attdef(T2, S2),
+ {EDEF,MergedAttrs} =
+ case Read(elem_def, Ename, S4) of
+ undefined -> %% this may happen when the ELEMENT is declared in
+ %% the external DTD but the ATTLIST in the
+ %% internal DTD.
+ {#xmlElement{},update_attributes(Attributes,[])};
+ Edef = #xmlElement{attributes = OldAttrs} ->
+ Delete(elem_def,Ename,S4),
+ %% the slot in rules table must be empty so that the
+ %% later write has the assumed effect. Read maybe
+ %% should empty the table slot.
+ {Edef,update_attributes(Attributes, OldAttrs)}
+ end,
+ NewEdef = EDEF#xmlElement{name=Ename,attributes = MergedAttrs},
+ S5 = Write(elem_def, Ename, NewEdef, S4),
+ T5 = T4,
+ strip(T5,S5);
+scan_markup_decl(_Str,S) ->
+ ?fatal(expected_markup,S).
+
+scan_element_completion(T,S) ->
+ scan_markup_completion_gt(T,S).
+
+update_attributes(NewAttrs, OldAttrs) ->
+ update_attributes1(NewAttrs,lists:reverse(OldAttrs)).
+
+update_attributes1([A = {Name,_Type,_DefaultV,_DefaultD,_Env}|Attrs],
+ OldAttrs) ->
+ case lists:keymember(Name, 1, OldAttrs) of
+ true ->
+ update_attributes1(Attrs, OldAttrs);
+ false ->
+ update_attributes1(Attrs, [A|OldAttrs])
+ end;
+update_attributes1([],Acc) ->
+ lists:reverse(Acc).
+
+
+%%%%%%% [53] AttDef
+
+scan_attdef([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_attdef(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_attdef(T, S) ->
+ scan_attdef(T, S, _AttrAcc = []).
+
+
+scan_attdef([], S=#xmerl_scanner{continuation_fun = F}, Attrs) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_attdef(MoreBytes, S1, Attrs) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_attdef(">" ++ T, S0, Attrs) ->
+ ?bump_col(1),
+ {lists:reverse(Attrs), T, S};
+scan_attdef("%" ++ _T, S=#xmerl_scanner{environment=prolog}, _Attrs) ->
+ ?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
+scan_attdef("%" ++ T, S0, Attrs) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,as_PE),
+ {_,T2,S2} = strip(ExpRef ++ T1,S1),
+ scan_attdef(T2, S2, Attrs);
+scan_attdef(T,S,Attrs) ->
+ {_,T1,S1} = mandatory_strip(T,S),
+ scan_attdef2(T1,S1,Attrs).
+
+scan_attdef2(">" ++ T, S0, Attrs) ->
+ ?bump_col(1),
+ {lists:reverse(Attrs), T, S};
+scan_attdef2("%" ++ _T, S=#xmerl_scanner{environment=prolog}, _Attrs) ->
+ ?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
+scan_attdef2("%" ++ T, S0, Attrs) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,as_PE),
+ {_,T2,S2} = strip(ExpRef ++ T1,S1),
+ scan_attdef2(T2, S2, Attrs);
+scan_attdef2(T, S, Attrs) ->
+ {AttName, _NamespaceInfo, T1, S1} = scan_name(T, S),
+ {_,T2,S2} = mandatory_strip(T1,S1),
+ {AttType, T3, S3} = scan_att_type(T2, S2),
+ {_,T4,S4} = mandatory_strip(T3,S3),
+ {{DefaultDecl,DefaultValue}, T5, S5} = scan_default_decl(T4, S4, AttType),
+ ?strip6,
+ Attr = {AttName, AttType,DefaultValue,DefaultDecl,
+ S#xmerl_scanner.environment},
+ scan_attdef2(T6, S6, [Attr|Attrs]).
+
+
+%%% [54] StringType
+scan_att_type([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_att_type(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_att_type("CDATA" ++ T, S0) ->
+ ?bump_col(5),
+ {'CDATA', T, S};
+%%% [55] TokenizedType
+scan_att_type("IDREFS" ++ T, S0) ->
+ ?bump_col(6),
+ {'IDREFS', T, S};
+scan_att_type("IDREF" ++ T, S0) ->
+ ?bump_col(5),
+ {'IDREF', T, S};
+scan_att_type("ID" ++ T, S0) ->
+ ?bump_col(2),
+ {'ID', T, S};
+scan_att_type("ENTITY" ++ T, S0) ->
+ ?bump_col(6),
+ {'ENTITY', T, S};
+scan_att_type("ENTITIES" ++ T, S0) ->
+ ?bump_col(8),
+ {'ENTITIES', T, S};
+scan_att_type("NMTOKENS" ++ T, S0) ->
+ ?bump_col(8),
+ {'NMTOKENS', T, S};
+scan_att_type("NMTOKEN" ++ T, S0) ->
+ ?bump_col(7),
+ {'NMTOKEN', T, S};
+%%% [57] EnumeratedType
+scan_att_type("NOTATION" ++ T, S0) ->
+ ?bump_col(8),
+ {_,T1,S1} = mandatory_strip(T,S),
+ T2 = scan_mandatory("(",T1,1,S1,expected_parenthesis_after_NOTATION),
+% "(" ++ T2 = T1,
+ S2 = S1,
+ ?strip3,
+ {Name, _NamespaceInfo, T4, S4} = scan_name(T3, S3),
+ notation_exists(Name, S4),
+ ?strip5,
+ scan_notation_type(T5, S5, [Name]);
+scan_att_type("(" ++ T, S0) ->
+ ?bump_col(1),
+ ?strip1,
+ {NmToken, _NamespaceInfo, T2, S2} = scan_nmtoken(T1, S1),
+ ?strip3,
+ scan_enumeration(T3, S3, [NmToken]);
+scan_att_type("%" ++ _T, S=#xmerl_scanner{environment=prolog}) ->
+ ?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
+scan_att_type("%" ++ T, S0) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,in_literal),
+ {ExpRef,T1,S1}.
+
+%%% [58] NotationType
+
+scan_notation_type([], S=#xmerl_scanner{continuation_fun = F}, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_notation_type(MoreBytes, S1, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_notation_type(")" ++ T, S0, Acc) ->
+ ?bump_col(1),
+ {{notation, lists:reverse(Acc)}, T, S};
+scan_notation_type("|" ++ T, S0, Acc) ->
+ ?bump_col(1),
+ ?strip1,
+ {Name, _NamespaceInfo, T2, S2} = scan_name(T1, S1),
+ notation_exists(Name, S2),
+ ?strip3,
+ scan_notation_type(T3, S3, [Name | Acc]).
+
+%%% Validity constraint for NotationType:
+%%% The used notation names must be declared in the DTD, but they may
+%%% be declared later.
+notation_exists(Name, #xmerl_scanner{rules_read_fun = Read,
+ rules_write_fun = Write } = S) ->
+ case Read(notation, Name, S) of
+ undefined ->
+ %% this is legal, since the referenced NOTATION
+ %% may be declared later in internal or external
+ %% subset.
+ Write(notation,Name,undeclared,S);
+ _Value ->
+ ok
+ end.
+
+%%% [59] Enumeration
+
+scan_enumeration([], S=#xmerl_scanner{continuation_fun = F}, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_enumeration(MoreBytes, S1, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_enumeration(")" ++ T, S0, Acc) ->
+ ?bump_col(1),
+ {{enumeration, lists:reverse(Acc)}, T, S};
+scan_enumeration("|" ++ T, S0, Acc) ->
+ ?bump_col(1),
+ ?strip1,
+ {NmToken, _NamespaceInfo, T2, S2} = scan_nmtoken(T1, S1),
+ ?strip3,
+ scan_enumeration(T3, S3, [NmToken|Acc]).
+
+
+%%%%%%% [60] DefaultDecl
+
+scan_default_decl([], S=#xmerl_scanner{continuation_fun = F}, Type) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_default_decl(MoreBytes, S1, Type) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_default_decl("#REQUIRED" ++ T, S0, _Type) ->
+ ?bump_col(9),
+ {{'#REQUIRED',no_value}, T, S};
+scan_default_decl("#IMPLIED" ++ T, S0, _Type) ->
+ ?bump_col(8),
+ {{'#IMPLIED',no_value}, T, S};
+scan_default_decl("#FIXED" ++ T, S0, Type) ->
+ ?bump_col(6),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {Value,T2,S2,_} = default_value(T1, S1, Type),
+ {{'#FIXED',Value},T2,S2};
+scan_default_decl(Str, S, Type) ->
+ {Value,T1,S1,_} = default_value(Str, S, Type),
+ {{no_decl,Value},T1,S1}.
+
+
+%% There is room here to validate against Type, but we don't do it at
+%% the moment.
+default_value(T, S, Type) ->
+ {_Val, _T1, _S1,_} = scan_att_value(T, S, Type).
+
+
+%%%%%%% [71] EntityDef
+
+scan_entity([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_entity(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_entity("%" ++ T, #xmerl_scanner{rules_write_fun = Write} = S0) ->
+ %% parameter entity
+ ?bump_col(1),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {PEName, _NamespaceInfo, T2, S2} = scan_name_no_colons(T1, S1),
+ {_,T3,S3} = mandatory_strip(T2,S2),
+ {PEDef, T4, S4} = scan_pe_def(T3, S3, PEName),
+ ?strip5,
+ {">" ++ T6,S6} = scan_entity_completion(T5,S5),
+ S7 = Write(parameter_entity, PEName, PEDef, S6),
+ {T6, S7};
+scan_entity(T, #xmerl_scanner{rules_write_fun = Write,
+ rules_read_fun = Read,
+ rules_delete_fun = Delete} = S) ->
+ %% generic entity
+ {EName, _NamespaceInfo, T1, S1} = scan_name_no_colons(T, S),
+ {_,T2,S2} = mandatory_strip(T1,S1),
+ {EDef, EntType, T3, S3} = scan_entity_def(T2, S2, EName),
+ check_entity_recursion(EName,S3),
+ ?strip4,
+ {">" ++ T5,S5} = scan_entity_completion(T4,S4),
+ case Read(entity,EName,S5) of
+ undeclared -> Delete(entity,EName,S5);
+ _ -> ok
+ end,
+ S6 = Write(entity, EName, {S5#xmerl_scanner.environment,EntType,EDef}, S5),
+ {T5, S6}.
+
+scan_entity_completion(T,S) ->
+ scan_markup_completion_gt(T,S).
+
+%%%%%%% [73] EntityDef
+
+scan_entity_def([], S=#xmerl_scanner{continuation_fun = F}, EName) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_entity_def(MoreBytes, S1, EName) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_entity_def("'" ++ T, S0, EName) ->
+ ?bump_col(1),
+ {EVal,T1,S1}=scan_entity_value(T, S, $', EName,general),
+ {EVal,internal,T1,S1};
+scan_entity_def("\"" ++ T, S0, EName) ->
+ ?bump_col(1),
+ {EVal,T1,S1}=scan_entity_value(T, S, $", EName,general),
+ {EVal,internal,T1,S1};
+%% external general entity, parsed or unparsed.
+scan_entity_def(Str, S, EName) ->
+ {ExtID, T1, S1} = scan_external_id(Str, S),
+ {NData, T2, S2} = scan_ndata_decl(T1, S1),
+ case NData of
+ {ndata,_} ->
+ %% if NDATA exists it is an unparsed ENTITY
+ {{ExtID,NData},external,T2,S2};
+ _ ->
+ case fetch_and_parse(ExtID,S2,
+ [{text_decl,true},
+ {environment,{external,{entity,EName}}}]) of
+ {{_USret,Entity},_Tail,_Sx} ->
+ {Entity, external,T2, S2};
+ {Entity,_Tail,Sx} ->
+ OldRef=S2#xmerl_scanner.entity_references,
+ NewRef=Sx#xmerl_scanner.entity_references,
+ {Entity,external,T2,
+ S2#xmerl_scanner{entity_references=OldRef++NewRef}};
+ {error,enoent} -> % this bad entity is declared,
+ % but it may not be referenced,
+ % then it would not be an
+ % error.
+ {{error,enoent},external,T2,S2}
+ end
+ end.
+
+
+scan_ndata_decl([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_ndata_decl(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_ndata_decl(Str = ">"++_T, S) ->
+ {[], Str, S};
+scan_ndata_decl(T, S) ->
+ {_,T1,S1} = mandatory_strip(T,S),
+ scan_ndata_decl2(T1,S1).
+scan_ndata_decl2(Str = ">"++_T,S) ->
+ {[], Str, S};
+scan_ndata_decl2("NDATA" ++ T,S0 = #xmerl_scanner{rules_read_fun = Read,
+ rules_write_fun = Write}) ->
+ ?bump_col(5),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {Name, _NamespaceInfo, T2, S2} = scan_name(T1, S1),
+ case Read(notation, Name, S2) of
+ undefined -> %% this is legal, since the referenced NOTATION
+ %% may be declared later in internal or external
+ %% subset.
+ Write(notation,Name,undeclared,S2),
+ {{ndata,Name},T2,S2};
+ _Value ->
+ {{ndata, Name}, T2, S2}
+ end.
+
+%%%%%%% [39] element
+
+scan_element(T, S, Pos) ->
+ scan_element(T, S, Pos, S#xmerl_scanner.space,
+ _Lang = [], _Parents = [], #xmlNamespace{}).
+
+scan_element(T, S=#xmerl_scanner{line=L,col=C},
+ Pos, SpaceDefault,Lang, Parents, NS) ->
+ {Name, NamespaceInfo, T1, S1} = scan_name(T, S),
+ vc_Element_valid(Name,S),
+ ?strip2,
+ scan_element(T2, S2, Pos, Name, L, C, _Attrs = [],
+ Lang, Parents, NamespaceInfo, NS,
+ SpaceDefault).
+
+
+scan_element("/", S=#xmerl_scanner{continuation_fun = F},
+ Pos, Name, StartL, StartC, Attrs, Lang, Parents,
+ NSI, NS, SpaceDefault) ->
+ ?dbg("trailing / detected~n", []),
+ F(fun(MoreBytes, S1) -> scan_element("/" ++ MoreBytes, S1,
+ Pos, Name, StartL, StartC, Attrs,
+ Lang,Parents,NSI,NS,SpaceDefault) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_element([], S=#xmerl_scanner{continuation_fun = F},
+ Pos, Name, StartL, StartC, Attrs, Lang, Parents,
+ NSI, NS, SpaceDefault) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_element(MoreBytes, S1,
+ Pos, Name, StartL, StartC, Attrs,
+ Lang,Parents,NSI,NS,SpaceDefault) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_element("/>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook,
+ event_fun = Event,
+ line = L, col = C,
+ xmlbase_cache=XMLBase}, Pos,
+ Name, _StartL, _StartC, Attrs0, Lang, Parents, NSI,
+ Namespace, _SpaceDefault) ->
+ ?bump_col(2),
+ Attrs = lists:reverse(Attrs0),
+ E=processed_whole_element(S, Pos, Name, Attrs, Lang, Parents,NSI,Namespace),
+
+ wfc_unique_att_spec(Attrs,S),
+ S1 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = L,
+ col = C,
+ data = E}, S0),
+ {Ret, S2} = Hook(E, S1),
+ S2b=S2#xmerl_scanner{xmlbase=XMLBase},
+ {Ret, T, S2b};
+scan_element(">", S=#xmerl_scanner{continuation_fun = F},
+ Pos, Name, StartL, StartC, Attrs, Lang, Parents,
+ NSI, NS, SpaceDefault) ->
+ ?dbg("trailing > detected~n", []),
+ F(fun(MoreBytes, S1) -> scan_element(">" ++ MoreBytes, S1,
+ Pos, Name, StartL, StartC, Attrs,
+ Lang,Parents,NSI,NS,SpaceDefault) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_element(">" ++ T, S0 = #xmerl_scanner{event_fun = Event,
+ hook_fun = Hook,
+ line = L, col = C,
+ xmlbase_cache=XMLBase,
+ space = SpaceOption},
+ Pos, Name, StartL, StartC, Attrs0, Lang, Parents,
+ NSI, Namespace, SpaceDefault) ->
+ ?bump_col(1),
+ Attrs = lists:reverse(Attrs0),
+ wfc_unique_att_spec(Attrs,S),
+ XMLSpace = case lists:keysearch('xml:space', #xmlAttribute.name, Attrs) of
+ false -> SpaceDefault;
+ {value, #xmlAttribute{value="default"}} -> SpaceOption;
+ {value, #xmlAttribute{value="preserve"}} -> preserve;
+ _ -> SpaceDefault
+ end,
+
+ E0=processed_whole_element(S,Pos,Name,Attrs,Lang,Parents,NSI,Namespace),
+ S1 = #xmerl_scanner{} = Event(#xmerl_event{event = started,
+ line = StartL,
+ col = StartC,
+ data = E0}, S),
+
+ {Content, T1, S2} = scan_content(T, S1, Name, Attrs, XMLSpace,
+ E0#xmlElement.language,
+ [{Name, Pos}|Parents], Namespace),
+
+ Element=E0#xmlElement{content=Content,
+ xmlbase=E0#xmlElement.xmlbase},
+ S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = L,
+ col = C,
+ data = Element}, S2),
+ {Ret, S4} = Hook(Element, S3),
+ S4b=S4#xmerl_scanner{xmlbase=XMLBase},
+ {Ret, T1, S4b};
+scan_element(T, S, Pos, Name, StartL, StartC, Attrs, Lang, Parents,
+ NSI, NS, SpaceDefault) ->
+ {AttName, NamespaceInfo, T1, S1} = scan_name(T, S),
+ {T2, S2} = scan_eq(T1, S1),
+ {AttType,_DefaultDecl} = get_att_type(S2,AttName,Name),
+ {AttValue, T3, S3,IsNorm} = scan_att_value(T2, S2, AttType),
+%% check_default_value(S3,DefaultDecl,AttValue),
+ NewNS = check_namespace(AttName, NamespaceInfo, AttValue, NS),
+ wfc_whitespace_betw_attrs(hd(T3),S3),
+ ?strip4,
+ AttrPos = case Attrs of
+ [] ->
+ 1;
+ [#xmlAttribute{pos = P}|_] ->
+ P+1
+ end,
+ Attr = #xmlAttribute{name = AttName,
+ pos = AttrPos,
+ language = Lang,
+ namespace = NamespaceInfo,
+ value = AttValue,
+ normalized = IsNorm},
+ XMLBase=if
+ AttName=='xml:base' ->
+ resolve_relative_uri(AttValue,S4#xmerl_scanner.xmlbase);
+ true ->
+ S4#xmerl_scanner.xmlbase
+ end,
+
+ #xmerl_scanner{event_fun = Event,
+ line = Line,
+ col = Col} = S4,
+ S5 = Event(#xmerl_event{event = ended,
+ line = Line,
+ col = Col,
+ data = Attr},
+ S4#xmerl_scanner{xmlbase=XMLBase,
+ xmlbase_cache=S#xmerl_scanner.xmlbase}),
+ scan_element(T4, S5, Pos, Name, StartL, StartC, [Attr|Attrs],
+ Lang, Parents, NSI, NewNS, SpaceDefault).
+
+get_att_type(S=#xmerl_scanner{rules_read_fun=Read},AttName,ElemName) ->
+ case Read(elem_def,ElemName,S) of
+ #xmlElement{attributes = Attrs} ->
+ case lists:keysearch(AttName,1,Attrs) of
+ {value,{_,AttType,_,DefaultDecl,_}} ->
+ {AttType,DefaultDecl};
+ _ -> {'CDATA',no_value} %% undefined attribute shall be treated as CDATA
+ end;
+ _ -> {'CDATA',no_value}
+ end.
+
+resolve_relative_uri(NewBase="/"++_,CurrentBase) ->
+ case xmerl_uri:parse(CurrentBase) of
+ {error,_Reason} ->
+ NewBase;
+ {Scheme,Host,Port,_Path,_Query} ->
+ atom_to_list(Scheme)++Host++":"++integer_to_list(Port)++NewBase
+ end;
+resolve_relative_uri(NewBase,CurrentBase) ->
+ filename:join(CurrentBase,NewBase).
+
+
+processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook,
+ xmlbase = XMLBase,
+ line = _L, col = _C,
+ event_fun = _Event},
+ Pos, Name, Attrs, Lang, Parents, NSI, Namespace) ->
+ Language = check_language(Attrs, Lang),
+
+ {ExpName, ExpAttrs} =
+ case S#xmerl_scanner.namespace_conformant of
+ true ->
+ %% expand attribute names. We need to do this after having
+ %% scanned all attributes of the element, since (as far as
+ %% I can tell), XML Names only specifies that namespace attrs
+ %% are valid within the whole scope of the element in which
+ %% they are declared, which should also mean that even if they
+ %% are declared after some other attributes, the namespace
+ %% should apply to those attributes as well.
+ %% Note that the default URI does not apply to attrbute names.
+ TempNamespace = Namespace#xmlNamespace{default = []},
+ ExpAttrsX =
+ [A#xmlAttribute{
+ expanded_name=expanded_name(
+ A#xmlAttribute.name,
+ A#xmlAttribute.namespace,
+ % NSI,
+ TempNamespace, S)} || A <- Attrs],
+ {expanded_name(Name, NSI, Namespace, S), ExpAttrsX};
+ false ->
+ {Name, Attrs}
+ end,
+
+ #xmlElement{name = Name,
+ xmlbase = XMLBase,
+ pos = Pos,
+ parents = Parents,
+ attributes = ExpAttrs,
+ language = Language,
+ expanded_name = ExpName,
+ nsinfo = NSI,
+ namespace = Namespace}.
+
+
+check_language([#xmlAttribute{name='xml:lang',value=Lang}|_], _) ->
+ Lang;
+check_language([_|T], Lang) ->
+ check_language(T, Lang);
+check_language([], Lang) ->
+ Lang.
+
+
+check_namespace(xmlns, _, Value, NS) ->
+ NS#xmlNamespace{default = list_to_atom(Value)};
+check_namespace(_, {"xmlns", Prefix}, Value,
+ NS = #xmlNamespace{nodes = Ns}) ->
+ NS#xmlNamespace{nodes = keyreplaceadd(
+ Prefix, 1, Ns, {Prefix, list_to_atom(Value)})};
+check_namespace(_, _, _, NS) ->
+ NS.
+
+
+expanded_name(Name, [], #xmlNamespace{default = []}, _S) ->
+ Name;
+expanded_name(Name, [], #xmlNamespace{default = URI}, _S) ->
+ {URI, Name};
+expanded_name(_Name, {"xmlns", Local}, _NS, _S) -> % CHECK THIS /JB
+ {"xmlns",Local};
+expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) ->
+ case lists:keysearch(Prefix, 1, Ns) of
+ {value, {_, URI}} ->
+ {URI, list_to_atom(Local)};
+ false ->
+ %% A namespace constraint of XML Names is that the prefix
+ %% must be declared
+ ?fatal({namespace_prefix_not_declared, Prefix}, S)
+ end.
+
+
+
+
+keyreplaceadd(K, Pos, [H|T], Obj) when K == element(Pos, H) ->
+ [Obj|T];
+keyreplaceadd(K, Pos, [H|T], Obj) ->
+ [H|keyreplaceadd(K, Pos, T, Obj)];
+keyreplaceadd(_K, _Pos, [], Obj) ->
+ [Obj].
+
+%%%%%%% [10] AttValue
+%% normalize the attribute value according to XML 1.0 section 3.3.3
+
+scan_att_value([], S=#xmerl_scanner{continuation_fun = F},AT) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_att_value(MoreBytes, S1, AT) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_att_value("%"++_T,S=#xmerl_scanner{environment=prolog},_AttType) ->
+ ?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
+scan_att_value("%"++T,S0=#xmerl_scanner{rules_read_fun=Read,
+ rules_write_fun=Write,
+ rules_delete_fun=Delete},AttType) ->
+ ?bump_col(1),
+ {Name,T1,S1} = scan_pe_reference(T,S),
+ {ExpandedRef,S2} =
+ case expand_pe_reference(Name,S1,in_literal) of
+ Tuple when is_tuple(Tuple) ->
+ %% {system,URI} or {public,URI}
+ %% Included in literal, just get external file.
+ {ExpRef,Sx}=fetch_not_parse(Tuple,S1),
+ {EntV,_,_S2} = scan_entity_value(ExpRef, Sx, no_delim,
+ Name,parameter),
+ %% should do an update Write(parameter_entity) so next
+ %% expand_pe_reference is faster
+ Delete(parameter_entity,Name,_S2),
+ _S3 = Write(parameter_entity,Name,EntV,_S2),
+ EntV2 = Read(parameter_entity,Name,_S3),
+ {EntV2,_S3};
+ ExpRef ->
+ {ExpRef,S1}
+ end,
+ {_,T2,S3} = strip(ExpandedRef ++ T1,S2),
+ scan_att_value(T2,S3,AttType);
+scan_att_value([H|T], S0,'CDATA'=AT) when H == $"; H == $' ->
+ ?bump_col(1),
+ scan_att_chars(T, S, H, [],[], AT,false);
+scan_att_value([H|T], S0,AttType) when H == $"; H == $' ->
+ ?bump_col(1),
+ {T1,S1,IsNorm} = normalize(T,S,false),
+ scan_att_chars(T1, S1, H, [],[], AttType,IsNorm).
+
+scan_att_chars([],S=#xmerl_scanner{continuation_fun=F},H,Acc,TmpAcc,AT,IsNorm)->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) ->
+ scan_att_chars(MoreBytes, S1, H, Acc,TmpAcc,AT,IsNorm)
+ end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_att_chars([H|T], S0, H, Acc, TmpAcc,AttType,IsNorm) -> % End quote
+ ?bump_col(1),
+ check_att_default_val(S#xmerl_scanner.validation,TmpAcc,AttType,S),
+ {Acc2,S2,IsNorm2} =
+ if
+ AttType == 'CDATA' -> {Acc,S,IsNorm};
+ true ->
+ normalize(Acc,S,IsNorm)
+ end,
+ {lists:reverse(Acc2), T, S2,IsNorm2};
+scan_att_chars("&" ++ T, S0, Delim, Acc, TmpAcc,AT,IsNorm) -> % Reference
+ ?bump_col(1),
+ {ExpRef, T1, S1} = scan_reference(T, S),
+ case markup_delimeter(ExpRef) of
+ true ->
+ scan_att_chars(T1,S1,Delim,[ExpRef|Acc],[ExpRef|TmpAcc],AT,IsNorm);
+ _ ->
+ scan_att_chars(string_to_char_set(S#xmerl_scanner.encoding,ExpRef)
+ ++ T1, S1, Delim, Acc,TmpAcc, AT,IsNorm)
+ end;
+scan_att_chars("<" ++ _T, S0, _Delim, _Acc,_, _,_) -> % Tags not allowed here
+ ?fatal(unexpected_char, S0);
+scan_att_chars([H|T], S0, Delim, Acc, _TmpAcc,'CDATA',IsNorm)
+ when ?whitespace(H) ->
+ ?bump_col(1),
+ scan_att_chars(T, S, Delim, [$\s|Acc], [],'CDATA',IsNorm);
+scan_att_chars([H|T], S0, Delim, Acc, TmpAcc,AT,IsNorm)
+ when ?whitespace(H) ->
+ ?bump_col(1),
+ {T1,S1,IsNorm2} = normalize(T,S,IsNorm),
+ check_att_default_val(S#xmerl_scanner.validation,TmpAcc,AT,S1),
+ scan_att_chars(T1, S1, Delim, [$\s|Acc],[], AT,IsNorm2);
+scan_att_chars(Str, S0, Delim, Acc, TmpAcc,AT,IsNorm) ->
+ ?bump_col(1),
+ {Ch,T} = to_ucs(S#xmerl_scanner.encoding,Str),
+ valid_Char(S#xmerl_scanner.validation,AT,Ch,S),
+ scan_att_chars(T, S, Delim, [Ch|Acc], [Ch|TmpAcc],AT,IsNorm).
+
+markup_delimeter("&")-> true;
+markup_delimeter("\"") -> true;
+markup_delimeter("\'") -> true;
+markup_delimeter("<") -> true;
+markup_delimeter(">") -> true;
+markup_delimeter("%") -> true;
+markup_delimeter(_) -> false.
+
+check_att_default_val(dtd,[],_Ent,_S) ->
+ ok;
+check_att_default_val(dtd,RevName,Ent,S) ->
+ check_att_default_val(lists:reverse(RevName),Ent,S);
+check_att_default_val(_,_,_,_) ->
+ ok.
+
+check_att_default_val(Name,Ent,S=#xmerl_scanner{rules_write_fun=Write})
+ when Ent == 'ENTITY'; Ent == 'ENTITIES' ->
+ case xmerl_lib:is_letter(hd(Name)) of
+ true -> ok;
+ _ -> ?fatal({illegal_first_character,Ent,Name},S)
+ end,
+ SName = list_to_atom(Name),
+ Write(entity,SName,undeclared,S);
+check_att_default_val(Name,IDR,S=#xmerl_scanner{rules_write_fun=Write})
+ when IDR == 'IDREF'; IDR == 'IDREFS' ->
+ case xmerl_lib:is_letter(hd(Name)) of
+ true -> ok;
+ _ -> ?fatal({illegal_first_character,IDR,Name},S)
+ end,
+ SName = list_to_atom(Name),
+ Write(id,SName,undeclared,S);
+check_att_default_val(Name,'ID',S=#xmerl_scanner{rules_write_fun=Write,
+ rules_read_fun=Read,
+ rules_delete_fun=Delete}) ->
+ case xmerl_lib:is_name(Name) of
+ false ->
+ ?fatal({'ID_names_must_be_Name_production',Name},S);
+ _ ->
+ ok
+ end,
+ SName = if
+ is_list(Name) -> list_to_atom(Name);
+ true -> Name
+ end,
+ case Read(id,SName,S) of
+ undeclared -> %% was referenced in IDREF/IDREFS before defined
+ Delete(id,SName,S);
+ SName -> ?fatal({values_must_be_unique,'ID',SName},S);
+ undefined -> ok
+ end,
+ Write(id,SName,SName,S);
+check_att_default_val(_,_,_) ->
+ ok.
+
+valid_Char(dtd,AT,C,S) when AT=='NMTOKEN';AT=='NMTOKENS' ->
+ vc_Valid_Char(AT,C,S);
+valid_Char(_,_,[C],S) ->
+ case xmerl_lib:is_char(C) of
+ true ->
+ ok;
+ false ->
+ ?fatal({unexpected_char,C}, S)
+ end;
+valid_Char(_,_,C,S) ->
+ case xmerl_lib:is_char(C) of
+ true ->
+ ok;
+ false ->
+ ?fatal({unexpected_char,C}, S)
+ end.
+
+
+
+%%%%%%% [43] content
+
+scan_content(T, S, Name, Attrs, Space, Lang, Parents, NS) ->
+ scan_content(T, S, _Pos = 1, Name, Attrs, Space,
+ Lang, Parents, NS, _Acc = [],_MarkupDel=[]).
+
+scan_content("<", S= #xmerl_scanner{continuation_fun = F},
+ Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,_) ->
+ ?dbg("trailing < detected~n", []),
+ F(fun(MoreBytes, S1) -> scan_content("<" ++ MoreBytes, S1,
+ Pos, Name, Attrs,
+ Space, Lang, Parents, NS, Acc,[]) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_content([], S=#xmerl_scanner{environment={external,{entity,_}}},
+ _Pos, _Name, _Attrs, _Space, _Lang, _Parents, _NS, Acc,_) ->
+ {lists:reverse(Acc),[],S};
+scan_content([], S=#xmerl_scanner{environment=internal_parsed_entity},
+ _Pos, _Name, _Attrs, _Space, _Lang, _Parents, _NS, Acc,_) ->
+ {lists:reverse(Acc),[],S};
+scan_content([], S=#xmerl_scanner{continuation_fun = F},
+ Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,_) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_content(MoreBytes, S1,
+ Pos, Name, Attrs,
+ Space, Lang, Parents, NS, Acc,[]) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_content("</" ++ T, S0, _Pos, Name, _Attrs, _Space, _Lang,
+ _Parents, _NS, Acc,[]) ->
+ ?bump_col(2),
+ {ETagName, _NamespaceInfo, T1, S1} = scan_name(T, S),
+ if ETagName == Name ->
+ ok;
+ true ->
+ ?fatal({endtag_does_not_match, {was,ETagName,should_have_been, Name}}, S)
+ end,
+ ?strip2,
+ case T2 of
+ ">" ++ T3 ->
+ {lists:reverse(Acc), T3, S2};
+ _ ->
+ ?fatal({error,{unexpected_end_of_STag}},S)
+ end;
+scan_content([$&|_T]=Str,
+ #xmerl_scanner{environment={external,{entity,EName}}} = S0,
+ Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,_) ->
+ {_EntV,T1,S1}=scan_entity_value(Str,S0 ,[],EName,general),
+ %%This is a problem. All referenced entities in the external entity must be checked for recursion, thus parse the contentbut,skip result.
+ scan_content(T1,S1,Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]);
+scan_content("&"++T,
+ #xmerl_scanner{environment=internal_parsed_entity} = S,
+ Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,_) ->
+ {_, T1, S1} = scan_reference(T, S),
+ scan_content(T1,S1,Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]);
+scan_content("&" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) ->
+ ?bump_col(1),
+ {ExpRef, T1, S1} = scan_reference(T, S),
+ case markup_delimeter(ExpRef) of
+ true -> scan_content(ExpRef++T1,S1,Pos,Name,Attrs,Space,Lang,Parents,NS,Acc,ExpRef);
+ _ ->
+ scan_content(string_to_char_set(S1#xmerl_scanner.encoding,ExpRef)++T1,S1,Pos,Name,Attrs,Space,Lang,Parents,NS,Acc,[])
+ end;
+scan_content("<!--" ++ T, S, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) ->
+ {_, T1, S1} = scan_comment(T, S, Pos, Parents, Lang),
+ scan_content(T1, S1, Pos+1, Name, Attrs, Space, Lang, Parents, NS, Acc,[]);
+scan_content("<" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) ->
+ ?bump_col(1),
+ {Markup, T1, S1} =
+ scan_content_markup(T, S, Pos, Name, Attrs, Space, Lang, Parents, NS),
+ AccF = S1#xmerl_scanner.acc_fun,
+ {NewAcc, NewPos, NewS} = case AccF(Markup, Acc, S1) of
+ {Acc2, S2} ->
+ {Acc2, Pos+1, S2};
+ {Acc2, Pos2, S2} ->
+ {Acc2, Pos2, S2}
+ end,
+ scan_content(T1, NewS, NewPos, Name, Attrs, Space, Lang,
+ Parents, NS, NewAcc,[]);
+scan_content([_H|T], S= #xmerl_scanner{environment={external,{entity,_}}},
+ Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,_) ->
+ %% Guess we have to scan the content to find any internal entity
+ %% references.
+ scan_content(T,S,Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]);
+scan_content(T, S=#xmerl_scanner{acc_fun = F,
+ event_fun = Event,
+ hook_fun=Hook,
+ line = _L},
+ Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,MarkupDel) ->
+ Text0 = #xmlText{pos = Pos,
+ parents = Parents},
+ S1 = #xmerl_scanner{} = Event(#xmerl_event{event = started,
+ line = S#xmerl_scanner.line,
+ data = Text0}, S),
+ {Data, T1, S2} = scan_char_data(T, S1, Space,MarkupDel),
+ Text = Text0#xmlText{value = Data},
+ {Ret,S2b} = Hook(Text,S2),
+ S3 = #xmerl_scanner{} = Event(#xmerl_event{event = ended,
+ line = S2b#xmerl_scanner.line,
+ data = Ret}, S2b),
+ {NewAcc, NewPos, NewS} = case F(Ret, Acc, S3) of
+ {Acc4, S4} ->
+ {Acc4, Pos+1, S4};
+ {Acc4, Pos4, S4} ->
+ {Acc4, Pos4, S4}
+ end,
+ scan_content(T1, NewS, NewPos, Name, Attrs, Space, Lang,
+ Parents, NS, NewAcc,[]).
+
+
+scan_content_markup([], S=#xmerl_scanner{continuation_fun = F},
+ Pos, Name, Attrs, Space, Lang, Parents, NS) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_content_markup(
+ MoreBytes,S1,Pos,Name,
+ Attrs,Space,Lang,Parents,NS) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_content_markup("![CDATA[" ++ T, S0, Pos, _Name, _Attrs,
+ _Space, _Lang, Parents, _NS) ->
+ ?bump_col(8),
+ scan_cdata(T, S, Pos, Parents);
+scan_content_markup("?"++T,S0,Pos,_Name,_Attrs,_Space,_Lang,_Parents,_NS) ->
+ ?bump_col(1),
+ scan_pi(T, S, Pos);
+scan_content_markup(T, S, Pos, _Name, _Attrs, Space, Lang, Parents, NS) ->
+ scan_element(T, S, Pos, Space, Lang, Parents, NS).
+
+scan_char_data(T, S, Space,MUD) ->
+ scan_char_data(T, S, Space,MUD, _Acc = []).
+
+%%%%%%% [14] CharData
+
+scan_char_data([], S=#xmerl_scanner{environment={external,{entity,_}}},
+ _Space,_MUD, Acc) ->
+
+ {lists:reverse(Acc), [], S};
+scan_char_data([], S=#xmerl_scanner{environment=internal_parsed_entity},
+ _Space, _MUD,Acc) ->
+
+ {lists:reverse(Acc), [], S};
+scan_char_data([], S=#xmerl_scanner{continuation_fun = F}, Space, _MUD,Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_char_data(MoreBytes,S1,Space,_MUD,Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_char_data([$&|T], S,Space,"&",Acc) ->
+ scan_char_data(T, S, Space,[], [$&|Acc]);
+scan_char_data(T=[$&|_], S,_Space,_MUD,Acc) ->
+
+ {lists:reverse(Acc), T, S};
+scan_char_data("]]>" ++ _T, S, _Space,_MUD, _Acc) ->
+ %% See Section 2.4: Especially:
+ %% "The right angle bracket (>) MAY be represented using the string "&gt;",
+ %% and MUST, for compatibility, be escaped using either "&gt;" or a
+ %% character reference when it appears in the string "]]>" in content, when
+ %% that string is not marking the end of a CDATA section.
+ ?fatal(unexpected_cdata_end, S);
+scan_char_data([$<|T],S,Space,"<", Acc) ->
+ scan_char_data(T, S, Space,[], [$<|Acc]);
+scan_char_data(T = [$<|_], S, _Space,_MUD,Acc) ->
+
+ {lists:reverse(Acc), T, S};
+scan_char_data(T = [H|R], S, Space,MUD, Acc) when ?whitespace(H) ->
+ if
+ MUD =:= [], Acc =:= [], H =:= $\n, Space =:= preserve ->
+ case fast_accumulate_whitespace(R, S, T) of
+ {done, Reply} ->
+ Reply;
+ {NewAcc, T1, S1} ->
+ scan_char_data(T1, S1, Space, MUD, NewAcc)
+ end;
+ true ->
+ {NewAcc, T1, S1} = accumulate_whitespace(T, S, Space, Acc),
+ scan_char_data(T1, S1, Space,MUD,NewAcc)
+ end;
+scan_char_data([H1,H2|_T],S,_Space,_MUD,_Acc) when ?non_character(H1,H2) ->
+ ?fatal({error,{not_allowed_to_use_Unicode_noncharacters}},S);
+scan_char_data("]]>"++_T,S,_Space,_MUD,_Acc) ->
+ ?fatal({error,{illegal_character_in_content,"]]>"}},S);
+scan_char_data(Str,S0,Space,MUD,Acc) ->
+ ?bump_col(1),
+ {Ch,T} = wfc_legal_char(Str,S),
+ scan_char_data(T,S,Space,MUD,[Ch|Acc]).
+
+
+
+%%%%%%% [18]-[21] CDATA
+
+scan_cdata(Str, S, Pos, Parents) ->
+ scan_cdata(Str, S, Pos, Parents, _Acc = []).
+
+
+scan_cdata([], S=#xmerl_scanner{continuation_fun = F}, Pos, Parents, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_cdata(MoreBytes, S1, Pos, Parents, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_cdata("]]>" ++ T, S0, Pos, Parents, Acc) ->
+ ?bump_col(3),
+ {#xmlText{pos = Pos,
+ parents = Parents,
+ value = lists:reverse(Acc),
+ type = cdata}, T, S};
+scan_cdata(Str, S0, Pos, Parents, Acc) ->
+ {Ch,T} = to_ucs(S0#xmerl_scanner.encoding,Str),
+ case xmerl_lib:is_char(Ch) of
+ true ->
+ ?bump_col(1),
+ scan_cdata(T, S, Pos, Parents, [Ch|Acc]);
+ false ->
+ ?fatal({unexpected_char,Ch}, S0)
+ end.
+
+
+%%%%%%% [67] Reference
+%% returns a three tuple {Result,RestBuf,State}
+
+scan_reference([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_reference(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_reference("#x" ++ T, S0) ->
+ %% [66] CharRef
+ ?bump_col(1),
+ if hd(T) /= $; ->
+ {[Ch], T2, S2} = scan_char_ref_hex(T, S, 0),
+ {to_char_set(S2#xmerl_scanner.encoding,Ch),T2,S2};
+ true ->
+ ?fatal(invalid_char_ref, S)
+ end;
+scan_reference("#" ++ T, S0) ->
+ %% [66] CharRef
+ ?bump_col(1),
+ if hd(T) /= $; ->
+ scan_char_ref_dec(T, S, []);
+ true ->
+ ?fatal(invalid_char_ref, S)
+ end;
+scan_reference(T, S) ->
+ case catch scan_entity_ref(T, S) of
+ {'EXIT', _} ->
+ ?fatal(error_scanning_entity_ref,S);
+ Other ->
+ Other
+ end.
+
+
+%% Chapter 4.4.2: ... the replacement text of entities used to escape
+%% markup delimiters (the entities amp, lt, gt, apos, quot) is always treated
+%% as data. (The string "AT&amp;T;" expands to "AT&T;" and the remaining
+%% ampersand is not recognized as an entity-reference delimiter.)"
+%%
+%% How to achieve this? My current approach is to insert the *strings* "&",
+%% "<", ">", "'", and "\"" instead of the characters. The processor will
+%% ignore them when performing multiple expansions. This means, for now, that
+%% the character data output by the processor is (1-2 levels) deep.
+%% At some suitable point, we should flatten these, so that application-level
+%% processors should not have to be aware of this detail.
+
+scan_entity_ref([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_entity_ref(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_entity_ref("amp;" ++ T, S0) ->
+ ?bump_col(4),
+ {"&", T, S};
+scan_entity_ref("lt;" ++ T, S0) ->
+ ?bump_col(3),
+ {"<", T, S};
+scan_entity_ref("gt;" ++ T, S0) ->
+ ?bump_col(3),
+ {">", T, S};
+scan_entity_ref("apos;" ++ T, S0) ->
+ ?bump_col(5),
+ {"'", T, S};
+scan_entity_ref("quot;" ++ T, S0) ->
+ ?bump_col(5),
+ {"\"", T, S};
+scan_entity_ref(T, S) ->
+ {Name, _NamespaceInfo, T1, S1} = scan_name(T, S),
+ T2 = scan_mandatory(";",T1,1,S1,expected_entity_reference_semicolon),
+% ";" ++ T2 = T1,
+ S2 = S1,
+ Entity = expand_reference(Name, S2),
+ {Entity, T2, S2}.
+
+
+%%%%%%% [69] PEReference
+
+scan_pe_reference(T, S) ->
+ {Name, _NamespaceInfo, T1, S1} = scan_name(T, S),
+ T2 = scan_mandatory(";",T1,1,S1,expected_parsed_entity_reference_semicolon),
+% ";" ++ T2 = T1,
+ {Name, T2, S1#xmerl_scanner{col = S1#xmerl_scanner.col+1}}.
+
+expand_pe_reference(Name, #xmerl_scanner{rules_read_fun = Read} = S,WS) ->
+ case Read(parameter_entity, Name, S) of
+ undefined ->
+ ?fatal({unknown_parameter_entity, Name}, S); % WFC or VC failure
+ Err={error,_Reason} ->
+ ?fatal(Err,S);
+ Tuple when is_tuple(Tuple) ->
+ Tuple;
+ Result ->
+ if
+ WS == in_literal -> Result;
+ true -> " "++Result++" "
+ end
+ end.
+
+% Currently unused
+%
+% expand_external_pe_reference(Name, #xmerl_scanner{rules_read_fun = Read} = S) ->
+% case Read(parameter_entity, Name, S) of
+% undefined ->
+% ?fatal({unknown_parameter_entity, Name}, S);
+% Result ->
+% fetch_DTD(Result,S)
+% end.
+
+
+%%%%%%% [68] EntityReference
+
+expand_reference(Name, #xmerl_scanner{environment={external,{entity,_}}}) ->
+ atom_to_list(Name);
+expand_reference(Name, #xmerl_scanner{environment=internal_parsed_entity}) ->
+ atom_to_list(Name);
+expand_reference(Name, #xmerl_scanner{rules_read_fun = Read} = S) ->
+ case Read(entity, Name, S) of
+ undefined ->
+ ?fatal({unknown_entity_ref, Name}, S);
+ {_,external,{error,enoent}} ->
+ ?fatal({error,{entity_target_not_found,{error,enoent},Name}},S);
+ {DefEnv,EntType,Value} ->
+ wfc_Entity_Declared(DefEnv,S,Name),
+ Value2 = string_to_char_set(S#xmerl_scanner.encoding,Value),
+ wfc_Internal_parsed_entity(EntType,Value2,S),
+ Value
+ end.
+
+
+%%%%%%% [66] CharRef
+
+scan_char_ref_dec([], S=#xmerl_scanner{continuation_fun = F}, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_char_ref_dec(MoreBytes, S1, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_char_ref_dec([H|T], S0, Acc) when H >= $0, H =< $9 ->
+ ?bump_col(1),
+ scan_char_ref_dec(T, S, [H|Acc]);
+scan_char_ref_dec(";" ++ T, S0, Acc) ->
+ ?bump_col(1),
+ Ref = list_to_integer(lists:reverse(Acc)),
+ {Ch,_} = wfc_legal_char(Ref,S),
+ {[Ch], T, S}. %% changed return value from [[Ref]]
+
+
+scan_char_ref_hex([], S=#xmerl_scanner{continuation_fun = F}, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_char_ref_hex(MoreBytes, S1, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_char_ref_hex([H|T], S0, Acc) when H >= $0, H =< $9 ->
+ ?bump_col(1),
+ Dec = H - $0,
+ scan_char_ref_hex(T, S, (Dec bor (Acc bsl 4)));
+scan_char_ref_hex([H|T], S0, Acc) when H >= $a, H =< $f ->
+ ?bump_col(1),
+ Dec = (H - $a) + 10,
+ scan_char_ref_hex(T, S, (Dec bor (Acc bsl 4)));
+scan_char_ref_hex([H|T], S0, Acc) when H >= $A, H =< $F ->
+ ?bump_col(1),
+ Dec = (H - $A) + 10,
+ scan_char_ref_hex(T, S, (Dec bor (Acc bsl 4)));
+scan_char_ref_hex(";" ++ T, S0, Acc) ->
+ ?bump_col(1),
+ {Ch,_} = wfc_legal_char(Acc,S),
+ {[Ch], T, S}. %% changed return value from [[Acc]]
+
+
+
+%%%%%%% [25] Eq
+%%% Eq ::= S? '=' S?
+scan_eq(T, S) ->
+ ?strip1,
+ case T1 of
+ [$=|T2] ->
+ S2 = S1#xmerl_scanner{col=S1#xmerl_scanner.col+1},
+ ?strip3,
+ {T3, S3};
+ _ ->
+ ?fatal(assignment_expected,S)
+ end.
+
+
+%% scan_name/2
+%%
+%% We perform some checks here to make sure that the names conform to
+%% the "Namespaces in XML" specification. This is an option.
+%%
+%% Qualified Name:
+%% [6] QName ::= (Prefix ':')? LocalPart
+%% [7] Prefix ::= NCName
+%% [8] LocalPart ::= NCName
+%% [4] NCName ::= (Letter | '_') (NCNameChar)*
+%% [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_'
+%% | CombiningChar | Extender
+
+
+%% The effect of XML Names (namespace) conformance is that:
+%% - All element types and attribute names contain either zero or one colon
+%% - No entity names, PI targets, or notation names contain any colons.
+%%
+%% scan_name_no_colons/2 will ensure that the name contains no colons iff
+%% the scanner has been told to be namespace conformant. Otherwise, it will
+%% behave exactly like scan_name/2.
+%%
+scan_name_no_colons(Str, S) ->
+ NSC = S#xmerl_scanner.namespace_conformant,
+ case NSC of
+ true ->
+ {Target, NSI, T1, S1} =
+ scan_name(Str,S#xmerl_scanner{namespace_conformant=no_colons}),
+ {Target,NSI,T1,S1#xmerl_scanner{namespace_conformant=NSC}};
+ false ->
+ scan_name(Str, S)
+ end.
+
+
+
+%% [5] Name ::= (Letter | '_' | ':') (NameChar)*
+scan_name([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_name(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_name(Str = [$:|T], S0 = #xmerl_scanner{namespace_conformant = NSC}) ->
+ if NSC == false ->
+ ?bump_col(1),
+ scan_nmtoken(T, S, [$:], NSC);
+ NSC == no_colons ->
+ ?fatal({invalid_NCName, lists:sublist(Str, 1, 6)}, S0);
+ true ->
+ %% In order to conform with the "Namespaces in XML" spec,
+ %% we cannot allow names to begin with ":"
+ ?fatal({invalid_NCName, lists:sublist(Str, 1, 6)}, S0)
+ end;
+scan_name([$_|T], S0 = #xmerl_scanner{namespace_conformant = NSC}) ->
+ ?bump_col(1),
+ scan_nmtoken(T, S, [$_], NSC);
+scan_name("%"++_T,S=#xmerl_scanner{environment=prolog}) ->
+ ?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
+scan_name("%"++T,S0=#xmerl_scanner{environment={external,_}}) ->
+ %% parameter entity that expands to a name
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,as_PE),
+ {_,T2,S2} = strip(ExpRef ++ T1,S1),
+ scan_name(T2,S2);
+scan_name(Str, S0 = #xmerl_scanner{namespace_conformant = NSC}) ->
+ {Ch,T} = to_ucs(S0#xmerl_scanner.encoding,Str),
+ case xmerl_lib:is_letter(Ch) of
+ true ->
+ ?bump_col(1),
+ scan_nmtoken(T, S, [Ch], NSC);
+ false ->
+ ?fatal({invalid_name, lists:sublist(Str, 1, 6)}, S0)
+ end;
+scan_name(Str, S) ->
+ ?fatal({invalid_name, Str}, S).
+
+
+
+
+
+
+scan_nmtoken(Str, S, Acc, NSC) ->
+ scan_nmtoken(Str, S, Acc, _Prefix = [], _Local = Acc, NSC,isLatin1(hd(Acc),true)).
+
+%% scan_nmtoken/2
+%% [7] NmToken ::= (NameChar)+
+scan_nmtoken([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_nmtoken(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_nmtoken("%"++T, S0=#xmerl_scanner{environment={external,_}}) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,as_PE),
+ {_,T2,S2} = strip(ExpRef ++ T1,S1),
+ scan_nmtoken(T2,S2);
+scan_nmtoken(Str, S) ->
+ {Ch,T} = to_ucs(S#xmerl_scanner.encoding,Str),
+ case xmerl_lib:is_namechar(Ch) of
+ true ->
+ scan_nmtoken(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
+ _Acc = [Ch], _Prefix = [], _Local = [Ch],
+ _NamespaceConformant = false,isLatin1(Ch,true));
+ false ->
+ ?fatal({invalid_nmtoken, lists:sublist(Str, 1, 6)}, S)
+ end.
+
+
+scan_nmtoken([], S=#xmerl_scanner{continuation_fun = F},
+ Acc, Prefix, Local, NSC,IsLatin1) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_nmtoken(MoreBytes,S1,Acc,Prefix,Local,NSC,IsLatin1) end,
+ fun(S1) -> {list_to_atom(lists:reverse(Acc)),
+ namespace_info(Prefix, Local),[],S1} end,
+ S);
+%% whitespace marks the end of a name
+scan_nmtoken(Str = [H|_], S, Acc, Prefix, Local, _NSC,true) when ?whitespace(H) ->
+ %% we don't strip here because the occurrence of whitespace may be an error
+ %% e.g. <!ELEMENT spec (front, body, back ?)>
+ NmString = lists:reverse(Acc),
+ {list_to_atom(NmString), namespace_info(Prefix, Local), Str, S};
+scan_nmtoken(Str = [$:|_], S, Acc, [], _Local, no_colons,_IsLatin1) ->
+ ?fatal({invalid_NCName,
+ lists:sublist(lists:reverse(Acc) ++ Str, 1, 6)}, S);
+scan_nmtoken([$:|T], S0, Acc, [], Local, NSC, IsLatin1) ->
+ ?bump_col(1),
+ scan_nmtoken(T, S, [$:|Acc], lists:reverse(Local), [], NSC,IsLatin1);
+scan_nmtoken(Str = [$:|_T], S, Acc, _Prefix, _Local, _NSC = true,_IsLatin1) ->
+ %% non-empty Prefix means that we've encountered a ":" already.
+ %% Conformity with "Namespaces in XML" requires
+ %% at most one colon in a name
+ ?fatal({invalid_NCName,
+ lists:sublist(lists:reverse(Acc) ++ Str, 1, 6)}, S);
+
+%% non-namechar also marks the end of a name
+scan_nmtoken(Str, S0, Acc, Prefix, Local, NSC,IsLatin1) ->
+ ?bump_col(1),
+ {Ch,T} = to_ucs(S#xmerl_scanner.encoding,Str),
+ case {xmerl_lib:is_namechar(Ch),IsLatin1} of
+ {true,_} ->
+ scan_nmtoken(T, S, [Ch|Acc], Prefix, [Ch|Local], NSC,isLatin1(Ch,IsLatin1));
+ {_,true} ->
+ NmStr = lists:reverse(Acc),
+ {list_to_atom(NmStr), namespace_info(Prefix, Local), Str, S};
+ _ ->
+ {lists:reverse(Acc), namespace_info(Prefix, Local), Str, S}
+ end.
+
+namespace_info([], _) ->
+ [];
+namespace_info(Prefix, Local) ->
+ {Prefix, lists:reverse(Local)}.
+
+isLatin1(_Ch,false) ->
+ false;
+isLatin1(Ch,_) when Ch > 255 ->
+ false;
+isLatin1(_,_) ->
+ true.
+
+%%%%%%% [11] SystemLiteral
+
+scan_system_literal([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_system_literal(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_system_literal("\"" ++ T, S) ->
+ scan_system_literal(T, S, $", []);
+scan_system_literal("'" ++ T, S) ->
+ scan_system_literal(T, S, $', []).
+
+
+scan_system_literal([], S=#xmerl_scanner{continuation_fun = F},
+ Delimiter, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_system_literal(MoreBytes,S1,Delimiter,Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_system_literal([H|T], S, H, Acc) ->
+ {lists:reverse(Acc), T, S#xmerl_scanner{col = S#xmerl_scanner.col+1}};
+scan_system_literal("#"++_R, S, _H, _Acc) ->
+ %% actually not a fatal error
+ ?fatal(fragment_identifier_in_system_literal,S);
+scan_system_literal(Str, S, Delimiter, Acc) ->
+ {Ch,T} = to_ucs(S#xmerl_scanner.encoding,Str),
+ scan_system_literal(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
+ Delimiter, [Ch|Acc]).
+
+
+%%%%%%% [12] PubidLiteral
+
+scan_pubid_literal([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_pubid_literal(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_pubid_literal([H|T], S) when H == $"; H == $' ->
+ scan_pubid_literal(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1}, H, []);
+scan_pubid_literal([H|_T], S) ->
+ ?fatal({invalid_pubid_char, H}, S).
+
+
+scan_pubid_literal([], S=#xmerl_scanner{continuation_fun = F},
+ Delimiter, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_pubid_literal(MoreBytes,S1,Delimiter,Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_pubid_literal([H|T], S, H, Acc) ->
+ {lists:reverse(Acc), T, S#xmerl_scanner{col = S#xmerl_scanner.col+1}};
+scan_pubid_literal(Str = [H|_], S, Delimiter, Acc) when ?whitespace(H) ->
+ %% Before matching public identifiers, all whitespace must be normalized,
+ %% so we do that here
+ {_, T, S1} = pub_id_strip(Str, S),
+ scan_pubid_literal(T, S1, Delimiter, [16#20|Acc]);
+scan_pubid_literal([H|T], S, Delimiter, Acc) ->
+ case is_pubid_char(H) of
+ true ->
+ scan_pubid_literal(
+ T, S#xmerl_scanner{col = S#xmerl_scanner.col+1},
+ Delimiter, [H|Acc]);
+ false ->
+ ?fatal({invalid_pubid_char, H}, S)
+ end.
+
+%% We do not match whitespace here, even though they're allowed in public
+%% identifiers. This is because we normalize this whitespace as we scan
+%% (see above in scan_pubid_literal())
+%%
+is_pubid_char(X) when X >= $a, X =< $z -> true;
+is_pubid_char(X) when X >= $A, X =< $Z -> true;
+is_pubid_char(X) when X >= $0, X =< $9 -> true;
+is_pubid_char(X) ->
+ lists:member(X, "-'()+,./:=?;!*#@$_%").
+
+
+%%%%%%% [46] contentspec
+
+scan_contentspec([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_contentspec(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_contentspec("EMPTY" ++ T, S0) ->
+ ?bump_col(5),
+ {empty, T, S};
+scan_contentspec("ANY" ++ T, S0) ->
+ ?bump_col(3),
+ {any, T, S};
+scan_contentspec("%" ++ _T, S=#xmerl_scanner{environment=prolog}) ->
+ ?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
+scan_contentspec("%" ++ T, S0) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,as_PE),
+ {_,T2,S2} = strip(ExpRef ++ T1,S1),
+ scan_contentspec(T2, S2);
+scan_contentspec("(" ++ T, S0) ->
+ ?bump_col(1),
+ ?strip1,
+ scan_elem_content(T1, S1);
+scan_contentspec(_Str,S) ->
+ ?fatal(unexpected_character,S).
+
+
+%%%%%%% [47] children
+%%%%%%% [51] Mixed
+
+scan_elem_content(T, S) ->
+ scan_elem_content(T, S, _Context = children, _Mode = unknown, _Acc = []).
+
+scan_elem_content([], S=#xmerl_scanner{continuation_fun = F},
+ Context, Mode, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes,S1) -> scan_elem_content(MoreBytes,S1,Context,Mode,Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_elem_content(")" ++ T, S0, Context, Mode0, Acc0) ->
+ ?bump_col(1),
+ {Mode, Acc} = case {Mode0, Acc0} of
+ {unknown, [_X]} ->
+ {seq, Acc0};
+ {M, _L} when M == seq; M == choice ->
+ {Mode0, lists:reverse(Acc0)}
+ end,
+ {Occurrence, T1, S1} = scan_occurrence(T, S),
+ vc_No_Duplicate_Types(S,Context,Acc),
+ case {Occurrence, Context,Acc} of
+ {once, mixed,['#PCDATA']} -> ok; % It is not ok when there are
+ % more names than '#PCDATA'
+ % and no '*'.
+ {'*', mixed,_} -> ok;
+ {Other, mixed,_} ->
+ ?fatal({illegal_for_mixed_content, Other}, S1);
+ _ ->
+ ok
+ end,
+ ?strip2,
+ {format_elem_content({Occurrence, {Mode, Acc}}), T2, S2};
+scan_elem_content("#PCDATA" ++ _T, S, not_mixed, _Mode, _Acc) ->
+ ?fatal({error,{extra_set_of_parenthesis}},S);
+scan_elem_content("#PCDATA" ++ _T, S, _Cont, Mode, Acc)
+ when Mode==choice;Mode==seq;Acc/=[] ->
+ ?fatal({error,{invalid_format_of_mixed_content}},S);
+scan_elem_content("#PCDATA" ++ T, S0, _Context, Mode, Acc) ->
+ ?bump_col(7),
+ ?strip1,
+ scan_elem_content(T1, S1, mixed, Mode, ['#PCDATA'|Acc]);
+scan_elem_content("," ++ _T, S, _Context, choice, _Acc) ->
+ ?fatal({mixing_comma_and_vertical_bar_in_content_model},S);
+scan_elem_content("," ++ T, S0, Context, _Mode, Acc) ->
+ ?bump_col(1),
+ ?strip1,
+ scan_elem_content2(T1, S1, Context, seq, Acc);
+scan_elem_content("|" ++ _T, S, _Context, seq, _Acc) ->
+ ?fatal({mixing_comma_and_vertical_bar_in_content_model},S);
+scan_elem_content("|" ++ T, S0, Context, _Mode, Acc) ->
+ ?bump_col(1),
+ ?strip1,
+ scan_elem_content2(T1, S1, Context, choice, Acc);
+scan_elem_content(T, S, Context, Mode, Acc) ->
+ scan_elem_content2(T, S, Context, Mode, Acc).
+
+scan_elem_content2("(" ++ _T, S, mixed, _Mode, _Acc) ->
+ ?fatal({error,
+ {element_names_must_not_be_parenthesized_in_mixed_content}},S);
+scan_elem_content2("(" ++ T, S0, Context, Mode, Acc) ->
+ ?bump_col(1),
+ ?strip1,
+ {Inner, T2, S2} = scan_elem_content(T1, S1, not_mixed, unknown, []),
+ scan_elem_content(T2, S2, Context, Mode, [Inner|Acc]);
+scan_elem_content2("%" ++ _T,S=#xmerl_scanner{environment=prolog},_Context,_Mode,_Acc) ->
+ ?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
+scan_elem_content2("%" ++ T, S0, Context, Mode, Acc) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ ExpRef = expand_pe_reference(PERefName, S1,as_PE),
+ {_,T2,S2}=strip(ExpRef++T1,S1),
+ scan_elem_content(T2, S2, Context, Mode, Acc);
+scan_elem_content2(T, S, Context, Mode, Acc) ->
+ {Name, _NameStr, T1, S1} = scan_name(T, S),
+ {Occurrence, T2, S2} = scan_occurrence(T1, S1),
+ case {Occurrence, Context} of
+ {once, mixed} -> ok;
+ {Other, mixed} ->
+ ?fatal({illegal_for_mixed_content, Other}, S1);
+ _ ->
+ ok
+ end,
+ ?strip3,
+ mandatory_delimeter_wfc(T3,S3),
+ NewAcc = [format_elem_content({Occurrence, Name}) | Acc],
+ scan_elem_content(T3, S3, Context, Mode, NewAcc).
+
+
+format_elem_content({once, What}) -> What;
+format_elem_content(Other) -> Other.
+
+
+scan_occurrence([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_occurrence(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_occurrence([$?|T], S0) ->
+ ?bump_col(1),
+ {'?', T, S};
+scan_occurrence([$+|T], S0) ->
+ ?bump_col(1),
+ {'+', T, S};
+scan_occurrence([$*|T], S0) ->
+ ?bump_col(1),
+ {'*', T, S};
+scan_occurrence(T, S) ->
+ {once, T , S}.
+
+%%% Tests of Validity Constraints
+
+
+%% first part of VC: Name Token
+vc_Valid_Char(_AT,C,S) ->
+ case xmerl_lib:is_namechar(C) of
+ true ->
+ ok;
+ _ ->
+ ?fatal({error,{validity_constraint_Name_Token,C}},S)
+ end.
+
+
+
+vc_ID_Attribute_Default(_,#xmerl_scanner{validation=Valid})
+ when Valid /= dtd ->
+ ok;
+vc_ID_Attribute_Default({_,'ID',_,Def,_},_S)
+ when Def=='#IMPLIED';Def=='#REQUIRED' ->
+ ok;
+vc_ID_Attribute_Default({_,'ID',_,Def,_},S) ->
+ ?fatal({error,{validity_constraint_error_ID_Attribute_Default,Def}},S).
+
+vc_Enumeration({_Name,{_,NameList},DefaultVal,_,_},S)
+ when is_list(DefaultVal) ->
+ case lists:member(list_to_atom(DefaultVal),NameList) of
+ true ->
+ ok;
+ _ ->
+ ?fatal({error,{vc_enumeration,list_to_atom(DefaultVal),NameList}},S)
+ end;
+vc_Enumeration({_Name,{_,_NameList},_DefaultVal,_,_},_S) ->
+ ok.
+
+vc_Entity_Name({_Name,'ENTITY',DefaultVal,_,_},S) when is_list(DefaultVal) ->
+ Read = S#xmerl_scanner.rules_read_fun,
+ case Read(entity,list_to_atom(DefaultVal),S) of
+ {_,external,{_,{ndata,_}}} ->
+ ok;
+ _ -> ?fatal({error,{vc_Entity_Name,list_to_atom(DefaultVal)}},S)
+ end;
+vc_Entity_Name({_Name,'ENTITY',_,_,_},_S) ->
+ ok;
+vc_Entity_Name({_,'ENTITIES',DefaultVal,_,_},S) when is_list(DefaultVal) ->
+ Read = S#xmerl_scanner.rules_read_fun,
+ NameListFun = fun([],Acc,_St,_Fun) ->
+ lists:reverse(Acc);
+ (Str,Acc,St,Fun) ->
+ {N,_,St2,Str2} = scan_name(Str,St),
+ Fun(Str2,[N|Acc],St2,Fun)
+ end,
+ NameList = NameListFun(DefaultVal,[],S,NameListFun),
+ VcFun =
+ fun(X) ->
+ case Read(entity,X,S) of
+ {_,external,{_,{ndata,_}}} ->
+ ok;
+ _ -> ?fatal({error,{vc_Entity_Name,X}},S)
+ end
+ end,
+ lists:foreach(VcFun,NameList);
+vc_Entity_Name({_,'ENTITIES',_,_,_},_S) ->
+ ok.
+
+vc_No_Duplicate_Types(#xmerl_scanner{validation=dtd} = S,mixed,Acc) ->
+ CheckDupl =
+ fun([H|T],F) ->
+ case lists:member(H,T) of
+ true ->
+ ?fatal({no_duplicate_types_allowed,H},S);
+ _ -> F(T,F)
+ end;
+ ([],_) -> ok
+ end,
+ CheckDupl(Acc,CheckDupl);
+vc_No_Duplicate_Types(_,_,_) ->
+ ok.
+
+
+%%% Tests of Well-Formededness Constraints
+
+
+mandatory_delimeter_wfc(","++_T,_S) ->
+ ok;
+mandatory_delimeter_wfc("|"++_T,_S) ->
+ ok;
+mandatory_delimeter_wfc(")"++_T,_S) ->
+ ok;
+mandatory_delimeter_wfc("%"++_T,_S) ->
+ %% a parameter reference is ok
+ ok;
+mandatory_delimeter_wfc(T,S) ->
+ ?fatal({comma_or_vertical_bar_mandatory_between_names_in_content_model,T},S).
+
+
+wfc_unique_att_spec([],_S) ->
+ ok;
+wfc_unique_att_spec([#xmlAttribute{name=N}|Atts],S) ->
+ case lists:keymember(N,#xmlAttribute.name,Atts) of
+ true ->
+ ?fatal({error,{unique_att_spec_required,N}},S);
+ _ ->
+ wfc_unique_att_spec(Atts,S)
+ end.
+
+wfc_legal_char(Chars,S) when is_list(Chars)->
+ {Ch,Rest} = to_ucs(S#xmerl_scanner.encoding,Chars),
+ case xmerl_lib:is_char(Ch) of
+ true ->
+ {Ch,Rest};
+ _ ->
+ ?fatal({error,{wfc_Legal_Character,Ch}},S)
+ end;
+wfc_legal_char(Ch,S) ->
+ case xmerl_lib:is_char(Ch) of
+ true ->
+ {Ch,[]};
+ _ ->
+ ?fatal({error,{wfc_Legal_Character,Ch}},S)
+ end.
+
+
+wfc_whitespace_betw_attrs(WS,_S) when ?whitespace(WS) ->
+ ok;
+wfc_whitespace_betw_attrs($/,_S) ->
+ ok;
+wfc_whitespace_betw_attrs($>,_S) ->
+ ok;
+wfc_whitespace_betw_attrs(_,S) ->
+ ?fatal({whitespace_required_between_attributes},S).
+
+wfc_Entity_Declared({external,_},S=#xmerl_scanner{standalone=yes},Name) ->
+ ?fatal({reference_to_externally_defed_entity_standalone_doc,Name},S);
+wfc_Entity_Declared({external,_},_S,_) ->
+ ok;
+wfc_Entity_Declared(_Env,_S,_) ->
+ ok.
+
+wfc_Internal_parsed_entity(internal,Value,S) ->
+ %% WFC test that replacement text matches production content
+ scan_content(Value,S#xmerl_scanner{environment=internal_parsed_entity},
+ _Name=[],[],S#xmerl_scanner.space,_Lang=[],_Prnt=[],
+ #xmlNamespace{});
+wfc_Internal_parsed_entity(_,_,_) ->
+ ok.
+
+
+vc_Element_valid(_Name,#xmerl_scanner{environment=internal_parsed_entity}) ->
+ ok;
+vc_Element_valid(Name,S=#xmerl_scanner{rules_read_fun=Read,
+ validation=dtd}) ->
+ case Read(elem_def,Name,S) of
+ #xmlElement{elementdef=undeclared} ->
+ ?fatal({error,{error_missing_element_declaration_in_DTD,Name}},S); undefined ->
+ ?fatal({error,{error_missing_element_declaration_in_DTD,Name}},S); _ -> ok
+ end;
+vc_Element_valid(_,_) ->
+ ok.
+
+%%%%%%% [74] PEDef
+
+
+scan_pe_def([], S=#xmerl_scanner{continuation_fun = F}, PEName) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_pe_def(MoreBytes, S1, PEName) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_pe_def("'" ++ T, S0, PEName) ->
+ ?bump_col(1),
+ scan_entity_value(T, S, $', PEName,parameter);
+scan_pe_def("\"" ++ T, S0, PEName) ->
+ ?bump_col(1),
+ scan_entity_value(T, S, $", PEName,parameter);
+scan_pe_def(Str, S, _PEName) ->
+ scan_external_id(Str, S).
+
+
+%%%%%%% [82] NotationDecl
+
+scan_notation_decl(T, #xmerl_scanner{rules_write_fun = Write,
+ rules_read_fun=Read,
+ rules_delete_fun=Delete} = S) ->
+ {Name, _NameStr, T1, S1} = scan_name_no_colons(T, S),
+ {_,T2,S2} = mandatory_strip(T1,S1),
+ {Def, T3, S3} = scan_notation_decl1(T2, S2),
+ ?strip4,
+ T5 = scan_mandatory(">",T4,1,S4,expected_end_tag_notation_declaration),
+% ">" ++ T5 = T4,
+ case Read(notation,Name,S) of
+ undeclared -> Delete(notation,Name,S4);
+ _ -> ok
+ end,
+ S5 = Write(notation, Name, Def, S4),
+ {T5, S5}.
+
+scan_notation_decl1([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_notation_decl1(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_notation_decl1("SYSTEM" ++ T, S0) ->
+ ?bump_col(6),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {SL, T2, S2} = scan_system_literal(T1, S1),
+ {{system, SL}, T2, S2};
+scan_notation_decl1("PUBLIC" ++ T, S0) ->
+ ?bump_col(6),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {PIDL, T2, S2} = scan_pubid_literal(T1, S1),
+ ?strip3,
+ case T3 of
+ ">" ++ _ ->
+ {{public, PIDL}, T3,
+ S3#xmerl_scanner{col = S3#xmerl_scanner.col+1}};
+ _ ->
+ {SL, T4, S4} = scan_system_literal(T3, S3),
+ {{public, PIDL, SL}, T4, S4}
+ end.
+
+%%%%%%% [75] ExternalID
+
+scan_external_id([], S=#xmerl_scanner{continuation_fun = F}) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_external_id(MoreBytes, S1) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_external_id("SYSTEM" ++ T, S0) ->
+ ?bump_col(6),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {SL, T2, S2} = scan_system_literal(T1, S1),
+ {{system, SL}, T2, S2};
+scan_external_id("PUBLIC" ++ T, S0) ->
+ ?bump_col(6),
+ {_,T1,S1} = mandatory_strip(T,S),
+ {PIDL, T2, S2} = scan_pubid_literal(T1, S1),
+ {_,T3,S3} = mandatory_strip(T2,S2),
+ {SL, T4, S4} = scan_system_literal(T3, S3),
+ {{public, PIDL, SL}, T4, S4}.
+
+
+%%%%%%% [9] EntityValue
+
+%% Note that we have two different scan functions for EntityValue
+%% They differ in that this one checks for recursive calls to the same
+%% parameter entity.
+
+scan_entity_value(Str, S, Delim, Name, Namespace) ->
+ scan_entity_value(Str, S, Delim, _Acc = [], Name, Namespace,[]).
+
+
+scan_entity_value([], S=#xmerl_scanner{environment={external,{entity,_}}},
+ _Delim,Acc,_,_,[]) ->
+ {lists:flatten(lists:reverse(Acc)), [], S};
+scan_entity_value([], S=#xmerl_scanner{environment={external,{entity,_}},
+ validation=dtd},
+ _Delim,_Acc,PEName,_,_) ->
+ {{error,{failed_VC_Proper_Declaration_PE_Nesting,1,PEName}},[],S};
+scan_entity_value([],S,
+ no_delim,Acc,_,_,[]) ->
+ {lists:flatten(lists:reverse(Acc)),[],S};
+scan_entity_value([],S=#xmerl_scanner{validation=dtd},
+ no_delim,_Acc,PEName,_,_PENesting) ->
+ {{error,{failed_VC_Proper_Declaration_PE_Nesting,2,PEName}},[],S};
+scan_entity_value([], S=#xmerl_scanner{continuation_fun = F},
+ Delim, Acc, PEName,Namespace,PENesting) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) ->
+ scan_entity_value(MoreBytes,S1,
+ Delim,Acc,PEName,Namespace,PENesting)
+ end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_entity_value([Delim|T], S=#xmerl_scanner{validation=dtd},
+ Delim,_Acc,PEName,_NS,PENesting) when length(PENesting) /= 0 ->
+ {{error,{failed_VC_Proper_Declaration_PE_Nesting,3,PEName}},T,S};
+scan_entity_value([Delim|T], S0,
+ Delim, Acc, _PEName,_NS,_PENesting) ->
+ ?bump_col(1),
+ {lists:flatten(lists:reverse(Acc)), T, S};
+scan_entity_value("%" ++ _T,S=#xmerl_scanner{environment=prolog},_,_,_,_,_) ->
+ ?fatal({error,{wfc_PEs_In_Internal_Subset}},S);
+% %% This is a PEdecl in an external entity
+% scan_entity_value([$%,WS|T], S0, Delim, Acc, PEName,Namespace,PENesting)
+% when ?whitespace(WS) ->
+% ?bump_col(2),
+% scan_entity_value(T, S, Delim, [WS,$%|Acc], PEName,Namespace,PENesting);
+scan_entity_value("%" ++ T, S0, Delim, Acc, PEName,Namespace,PENesting) ->
+ ?bump_col(1),
+ {PERefName, T1, S1} = scan_pe_reference(T, S),
+ if PERefName == PEName,Namespace==parameter ->
+ ?fatal({illegal_recursion_in_PE, PEName}, S1);
+ true ->
+ {ExpandedRef,S2} =
+ case expand_pe_reference(PERefName, S1, in_literal) of
+ %% actually should pe ref be expanded as_PE but
+ %% handle whitespace explicitly in this case.
+ Tuple when is_tuple(Tuple) ->
+ %% {system,URI} or {public,URI}
+ %% Included in literal.
+ {ExpRef,Sx}=fetch_not_parse(Tuple,S1),
+ {EntV,_,_S2} =
+ scan_entity_value(ExpRef, Sx, no_delim,[],
+ PERefName,parameter,[]),
+ %% should do an update Write(parameter_entity)
+ %% so next expand_pe_reference is faster
+ {EntV,_S2};
+ ExpRef ->
+ {ExpRef,S1}
+ end,
+ %% single or duoble qoutes are not treated as delimeters
+ %% in passages "included in literal"
+ S3 = S2#xmerl_scanner{col=S2#xmerl_scanner.col+1},
+ {Acc2,_,S4} = scan_entity_value(ExpandedRef,S3,no_delim,Acc,
+ PEName,Namespace,[]),
+% {_,T2,S5} = strip(" "++T1,S4),
+ scan_entity_value(T1,S4#xmerl_scanner{line=S3#xmerl_scanner.line,
+ col=S3#xmerl_scanner.col},
+ Delim,lists:reverse(Acc2),
+ PEName,Namespace,PENesting)
+% scan_entity_value(T1,S4,Delim,lists:reverse(Acc2),
+% PEName,Namespace,PENesting)
+ end;
+scan_entity_value("&" ++ T, S0, Delim, Acc, PEName,Namespace,PENesting) ->
+ %% This is either a character entity or a general entity (internal
+ %% or external) reference. An internal general entity shall not be
+ %% expanded in an entity def XML1.0 section 4.5.
+ ?bump_col(1),
+ case T of
+ "#"++_T ->
+ {ExpRef, T1, S1} = scan_reference(T, S),
+ Tok = pe_nesting_token(ExpRef++T1,Namespace,S1#xmerl_scanner.validation),
+ case markup_delimeter(ExpRef) of
+ true ->
+ scan_entity_value(T1, S1, Delim, [ExpRef|Acc], PEName,
+ Namespace,pe_push(Tok,PENesting,S1));
+ _ ->
+ ExpRef2 = string_to_char_set(S#xmerl_scanner.encoding,ExpRef),
+ scan_entity_value(ExpRef2 ++ T1, S1, Delim, Acc, PEName,
+ Namespace,pe_push(Tok,PENesting,S1))
+ end;
+ _ -> %% General Entity is bypassed, though must check for
+ %% recursion: save referenced name now and check for
+ %% recursive reference after the whole entity definition is
+ %% completed.
+ {Name, _NamespaceInfo, T1, S1} = scan_name(T,S),
+ T2=scan_mandatory(";",T1,1,S1,expected_entity_reference_semicolon),
+ S2=save_refed_entity_name(Name,PEName,S1),
+ scan_entity_value(T2,S2,Delim,[";",atom_to_list(Name),"&"|Acc],PEName,Namespace,PENesting)
+ end;
+%% The following clauses is for PE Nesting VC constraint
+%% Start delimeter for ConditionalSection
+scan_entity_value("<!["++T,S0,Delim,Acc,PEName,parameter=NS,PENesting)->
+ ?bump_col(3),
+ scan_entity_value(T,S,Delim,["<!["|Acc],PEName,NS,
+ pe_push("<![",PENesting,S));
+%% Start delimeter for ConditionalSection (2)
+scan_entity_value("["++T,S0,Delim,Acc,PEName,parameter=NS,PENesting)->
+ ?bump_col(1),
+ scan_entity_value(T,S,Delim,["["|Acc],PEName,NS,
+ pe_push("[",PENesting,S));
+%% Start delimeter for comment
+scan_entity_value("<!--"++T,S0,Delim,Acc,PEName,parameter=NS,PENesting)->
+ ?bump_col(4),
+ scan_entity_value(T,S,Delim,["<!--"|Acc],PEName,NS,
+ pe_push("<!--",PENesting,S));
+%% Start delimeter for ElementDecl, AttListDecl,EntityDecl,NotationDecl
+scan_entity_value("<!"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
+ ?bump_col(2),
+ scan_entity_value(T,S,Delim,["<!"|Acc],PEName,NS,
+ pe_push("<!",PENesting,S));
+%% Start delimeter for PI
+scan_entity_value("<?"++T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
+ ?bump_col(2),
+ scan_entity_value(T,S,Delim,["<?"|Acc],PEName,NS,
+ pe_push("<?",PENesting,S));
+%% Start delimeter for elements that matches the proper stop delimeter
+%% for a markupdecl
+scan_entity_value("</"++T,S0,Delim,Acc,PEName,parameter=NS,PENesting)->
+ ?bump_col(2),
+ scan_entity_value(T,S,Delim,["</"|Acc],PEName,NS,
+ pe_push("</",PENesting,S));
+scan_entity_value("<"++T,S0,Delim,Acc,PEName,parameter=NS,PENesting)->
+ ?bump_col(1),
+ scan_entity_value(T,S,Delim,["<"|Acc],PEName,NS,
+ pe_push("<",PENesting,S));
+%% Delimeter for contentspecs
+scan_entity_value("("++T,S0,Delim,Acc,PEName,parameter=NS,PENesting)->
+ ?bump_col(1),
+ scan_entity_value(T,S,Delim,["("|Acc],PEName,NS,
+ pe_push("(",PENesting,S));
+%% Stop delimeter for ElementDecl, AttListDecl,EntityDecl,NotationDecl
+scan_entity_value(">"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
+ ?bump_col(1),
+ scan_entity_value(T,S,Delim,[">"|Acc],PEName,NS,
+ pe_pop(">",PENesting,S));
+%% Stop delimeter for PI
+scan_entity_value("?>"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
+ ?bump_col(2),
+ scan_entity_value(T,S,Delim,["?>"|Acc],PEName,NS,
+ pe_pop("?>",PENesting,S));
+%% Stop delimeter for comment
+scan_entity_value("-->"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
+ ?bump_col(3),
+ scan_entity_value(T,S,Delim,["-->"|Acc],PEName,NS,
+ pe_pop("-->",PENesting,S));
+%% Stop delimeter for ConditionalSection
+scan_entity_value("]]>"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
+ ?bump_col(3),
+ scan_entity_value(T,S,Delim,["]]>"|Acc],PEName,NS,
+ pe_pop("]]>",PENesting,S));
+%% Stop delimeter added to match a content start delimeter included
+scan_entity_value("/>"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
+ ?bump_col(2),
+ scan_entity_value(T,S,Delim,["/>"|Acc],PEName,NS,
+ pe_pop("/>",PENesting,S));
+scan_entity_value(")"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) ->
+ ?bump_col(1),
+ scan_entity_value(T,S,Delim,[")"|Acc],PEName,NS,
+ pe_pop(")",PENesting,S));
+scan_entity_value("\n"++T, S, Delim, Acc, PEName,Namespace,PENesting) ->
+ scan_entity_value(T, S#xmerl_scanner{line=S#xmerl_scanner.line+1},
+ Delim, ["\n"|Acc], PEName,Namespace,PENesting);
+scan_entity_value(Str, S0, Delim, Acc, PEName,Namespace,PENesting) ->
+ {Ch,T} = to_ucs(S0#xmerl_scanner.encoding,Str),
+ case xmerl_lib:is_char(Ch) of
+ true ->
+ ?bump_col(1),
+ scan_entity_value(T, S, Delim, [Ch|Acc], PEName,Namespace,PENesting);
+ false ->
+ ?fatal({unexpected_char,Ch}, S0)
+ end.
+
+
+
+save_refed_entity_name(Name,PEName,S) ->
+ case predefined_entity(Name) of
+ true ->
+ S;
+ _ ->
+ save_refed_entity_name1(Name,PEName,S)
+ end.
+
+save_refed_entity_name1(Name,PEName,
+ S=#xmerl_scanner{entity_references=ERefs}) ->
+ case lists:keysearch(PEName,1,ERefs) of
+ {value,{_,Refs}} ->
+ NewRefs =
+ case lists:member(Name,Refs) of
+ true ->Refs;
+ _ -> [Name|Refs]
+ end,
+ S#xmerl_scanner{entity_references=lists:keyreplace(PEName,1,ERefs,
+ {PEName,NewRefs})
+ };
+ _ ->
+ S#xmerl_scanner{entity_references=[{PEName,[Name]}|ERefs]}
+ end.
+
+
+
+pe_push(Tok,Stack,_S) when Tok=="<!";Tok=="<?";Tok=="<!--";Tok=="<![";
+ Tok=="[";Tok=="<";Tok=="</";Tok=="(" ->
+ [Tok|Stack];
+pe_push(Tok,Stack,#xmerl_scanner{validation=dtd})
+ when Tok==")";Tok==">";Tok=="?>";Tok=="]]>";Tok=="-->";Tok=="/>"->
+ [Tok|Stack];
+pe_push(_,Stack,_S) ->
+ Stack.
+
+pe_pop(">",["<!"|Rest],_S) -> Rest;
+pe_pop("?>",["<?"|Rest],_S) -> Rest;
+pe_pop("-->",["<!--"|Rest],_S) -> Rest;
+pe_pop("]]>",["[","<!["|Rest],_S) -> Rest;
+pe_pop("/>",["<"|Rest],_S) -> Rest;
+pe_pop(">",["<"|Rest],_S) -> Rest;
+pe_pop(">",["</"|Rest],_S) -> Rest;
+pe_pop(")",["("|Rest],_S) -> Rest;
+pe_pop(Token,_Stack,S=#xmerl_scanner{validation=dtd}) ->
+ ?fatal({error,{failed_VC_Proper_Declaration_PE_Nesting,5,Token}},S);
+pe_pop(_,Rest,_) ->
+ Rest.
+
+pe_nesting_token("<!"++_T,parameter,dtd) -> "<!";
+pe_nesting_token("<?"++_T,parameter,dtd) -> "<?";
+pe_nesting_token("<!--"++_T,parameter,dtd) -> "<!--";
+pe_nesting_token("<!["++_T,parameter,dtd) -> "<![";
+pe_nesting_token("["++_T,parameter,dtd) -> "[";
+pe_nesting_token("("++_T,parameter,dtd) -> "(";
+pe_nesting_token(">"++_T,parameter,dtd) -> ">";
+pe_nesting_token("?>"++_T,parameter,dtd) -> "?>";
+pe_nesting_token("-->"++_T,parameter,dtd) -> "-->";
+pe_nesting_token("]]>"++_T,parameter,dtd) -> "]]>";
+pe_nesting_token(")"++_T,parameter,dtd) -> ")";
+pe_nesting_token("/>"++_T,parameter,dtd) -> "/>";
+pe_nesting_token(_,_,_) -> false.
+
+predefined_entity(amp) -> true;
+predefined_entity(lt) -> true;
+predefined_entity(gt) -> true;
+predefined_entity(apos) -> true;
+predefined_entity(quot) -> true;
+predefined_entity(_) -> false.
+
+check_entity_recursion(EName,
+ S=#xmerl_scanner{entity_references=EntityRefList}) ->
+ Set = sofs:family(EntityRefList),
+ case catch sofs:family_to_digraph(Set, [acyclic]) of
+ {'EXIT',{cyclic,_}} ->
+ ?fatal({illegal_recursion_in_Entity, EName}, S);
+ DG ->
+ digraph:delete(DG),
+ ok
+ end.
+
+
+
+
+%%%%%%% [15] Comment
+scan_comment(Str, S) ->
+ scan_comment(Str, S, _Pos = undefined, _Parents = [], _Lang = []).
+
+scan_comment(Str,S=#xmerl_scanner{col=C,event_fun=Event}, Pos, Parents, Lang) ->
+ Comment = #xmlComment{pos = Pos,
+ parents = Parents,
+ language = Lang,
+ value = undefined},
+ S1 = #xmerl_scanner{} = Event(#xmerl_event{event = started,
+ line = S#xmerl_scanner.line,
+ col = C,
+ pos = Pos,
+ data = Comment}, S),
+
+ scan_comment1(Str, S1, Pos, Comment, _Acc = []).
+
+scan_comment1([], S=#xmerl_scanner{continuation_fun = F},
+ Pos, Comment, Acc) ->
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> scan_comment1(MoreBytes, S1, Pos, Comment, Acc) end,
+ fun(S1) -> ?fatal(unexpected_end, S1) end,
+ S);
+scan_comment1("-->" ++ T, S0 = #xmerl_scanner{col = C,
+ event_fun = Event,
+ hook_fun = Hook},
+ _Pos, Comment, Acc) ->
+ ?bump_col(3),
+ Comment1 = Comment#xmlComment{value = lists:reverse(Acc)},
+ S1=#xmerl_scanner{}=Event(#xmerl_event{event = ended,
+ line=S#xmerl_scanner.line,
+ col = C,
+ data = Comment1}, S),
+ {Ret, S2} = Hook(Comment1, S1),
+ {_,T3,S3}=strip(T,S2),
+ {Ret,T3,S3};
+scan_comment1("--"++T,S,_Pos,_Comment,_Acc) ->
+ ?fatal({invalid_comment,"--"++[hd(T)]}, S);
+scan_comment1("\n" ++ T, S=#xmerl_scanner{line = L}, Pos, Cmt, Acc) ->
+ scan_comment1(T, S#xmerl_scanner{line=L+1,col=1},Pos, Cmt, "\n" ++ Acc);
+scan_comment1("\r\n" ++ T, S=#xmerl_scanner{line = L}, Pos, Cmt, Acc) ->
+ %% CR followed by LF is read as a single LF
+ scan_comment1(T, S#xmerl_scanner{line=L+1,col=1}, Pos, Cmt, "\n" ++ Acc);
+scan_comment1("\r" ++ T, S=#xmerl_scanner{line = L}, Pos, Cmt, Acc) ->
+ %% CR not followed by LF is read as a LF
+ scan_comment1(T, S#xmerl_scanner{line=L+1,col=1}, Pos, Cmt, "\n" ++ Acc);
+scan_comment1(Str, S=#xmerl_scanner{col = C}, Pos, Cmt, Acc) ->
+ {Ch,T} = wfc_legal_char(Str,S),
+ scan_comment1(T, S#xmerl_scanner{col=C+1}, Pos, Cmt, [Ch|Acc]).
+
+%%%%%%%
+
+scan_markup_completion_gt([$>|_R]=T,S) ->
+ {T,S};
+scan_markup_completion_gt([$%|T],S0) ->
+ ?bump_col(1),
+ {Name,T1,S1} = scan_pe_reference(T,S),
+ ExpandedRef = expand_pe_reference(Name,S1,as_PE),
+ {_,T2,S2} = strip(ExpandedRef++T1,S1),
+ scan_markup_completion_gt(T2,S2);
+scan_markup_completion_gt(T,S) ->
+ ?fatal({error,{malformed_syntax_entity_completion,T}},S).
+
+
+scan_mandatory(Pattern,T,N,S,ErrorMsg) ->
+ case lists:prefix(Pattern,T) of
+ true ->
+ lists:nthtail(N,T);
+ _ ->
+ ?fatal(ErrorMsg,S)
+ end.
+
+
+strip(Str,S) ->
+ strip(Str,S,all).
+
+strip([], S=#xmerl_scanner{continuation_fun = F},_) ->
+ ?dbg("cont()... stripping whitespace~n", []),
+ F(fun(MoreBytes, S1) -> strip(MoreBytes, S1) end,
+ fun(S1) -> {[], [], S1} end,
+ S);
+strip("\s" ++ T, S=#xmerl_scanner{col = C},Lim) ->
+ strip(T, S#xmerl_scanner{col = C+1},Lim);
+strip("\t" ++ _T, S ,no_tab) ->
+ ?fatal({error,{no_tab_allowed}},S);
+strip("\t" ++ T, S=#xmerl_scanner{col = C},Lim) ->
+ strip(T, S#xmerl_scanner{col = expand_tab(C)},Lim);
+strip("\n" ++ T, S=#xmerl_scanner{line = L},Lim) ->
+ strip(T, S#xmerl_scanner{line = L+1, col = 1},Lim);
+strip("\r\n" ++ T, S=#xmerl_scanner{line = L},Lim) ->
+ %% CR followed by LF is read as a single LF
+ strip(T, S#xmerl_scanner{line = L+1, col = 1},Lim);
+strip("\r" ++ T, S=#xmerl_scanner{line = L},Lim) ->
+ %% CR not followed by LF is read as a LF
+ strip(T, S#xmerl_scanner{line = L+1, col = 1},Lim);
+strip(Str, S,_Lim) ->
+ {[], Str, S}.
+
+%% demands a whitespace, though a parameter entity is ok, it will
+%% expand with a whitespace on each side.
+mandatory_strip([],S) ->
+ ?fatal({error,{whitespace_was_expected}},S);
+mandatory_strip(T,S) when ?whitespace(hd(T)) ->
+ strip(T,S,all);
+mandatory_strip([$%|T],S) when ?whitespace(hd(T)) -> %this is not a PERefence, but an PEDeclaration
+ ?fatal({error,{whitespace_was_expected}},S);
+mandatory_strip([$%|_T]=T,S) ->
+ {[],T,S};
+mandatory_strip(_T,S) ->
+ ?fatal({error,{whitespace_was_expected}},S).
+
+%% strip but don't accept tab
+pub_id_strip(Str, S) ->
+ strip(Str,S,no_tab).
+
+
+normalize("&"++T,S,IsNorm) ->
+ case scan_reference(T, S) of
+ {ExpRef, T1, S1} when ?whitespace(hd(ExpRef)) ->
+ ExpRef2 = string_to_char_set(S#xmerl_scanner.encoding,ExpRef),
+ normalize(ExpRef2++T1,S1,IsNorm);
+ _ ->
+ {"&"++T,S,IsNorm}
+ end;
+normalize(T,S,IsNorm) ->
+ case strip(T,S) of
+ {_,T,S} ->
+ {T,S,IsNorm};
+ {_,T1,S1} ->
+ {T1,S1,true}
+ end.
+
+
+%% Optimization:
+%% - avoid building list of spaces or tabs;
+%% - avoid reverse;
+%% - compact two common indentation patterns.
+%% Note: only to be called when a \n was found.
+fast_accumulate_whitespace(" " ++ T, S, _) ->
+ fast_acc_spaces(T, S, 1);
+fast_accumulate_whitespace("\t"++T, S, _) ->
+ fast_acc_tabs(T, S, 1);
+fast_accumulate_whitespace("<"++_=R, S, _T) ->
+ #xmerl_scanner{common_data = CD, line = Line} = S,
+ {done, {element(3, CD), R, S#xmerl_scanner{col = 1, line = Line + 1}}};
+fast_accumulate_whitespace(_, S, T) ->
+ accumulate_whitespace(T, S, []).
+
+fast_acc_spaces(" " ++ T, S, N) ->
+ fast_acc_spaces(T, S, N + 1);
+fast_acc_spaces(T, S, N) ->
+ fast_acc_end(T, S, N, N, $\s, 1).
+
+fast_acc_tabs("\t" ++ T, S, N) ->
+ fast_acc_tabs(T, S, N + 1);
+fast_acc_tabs(T, S, N) ->
+ fast_acc_end(T, S, N, N * 8 + 1, $\t, 2).
+
+fast_acc_end(T, S, N, Col, C, CD_I) ->
+ #xmerl_scanner{common_data = CD, line = Line0} = S,
+ Line = Line0 + 1,
+ try
+ $< = hd(T),
+ {done,{element(N, element(CD_I, CD)), T,
+ S#xmerl_scanner{col = Col, line = Line}}}
+ catch _:_ ->
+ accumulate_whitespace(T, S, Line, Col, lists:duplicate(N, C)++"\n")
+ end.
+
+
+%%% @spec accumulate_whitespace(T::string(),S::global_state(),
+%%% atom(),Acc::string()) -> {Acc, T1, S1}
+%%%
+%%% @doc Function to accumulate and normalize whitespace.
+accumulate_whitespace(T, S, preserve, Acc) ->
+ accumulate_whitespace(T, S, Acc);
+accumulate_whitespace(T, S, normalize, Acc) ->
+ {_WsAcc, T1, S1} = accumulate_whitespace(T, S, []),
+ {[$\s|Acc], T1, S1}.
+
+accumulate_whitespace(T, S, Acc) ->
+ #xmerl_scanner{line = Line, col = Col} = S,
+ accumulate_whitespace(T, S, Line, Col, Acc).
+
+accumulate_whitespace([], S0, Line, Col, Acc) ->
+ #xmerl_scanner{continuation_fun = F} = S0,
+ S = S0#xmerl_scanner{line = Line, col = Col},
+ ?dbg("cont()...~n", []),
+ F(fun(MoreBytes, S1) -> accumulate_whitespace(MoreBytes, S1, Acc) end,
+ fun(S1) -> {Acc, [], S1} end,
+ S);
+accumulate_whitespace("\s" ++ T, S, Line, Col, Acc) ->
+ accumulate_whitespace(T, S, Line, Col+1, [$\s|Acc]);
+accumulate_whitespace("\t" ++ T, S, Line, Col, Acc) ->
+ accumulate_whitespace(T, S, Line, expand_tab(Col), [$\t|Acc]);
+accumulate_whitespace("\n" ++ T, S, Line, _Col, Acc) ->
+ accumulate_whitespace(T, S, Line+1, 1, [$\n|Acc]);
+accumulate_whitespace("\r\n" ++ T, S, Line, _Col, Acc) ->
+ %% CR followed by LF is read as a single LF
+ accumulate_whitespace(T, S, Line+1, 1, [$\n|Acc]);
+accumulate_whitespace("\r" ++ T, S, Line, _Col, Acc) ->
+ %% CR not followed by LF is read as a LF
+ accumulate_whitespace(T, S, Line+1, 1, [$\n|Acc]);
+accumulate_whitespace(Str, S, Line, Col, Acc) ->
+ {Acc, Str, S#xmerl_scanner{line = Line, col = Col}}.
+
+expand_tab(Col) ->
+ Rem = (Col-1) rem 8,
+ _NewCol = Col + 8 - Rem.
+
+%% validation_mode(Validation)
+%% Validation = off | dtd | schema | true | false
+%% true and false are obsolete
+validation_mode(false) ->
+ off;
+validation_mode(true) ->
+ dtd;
+validation_mode(Other) ->
+ Other.
+
+
+schemaLocations(El,#xmerl_scanner{schemaLocation=[]}) ->
+ schemaLocations(El);
+schemaLocations(El,#xmerl_scanner{schemaLocation=SL}) ->
+ case SL of
+ [{_,_}|_] ->
+ {ok,SL};
+ _ ->
+ schemaLocations(El)
+ end.
+
+schemaLocations(#xmlElement{attributes=Atts,xmlbase=_Base}) ->
+ Pred = fun(#xmlAttribute{name=schemaLocation}) -> false;
+ (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false;
+ (_) -> true
+ end,
+ case lists:dropwhile(Pred,Atts) of
+ [#xmlAttribute{value=Paths}|_] ->
+
+ case string:tokens(Paths," ") of
+ L when length(L) > 0 ->
+ case length(L) rem 2 of
+ 0 ->
+ PairList =
+ fun([],_Fun) ->
+ [];
+ ([SLNS,SLLoc|Rest],Fun) ->
+ [{SLNS,SLLoc}|Fun(Rest,Fun)]
+ end,
+ {ok,PairList(L,PairList)};
+ _ ->
+ {error,{schemaLocation_attribute,namespace_location_not_in_pair}}
+ end;
+ _ ->
+ {error,{missing_schemaLocation}}
+ end;
+ [] ->
+ {error,{missing_schemaLocation}}
+ end.
+
+inherit_options(S) ->
+ %%io:format("xsdbase: ~p~n",[S#xmerl_scanner.xmlbase]),
+ [{xsdbase,S#xmerl_scanner.xmlbase}].
+
+handle_schema_result({XSDRes=#xmlElement{},_},S5) ->
+ {XSDRes,S5};
+handle_schema_result({error,Reason},S5) ->
+ ?fatal({failed_schema_validation,Reason},S5).
+
+%%% Helper functions
+
+fatal(Reason, S) ->
+ exit({fatal, {Reason,
+ {file,S#xmerl_scanner.filename},
+ {line,S#xmerl_scanner.line},
+ {col,S#xmerl_scanner.col}}}).
+
+%% preformat formats tokens in L1 and L2, L2 separated by Sep into a
+%% list
+preformat(L1,L2,Sep) ->
+ Format1= lists:flatten(lists:duplicate(length(L1)-1,"~s ")++"~s"),
+ Format2 = lists:flatten(lists:duplicate(length(L2)-1,
+ " ~s"++Sep)++" ~s"),
+
+ lists:flatten(io_lib:format(Format1++Format2,L1++L2)).
+
+
+%% BUG when we are many <!ATTLIST ..> balise none attributes has save in rules
+rules_write(Context, Name, Value, #xmerl_scanner{rules = T} = S) ->
+ case ets:lookup(T, {Context, Name}) of
+ [] ->
+ ets:insert(T, {{Context, Name}, Value});
+ _ ->
+ ok
+ end,
+ S.
+
+
+rules_read(Context, Name, #xmerl_scanner{rules = T}) ->
+ case ets:lookup(T, {Context, Name}) of
+ [] ->
+ undefined;
+ [{_, V}] ->
+ V
+ end.
+
+rules_delete(Context,Name,#xmerl_scanner{rules = T}) ->
+ ets:delete(T,{Context,Name}).
+
+to_ucs(Encoding, Chars) when Encoding=="utf-8"; Encoding == undefined ->
+ utf8_2_ucs(Chars);
+to_ucs(_,[C|Rest]) ->
+ {C,Rest}.
+
+utf8_2_ucs([A,B,C,D|Rest]) when A band 16#f8 =:= 16#f0,
+ B band 16#c0 =:= 16#80,
+ C band 16#c0 =:= 16#80,
+ D band 16#c0 =:= 16#80 ->
+ %% 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
+ case ((D band 16#3f) bor ((C band 16#3f) bsl 6) bor
+ ((B band 16#3f) bsl 12) bor ((A band 16#07) bsl 18)) of
+ Ch when Ch >= 16#10000 ->
+ {Ch,Rest};
+ Ch ->
+ {{error,{bad_character,Ch}},Rest}
+ end;
+utf8_2_ucs([A,B,C|Rest]) when A band 16#f0 =:= 16#e0,
+ B band 16#c0 =:= 16#80,
+ C band 16#c0 =:= 16#80 ->
+ %% 1110vvvv 10vvvvvv 10vvvvvv
+ case ((C band 16#3f) bor ((B band 16#3f) bsl 6) bor
+ ((A band 16#0f) bsl 12)) of
+ Ch when Ch >= 16#800 ->
+ {Ch,Rest};
+ Ch ->
+ {{error,{bad_character,Ch}},Rest}
+ end;
+utf8_2_ucs([A,B|Rest]) when A band 16#e0 =:= 16#c0,
+ B band 16#c0 =:= 16#80 ->
+ %% 110vvvvv 10vvvvvv
+ case ((B band 16#3f) bor ((A band 16#1f) bsl 6)) of
+ Ch when Ch >= 16#80 ->
+ {Ch,Rest};
+ Ch ->
+ {{error,{bad_character,Ch}},Rest}
+ end;
+utf8_2_ucs([A|Rest]) when A < 16#80 ->
+ {A,Rest};
+utf8_2_ucs([A|Rest]) ->
+ {{error,{bad_character,A}},Rest}.
+
+to_char_set("iso-10646-utf-1",Ch) ->
+ [Ch];
+to_char_set(UTF8,Ch) when UTF8 =:= "utf-8"; UTF8 =:= undefined ->
+ ucs_2_utf8(Ch);
+to_char_set(_,Ch) ->
+ [Ch].
+
+ucs_2_utf8(Ch) when Ch < 128 ->
+ %% 0vvvvvvv
+ [Ch];
+ucs_2_utf8(Ch) when Ch < 16#0800 ->
+ %% Ch: -----vvv vvvvvvvv
+ %% 110vvvvv 10vvvvvv
+ %% O1 = (Ch band 16#07c0) bsr 6,
+ %% O2 = (Ch band 16#003f),
+ [((Ch band 16#07c0) bsr 6) bor 16#c0,(Ch band 16#003f) bor 16#80];
+ucs_2_utf8(Ch) when Ch < 16#10000 ->
+ %% Ch: vvvvvvvv vvvvvvvv
+ %% 1110vvvv 10vvvvvv 10vvvvvv
+ %% O1 = (Ch band 16#f000) bsr 12
+ %% O2 = (Ch band 16#0fc0) bsr 6
+ %% O3 = (Ch band 16#003f)
+ [((Ch band 16#f000) bsr 12) bor 16#e0,
+ ((Ch band 16#0fc0) bsr 6) bor 16#80,
+ (Ch band 16#003f) bor 16#80];
+ucs_2_utf8(Ch) when Ch < 16#200000 ->
+ %% Ch: ---vvvvv vvvvvvvv vvvvvvvv
+ %% 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
+ %% O1 = (Ch band 16#1c0000) bsr 18
+ %% O2 = (Ch band 16#03f000) bsr 12
+ %% O3 = (Ch band 16#000fc0) bsr 6
+ %% O4 = (Ch band 16#00003f)
+ [((Ch band 16#1c0000) bsr 18) bor 16#f0,
+ ((Ch band 16#03f000) bsr 12) bor 16#80,
+ ((Ch band 16#000fc0) bsr 6) bor 16#80,
+ (Ch band 16#00003f) bor 16#80].
+
+
+string_to_char_set(Enc,Str) when Enc =:= "utf-8"; Enc =:= undefined ->
+ lists:flatten([ucs_2_utf8(X)||X <- Str]);
+string_to_char_set(_,Str) ->
+ Str.
+
+%% diagnose(Line) ->
+%% Mem=erlang:memory(),
+%% {OldTot,OldLine} = get_total(),
+%% NewTot =
+%% case {lists:keysearch(total,1,Mem),OldTot*1.1} of
+%% {{_,{_,Tot}},Tot110} when Tot > Tot110 ->
+%% io:format("From ~p to ~p, total memory: ~p (~p)~n",[OldLine,Line,Tot,OldTot]),
+%% Tot;
+%% {{_,{_,Tot}},_} ->
+%% Tot
+%% end,
+%% put_total({NewTot,Line}).
+
+%% get_total() ->
+%% case get(xmerl_mem) of
+%% undefined ->
+%% put(xmerl_mem,{0,0}),
+%% {0,0};
+%% M -> M
+%% end.
+
+%% put_total(M) ->
+%% put(xmerl_mem,M).
diff --git a/lib/xmerl/src/xmerl_sgml.erl b/lib/xmerl/src/xmerl_sgml.erl
new file mode 100644
index 0000000000..1fe6453e7f
--- /dev/null
+++ b/lib/xmerl/src/xmerl_sgml.erl
@@ -0,0 +1,65 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+ %% Description : Callback module for exporting XML to SGML.
+
+-module(xmerl_sgml).
+
+-export(['#xml-inheritance#'/0]).
+
+%% Note: we assume XML data, so all tags are lowercase!
+
+-export(['#root#'/4,
+ '#element#'/5,
+ '#text#'/1]).
+
+-import(xmerl_lib, [markup/3, find_attribute/2, export_text/1]).
+
+-include("xmerl.hrl").
+
+
+'#xml-inheritance#'() -> [].
+
+
+%% The '#text#' function is called for every text segment.
+
+'#text#'(Text) ->
+ export_text(Text).
+
+
+%% The '#root#' tag is called when the entire structure has been
+%% exported. It does not appear in the structure itself.
+
+'#root#'(Data, Attrs, [], _E) ->
+ case find_attribute(header, Attrs) of
+ {value, Hdr} ->
+ [Hdr, Data];
+ false ->
+ Data
+ end.
+
+
+%% Note that SGML does not have the <Tag/> empty-element form.
+%% Furthermore, for some element types, the end tag may be forbidden -
+%% this can be handled by extending this module - see xmerl_otpsgml.erl
+%% for an example. (By default, we always generate the end tag, to make
+%% sure that the scope of a markup is not extended by mistake.)
+
+'#element#'(Tag, Data, Attrs, _Parents, _E) ->
+ markup(Tag, Attrs, Data).
diff --git a/lib/xmerl/src/xmerl_simple.erl b/lib/xmerl/src/xmerl_simple.erl
new file mode 100644
index 0000000000..573dcd2a62
--- /dev/null
+++ b/lib/xmerl/src/xmerl_simple.erl
@@ -0,0 +1,109 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+
+%% Description : Simple event-based processor (front-end to xmerl_scanner)
+
+
+-module(xmerl_simple).
+
+-export([file/2,
+ string/2]).
+
+-include("xmerl.hrl").
+
+-record(state, {content_acc = [],
+ attr_acc = [],
+ content_stack = [],
+ attr_stack = []}).
+
+file(Fname, Opts) ->
+ Opts1 = scanner_options(Opts),
+ xmerl_scan:file(Fname, Opts1).
+
+string(Str, Opts) ->
+ Opts1 = scanner_options(Opts),
+ xmerl_scan:string(Str, Opts1).
+
+scanner_options(Opts) ->
+ EventS = #state{},
+ scanner_options(Opts,
+ [{event_fun, fun event/2, EventS},
+ {acc_fun, fun(_, Acc, S) -> {Acc,S} end},
+ {close_fun, fun close/1}]).
+
+scanner_options([H|T], Opts) ->
+ case catch keyreplace(H, 1, Opts) of
+ false ->
+ scanner_options(T, [H|Opts]);
+ NewOpts ->
+ scanner_options(T, NewOpts)
+ end;
+scanner_options([], Opts) ->
+ Opts.
+
+keyreplace(X, Pos, [H|T]) when element(Pos, X) == element(Pos, H) ->
+ [X|T];
+keyreplace(X, Pos, [H|T]) ->
+ [H|keyreplace(X, Pos, T)];
+keyreplace(_X, _Pos, []) ->
+ throw(false).
+
+
+close(S) ->
+ ES = xmerl_scan:event_state(S),
+ #state{attr_stack = [],
+ content_stack = [],
+ %% attr_acc may contain document attributes
+ content_acc = Content} = ES,
+ lists:reverse(Content).
+
+event(#xmerl_event{event = started, data = #xmlElement{}}, S) ->
+ #state{content_acc = CAcc,
+ attr_acc = AAcc,
+ content_stack = CSt,
+ attr_stack = ASt} = ES = xmerl_scan:event_state(S),
+ xmerl_scan:event_state(ES#state{content_acc = [],
+ attr_acc = [],
+ content_stack = [CAcc | CSt],
+ attr_stack = [AAcc | ASt]}, S);
+
+event(#xmerl_event{event = ended, data = #xmlElement{name = Name}}, S) ->
+ #state{content_acc = CAcc,
+ attr_acc = AAcc,
+ content_stack = [PrevCAcc | CSt],
+ attr_stack = [PrevAAcc | ASt]} = ES = xmerl_scan:event_state(S),
+ Simple = {Name, lists:reverse(AAcc), lists:reverse(CAcc)},
+ xmerl_scan:event_state(ES#state{content_acc = [Simple|PrevCAcc],
+ attr_acc = PrevAAcc,
+ content_stack = CSt,
+ attr_stack = ASt}, S);
+
+event(#xmerl_event{event = ended, data = #xmlAttribute{name = Name,
+ value = Value}}, S) ->
+ #state{attr_acc = AAcc} = ES = xmerl_scan:event_state(S),
+ Simple = {Name, Value},
+ xmerl_scan:event_state(ES#state{attr_acc = [Simple|AAcc]}, S);
+
+event(#xmerl_event{event = ended, data = #xmlText{value = Text}}, S) ->
+ #state{content_acc = CAcc} = ES = xmerl_scan:event_state(S),
+ xmerl_scan:event_state(ES#state{content_acc = [Text|CAcc]}, S);
+
+event(_E, S) ->
+ S.
diff --git a/lib/xmerl/src/xmerl_text.erl b/lib/xmerl/src/xmerl_text.erl
new file mode 100644
index 0000000000..70df8369ff
--- /dev/null
+++ b/lib/xmerl/src/xmerl_text.erl
@@ -0,0 +1,50 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Callback module for exporting as raw text.
+
+
+-module(xmerl_text).
+
+-export(['#xml-inheritance#'/0]).
+
+-export(['#root#'/4,
+ '#element#'/5,
+ '#text#'/1]).
+
+-include("xmerl.hrl").
+
+
+'#xml-inheritance#'() -> [].
+
+
+%% The '#text#' function is called for every text segment.
+
+'#text#'(Text) -> Text.
+
+
+%% The '#root#' tag is called when the entire structure has been
+%% exported. It does not appear in the structure itself.
+
+'#root#'(Data, _Attrs, [], _E) -> Data.
+
+
+%% The '#element#' function is the default handler for XML elements.
+
+'#element#'(_Tag, Data, _Attrs, _Parents, _E) -> Data.
diff --git a/lib/xmerl/src/xmerl_ucs.erl b/lib/xmerl/src/xmerl_ucs.erl
new file mode 100644
index 0000000000..7c45c838ab
--- /dev/null
+++ b/lib/xmerl/src/xmerl_ucs.erl
@@ -0,0 +1,556 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(xmerl_ucs).
+
+-compile([verbose,report_warnings,warn_unused_vars]).
+
+
+
+%%% Conversion to/from IANA recognised character sets
+-export([to_unicode/2]).
+
+%%% Micellaneous predicates
+-export([is_iso10646/1, is_unicode/1, is_bmpchar/1, is_latin1/1, is_ascii/1,
+ is_visible_latin1/1, is_visible_ascii/1, is_iso646_basic/1,
+ is_incharset/2]).
+
+%%% Conversion to/from RFC-1345 style mnemonic strings consisting
+%%% of subsets of ISO-10646 with "escape" sequences.
+%-export([from_mnemonic/1, from_mnemonic/2]).
+
+%%% UCS-2, UCS-4, UTF-16, and UTF-8 encoding and decoding
+-export([to_ucs2be/1,from_ucs2be/1, from_ucs2be/2]).
+-export([to_ucs2le/1,from_ucs2le/1, from_ucs2le/2]).
+-export([to_ucs4be/1,from_ucs4be/1, from_ucs4be/2]).
+-export([to_ucs4le/1,from_ucs4le/1, from_ucs4le/2]).
+-export([to_utf16be/1, from_utf16be/1, from_utf16be/2]).
+-export([to_utf16le/1, from_utf16le/1, from_utf16le/2]).
+-export([to_utf8/1, from_utf8/1]).
+
+%%% NB: Non-canonical UTF-8 encodings and incorrectly used
+%%% surrogate-pair codes are disallowed by this code. There are
+%%% important security implications concerning them. DO NOT REMOVE
+%%% THE VARIOUS GUARDS AND TESTS THAT ENFORCE THIS POLICY.
+
+%%% Test if Ch is a legitimate ISO-10646 character code
+is_iso10646(Ch) when is_integer(Ch), Ch >= 0 ->
+ if Ch < 16#D800 -> true;
+ Ch < 16#E000 -> false; % Surrogates
+ Ch < 16#FFFE -> true;
+ Ch =< 16#FFFF -> false; % FFFE and FFFF (not characters)
+ Ch =< 16#7FFFFFFF -> true;
+ true -> false
+ end;
+is_iso10646(_) -> false.
+
+%%% Test if Ch is a legitimate ISO-10646 character code capable of
+%%% being encoded in a UTF-16 string.
+is_unicode(Ch) when Ch < 16#110000 -> is_iso10646(Ch);
+is_unicode(_) -> false.
+
+%%% Test if Ch is a legitimate ISO-10646 character code belonging to
+%%% the basic multi-lingual plane (BMP).
+is_bmpchar(Ch) when is_integer(Ch), Ch >= 0 ->
+ if Ch < 16#D800 -> true;
+ Ch < 16#E000 -> false; % Surrogates
+ Ch < 16#FFFE -> true;
+ true -> false
+ end;
+is_bmpchar(_) -> false.
+
+%%% Test for legitimate Latin-1 code
+is_latin1(Ch) when is_integer(Ch), Ch >= 0, Ch =< 255 -> true;
+is_latin1(_) -> false.
+
+%%% Test for legitimate ASCII code
+is_ascii(Ch) when is_integer(Ch), Ch >= 0, Ch =< 127 -> true;
+is_ascii(_) -> false.
+
+%%% Test for char an element of ISO-646.basic set
+is_iso646_basic(Ch) when is_integer(Ch), Ch >= $\s ->
+ if Ch =< $Z ->
+ %% Everything in this range except $# $$ and $@
+ if Ch > $$ -> Ch =/= $@;
+ true -> Ch < $#
+ end;
+ %% Only $_ and $a .. $z in range above $Z
+ Ch > $z -> false;
+ Ch >= $a -> true;
+ true -> Ch =:= $_
+ end;
+is_iso646_basic(_) ->
+ false.
+
+%%% Test for char a visible Latin-1 char, i.e. a non-control Latin-1 char,
+%%% excepting non-break space (but including space).
+is_visible_latin1(Ch) when is_integer(Ch), Ch >= $\s ->
+ if Ch =< $~ -> true;
+ Ch >= 161 -> Ch =< 255
+ end;
+is_visible_latin1(_) ->
+ false.
+
+%%% Test for char a visible ASCII char, i.e. a non-control ASCII char
+%%% (including space).
+is_visible_ascii(Ch) when is_integer(Ch), Ch >= $\s -> Ch =< $~;
+is_visible_ascii(_) -> false.
+
+
+%%% UCS-4, big and little endian versions, encoding and decoding
+to_ucs4be(List) when is_list(List) -> lists:flatmap(fun to_ucs4be/1, List);
+to_ucs4be(Ch) -> char_to_ucs4be(Ch).
+
+from_ucs4be(Bin) when is_binary(Bin) -> from_ucs4be(Bin,[],[]);
+from_ucs4be(List) -> from_ucs4be(list_to_binary(List),[],[]).
+
+from_ucs4be(Bin,Tail) when is_binary(Bin) -> from_ucs4be(Bin,[],Tail);
+from_ucs4be(List,Tail) -> from_ucs4be(list_to_binary(List),[],Tail).
+
+to_ucs4le(List) when is_list(List) -> lists:flatmap(fun to_ucs4le/1, List);
+to_ucs4le(Ch) -> char_to_ucs4le(Ch).
+
+from_ucs4le(Bin) when is_binary(Bin) -> from_ucs4le(Bin,[],[]);
+from_ucs4le(List) -> from_ucs4le(list_to_binary(List),[],[]).
+
+from_ucs4le(Bin,Tail) when is_binary(Bin) -> from_ucs4le(Bin,[],Tail);
+from_ucs4le(List,Tail) -> from_ucs4le(list_to_binary(List),[],Tail).
+
+%%% UCS-2, big and little endian versions, encoding and decoding
+to_ucs2be(List) when is_list(List) -> lists:flatmap(fun to_ucs2be/1, List);
+to_ucs2be(Ch) -> char_to_ucs2be(Ch).
+
+from_ucs2be(Bin) when is_binary(Bin) -> from_ucs2be(Bin,[],[]);
+from_ucs2be(List) -> from_ucs2be(list_to_binary(List),[],[]).
+
+from_ucs2be(Bin,Tail) when is_binary(Bin) -> from_ucs2be(Bin,[],Tail);
+from_ucs2be(List,Tail) -> from_ucs2be(list_to_binary(List),[],Tail).
+
+to_ucs2le(List) when is_list(List) -> lists:flatmap(fun to_ucs2le/1, List);
+to_ucs2le(Ch) -> char_to_ucs2le(Ch).
+
+from_ucs2le(Bin) when is_binary(Bin) -> from_ucs2le(Bin,[],[]);
+from_ucs2le(List) -> from_ucs2le(list_to_binary(List),[],[]).
+
+from_ucs2le(Bin,Tail) when is_binary(Bin) -> from_ucs2le(Bin,[],Tail);
+from_ucs2le(List,Tail) -> from_ucs2le(list_to_binary(List),[],Tail).
+
+
+%%% UTF-16, big and little endian versions, encoding and decoding
+to_utf16be(List) when is_list(List) -> lists:flatmap(fun to_utf16be/1, List);
+to_utf16be(Ch) -> char_to_utf16be(Ch).
+
+from_utf16be(Bin) when is_binary(Bin) -> from_utf16be(Bin,[],[]);
+from_utf16be(List) -> from_utf16be(list_to_binary(List),[],[]).
+
+from_utf16be(Bin,Tail) when is_binary(Bin) -> from_utf16be(Bin,[],Tail);
+from_utf16be(List,Tail) -> from_utf16be(list_to_binary(List),[],Tail).
+
+to_utf16le(List) when is_list(List) -> lists:flatmap(fun to_utf16le/1, List);
+to_utf16le(Ch) -> char_to_utf16le(Ch).
+
+from_utf16le(Bin) when is_binary(Bin) -> from_utf16le(Bin,[],[]);
+from_utf16le(List) -> from_utf16le(list_to_binary(List),[],[]).
+
+from_utf16le(Bin,Tail) when is_binary(Bin) -> from_utf16le(Bin,[],Tail);
+from_utf16le(List,Tail) -> from_utf16le(list_to_binary(List),[],Tail).
+
+
+%%% UTF-8 encoding and decoding
+to_utf8(List) when is_list(List) -> lists:flatmap(fun to_utf8/1, List);
+to_utf8(Ch) -> char_to_utf8(Ch).
+
+from_utf8(Bin) when is_binary(Bin) -> from_utf8(binary_to_list(Bin));
+from_utf8(List) ->
+ case expand_utf8(List) of
+ {Result,0} -> Result;
+ {_Res,_NumBadChar} ->
+ exit({ucs,{bad_utf8_character_code}})
+ end.
+
+
+
+
+%%% UCS-4 support
+%%% Possible errors encoding UCS-4:
+%%% - Non-character values (something other than 0 .. 2^31-1)
+%%% - Surrogate-pair code in string.
+%%% - 16#FFFE or 16#FFFF character in string.
+%%% Possible errors decoding UCS-4:
+%%% - Element out of range (i.e. the "sign" bit is set).
+%%% - Surrogate-pair code in string.
+%%% - 16#FFFE or 16#FFFF character in string.
+char_to_ucs4be(Ch) ->
+ true = is_iso10646(Ch),
+ [(Ch bsr 24),
+ (Ch bsr 16) band 16#FF,
+ (Ch bsr 8) band 16#FF,
+ Ch band 16#FF].
+
+from_ucs4be(<<Ch:32/big-signed-integer, Rest/binary>>,Acc,Tail) ->
+ if Ch < 0; Ch >= 16#D800, Ch < 16#E000; Ch =:= 16#FFFE; Ch =:= 16#FFFF ->
+ exit({bad_character_code,Ch});
+ true ->
+ from_ucs4be(Rest,[Ch|Acc],Tail)
+ end;
+from_ucs4be(<<>>,Acc,Tail) ->
+ lists:reverse(Acc,Tail);
+from_ucs4be(Bin,Acc,Tail) ->
+ io:format("ucs Error: Bin=~p~n Acc=~p~n Tail=~p~n",[Bin,Acc,Tail]),
+ {error,not_ucs4be}.
+
+char_to_ucs4le(Ch) ->
+ true = is_iso10646(Ch),
+ [Ch band 16#FF,
+ (Ch bsr 8) band 16#FF,
+ (Ch bsr 16) band 16#FF,
+ (Ch bsr 24)].
+
+
+from_ucs4le(<<Ch:32/little-signed-integer, Rest/binary>>,Acc,Tail) ->
+ if Ch < 0; Ch >= 16#D800, Ch < 16#E000; Ch =:= 16#FFFE; Ch =:= 16#FFFF ->
+ exit({bad_character_code,Ch});
+ true ->
+ from_ucs4le(Rest,[Ch|Acc],Tail)
+ end;
+from_ucs4le(<<>>,Acc,Tail) ->
+ lists:reverse(Acc,Tail);
+from_ucs4le(Bin,Acc,Tail) ->
+ io:format("ucs Error: Bin=~p~n Acc=~p~n Tail=~p~n",[Bin,Acc,Tail]),
+ {error,not_ucs4le}.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% UCS-2 support
+%%% FIXME! Don't know how to encode UCS-2!!
+%%% Currently I just encode as UCS-4, but strips the 16 higher bits.
+char_to_ucs2be(Ch) ->
+ true = is_iso10646(Ch),
+ [(Ch bsr 8) band 16#FF,
+ Ch band 16#FF].
+
+from_ucs2be(<<Ch:16/big-signed-integer, Rest/binary>>,Acc,Tail) ->
+ if Ch < 0; Ch >= 16#D800, Ch < 16#E000; Ch =:= 16#FFFE; Ch =:= 16#FFFF ->
+ exit({bad_character_code,Ch});
+ true ->
+ from_ucs2be(Rest,[Ch|Acc],Tail)
+ end;
+from_ucs2be(<<>>,Acc,Tail) ->
+ lists:reverse(Acc,Tail);
+from_ucs2be(Bin,Acc,Tail) ->
+ io:format("ucs Error: Bin=~p~n Acc=~p~n Tail=~p~n",[Bin,Acc,Tail]),
+ {error,not_ucs2be}.
+
+char_to_ucs2le(Ch) ->
+ true = is_iso10646(Ch),
+ [(Ch bsr 16) band 16#FF,
+ (Ch bsr 24)].
+
+
+from_ucs2le(<<Ch:16/little-signed-integer, Rest/binary>>,Acc,Tail) ->
+ if Ch < 0; Ch >= 16#D800, Ch < 16#E000; Ch =:= 16#FFFE; Ch =:= 16#FFFF ->
+ exit({bad_character_code,Ch});
+ true ->
+ from_ucs4le(Rest,[Ch|Acc],Tail)
+ end;
+from_ucs2le(<<>>,Acc,Tail) ->
+ lists:reverse(Acc,Tail);
+from_ucs2le(Bin,Acc,Tail) ->
+ io:format("ucs Error: Bin=~p~n Acc=~p~n Tail=~p~n",[Bin,Acc,Tail]),
+ {error,not_ucs2le}.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% UTF-16 support
+%%% Possible errors encoding UTF-16
+%%% - Non-character values (something other than 0 .. 2^31-1)
+%%% - Surrogate-pair code in string.
+%%% - 16#FFFE or 16#FFFF character in string.
+%%% NB: the UCS replacement char (U+FFFD) will be quietly substituted
+%%% for unrepresentable chars (i.e. those geq to 2^20+2^16).
+%%% Possible errors decoding UTF-16:
+%%% - Unmatched surrogate-pair code in string.
+%%% - 16#FFFE or 16#FFFF character in string.
+char_to_utf16be(Ch) when is_integer(Ch), Ch >= 0 ->
+ if Ch =< 16#FFFF ->
+ if Ch < 16#D800; Ch >= 16#E000, Ch < 16#FFFE ->
+ [Ch bsr 8, Ch band 16#FF]
+ end;
+ Ch < 16#110000 ->
+ %% Encode with surrogate pair
+ X = Ch - 16#10000,
+ [16#D8 + (X bsr 18),
+ (X bsr 10) band 16#FF,
+ 16#DC + ((X bsr 8) band 3),
+ X band 16#FF];
+ Ch =< 16#7FFFFFFF ->
+ %% Unrepresentable char: use REPLACEMENT CHARACTER (U+FFFD)
+ [16#FF, 16#FD]
+ end.
+
+from_utf16be(<<Ch:16/big-unsigned-integer, Rest/binary>>, Acc, Tail)
+ when Ch < 16#D800; Ch > 16#DFFF ->
+ if Ch < 16#FFFE -> from_utf16be(Rest,[Ch|Acc],Tail) end;
+from_utf16be(<<Hi:16/big-unsigned-integer, Lo:16/big-unsigned-integer,
+ Rest/binary>>, Acc, Tail)
+ when Hi >= 16#D800, Hi < 16#DC00, Lo >= 16#DC00, Lo =< 16#DFFF ->
+ %% Surrogate pair
+ Ch = ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF) + 16#10000,
+ from_utf16be(Rest, [Ch|Acc], Tail);
+from_utf16be(<<>>,Acc,Tail) ->
+ lists:reverse(Acc,Tail);
+from_utf16be(Bin,Acc,Tail) ->
+ io:format("ucs Error: Bin=~p~n Acc=~p~n Tail=~p~n",[Bin,Acc,Tail]),
+ {error,not_utf16be}.
+
+char_to_utf16le(Ch) when is_integer(Ch), Ch >= 0 ->
+ if Ch =< 16#FFFF ->
+ if Ch < 16#D800; Ch >= 16#E000, Ch < 16#FFFE ->
+ [Ch band 16#FF, Ch bsr 8]
+ end;
+ Ch < 16#110000 ->
+ %% Encode with surrogate pair
+ X = Ch - 16#10000,
+ [(X bsr 10) band 16#FF,
+ 16#D8 + (X bsr 18),
+ X band 16#FF,
+ 16#DC + ((X bsr 8) band 3)];
+ Ch =< 16#7FFFFFFF ->
+ %% Unrepresentable char: use REPLACEMENT CHARACTER (U+FFFD)
+ [16#FD, 16#FF]
+ end.
+
+from_utf16le(<<Ch:16/little-unsigned-integer, Rest/binary>>, Acc, Tail)
+ when Ch < 16#D800; Ch > 16#DFFF ->
+ if Ch < 16#FFFE -> from_utf16le(Rest, [Ch|Acc], Tail) end;
+from_utf16le(<<Hi:16/little-unsigned-integer, Lo:16/little-unsigned-integer,
+ Rest/binary>>, Acc, Tail)
+ when Hi >= 16#D800, Hi < 16#DC00, Lo >= 16#DC00, Lo =< 16#DFFF ->
+ %% Surrogate pair
+ Ch = ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF) + 16#10000,
+ from_utf16le(Rest, [Ch|Acc], Tail);
+from_utf16le(<<>>,Acc,Tail) ->
+ lists:reverse(Acc,Tail);
+from_utf16le(Bin,Acc,Tail) ->
+ io:format("ucs Error: Bin=~p~n Acc=~p~n Tail=~p~n",[Bin,Acc,Tail]),
+ {error,not_utf16le}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% UTF-8 support
+%%% Possible errors encoding UTF-8:
+%%% - Non-character values (something other than 0 .. 2^31-1).
+%%% - Surrogate pair code in string.
+%%% - 16#FFFE or 16#FFFF character in string.
+%%% Possible errors decoding UTF-8:
+%%% - 10xxxxxx or 1111111x as initial byte.
+%%% - Insufficient number of 10xxxxxx octets following an initial octet of
+%%% multi-octet sequence.
+%%% - Non-canonical encoding used.
+%%% - Surrogate-pair code encoded as UTF-8.
+%%% - 16#FFFE or 16#FFFF character in string.
+char_to_utf8(Ch) when is_integer(Ch), Ch >= 0 ->
+ if Ch < 128 ->
+ %% 0yyyyyyy
+ [Ch];
+ Ch < 16#800 ->
+ %% 110xxxxy 10yyyyyy
+ [16#C0 + (Ch bsr 6),
+ 128+(Ch band 16#3F)];
+ Ch < 16#10000 ->
+ %% 1110xxxx 10xyyyyy 10yyyyyy
+ if Ch < 16#D800; Ch > 16#DFFF, Ch < 16#FFFE ->
+ [16#E0 + (Ch bsr 12),
+ 128+((Ch bsr 6) band 16#3F),
+ 128+(Ch band 16#3F)]
+ end;
+ Ch < 16#200000 ->
+ %% 11110xxx 10xxyyyy 10yyyyyy 10yyyyyy
+ [16#F0+(Ch bsr 18),
+ 128+((Ch bsr 12) band 16#3F),
+ 128+((Ch bsr 6) band 16#3F),
+ 128+(Ch band 16#3F)];
+ Ch < 16#4000000 ->
+ %% 111110xx 10xxxyyy 10yyyyyy 10yyyyyy 10yyyyyy
+ [16#F8+(Ch bsr 24),
+ 128+((Ch bsr 18) band 16#3F),
+ 128+((Ch bsr 12) band 16#3F),
+ 128+((Ch bsr 6) band 16#3F),
+ 128+(Ch band 16#3F)];
+ Ch < 16#80000000 ->
+ %% 1111110x 10xxxxyy 10yyyyyy 10yyyyyy 10yyyyyy 10yyyyyy
+ [16#FC+(Ch bsr 30),
+ 128+((Ch bsr 24) band 16#3F),
+ 128+((Ch bsr 18) band 16#3F),
+ 128+((Ch bsr 12) band 16#3F),
+ 128+((Ch bsr 6) band 16#3F),
+ 128+(Ch band 16#3F)]
+ end.
+
+
+
+
+%% expand_utf8([Byte]) -> {[UnicodeChar],NumberOfBadBytes}
+%% Expand UTF8 byte sequences to ISO 10646/Unicode
+%% charactes. Any illegal bytes are removed and the number of
+%% bad bytes are returned.
+%%
+%% Reference:
+%% RFC 3629: "UTF-8, a transformation format of ISO 10646".
+
+expand_utf8(Str) ->
+ expand_utf8_1(Str, [], 0).
+
+expand_utf8_1([C|Cs], Acc, Bad) when C < 16#80 ->
+ %% Plain Ascii character.
+ expand_utf8_1(Cs, [C|Acc], Bad);
+expand_utf8_1([C1,C2|Cs], Acc, Bad) when C1 band 16#E0 =:= 16#C0,
+ C2 band 16#C0 =:= 16#80 ->
+ case ((C1 band 16#1F) bsl 6) bor (C2 band 16#3F) of
+ C when 16#80 =< C ->
+ expand_utf8_1(Cs, [C|Acc], Bad);
+ _ ->
+ %% Bad range.
+ expand_utf8_1(Cs, Acc, Bad+1)
+ end;
+expand_utf8_1([C1,C2,C3|Cs], Acc, Bad) when C1 band 16#F0 =:= 16#E0,
+ C2 band 16#C0 =:= 16#80,
+ C3 band 16#C0 =:= 16#80 ->
+ case ((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor
+ (C3 band 16#3F) of
+ C when 16#800 =< C ->
+ expand_utf8_1(Cs, [C|Acc], Bad);
+ _ ->
+ %% Bad range.
+ expand_utf8_1(Cs, Acc, Bad+1)
+ end;
+expand_utf8_1([C1,C2,C3,C4|Cs], Acc, Bad) when C1 band 16#F8 =:= 16#F0,
+ C2 band 16#C0 =:= 16#80,
+ C3 band 16#C0 =:= 16#80,
+ C4 band 16#C0 =:= 16#80 ->
+ case ((((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor
+ (C3 band 16#3F)) bsl 6) bor (C4 band 16#3F) of
+ C when 16#10000 =< C ->
+ expand_utf8_1(Cs, [C|Acc], Bad);
+ _ ->
+ %% Bad range.
+ expand_utf8_1(Cs, Acc, Bad+1)
+ end;
+expand_utf8_1([_|Cs], Acc, Bad) ->
+ %% Ignore bad character.
+ expand_utf8_1(Cs, Acc, Bad+1);
+expand_utf8_1([], Acc, Bad) -> {lists:reverse(Acc),Bad}.
+
+
+
+%%% ----------------------------------------------------------------------------
+%%% Translation to/from any IANA defined character set, given that a mapping
+%%% exists. Don't care about validating valid subsets of Unicode
+to_unicode(Input,Cs) when Cs=='ansi_x3.4-1968';Cs=='iso-ir-6';
+ Cs=='ansi_x3.4-1986';Cs=='iso_646.irv:1991';
+ Cs=='ascii';Cs=='iso646-us';Cs=='us-ascii';Cs=='us';
+ Cs=='ibm367';Cs=='cp367';Cs=='csascii' -> % US-ASCII
+ Input;
+to_unicode(Input,Cs) when Cs=='iso-10646-utf-1';Cs=='csiso10646utf1' ->
+ Input;
+to_unicode(Input,Cs) when Cs=='iso_646.basic:1983';Cs=='ref';
+ Cs=='csiso646basic1983' ->
+ Input;
+to_unicode(Input,Cs) when Cs=='iso_8859-1:1987';Cs=='iso-ir-100';
+ Cs=='iso_8859-1';Cs=='iso-8859-1';Cs=='latin1';
+ Cs=='l1';Cs=='ibm819';
+ Cs=='cp819';Cs=='csisolatin1' ->
+ Input;
+% to_unicode(Input,Cs) when Cs=='mnemonic';Cs=='"mnemonic+ascii+38';
+% Cs=='mnem';Cs=='"mnemonic+ascii+8200' ->
+% from_mnemonic(Input);
+to_unicode(Input,Cs) when Cs=='iso-10646-ucs-2';Cs=='csunicode' ->
+ from_ucs2be(Input); % Guess byteorder
+to_unicode(Input,Cs) when Cs=='iso-10646-ucs-4';Cs=='csucs4' ->
+ from_ucs4be(Input); % Guess byteorder
+to_unicode(Input,Cs) when Cs=='utf-16be';Cs=='utf-16' ->
+ from_utf16be(Input);
+to_unicode(Input,'utf-16le') ->
+ from_utf16le(Input);
+to_unicode(Input,'utf-8') ->
+ from_utf8(Input);
+to_unicode(Input,Charset) ->
+ exit({bad_character_code,Input,Charset}).
+ %ucs_data:to_unicode(Input,Charset).
+
+
+
+
+%%% Tests if Char is in Charset.
+%%% Do this by trying to convert it into unicode, if possible a mapping was
+%%% found and we are ok.
+is_incharset(In,Cs) when Cs=='ansi_x3.4-1968';Cs=='iso-ir-6';
+ Cs=='ansi_x3.4-1986';Cs=='iso_646.irv:1991';
+ Cs=='ascii';Cs=='iso646-us';Cs=='us-ascii';Cs=='us';
+ Cs=='ibm367';Cs=='cp367';Cs=='csascii' -> % US-ASCII
+ if
+ is_integer(In) -> is_ascii(In);
+ is_list(In) -> test_charset(fun is_ascii/1,In)
+ end;
+is_incharset(In,Cs) when Cs=='iso-10646-utf-1';Cs=='csiso10646utf1' ->
+ if
+ is_integer(In) -> is_unicode(In);
+ is_list(In) -> test_charset(fun is_unicode/1, In)
+ end;
+is_incharset(In,Cs) when Cs=='iso_646.basic:1983';Cs=='ref';
+ Cs=='csiso646basic1983' ->
+ if
+ is_integer(In) -> is_iso646_basic(In);
+ is_list(In) -> test_charset(fun is_iso646_basic/1, In)
+ end;
+is_incharset(In,Cs) when Cs=='iso_8859-1:1987';Cs=='iso-ir-100';
+ Cs=='iso_8859-1';Cs=='iso-8859-1';
+ Cs=='latin1';Cs=='l1';Cs=='ibm819';
+ Cs=='cp819';Cs=='csisolatin1' ->
+ if
+ is_integer(In) -> is_latin1(In);
+ is_list(In) -> test_charset(fun is_latin1/1, In)
+ end;
+is_incharset(In,Charset) when is_integer(In) ->
+ case to_unicode([In],Charset) of
+ {error,unsupported_charset} ->
+ {error,unsupported_charset};
+ {error,_} ->
+ false;
+ [Int] when is_integer(Int) ->
+ true
+ end;
+is_incharset(In,Charset) when is_list(In) ->
+ case to_unicode(In,Charset) of
+ {error,unsupported_charset} ->
+ {error,unsupported_charset};
+ {error,_} ->
+ false;
+ [Int] when is_integer(Int) ->
+ true
+ end.
+
+
+test_charset(Fun,Input) ->
+ case lists:all(Fun, Input) of
+ true ->
+ true;
+ _ ->
+ false
+ end.
+
diff --git a/lib/xmerl/src/xmerl_uri.erl b/lib/xmerl/src/xmerl_uri.erl
new file mode 100644
index 0000000000..d8edb2e6e1
--- /dev/null
+++ b/lib/xmerl/src/xmerl_uri.erl
@@ -0,0 +1,478 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+
+-module(xmerl_uri).
+
+
+-export([parse/1,resolve/2
+% scan_abspath/1
+ ]).
+
+
+%%% Parse URI and return {Scheme,Path}
+%%% Note that Scheme specific parsing/validation is not handled here!
+resolve(_Root,_Rel) ->
+ ok.
+
+%%% See "http://www.isi.edu/in-notes/iana/assignments/url-schemes" for a list of
+%%% defined URL schemes and references to its sources.
+
+parse(URI) ->
+ case parse_scheme(URI) of
+ {http,Cont} -> parse_http(Cont,http);
+ {https,Cont} -> parse_http(Cont,https);
+ {ftp,Cont} -> parse_ftp(Cont,ftp);
+ {sip,Cont} -> parse_sip(Cont,sip);
+ {sips,Cont} -> parse_sip(Cont,sips);
+ {sms,Cont} -> parse_sms(Cont,sms); % Note: From old draft
+ {error,Error} -> {error,Error};
+ {Scheme,Cont} -> {Scheme,Cont}
+ end.
+
+
+%%% Parse the scheme.
+parse_scheme(URI) ->
+ parse_scheme(URI,[]).
+
+parse_scheme([H|URI],Acc) when $a=<H,H=<$z; $A=<H,H=<$Z ->
+ parse_scheme2(URI,[H|Acc]);
+parse_scheme(_,_) ->
+ {error,no_scheme}.
+
+parse_scheme2([H|URI],Acc)
+ when $a=<H,H=<$z; $A=<H,H=<$Z; $0=<H,H=<$9; H==$-;H==$+;H==$. ->
+ parse_scheme2(URI,[H|Acc]);
+parse_scheme2([$:|URI],Acc) ->
+ {list_to_atom(lists:reverse(Acc)),URI};
+parse_scheme2(_,_) ->
+ {error,no_scheme}.
+
+
+%%% ............................................................................
+-define(HTTP_DEFAULT_PORT, 80).
+-define(HTTPS_DEFAULT_PORT, 443).
+
+%%% HTTP (Source RFC 2396, RFC 2616)
+%%% http_URL = "*" | absoluteURI | abs_path [ "?" query ] | authority
+
+%%% http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]]
+%%% Returns a tuple {http,Host,Port,Path,Query} where
+%%% Host = string() Host value
+%%% Port = string() Port value
+%%% Path = string() Absolute path
+%%% Query = string() Query or Fragment value
+parse_http("//"++C0,Scheme) ->
+ case scan_hostport(C0,Scheme) of
+ {C1,Host,Port} ->
+ case scan_pathquery(C1) of
+ {error,Error} ->
+ {error,Error};
+ {Path,Query} ->
+ {Scheme,Host,Port,Path,Query}
+ end;
+ {error,Error} ->
+ {error,Error}
+ end;
+parse_http(_,_) ->
+ {error,invalid_url}.
+
+scan_pathquery(C0) ->
+ case scan_abspath(C0) of
+ {error,Error} ->
+ {error,Error};
+ {[],[]} -> % Add implicit path
+ {"/",""};
+ {"?"++C1,Path} ->
+ case scan_query(C1,[]) of
+ {error,Error} ->
+ {error,Error};
+ Query ->
+ {Path,"?"++Query}
+ end;
+ {"#"++C1,Path} -> % 'query' and 'fragment' are both defined as '*uric'
+ case scan_query(C1,[]) of
+ {error,Error} ->
+ {error,Error};
+ Fragment ->
+ {Path,"#"++Fragment}
+ end;
+ {[],Path} ->
+ {Path,""}
+ end.
+
+
+%%% ............................................................................
+-define(FTP_DEFAULT_PORT, 21).
+
+%%% FTP (Source RFC 2396, RFC 1738, RFC 959)
+%%% Encoded :, @, or / characters appearing within the username or
+%%% password fields (as required by RFC 1738) are not handled.
+%%%
+%%% Note: This BNF has been modified to better fit with RFC 2396
+%%% ftp_URL = "ftp:" "//" [ ftp_userinfo "@"] host [ ":" port ] ftp_abs_path
+%%% ftp_userinfo = ftp_user [ ":" ftp_password ]
+%%% ftp_abs_path = "/" ftp_path_segments [ ";type=" ftp_type ]
+%%% ftp_path_segments = ftp_segment *( "/" ftp_segment)
+%%% ftp_segment = *[ ftp_uchar | "?" | ":" | "@" | "&" | "=" ]
+%%% ftp_type = "A" | "I" | "D" | "a" | "i" | "d"
+%%% ftp_user = *[ ftp_uchar | ";" | "?" | "&" | "=" ]
+%%% ftp_password = *[ ftp_uchar | ";" | "?" | "&" | "=" ]
+%%% ftp_uchar = ftp_unreserved | escaped
+%%% ftp_unreserved = alphanum | mark | "$" | "+" | ","
+parse_ftp("//"++C0,Scheme) ->
+ case ftp_userinfo(C0) of
+ {error, Error} ->
+ {error,Error};
+ {C1,Creds} ->
+ case scan_hostport(C1,Scheme) of
+ {C2,Host,Port} ->
+ case scan_abspath(C2) of
+ {error,Error} ->
+ {error,Error};
+ {[],[]} -> % Add implicit path
+ {Scheme,Creds,Host,Port,"/"};
+ {[],Path} ->
+ {Scheme,Creds,Host,Port,Path}
+ end;
+ {error,Error} ->
+ {error,Error}
+ end
+ end.
+
+ftp_userinfo(C0) ->
+ ftp_userinfo(C0, []).
+
+ftp_userinfo([], Acc) ->
+ {lists:reverse(Acc), {"",""}};
+ftp_userinfo(C0=[$/ |_], Acc) ->
+ {lists:reverse(Acc)++C0, {"",""}};
+ftp_userinfo([$@ |C0], Acc) ->
+ {C0, ftp_userinfo_1(lists:reverse(Acc), 0, "", "")};
+ftp_userinfo([C |C0], Acc) ->
+ ftp_userinfo(C0, [C |Acc]).
+
+
+ftp_userinfo_1([], 0, Acc, []) ->
+ { lists:reverse(Acc), ""};
+ftp_userinfo_1([], 1, Acc, User) ->
+ {User, lists:reverse(Acc)};
+ftp_userinfo_1([$:|_], 0, [], []) ->
+ {error,no_user};
+ftp_userinfo_1([$:|C0], 0, Acc,[]) ->
+ ftp_userinfo_1(C0, 1, [], lists:reverse(Acc));
+
+ftp_userinfo_1([C|C0],Stage, Acc, User) ->
+ ftp_userinfo_1(C0,Stage, [C|Acc], User).
+
+
+%%% .........................................................................
+-define(SIP_DEFAULT_PORT, 5060).
+-define(SIPTLS_DEFAULT_PORT, 5061).
+
+%%% SIP (Source RFC 2396, RFC 3261)
+%%% sip_URL = "sip:" [ sip_userinfo "@" ] host [ ":" port ]
+%%% sip_uri-parameters [ sip_headers ]
+%%% sip_userinfo = (sip_user | sip_telephone-subscriber)
+%%% [ ":" sip_password ]
+%%% sip_user = *( unreserved | escaped |
+%%% "&" | "=" | "+" | "$" | "," | ";" | "?" | "/")
+%%% sip_telephone-subscriber = See RFC2806
+%%% sip_password = *( unreserved | escaped |
+%%% "&" | "=" | "+" | "$" | "," )
+%%% sip_uri-parameters = *( ";" sip_uri-parameter )
+%%% sip_uri-parameter = sip_transport-param | sip_user-param |
+%%% sip_method-param | sip_ttl-param |
+%%% sip_maddr-param | sip_lr-param | sip_other-param
+%%% sip_transport-param = "transport=" ( "udp" | "tcp" | "sctp" | "tls" | token)
+%%% sip_user-param = "user=" ( "phone" | "ip" | token)
+%%% sip_method-param = "method=" sip_Method
+%%% sip_ttl-param = "ttl=" sip_ttl
+%%% sip_maddr-param = "maddr=" host
+%%% sip_lr-param = "lr"
+%%% sip_other-param = 1*sip_paramchar [ "=" 1*sip_paramchar ]
+%%% sip_Method = "INVITE" | "ACK" | "OPTIONS" | "BYE" |
+%%% "CANCEL" | "REGISTER" | token
+%%% sip_ttl = 1*3DIGIT ; 0 to 255
+%%% sip_paramchar = sip_param-unreserved | unreserved | escaped
+%%% sip_param-unreserved = "[" | "]" | "/" | ":" | "&" | "+" | "$"
+%%% sip_headers = "?" sip_header *( "&" sip_header )
+%%% sip_header = sip_hname "=" sip_hvalue
+%%% sip_hname = 1*( sip_hnv-unreserved | unreserved | escaped )
+%%% sip_hvalue = *( sip_hnv-unreserved / unreserved / escaped )
+%%% sip_hnv-unreserved = "[" | "]" | "/" | "?" | ":" | "+" | "$"
+
+%%% Note:
+%%% - FIXME: Headers not parsed
+parse_sip(C0,Scheme) ->
+ case string:tokens(C0,"@") of
+ [Userinfo,Hostport] ->
+ {User,Pass}=sip_userinfo(Userinfo),
+ {C1,Host,Port}=scan_hostport(Hostport,Scheme),
+ {C2,Parameters}=scan_parameters(C1),
+ Headers=scan_headers(C2),
+ {Scheme,User,Pass,Host,Port,Parameters,Headers};
+ [Hostport] ->
+ {C1,Host,Port}=scan_hostport(Hostport,Scheme),
+ {C2,Parameters}=scan_parameters(C1),
+ Headers=scan_headers(C2),
+ {Scheme,none,none,Host,Port,Parameters,Headers}
+ end.
+
+%%% FIXME! User can be telephone subscriber
+sip_userinfo(Userinfo) ->
+ case string:tokens(Userinfo,":") of
+ [User,Pass] -> {User,Pass};
+ [User] -> {User,none}
+ end.
+
+scan_parameters(C1) ->
+ ParList=string:tokens(C1,";"),
+ scan_parameters2(ParList,[], []).
+
+%% Is Foo the way to go? This code needs further investigation. (As
+%% does most of this module.) If we decide to keep it!
+scan_parameters2([],Out, Foo) ->
+ {lists:reverse(Foo), lists:reverse(Out)};
+scan_parameters2(["transport"++Val|Rest],Out, Foo) ->
+ scan_parameters2(Rest,[{transport,Val}|Out], Foo);
+scan_parameters2(["user"++Val|Rest],Out, Foo) ->
+ scan_parameters2(Rest,[{user,Val}|Out], Foo);
+scan_parameters2(["method"++Val|Rest],Out, Foo) ->
+ scan_parameters2(Rest,[{method,Val}|Out], Foo);
+scan_parameters2(["ttl"++Val|Rest],Out, Foo) ->
+ scan_parameters2(Rest,[{ttl,Val}|Out], Foo);
+scan_parameters2(["maddr"++Val|Rest],Out, Foo) ->
+ scan_parameters2(Rest,[{maddr,Val}|Out], Foo);
+scan_parameters2(["lr"|Rest],Out, Foo) ->
+ scan_parameters2(Rest,[{lr,""}|Out], Foo);
+scan_parameters2([Other|Rest],Out, Foo) ->
+ scan_parameters2(Rest,[Out], [Other |Foo]).
+
+%%% FIXME!
+scan_headers(C2) ->
+ C2.
+
+%%% ............................................................................
+%%% SMS (Source draft-wilde-sms-uri-01, January 24 2002 and
+%%% draft-allocchio-gstn-01, November 2001)
+%%% The syntax definition for "gstn-phone" is taken from
+%%% [draft-allocchio-gstn-01], allowing global as well as local telephone
+%%% numbers.
+%%% Note: This BNF has been modified to better fit with RFC 2396
+%%% sms_URI = sms ":" 1*( sms-recipient ) [ sms-body ]
+%%% sms-recipient = gstn-phone sms-qualifier
+%%% [ "," sms-recipient ]
+%%% sms-qualifier = *( smsc-qualifier / pid-qualifier )
+%%% smsc-qualifier = ";smsc=" SMSC-sub-addr
+%%% pid-qualifier = ";pid=" PID-sub-addr
+%%% sms-body = ";body=" *urlc
+%%% gstn-phone = ( global-phone / local-phone )
+%%% global-phone = "+" 1*( DIGIT / written-sep )
+%%% local-phone = [ exit-code ] dial-number / exit-code [ dial-number ]
+%%% exit-code = phone-string
+%%% dial-number = phone-string
+%%% subaddr-string = phone-string
+%%% post-dial = phone-string
+%%% phone-string = 1*( DTMF / pause / tonewait / written-sep )
+%%% DTMF = ( DIGIT / "#" / "*" / "A" / "B" / "C" / "D" )
+%%% written-sep = ( "-" / "." )
+%%% pause = "p"
+%%% tonewait = "w"
+parse_sms(Cont,Scheme) ->
+ {Scheme,Cont}.
+
+
+%%% ==========================================================================
+%%% Generic URI parsing. BNF rules from RFC 2396
+
+%%% hostport = host [ ":" port ]
+scan_hostport(C0,Scheme) ->
+ case scan_host(C0) of
+ {error,Error} ->
+ {error,Error};
+ {":"++C1,Host} ->
+ {C2,Port}=scan_port(C1,[]),
+ {C2,Host,list_to_integer(Port)};
+ {C1,Host} when Scheme==http ->
+ {C1,Host,?HTTP_DEFAULT_PORT};
+ {C1,Host} when Scheme==https ->
+ {C1,Host,?HTTPS_DEFAULT_PORT};
+ {C1,Host} when Scheme==ftp ->
+ {C1,Host,?FTP_DEFAULT_PORT};
+ {C1,Host} when Scheme==sip ->
+ {C1,Host,?SIP_DEFAULT_PORT}
+ end.
+
+
+%%% host = hostname | IPv4address | IPv6reference
+%%% hostname = *( domainlabel "." ) toplabel [ "." ]
+%%% domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+%%% toplabel = alpha | alpha *( alphanum | "-" ) alphanum
+%%% IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
+%%% ipv6reference = "[" IPv6address "]"
+%%% IPv6address = hexpart [ ":" IPv4address ]
+%%% hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
+%%% hexseq = hex4 *( ":" hex4)
+%%% hex4 = 1*4HEXDIG
+
+%%% Note:
+%%% Bitfields are set as follows:
+%%% Bit 0 = 0-9
+%%% Bit 1 = a-f
+%%% Bit 2 = g-z
+-define(BIT1, 1).
+-define(BIT2, 2).
+-define(BIT3, 4).
+
+%%% 1 = DIGIT are only digits
+%%% 3 = HEX are DIGITS + a-f
+%%% 6 = ALPHA are HEX - DIGITS + g-z
+-define(DIGIT, 1).
+-define(HEX, 3).
+-define(ALPHA, 6).
+
+
+scan_host(C0) ->
+ case scan_host2(C0,[],0,[],[]) of
+ {C1,IPv4address,[?DIGIT,?DIGIT,?DIGIT,?DIGIT]} ->
+ {C1,lists:reverse(lists:append(IPv4address))};
+%% {C1,IPv6address,[$[,Hex1,Hex2,Hex3,Hex4,$]]} when Hex1=<?HEX;
+%% Hex2=<?HEX;
+%% Hex3=<?HEX;
+%% Hex4=<?HEX ->
+%% {C1,lists:reverse(lists:append(IPv6address))};
+ {C1,Hostname,[A|_HostF]} ->
+ {C1,lists:reverse(lists:append(Hostname))};
+ _ ->
+ {error,no_host}
+ end.
+
+scan_host2([H|C0],Acc,CurF,Host,HostF) when $0=<H,H=<$9 ->
+ scan_host2(C0,[H|Acc],CurF bor ?BIT1,Host,HostF);
+scan_host2([H|C0],Acc,CurF,Host,HostF) when $a=<H,H=<$z; $A=<H,H=<$Z ->
+ scan_host2(C0,[H|Acc],CurF bor ?ALPHA,Host,HostF);
+scan_host2([$-|C0],Acc,CurF,Host,HostF) when CurF=/=0 ->
+ scan_host2(C0,[$-|Acc],CurF,Host,HostF);
+scan_host2([$.|C0],Acc,CurF,Host,HostF) when CurF=/=0 ->
+ scan_host2(C0,[],0,[".",Acc|Host],[CurF|HostF]);
+scan_host2(C0,Acc,CurF,Host,HostF) ->
+ {C0,[Acc|Host],[CurF|HostF]}.
+
+
+%%% port = *digit
+scan_port([H|C0],Acc) when $0=<H,H=<$9 ->
+ scan_port(C0,[H|Acc]);
+scan_port(C0,Acc) ->
+ {C0,lists:reverse(Acc)}.
+
+%%% abs_path = "/" path_segments
+scan_abspath([]) ->
+ {[],[]};
+scan_abspath("/"++C0) ->
+ scan_pathsegments(C0,["/"]);
+scan_abspath(_) ->
+ {error,no_abspath}.
+
+%%% path_segments = segment *( "/" segment )
+scan_pathsegments(C0,Acc) ->
+ case scan_segment(C0,[]) of
+ {"/"++C1,Segment} ->
+ scan_pathsegments(C1,["/",Segment|Acc]);
+ {C1,Segment} ->
+ {C1,lists:reverse(lists:append([Segment|Acc]))}
+ end.
+
+
+%%% segment = *pchar *( ";" param )
+%%% param = *pchar
+scan_segment(";"++C0,Acc) ->
+ {C1,ParamAcc}=scan_pchars(C0,";"++Acc),
+ scan_segment(C1,ParamAcc);
+scan_segment(C0,Acc) ->
+ case scan_pchars(C0,Acc) of
+ {";"++C1,Segment} ->
+ {C2,ParamAcc}=scan_pchars(C1,";"++Segment),
+ scan_segment(C2,ParamAcc);
+ {C1,Segment} ->
+ {C1,Segment}
+ end.
+
+%%% query = *uric
+%%% uric = reserved | unreserved | escaped
+%%% reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+%%% "$" | "," | "[" | "]"
+%%% unreserved = alphanum | mark
+%%% mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
+%%% "(" | ")"
+%%% escaped = "%" hex hex
+scan_query([],Acc) ->
+ lists:reverse(Acc);
+scan_query([$%,H1,H2|C0],Acc) -> % escaped
+ scan_query([hex2dec(H1)*16+hex2dec(H2)|C0],Acc);
+scan_query([H|C0],Acc) when $a=<H,H=<$z;$A=<H,H=<$Z;$0=<H,H=<$9 -> % alphanum
+ scan_query(C0,[H|Acc]);
+scan_query([H|C0],Acc) when H==$;; H==$/; H==$?; H==$:; H==$@; H==$[; H==$];
+ H==$&; H==$=; H==$+; H==$$; H==$, -> % reserved
+ scan_query(C0,[H|Acc]);
+scan_query([H|C0],Acc) when H==$-; H==$_; H==$.; H==$!; H==$~;
+ H==$*; H==$'; H==$(; H==$) -> % mark
+ scan_query(C0,[H|Acc]);
+scan_query([H|C0],Acc) when 0=<H,H=<127 -> % US ASCII
+ {H1,H2}=dec2hex(H),
+ scan_query(C0,[H2,H1,$%|Acc]);
+scan_query([_H|_C0],_Acc) ->
+ {error,no_query}.
+
+
+%%% pchar = unreserved | escaped |
+%%% ":" | "@" | "&" | "=" | "+" | "$" | ","
+scan_pchars([],Acc) ->
+ {[],Acc};
+scan_pchars([$%,H1,H2|C0],Acc) -> % escaped
+ scan_pchars([hex2dec(H1)*16+hex2dec(H2)|C0],Acc);
+scan_pchars([H|C0],Acc) when $a=<H,H=<$z;$A=<H,H=<$Z;$0=<H,H=<$9 -> % alphanum
+ scan_pchars(C0,[H|Acc]);
+scan_pchars([H|C0],Acc) when H==$-; H==$_; H==$.; H==$!; H==$~;
+ H==$*; H==$'; H==$(; H==$) -> % mark
+ scan_pchars(C0,[H|Acc]);
+scan_pchars([H|C0],Acc) when H==$:; H==$@; H==$&; H==$=; H==$+; H==$$; H==$, ->
+ scan_pchars(C0,[H|Acc]);
+scan_pchars([H|C0],Acc) when 0=<H,H=<127, % US ASCII
+ H=/=$?,H=/=$;,H=/=$/,H=/=$# ->
+ {H1,H2}=dec2hex(H),
+ scan_pchars(C0,[H2,H1,$%|Acc]);
+scan_pchars(C0,Acc) ->
+ {C0,Acc}.
+
+hex2dec(X) when X>=$0,X=<$9 -> X-$0;
+hex2dec(X) when X>=$A,X=<$F -> X-$A+10;
+hex2dec(X) when X>=$a,X=<$f -> X-$a+10.
+
+dec2hex(H) when H<256 ->
+ <<H1:4,H2:4>> = <<H>>,
+ {nibble2hex(H1),nibble2hex(H2)}.
+
+nibble2hex(X) when 0=<X,X=<9 -> X+$0;
+nibble2hex(10) -> $a;
+nibble2hex(11) -> $b;
+nibble2hex(12) -> $c;
+nibble2hex(13) -> $d;
+nibble2hex(14) -> $e;
+nibble2hex(15) -> $f.
diff --git a/lib/xmerl/src/xmerl_validate.erl b/lib/xmerl/src/xmerl_validate.erl
new file mode 100644
index 0000000000..893e23ca34
--- /dev/null
+++ b/lib/xmerl/src/xmerl_validate.erl
@@ -0,0 +1,663 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(xmerl_validate).
+
+-export([validate/2]).
+
+
+-include("xmerl.hrl"). % record def, macros
+
+
+
+%% +type validate(xmerl_scanner(),xmlElement())->
+%% xmlElment() | {error,tuple()}.
+validate(#xmerl_scanner{doctype_name=DTName,doctype_DTD=OpProv},
+ #xmlElement{name=Name})
+ when DTName=/=Name,OpProv=/=option_provided->
+ {error, {mismatched_root_element,Name,DTName}};
+validate(#xmerl_scanner{rules=Rules}=S,
+ XML=#xmlElement{name=Name})->
+ catch do_validation(read_rules(Rules,Name),XML,Rules,S);
+validate(_, XML) ->
+ {error, {no_xml_element, XML}}.
+
+
+
+%% +type validate(rules(),xmlElement())->
+%% {ok,xmlElement()} | {error,tuple()}.
+do_validation(undefined,#xmlElement{name=Name}, _Rules,_S) ->
+ {error,{unknown_element,Name}};
+do_validation(El_Rule,XML,Rules,S)->
+ case catch valid_attributes(El_Rule#xmlElement.attributes,
+ XML#xmlElement.attributes,S) of
+ {'EXIT',Reason} ->
+ {error,Reason};
+ {error,Reason} ->
+ {error,Reason};
+ Attr_2->
+% XML_=XML#xmlElement{attributes=Attr_2},
+ El_Rule_Cont = El_Rule#xmlElement.content,
+ WSActionMode = ws_action_mode(El_Rule#xmlElement.elementdef,
+ El_Rule_Cont,S),
+ XML_Cont = XML#xmlElement.content,
+ check_direct_ws_SDD(XML_Cont,WSActionMode),
+ case valid_contents(El_Rule_Cont,
+ XML_Cont,Rules,S,WSActionMode) of
+ {error,Reason}->
+ {error,Reason};
+ {error,Reason,N}->
+ {error,Reason,N};
+ XMLS ->
+ XML#xmlElement{attributes=Attr_2,content=XMLS}
+ end
+ end.
+
+check_direct_ws_SDD(XML,always_preserve) ->
+ case XML of
+ [#xmlText{}|_Rest] ->
+ exit({error,{illegal_whitespace_standalone_doc,XML}});
+ _ -> ok
+ end,
+ case lists:reverse(XML) of
+ [#xmlText{}|_Rest2] ->
+ exit({error,{illegal_whitespace_standalone_doc,XML}});
+ _ -> ok
+ end;
+check_direct_ws_SDD(_,_) -> ok.
+
+ws_action_mode({external,_},Content,#xmerl_scanner{standalone=yes}) ->
+ case element_content(Content) of
+ children ->
+ always_preserve;
+ _ ->
+ preserve
+ end;
+ws_action_mode(_,_,_) ->
+ preserve.
+
+element_content(A) when is_atom(A),A /= any, A /= empty ->
+ children;
+element_content({choice,L}) when is_list(L) ->
+ element_content(L);
+element_content({seq,L}) when is_list(L) ->
+ element_content(L);
+element_content(['#PCDATA'|_T]) ->
+ mixed;
+element_content('#PCDATA') ->
+ mixed;
+element_content({'*',Rest}) ->
+ element_content(Rest);
+element_content(_) -> children.
+
+%% +type read_rules(DTD::atom(),Element_Name::atom())->
+%% undefined | xmlElement().
+read_rules(_, pcdata) ->
+ pcdata;
+read_rules(T, Name) ->
+ case ets:lookup(T, {elem_def, Name}) of
+ [] ->
+ undefined;
+ [{_K, V}] ->
+ V
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%% Attributes Validation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% +deftype attribute_rule() = {Attr_Name::atom(),attribute_type(),
+%% attribute_priority()}.
+
+%% +type valid_attributes([attribute_rule()],[xmlAttribute()])->
+%% [xmlAttribute()] | {error,attribute_unknow}.
+valid_attributes(All_Attr,[#xmlAttribute{}|_T]=Attr,S)->
+ single_ID_definition(All_Attr),
+ vc_Name_Token_IDREFS(All_Attr,Attr),
+ lists:foreach(fun(#xmlAttribute{name=Name})->
+ case is_attribute_exist(Name,All_Attr) of
+ true ->
+ ok;
+ false ->
+ exit({error,{attribute_unknown,Name}})
+ end
+ end,
+ Attr),
+ lists:flatten(lists:foldl(fun({Name,DataType,IF,DefDecl,Env},Attr_2)->
+ Attr_2++
+ [valid_attribute(Name,DataType,IF,
+ DefDecl,Attr,Env,S)]
+ end,[],All_Attr));
+valid_attributes([],[],_) ->
+ [];
+valid_attributes(All_Attr,[],S) ->
+ single_ID_definition(All_Attr),
+ lists:flatten(lists:foldl(fun({Name,DataType,IF,DefDecl,Env},Attr_2)->
+ Attr_2++[valid_attribute(Name,
+ DataType,IF,
+ DefDecl,
+ [],
+ Env,S)]
+ end,[],All_Attr)).
+
+%%%% [60] DefaultDecl::=
+%%%% '#REQUIRED' | '#IMPLIED'
+%%%% | (('#FIXED' S)? AttValue)
+%% +deftype attribute_priority = '#REQUIRED'|'#FIXED'|'#IMPLIED'.
+
+%% +type valid_attribute(Name::atom(),DataType::attribute_value(),
+%% IF::attribute_priority(),[xmlAttribute()])->
+%% [xmlAttribute()] | exit().
+valid_attribute(Name,DataType,IF,DefaultDecl,List_of_Attributes,Env,S)->
+ SA = S#xmerl_scanner.standalone,
+ Attr=search_attr(Name,List_of_Attributes),
+ check_SDD_validity(SA,Env,Attr,IF),
+ case {DefaultDecl,IF,Attr} of
+ {'#REQUIRED',_,no_attribute}->
+ exit({error,{Name,is_required}});
+ {'#IMPLIED',_,no_attribute}->
+ []; %% and no default value
+ {'#FIXED',DefVal,#xmlAttribute{value=DefVal}=Attr} ->
+ Attr;
+ {'#FIXED',A,no_attribute} ->
+ #xmlAttribute{name=Name,value=A}; % FIXED declare value becomes default.
+ {'#FIXED',A,B} ->
+ exit({error,{fixed_default_value_missmatch,A,B}});
+ {_,Value,no_attribute} when is_list(Value)->
+ #xmlAttribute{name=Name,value=Value};
+ {_,_,#xmlAttribute{}=Attr}->
+ %% do test data value, and default_value
+ test_attribute_value(DataType,Attr,IF,S);
+ {DefDecl,Else,XML} ->
+ exit({error,{unknow_attribute_type,DefDecl,Else,XML}})
+ end.
+
+vc_Name_Token_IDREFS([{Name,Type,_,_,_}|Rest],Attrs)
+ when Type=='NMTOKEN';Type=='NMTOKENS'->
+ case lists:keysearch(Name,#xmlAttribute.name,Attrs) of
+ {value,A} ->
+ valid_nmtoken_value(A#xmlAttribute.value,Type);
+ _ -> ok
+ end,
+ vc_Name_Token_IDREFS(Rest,Attrs);
+vc_Name_Token_IDREFS([{Name,Type,_,_,_}|Rest],Attrs)
+ when Type=='IDREFS'->
+ case lists:keysearch(Name,#xmlAttribute.name,Attrs) of
+ {value,A} ->
+ valid_IDREFS(A#xmlAttribute.value,Type);
+ _ -> ok
+ end,
+ vc_Name_Token_IDREFS(Rest,Attrs);
+vc_Name_Token_IDREFS([_H|Rest],Attrs) ->
+ vc_Name_Token_IDREFS(Rest,Attrs);
+vc_Name_Token_IDREFS([],_) -> ok.
+
+valid_nmtoken_value([],'NMTOKENS') ->
+ exit({error,{at_least_one_Nmtoken_required}});
+% valid_nmtoken_value([H|_T] = L,'NMTOKENS') when is_list(H) ->
+% ValidChar =
+% fun(X) ->
+% case xmerl_lib:is_namechar(X) of
+% false ->
+% exit({error,{invalid_character_in_Nmtoken,X}});
+% _ -> ok
+% end
+% end,
+% ValidCharList =
+% fun([Nmtok|T],F) ->
+% lists:foreach(ValidChar,Nmtok),
+% F(T,F);
+% ([],_) -> ok
+% end,
+% ValidCharList(L,ValidChar);
+valid_nmtoken_value(Nmtok,_) ->
+ ValidChar =
+ fun(X) when ?whitespace(X),Nmtok=='NMTOKENS' ->
+ ok;
+ (X) ->
+ case xmerl_lib:is_namechar(X) of
+ false ->
+ exit({error,{invalid_character_in_Nmtoken,X}});
+ _ -> ok
+ end
+ end,
+ lists:foreach(ValidChar,Nmtok).
+
+valid_IDREFS([],'IDREFS') ->
+ exit({error,{at_least_one_IDREF_Name_required}});
+valid_IDREFS(_Str,'IDREFS') ->
+ ok.
+
+single_ID_definition([{_,'ID',_,_,_}=Att1|Rest]) ->
+ case lists:keysearch('ID',2,Rest) of
+ {value,Att2} ->
+ exit({error,{just_one_ID_definition_allowed,Att1,Att2}});
+ _ -> ok
+ end;
+single_ID_definition([_H|T]) ->
+ single_ID_definition(T);
+single_ID_definition([]) ->
+ ok.
+
+check_SDD_validity(yes,{external,_},#xmlAttribute{name=Name,normalized=true},_) ->
+ exit({error,{externally_defed_attribute_normalized_in_standalone_doc,Name}});
+check_SDD_validity(yes,{external,_},no_attribute,V) when V /= no_value->
+ exit({error,{externally_defed_attribute_with_default_value_missing_in_standalone_doc}});
+check_SDD_validity(_,_,_,_) ->
+ ok.
+
+search_attr(Name,[#xmlAttribute{name=Name}=H|_T])->
+ H;
+search_attr(Name,[#xmlAttribute{}|T])->
+ search_attr(Name,T);
+search_attr(_Name,_T) ->
+ no_attribute.
+
+is_attribute_exist(Name,[{Name,_,_,_,_}|_T])->
+ true;
+is_attribute_exist(Name,[{_Attr,_,_,_,_}|T]) ->
+ is_attribute_exist(Name,T);
+is_attribute_exist(_Name,[]) ->
+ false.
+
+%%%%[54] AttType::= StringType | TokenizedType | EnumeratedType
+%%%%[55] StringType::= 'CDATA'
+%%%%[56] TokenizedType::= 'ID'|'IDREF'| 'IDREFS'|'ENTITY'| 'ENTITIES'
+%%%% | 'NMTOKEN'| 'NMTOKENS'
+%%%%[57] EnumeratedType::= NotationType | Enumeration
+%%%%[58] NotationType::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
+%%%%[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
+
+%% +deftype attribute_type()-> 'CDATA' | 'ID'|'IDREF'| 'IDREFS'|'ENTITY'|
+%% 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
+%% {enumeration,[List_of_value::atom()]}.
+
+%% +type test_attribute_value(attribute_type(),xmlAttribute())->
+%% xmlAttribute()| exit.
+%%%% test the constraint validity of Attribute value.
+test_attribute_value('CDATA',#xmlAttribute{}=Attr,_,_) ->
+ Attr;
+test_attribute_value('NMTOKEN',#xmlAttribute{name=Name,value=V}=Attr,
+ Default,_S) ->
+ Fun =
+ fun (X)->
+ case xmerl_lib:is_namechar(X) of
+ true->
+ ok;
+ false->
+ %%io:format("Warning*** nmtoken,value_incorrect: ~p~n",[V]),
+ exit({error,{invalid_value_nmtoken,Name,V}})
+ end
+ end,
+ lists:foreach(Fun,V),
+ if
+ is_list(Default) ->
+ lists:foreach(Fun,Default);
+ true -> ok
+ end,
+ Attr;
+test_attribute_value('NMTOKENS',#xmlAttribute{name=Name,value=V}=Attr,
+ Default,_S) ->
+ Fun =
+ fun (X)->
+ case xmerl_lib:is_namechar(X) of
+ true->
+ ok;
+ false when ?whitespace(X)->
+ ok;
+ false ->
+ exit({error,{invalid_value_nmtokens,Name,V}})
+ end
+ end,
+ lists:foreach(Fun,V),
+ if
+ is_list(Default) ->
+ lists:foreach(Fun,Default);
+ true -> ok
+ end,
+ Attr;
+test_attribute_value(Ent,#xmlAttribute{name=_Name,value=V}=Attr,_Default,
+ S=#xmerl_scanner{rules_read_fun=Read})
+ when Ent == 'ENTITY'; Ent == 'ENTITIES'->
+ %% The default value is already checked
+ NameListFun =
+ fun([],Acc,_) ->
+ lists:reverse(Acc);
+ (Str,Acc,Fun) ->
+ {N,Str2} = scan_name(Str,[]),
+ Fun(Str2,[N|Acc],Fun)
+ end,
+ NameList = NameListFun(V,[],NameListFun),
+ VC_Entity_Name =
+ fun(X) ->
+ case Read(entity,X,S) of
+ {_,external,{_,{ndata,_}}} ->
+ ok;
+ _ -> exit({error,{vc_Entity_Name,X,V}})
+ end
+ end,
+ lists:foreach(VC_Entity_Name,NameList),
+ Attr;
+test_attribute_value({Type,L},#xmlAttribute{value=Value}=Attr,Default,_S)
+ when Type == enumeration; Type == notation ->
+ ValidDefault =
+ if
+ is_atom(Default) -> true;
+ true -> lists:member(list_to_atom(Default),L)
+ end,
+ NoDuplicatesFun =
+ fun(_,_,notation) -> true;
+ ([],_,_) -> true;
+ ([H|T],F,Enum) ->
+ case lists:member(H,T) of
+ true -> false;
+ _ -> F(T,F,Enum)
+ end
+ end,
+ NoDuplicates = NoDuplicatesFun(L,NoDuplicatesFun,Type),
+ case {lists:member(list_to_atom(Value),L),ValidDefault,NoDuplicates} of
+ {true,true,true}->
+ Attr;
+ {false,_,_} ->
+ exit({error,{attribute_value_unknow,Value,{list,L}}});
+ {_,false,_} ->
+ exit({error,{attribute_default_value_unknow,Default,{list,L}}});
+ {_,_,false} ->
+ exit({error,{duplicate_tokens_not_allowed,{list,L}}})
+ end;
+test_attribute_value(_Rule,Attr,_,_) ->
+% io:format("Attr Value*****~nRule~p~nValue~p~n",[Rule,Attr]),
+ Attr.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%% Contents Validation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%Element-content Models
+%%%%[47] children::= (choice | seq) ('?' | '*' | '+')?
+%%%%[48] cp::= (Name | choice | seq) ('?' | '*' | '+')?
+%%%%[49] choice::= '(' S? cp ( S? '|' S? cp )+ S? ')'
+%%%%[50] seq::= '(' S? cp ( S? ',' S? cp )* S? ')'
+%%%%[51] Mixed::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
+%%%% | '(' S? '#PCDATA' S? ')'
+
+
+%% +type valid_contents([rule()],[xmlElement()])->
+%% [xmlElement() | {error,???}.
+valid_contents(Rule,XMLS,Rules,S,WSActionMode)->
+ case parse(Rule,XMLS,Rules,WSActionMode,S) of
+ {XML_N,[]}->
+ lists:flatten(XML_N);
+ {_,[#xmlElement{name=Name}|_T]} ->
+ exit({error,{element,Name,isnt_comprise_in_the_rule,Rule}});
+ {_,[#xmlText{}=Txt|_T]} ->
+ exit({error,{element,text,Txt,isnt_comprise_in_the_rule,Rule}});
+ {error,Reason} ->
+ {error,Reason};
+ {error,Reason,N} ->
+ {error,Reason,N}
+ end.
+
+parse({'*',SubRule},XMLS,Rules,WSaction,S)->
+ star(SubRule,XMLS,Rules,WSaction,[],S);
+parse({'+',SubRule},XMLS,Rules,WSaction,S) ->
+ plus(SubRule,XMLS,Rules,WSaction,S);
+parse({choice,CHOICE},XMLS,Rules,WSaction,S)->
+% case XMLS of
+% [] ->
+% io:format("~p~n",[{choice,CHOICE,[]}]);
+% [#xmlElement{name=Name,pos=Pos}|_] ->
+% io:format("~p~n",[{choice,CHOICE,{Name,Pos}}]);
+% [#xmlText{value=V}|_] ->
+% io:format("~p~n",[{choice,CHOICE,{text,V}}])
+% end,
+ choice(CHOICE,XMLS,Rules,WSaction,S);
+parse(empty,[],_Rules,_WSaction,_S) ->
+ {[],[]};
+parse({'?',SubRule},XMLS,Rules,_WSaction,S)->
+ question(SubRule,XMLS,Rules,S);
+parse({seq,List},XMLS,Rules,WSaction,S) ->
+ seq(List,XMLS,Rules,WSaction,S);
+parse(El_Name,[#xmlElement{name=El_Name}=XML|T],Rules,_WSaction,S)
+ when is_atom(El_Name)->
+ case do_validation(read_rules(Rules,El_Name),XML,Rules,S) of
+ {error,R} ->
+% {error,R};
+ exit(R);
+ {error,R,_N}->
+% {error,R,N};
+ exit(R);
+ XML_->
+ {[XML_],T}
+ end;
+parse(any,Cont,Rules,_WSaction,S) ->
+ case catch parse_any(Cont,Rules,S) of
+ Err = {error,_} -> Err;
+ ValidContents -> {ValidContents,[]}
+ end;
+parse(El_Name,[#xmlElement{name=Name}|_T]=S,_Rules,_WSa,_S) when is_atom(El_Name)->
+ {error,
+ {element_seq_not_conform,{wait,El_Name},{is,Name}},
+ {{next,S},{act,[]}} };
+parse(_El_Name,[#xmlPI{}=H|T],_Rules,_WSa,_S) ->
+ {[H],T};
+parse('#PCDATA',XML,_Rules,_WSa,_S)->
+ %%% PCDATA it is 0 , 1 or more #xmlText{}.
+ parse_pcdata(XML);
+parse(El_Name,[#xmlText{}|_T]=S,_Rules,_WSa,_S)->
+ {error,
+ {text_in_place_of,El_Name},
+ {{next,S},{act,[]}}};
+parse([],_,_,_,_) ->
+ {error,no_rule};
+parse(Rule,[],_,_,_) ->
+ {error,{no_xml_element,Rule}}.
+
+parse_any([],_Rules,_S) ->
+ [];
+parse_any([H|T],Rules,S) ->
+ case parse_any(H,Rules,S) of
+ [Cont] ->
+ [Cont|parse_any(T,Rules,S)];
+ Err -> throw(Err)
+ end;
+parse_any(#xmlElement{}=XML,Rules,S) ->
+ case do_validation(read_rules(Rules,el_name(XML)),XML,Rules,S) of
+ {error,R} ->
+ {error,R};
+ {error,R,N}->
+ {error,R,N};
+ XML_->
+ [XML_]
+ end;
+parse_any(El,_Rules,_S) ->
+ [El].
+
+
+
+%% XXX remove first function clause
+% choice(_Choice,[#xmlText{}=T|R],_Rules) ->
+% {[T],R};
+choice([CH|CHS],[_XML|_T]=XMLS,Rules,WSaction,S)->
+ {WS,XMLS1} = whitespace_action(XMLS,ws_action(WSaction,remove)),
+ case parse(CH,XMLS1,Rules,ws_action(WSaction,remove),S) of
+ {error,_R} ->
+ choice(CHS,XMLS,Rules,WSaction,S);
+ {error,_R,_N} ->
+ choice(CHS,XMLS,Rules,WSaction,S); %% XXX add a case {[],XML}
+ {[],XMLS1} -> %% Maybe a sequence with * or ? elements that
+ %% didn't match
+ case CHS of
+ [] -> % choice has succeded but without matching XMLS1
+ {[],XMLS1};
+ _ -> % there are more choice alternatives to try with
+ choice(CHS,XMLS1,Rules,WSaction,S)
+ end;
+%% choice(CHS,XMLS1,Rules,WSaction,S);
+ {Tree,XMLS2}->
+ {WS2,XMLS3} = whitespace_action(XMLS2,ws_action(WSaction,remove)),
+ {WS2++[Tree]++WS,XMLS3}
+ end;
+choice([],XMLS,_,WSaction,_S)->
+ case whitespace_action(XMLS,ws_action(WSaction,remove)) of
+ Res={_,[]} -> Res;
+ _ ->
+ {error,element_unauthorize_in_choice,{{next,XMLS},{act,[]}}}
+ end;
+choice(_,[],_,_,_S) ->
+ {[],[]}.
+
+plus(Rule,XMLS,Rules,WSaction,S) ->
+ %% 1 or more
+ {WS,XMLS1}=whitespace_action(XMLS,WSaction),
+ case parse(Rule,XMLS1,Rules,WSaction,S) of
+ {error, Reason,_XML} ->
+ {error, Reason};
+ {error, X} ->
+ {error, X};
+ {Tree, XMLS2} ->
+ case star(Rule, XMLS2,Rules,WSaction,[],S) of
+ {[], _} ->
+ {WS++[Tree], XMLS2};
+ {Tree_1, XMLS3} ->
+ {WS++[Tree]++Tree_1, XMLS3}
+ end
+ end.
+
+star(_Rule,XML,_Rules,_WSa,Tree,_S) when length(XML)==0->
+ {[Tree],[]};
+star(Rule,XMLS,Rules,WSaction,Tree,S) ->
+ {WS,XMLS1} = whitespace_action(XMLS,WSaction),
+ case parse(Rule,XMLS1,Rules,WSaction,S) of
+ {error, _E, {{next,N},{act,A}}}->
+ %%io:format("Error~p~n",[_E]),
+ {WS++Tree++A,N};
+ {error, _E}->
+ %%io:format("Error~p~n",[_E]),
+% {WS++[Tree],[]};
+ case whitespace_action(XMLS,ws_action(WSaction,remove)) of
+ {[],_} ->
+ {WS++[Tree],XMLS};
+ {WS2,XMLS2} ->
+ {WS2++[Tree],XMLS2}
+ end;
+ {Tree1,XMLS2}->
+ star(Rule,XMLS2,Rules,WSaction,Tree++WS++[Tree1],S)
+ end.
+
+question(_Rule, [],_Rules,_S) ->
+ {[],[]};
+question(Rule, Toks,Rules,S) ->
+ %% 0 or 1
+ case parse(Rule, Toks,Rules,preserve,S) of
+ {error, _E, _Next}->
+ {[],Toks};
+ {error, _E} ->
+ {[], Toks};
+ {T,Toks1} ->
+ {T, Toks1}
+ end.
+
+seq(H,Toks,Rules,WSaction,S)->
+ case seq2(H,Toks,Rules,[],WSaction,S) of
+ {error,E}->
+ {error,E};
+ {error,R,N}->
+ {error,R,N};
+ {Tree,Toks2}->
+ {Tree,Toks2}
+ end.
+
+seq2([],[],_,Tree,_WSa,_S)->
+ {Tree,[]};
+% seq2([],[#xmlElement{name=Name}|_T]=XMLS,_,Tree,_WSa,_S)->
+% {error,{sequence_finish,Name,isnt_in_the_right_place},
+% {{next,XMLS},{act,Tree}}};
+seq2([],[#xmlText{}]=XML,_,Tree,_WSa,_S)->
+ case whitespace_action(XML,remove) of
+ {[],_} ->
+ {error,sequence_finish,{{next,XML},{act,Tree}}};
+ {WS,Rest} ->
+ {WS++Tree,Rest}
+ end;
+seq2([],Rest,_,Tree,_WSa,_S) ->
+ {WS,Rest2}=whitespace_action(Rest,remove),
+ {WS++Tree,Rest2};
+seq2([H|T],Toks,Rules,Tree,WSaction,S) ->
+ {WS,Toks1} = whitespace_action(Toks,ws_action(WSaction,remove)),
+ case parse(H,Toks1,Rules,remove,S) of %% H maybe only match parts of Toks
+ {error,Reason,_XML}->
+ {error,Reason};
+ {error,E}->
+ {error,E};
+ {[],Toks2}->
+ seq2(T,Toks2,Rules,Tree,WSaction,S);
+ {Tree1,Toks2} when is_list(Tree1)->
+ seq2(T,Toks2,Rules,Tree++WS++Tree1,WSaction,S);
+ {Tree1,Toks2}->
+ seq2(T,Toks2,Rules,Tree++WS++[Tree1],WSaction,S)
+ end.
+
+el_name(#xmlElement{name=Name})->
+ Name.
+
+parse_pcdata([#xmlText{}=H|T])->
+ parse_pcdata(T,[H]);
+parse_pcdata(H) ->
+ {[],H}.
+
+parse_pcdata([#xmlText{}=H|T],Acc)->
+ parse_pcdata(T,Acc++[H]);
+parse_pcdata(H,Acc) ->
+ {Acc,H}.
+
+whitespace([]) ->
+ true;
+whitespace([H|T]) when ?whitespace(H) ->
+ whitespace(T);
+whitespace(_) ->
+ false.
+
+whitespace_action(XML,remove) ->
+ whitespace_remove(XML,[]);
+whitespace_action(XML,_) ->
+ {[],XML}.
+
+whitespace_remove([#xmlText{value=V,type=text}=T|R]=L,Acc) ->
+ case whitespace(V) of
+ true ->
+ whitespace_remove(R,[T|Acc]);
+ _ ->
+ {lists:reverse(Acc),L}
+ end;
+whitespace_remove(L,Acc) ->
+ {lists:reverse(Acc),L}.
+
+ws_action(always_preserve=A,_) ->
+ A;
+ws_action(_,B) ->
+ B.
+
+scan_name(N,_) when is_atom(N) ->
+ N;
+scan_name([$\s|T],Acc) ->
+ {list_to_atom(lists:reverse(Acc)),T};
+scan_name([H|T],Acc) ->
+ scan_name(T,[H|Acc]);
+scan_name("",Acc) ->
+ {list_to_atom(lists:reverse(Acc)),[]}.
diff --git a/lib/xmerl/src/xmerl_xlate.erl b/lib/xmerl/src/xmerl_xlate.erl
new file mode 100644
index 0000000000..5c4d9d2540
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xlate.erl
@@ -0,0 +1,50 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%% Description : Wrapper to xmerl_scan, which reads an XML document
+%%% from disk (alt. consumes a string), processes it, and
+%%% exports is using the specified Callback module.
+-module(xmerl_xlate).
+
+
+-export([file/3,
+ string/3]).
+
+
+-include("xmerl.hrl").
+
+file(F, Title, Callback) ->
+ case file:read_file(F) of
+ {ok, Bin} ->
+ string(binary_to_list(Bin), Title, Callback);
+ Error ->
+ Error
+ end.
+
+string(Str, Title, Callback) ->
+ xmerl_scan:string(Str, [{hook_fun, fun hook/2, {Title, Callback}}]).
+
+
+hook(E = #xmlElement{parents = []}, S) ->
+ {Title, Callback} = xmerl_scan:hook_state(S),
+ Data = xmerl:export([E], Callback, [{title, Title}]),
+ {Data, S};
+hook(X, S) ->
+ {X, S}.
+
diff --git a/lib/xmerl/src/xmerl_xml.erl b/lib/xmerl/src/xmerl_xml.erl
new file mode 100644
index 0000000000..702a654629
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xml.erl
@@ -0,0 +1,62 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Callback module for exporting
+%% complete or simple forms to XML.
+
+-module(xmerl_xml).
+
+-export(['#xml-inheritance#'/0]).
+
+-export(['#root#'/4,
+ '#element#'/5,
+ '#text#'/1]).
+
+-import(xmerl_lib, [markup/3, empty_tag/2, export_text/1]).
+
+-include("xmerl.hrl").
+
+
+'#xml-inheritance#'() -> [].
+
+
+%% The '#text#' function is called for every text segment.
+
+'#text#'(Text) ->
+%io:format("Text=~p~n",[Text]),
+ export_text(Text).
+
+
+%% The '#root#' tag is called when the entire structure has been
+%% exported. It does not appear in the structure itself.
+
+'#root#'(Data, [#xmlAttribute{name=prolog,value=V}], [], _E) ->
+ [V,Data];
+'#root#'(Data, _Attrs, [], _E) ->
+ ["<?xml version=\"1.0\"?>", Data].
+
+
+%% The '#element#' function is the default handler for XML elements.
+
+'#element#'(Tag, [], Attrs, _Parents, _E) ->
+%io:format("Empty Tag=~p~n",[Tag]),
+ empty_tag(Tag, Attrs);
+'#element#'(Tag, Data, Attrs, _Parents, _E) ->
+%io:format("Tag=~p~n",[Tag]),
+ markup(Tag, Attrs, Data).
diff --git a/lib/xmerl/src/xmerl_xpath.erl b/lib/xmerl/src/xmerl_xpath.erl
new file mode 100644
index 0000000000..182a186d2c
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xpath.erl
@@ -0,0 +1,776 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Implements a search engine based on XPath
+
+%% @doc The xmerl_xpath module handles the entire XPath 1.0 spec
+%% XPath expressions typically occurs in XML attributes and are used to addres
+%% parts of an XML document.
+% The grammar is defined in <code>xmerl_xpath_parse.yrl</code>.
+% The core functions are defined in <code>xmerl_xpath_pred.erl</code>.
+%
+% <p>Some useful shell commands for debugging the XPath parser</p>
+% <pre>
+% c(xmerl_xpath_scan).
+% yecc:yecc("xmerl_xpath_parse.yrl", "xmerl_xpath_parse", true, []).
+% c(xmerl_xpath_parse).
+%
+% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("position() > -1")).
+% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 * 6 div 2")).
+% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 + 6 mod 2")).
+% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 * 6")).
+% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("-----6")).
+% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("parent::node()")).
+% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("descendant-or-self::node()")).
+% xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("parent::processing-instruction('foo')")).
+%% </pre>
+%%
+%% @type docEntity() =
+%% xmlElement()
+%% | xmlAttribute()
+%% | xmlText()
+%% | xmlPI()
+%% | xmlComment()
+%% @type nodeEntity() =
+%% xmlElement()
+%% | xmlAttribute()
+%% | xmlText()
+%% | xmlPI()
+%% | xmlNamespace()
+%% | xmlDocument()
+%% @type option_list(). <p>Options allows to customize the behaviour of the
+%% XPath scanner.
+%% </p>
+%% Possible options are:
+%% <dl>
+%% <dt><code>{namespace, #xmlNamespace}</code></dt>
+%% <dd>Set namespace nodes, from XmlNamspace, in xmlContext</dd>
+%% <dt><code>{namespace, Nodes}</code></dt>
+%% <dd>Set namespace nodes in xmlContext.</dd>
+%% </dl>
+
+%% <dt><code>{bindings, Bs}</code></dt>
+%% <dd></dd>
+%% <dt><code>{functions, Fs}</code></dt>
+%% <dd></dd>
+-module(xmerl_xpath).
+
+
+%% main API
+-export([string/2,
+ string/3,
+ string/5]).
+
+%% exported helper functions, internal for the XPath support
+-export([eval_path/3,
+ axis/3, axis/4]).
+
+%% debug function
+-export([write_node/1]).
+
+
+-include("xmerl.hrl").
+-include("xmerl_internal.hrl").
+
+
+-record(state, {context = #xmlContext{},
+ acc = []}).
+
+
+-define(nodeset(NS), #state{context = #xmlContext{nodeset = NS}}).
+-define(context(C), #state{context = C}).
+
+
+
+
+%% @spec string(Str, Doc) -> [docEntity()] | Scalar
+%% @equiv string(Str,Doc, [])
+string(Str, Doc) ->
+ string(Str, Doc, []).
+
+%% @spec string(Str,Doc,Options) ->
+%% [docEntity()] | Scalar
+%% @equiv string(Str,Doc, [],Doc,Options)
+string(Str, Doc, Options) ->
+ string(Str, Doc, [], Doc, Options).
+
+%% @spec string(Str,Node,Parents,Doc,Options) ->
+%% [docEntity()] | Scalar
+%% Str = xPathString()
+%% Node = nodeEntity()
+%% Parents = parentList()
+%% Doc = nodeEntity()
+%% Options = option_list()
+%% Scalar = xmlObj
+%% @doc Extracts the nodes from the parsed XML tree according to XPath.
+%% xmlObj is a record with fields type and value,
+%% where type is boolean | number | string
+string(Str, Node, Parents, Doc, Options) ->
+%% record with fields type and value,
+%% where type is boolean | number | string
+ FullParents =
+ case Parents of
+ [] ->
+ [];
+ [{H, P}|_] when is_atom(H), is_integer(P) ->
+ full_parents(Parents, Doc)
+ end,
+%io:format("string FullParents=~p~n",[FullParents]),
+ ContextNode=#xmlNode{type = node_type(Node),
+ node = Node,
+ parents = FullParents},
+%io:format("string ContextNode=~p~n",[ContextNode]),
+ WholeDoc = whole_document(Doc),
+%io:format("string WholeDoc=~p~n",[WholeDoc]),
+ Context=(new_context(Options))#xmlContext{context_node = ContextNode,
+ whole_document = WholeDoc},
+%io:format("string Context=~p~n",[Context]),
+ #state{context = NewContext} = match(Str, #state{context = Context}),
+%io:format("string NewContext=~p~n",[NewContext]),
+ case NewContext#xmlContext.nodeset of
+ ScalObj = #xmlObj{type=Scalar}
+ when Scalar == boolean; Scalar == number; Scalar == string ->
+ ScalObj;
+ #xmlObj{type=nodeset,value=NodeSet} ->
+ NodeSet;
+ _ ->
+ [N || #xmlNode{node = N} <- NewContext#xmlContext.nodeset]
+ end.
+
+
+whole_document(#xmlDocument{} = Doc) ->
+ #xmlNode{type = root_node,
+ node = Doc,
+ parents = []};
+whole_document(Other) ->
+ #xmlNode{type = root_node,
+ node = #xmlDocument{content = Other},
+ parents = []}.
+
+
+new_context(Options) ->
+ new_context(Options, #xmlContext{}).
+
+new_context([{namespace, #xmlNamespace{nodes = Nodes}}|T], C) ->
+ new_context(T, C#xmlContext{namespace = ns_nodes(Nodes)});
+new_context([{namespace, Nodes}|T], C) ->
+ new_context(T, C#xmlContext{namespace = ns_nodes(Nodes)});
+new_context([{bindings, Bs}|T], C) ->
+ new_context(T, C#xmlContext{bindings = Bs});
+new_context([{functions, Fs}|T], C) ->
+ new_context(T, C#xmlContext{functions = Fs});
+new_context([], C) ->
+ C.
+
+
+ns_nodes([{Prefix, URI}|T]) ->
+ [{to_string(Prefix), to_atom(URI)}|ns_nodes(T)];
+ns_nodes([]) ->
+ [].
+
+full_parents(Ps, Doc) ->
+ full_parents1(lists:reverse(Ps), [Doc], []).
+
+full_parents1([{Name, Pos}|Ns], Content, Parents) ->
+ E = locate_element(Name, Pos, Content),
+ PN = #xmlNode{type = element,
+ node = E,
+ parents = Parents},
+ full_parents1(Ns, get_content(E), [PN|Parents]);
+full_parents1([], _E, Parents) ->
+ Parents.
+
+
+locate_element(Name, Pos, [E = #xmlElement{name = Name, pos = Pos}|_]) ->
+ E;
+locate_element(_Name, Pos, [#xmlElement{pos = P}|_]) when P >= Pos ->
+ %% we've passed Pos (P > Pos) or the name is wrong (P == Pos)
+ exit(invalid_parents);
+locate_element(_Name, _Pos, []) ->
+ exit(invalid_parents);
+locate_element(Name, Pos, [_|T]) ->
+ locate_element(Name, Pos, T).
+
+
+match(Str, S = #state{}) ->
+ Tokens = xmerl_xpath_scan:tokens(Str),
+ case xmerl_xpath_parse:parse(Tokens) of
+ {ok, Expr} ->
+ match_expr(Expr, S);
+ Error ->
+ Error
+ end.
+
+
+match_expr({path, Type, Arg}, S) ->
+ eval_path(Type, Arg, S#state.context);
+%% PrimaryExpr
+match_expr(PrimExpr,S) ->
+ eval_primary_expr(PrimExpr,S).
+
+
+
+
+
+path_expr({refine, StepExpr1, StepExpr2}, S) ->
+ ?dbg("StepExpr1=~p StepExpr2=~p~n", [StepExpr1,StepExpr2]),
+ ?dbg("length(nodeset) = ~p~n",
+ [length((S#state.context)#xmlContext.nodeset)]),
+ S1 = path_expr(StepExpr1, S),
+ ?dbg("length(nodeset1) = ~p~n",
+ [length((S1#state.context)#xmlContext.nodeset)]),
+ path_expr(StepExpr2, S1);
+path_expr({step, {Axis, NodeTest, PredExpr}}, S = #state{context = C,
+ acc = Acc}) ->
+ ?dbg("PredExpr = ~p~n", [PredExpr]),
+ NewContext = axis(Axis, NodeTest, C, Acc),
+ pred_expr(PredExpr, S#state{context = NewContext});
+path_expr('/', S) ->
+ S.
+
+
+pred_expr([], S) ->
+ S;
+pred_expr([{pred, Pred}|Preds], S = #state{}) ->
+ ?dbg("Pred = ~p~n", [Pred]),
+ NewS = eval_pred(Pred, S),
+ pred_expr(Preds, NewS).
+
+%% simple case: the predicate is a number, e.g. para[5].
+%% No need to iterate over all nodes in the nodeset; we know what to do.
+%%
+eval_pred({number, N0},
+ S = #state{context = C = #xmlContext{nodeset = NS,
+ axis_type = AxisType}}) ->
+ Len = length(NS),
+ case Len>=N0 of
+ true ->
+ N = case AxisType of
+ forward ->
+ N0;
+ reverse ->
+ Len + 1 - N0
+ end,
+ NewNodeSet = [lists:nth(N, NS)],
+ NewContext = C#xmlContext{nodeset = NewNodeSet},
+ S#state{context = NewContext};
+ false -> S#state{context = C#xmlContext{nodeset = []}}
+ end;
+eval_pred(Predicate, S = #state{context = C =
+ #xmlContext{nodeset = NodeSet}}) ->
+ NewNodeSet =
+ lists:filter(
+ fun(Node) ->
+ %io:format("current node: ~p~n", [write_node(Node)]),
+ ThisContext = C#xmlContext{context_node = Node},
+ xmerl_xpath_pred:eval(Predicate, ThisContext)
+ end, NodeSet),
+ NewContext = C#xmlContext{nodeset = NewNodeSet},
+ S#state{context = NewContext}.
+
+
+
+%% write_node(Node::xmlNode()) -> {Type,Pos,Name,Parents}
+%% Helper function to access essential information from the xmlNode record.
+%% @hidden
+write_node(#xmlNode{pos = Pos,
+ node = #xmlAttribute{name = Name,
+ parents = Ps}}) ->
+ {attribute, Pos, Name, Ps};
+write_node(#xmlNode{pos = Pos,
+ node = #xmlElement{name = Name,
+ parents = Ps}}) ->
+ {element, Pos, Name, Ps};
+write_node(#xmlNode{pos = Pos,
+ node = #xmlText{value = Txt,
+ parents = Ps}}) ->
+ {text, Pos, Txt, Ps};
+write_node(_) ->
+ other.
+
+
+%% eval_path(Type,Arg,S::state()) -> state()
+%% Eval path
+%% @hidden
+eval_path(union, {PathExpr1, PathExpr2}, C = #xmlContext{}) ->
+ S = #state{context = C},
+ S1 = match_expr(PathExpr1, S),
+%% NewNodeSet = (S1#state.context)#xmlContext.nodeset,
+ S2 = match_expr(PathExpr2, S1#state{context=C}),
+ NodeSet1 = (S1#state.context)#xmlContext.nodeset,
+ NodeSet2 = (S2#state.context)#xmlContext.nodeset,
+ NewNodeSet = ordsets:to_list(ordsets:union(ordsets:from_list(NodeSet1),
+ ordsets:from_list(NodeSet2))),
+ S2#state{context=(S2#state.context)#xmlContext{nodeset=NewNodeSet}};
+eval_path(abs, PathExpr, C = #xmlContext{}) ->
+ NodeSet = [C#xmlContext.whole_document],
+ Context = C#xmlContext{nodeset = NodeSet},
+ S = #state{context = Context},
+ path_expr(PathExpr, S);
+eval_path(rel, PathExpr, C = #xmlContext{}) ->
+ NodeSet = [C#xmlContext.context_node],
+ Context = C#xmlContext{nodeset = NodeSet},
+ S = #state{context = Context},
+ path_expr(PathExpr, S);
+eval_path(filter, {PathExpr, PredExpr}, C = #xmlContext{}) ->
+ S = #state{context = C},
+ S1 = path_expr(PathExpr, S),
+ pred_expr(PredExpr, S1).
+
+eval_primary_expr(FC = {function_call,_,_},S = #state{context = Context}) ->
+%% NewNodeSet = xmerl_xpath_pred:eval(FC, Context),
+ NewNodeSet = xmerl_xpath_lib:eval(primary_expr, FC, Context),
+ NewContext = Context#xmlContext{nodeset = NewNodeSet},
+ S#state{context = NewContext};
+eval_primary_expr(PrimExpr,_S) ->
+ exit({primary_expression,{not_implemented, PrimExpr}}).
+
+
+%% axis(Axis,NodeTest,Context::xmlContext()) -> xmlContext()
+%% axis(Axis,NodeTest,Context,[])
+%% @hidden
+axis(Axis, NodeTest, Context) ->
+ axis(Axis, NodeTest, Context, []).
+
+
+%% axis(Axis,NodeTest,Context::xmlContext(),Acc) -> xmlContext()
+%%
+%% An axis specifies the tree relationship between the nodes selected by
+%% the location step and the context node.
+%% @hidden
+axis(Axis, NodeTest, Context = #xmlContext{nodeset = NS0}, Acc) ->
+ NewNodeSet=lists:foldr(
+ fun(N, AccX) ->
+ axis1(Axis, NodeTest, N, AccX, Context)
+ end, Acc, NS0),
+ update_nodeset(fwd_or_reverse(Axis, Context), NewNodeSet).
+
+
+axis1(self, Tok, N, Acc, Context) ->
+ match_self(Tok, N, Acc, Context);
+axis1(descendant, Tok, N, Acc, Context) ->
+ match_descendant(Tok, N, Acc, Context);
+axis1(child, Tok, N, Acc, Context) ->
+ match_child(Tok, N, Acc, Context);
+axis1(parent, Tok, N, Acc, Context) ->
+ match_parent(Tok, N, Acc, Context);
+axis1(ancestor, Tok, N, Acc, Context) ->
+ match_ancestor(Tok, N, Acc, Context);
+axis1(following_sibling, Tok, N, Acc, Context) ->
+ match_following_sibling(Tok, N, Acc, Context);
+axis1(preceding_sibling, Tok, N, Acc, Context) ->
+ match_preceding_sibling(Tok, N, Acc, Context);
+axis1(following, Tok, N, Acc, Context) ->
+ match_following(Tok, N, Acc, Context);
+axis1(preceding, Tok, N, Acc, Context) ->
+ match_preceding(Tok, N, Acc, Context);
+axis1(attribute, Tok, N, Acc, Context) ->
+ match_attribute(Tok, N, Acc, Context);
+%axis1(namespace, Tok, N, Acc, Context) ->
+% match_namespace(Tok, N, Acc, Context);
+axis1(ancestor_or_self, Tok, N, Acc, Context) ->
+ match_ancestor_or_self(Tok, N, Acc, Context);
+axis1(descendant_or_self, Tok, N, Acc, Context) ->
+ match_descendant_or_self(Tok, N, Acc, Context).
+
+
+fwd_or_reverse(ancestor, Context) ->
+ reverse_axis(Context);
+fwd_or_reverse(ancestor_or_self, Context) ->
+ reverse_axis(Context);
+fwd_or_reverse(preceding_sibling, Context) ->
+ reverse_axis(Context);
+fwd_or_reverse(preceding, Context) ->
+ reverse_axis(Context);
+fwd_or_reverse(_, Context) ->
+ forward_axis(Context).
+
+reverse_axis(Context) ->
+ Context#xmlContext{axis_type = reverse}.
+forward_axis(Context) ->
+ Context#xmlContext{axis_type = forward}.
+
+
+
+match_self(Tok, N, Acc, Context) ->
+ case node_test(Tok, N, Context) of
+ true ->
+ [N|Acc];
+ false ->
+ Acc
+ end.
+
+
+match_descendant(Tok, N, Acc, Context) ->
+ #xmlNode{parents = Ps, node = Node, type = Type} = N,
+ case Type of
+ El when El == element; El == root_node ->
+ NewPs = [N|Ps],
+ match_desc(get_content(Node), NewPs, Tok, Acc, Context);
+ _Other ->
+ Acc
+ end.
+
+
+match_desc([E = #xmlElement{}|T], Parents, Tok, Acc, Context) ->
+ Acc1 = match_desc(T, Parents, Tok, Acc, Context),
+ N = #xmlNode{type = node_type(E),
+ node = E,
+ parents = Parents},
+ NewParents = [N|Parents],
+ Acc2 = match_desc(get_content(E), NewParents, Tok, Acc1, Context),
+ match_self(Tok, N, Acc2, Context);
+match_desc([E|T], Parents, Tok, Acc, Context) ->
+ Acc1 = match_desc(T, Parents, Tok, Acc, Context),
+ N = #xmlNode{node = E,
+ type = node_type(E),
+ parents = Parents},
+ match_self(Tok, N, Acc1, Context);
+match_desc([], _Parents, _Tok, Acc, _Context) ->
+ Acc.
+
+
+
+%% "The 'descendant-or-self' axis contains the context node and the
+%% descendants of the context node."
+match_descendant_or_self(Tok, N, Acc, Context) ->
+ Acc1 = match_descendant(Tok, N, Acc, Context),
+ match_self(Tok, N, Acc1, Context).
+
+
+match_child(Tok, N, Acc, Context) ->
+ %io:format("match_child(~p)~n", [write_node(N)]),
+ #xmlNode{parents = Ps, node = Node, type = Type} = N,
+ case Type of
+ El when El == element; El == root_node ->
+ NewPs = [N|Ps],
+ lists:foldr(
+ fun(E, AccX) ->
+ ThisN = #xmlNode{type = node_type(E),
+ node = E,
+ parents = NewPs},
+ match_self(Tok, ThisN, AccX, Context)
+ end, Acc, get_content(Node));
+ _Other ->
+ Acc
+ end.
+
+
+%% "The 'parent' axis contains the parent of the context node,
+%% if there is one."
+match_parent(Tok, N, Acc, Context) ->
+ case N#xmlNode.parents of
+ [] ->
+ Acc;
+ [PN|_] ->
+ match_self(Tok, PN, Acc, Context)
+ end.
+
+
+%% "The 'ancestor' axis contains the ancestors of the context node;
+%% the ancestors of the context node consists of the parent of the context
+%% node and the parent's parent and so on; thus, the ancestor axis will
+%% always include the root node, unless the context node is the root node."
+match_ancestor(Tok, N, Acc, Context) ->
+ Parents = N#xmlNode.parents,
+ lists:foldl(
+ fun(PN, AccX) ->
+ match_self(Tok, PN, AccX, Context)
+ end, Acc, Parents).
+
+
+
+
+%% "The 'ancestor-or-self' axis contains the context node and the ancestors
+%% of the context node; thus, the acestor axis will always include the
+%% root node."
+match_ancestor_or_self(Tok, N, Acc, Context) ->
+ Acc1 = match_self(Tok, N, Acc, Context),
+ match_ancestor(Tok, N, Acc1, Context).
+
+
+match_following_sibling(_Tok, #xmlAttribute{}, Acc, _Context) ->
+ Acc;
+match_following_sibling(_Tok, #xmlNamespace{}, Acc, _Context) ->
+ Acc;
+
+match_following_sibling(Tok, N, Acc, Context) ->
+ #xmlNode{parents = Ps, node = Node} = N,
+ case Ps of
+ [#xmlNode{type = element,
+ node = #xmlElement{} = PNode}|_] ->
+ FollowingSiblings = lists:nthtail(get_position(Node),
+ get_content(PNode)),
+ lists:foldr(
+ fun(E, AccX) ->
+ ThisN = #xmlNode{type = node_type(E),
+ node = E,
+ parents = Ps},
+ match_self(Tok, ThisN, AccX, Context)
+ end, Acc, FollowingSiblings);
+ _Other ->
+ Acc
+ end.
+
+
+%% "The 'following' axis contains all nodes in the same document as the
+%% context node that are after the context node in document order, excluding
+%% any descendants and excluding attribute nodes and namespace nodes."
+match_following(Tok, N, Acc, Context) ->
+ #xmlNode{parents = Ps, node = Node} = N,
+ case Ps of
+ [#xmlNode{type = element,
+ node = #xmlElement{} = PNode} = P|_] ->
+ FollowingSiblings = lists:nthtail(get_position(Node),
+ get_content(PNode)),
+ Acc0 = match_following(Tok, P, Acc, Context),
+ lists:foldr(
+ fun(E, AccX) ->
+ ThisN = #xmlNode{type = node_type(E),
+ node = E,
+ parents = Ps},
+ match_descendant_or_self(Tok, ThisN, AccX, Context)
+ end, Acc0, FollowingSiblings);
+ _Other ->
+ Acc
+ end.
+
+
+%% "The preceding-sibling axis contains all the preceding siblings of the
+%% context node; if the context node is an attribute node or namespace node,
+%% the preceding-sibling axis is empty."
+match_preceding_sibling(_Tok, #xmlAttribute{}, Acc, _Context) ->
+ Acc;
+match_preceding_sibling(_Tok, #xmlNamespace{}, Acc, _Context) ->
+ Acc;
+
+match_preceding_sibling(Tok, N, Acc, Context) ->
+ #xmlNode{parents = Ps, node = Node} = N,
+ case Ps of
+ [#xmlNode{type = element,
+ node = #xmlElement{} = PNode}|_] ->
+ PrecedingSiblings = lists:sublist(get_content(PNode), 1,
+ get_position(Node) - 1),
+ lists:foldr(
+ fun(E, AccX) ->
+ ThisN = #xmlNode{type = node_type(E),
+ node = E,
+ parents = Ps},
+ match_self(Tok, ThisN, AccX, Context)
+ end, Acc, PrecedingSiblings);
+ _Other ->
+ Acc
+ end.
+
+
+%% "The 'preceding' axis contains all nodes in the same document as the context
+%% node that are before the context node in document order, exluding any
+%% ancestors and excluding attribute nodes and namespace nodes."
+match_preceding(Tok, N, Acc, Context) ->
+ #xmlNode{parents = Ps, node = Node} = N,
+ case Ps of
+ [#xmlNode{type = element,
+ node = #xmlElement{} = PNode} = P|_] ->
+ PrecedingSiblings = lists:sublist(get_content(PNode), 1,
+ get_position(Node) - 1),
+ Acc0 = lists:foldr(
+ fun(E, AccX) ->
+ ThisN = #xmlNode{type = node_type(E),
+ node = E,
+ parents = Ps},
+ match_descendant_or_self(Tok, ThisN,
+ AccX, Context)
+ end, Acc, PrecedingSiblings),
+ match_preceding(Tok, P, Acc0, Context);
+ _Other ->
+ Acc
+ end.
+
+
+%% "The 'attribute' axis contains the attributes of the context node; the
+%% axis will be empty unless the context node is an element."
+match_attribute(Tok, N, Acc, Context) ->
+ case N#xmlNode.type of
+ element ->
+ #xmlNode{parents = Ps, node = E} = N,
+ lists:foldr(
+ fun(A, AccX) ->
+ ThisN = #xmlNode{type = attribute,
+ node = A,
+ parents = [N|Ps]},
+ match_self(Tok, ThisN, AccX, Context)
+ end, Acc, E#xmlElement.attributes);
+ _Other ->
+ %%[]
+ Acc
+ end.
+
+node_type(#xmlAttribute{}) -> attribute;
+node_type(#xmlElement{}) -> element;
+node_type(#xmlText{}) -> text;
+node_type(#xmlPI{}) -> processing_instruction;
+node_type(#xmlNamespace{}) -> namespace;
+node_type(#xmlDocument{}) -> root_node.
+
+%% "The namespace axis contains the namespace nodes of the context node;
+%% the axis will be empty unless the context node is an element."
+%match_namespace(_Tok, _N, _Acc, _Context) ->
+ %% TODO: IMPLEMENT NAMESPACE AXIS
+% erlang:fault(not_yet_implemented).
+
+
+update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) ->
+ MapFold =
+ case AxisType of
+ forward ->
+ mapfoldl;
+ reverse ->
+ mapfoldr
+ end,
+ {Result, _N} =
+ lists:MapFold(fun(Node, N) ->
+ {Node#xmlNode{pos = N}, N + 1}
+ end, 1, NodeSet),
+ Context#xmlContext{nodeset = Result}.
+
+
+
+node_test(F, N, Context) when is_function(F) ->
+ F(N, Context);
+node_test({wildcard, _}, #xmlNode{type=ElAt}, _Context)
+ when ElAt==element; ElAt==attribute ->
+ true;
+node_test({prefix_test, Prefix}, #xmlNode{node = N}, _Context) ->
+ case N of
+ #xmlElement{nsinfo = {Prefix, _}} -> true;
+ #xmlAttribute{nsinfo = {Prefix, _}} -> true;
+ _ ->
+ false
+ end;
+node_test({name, {Tag, _Prefix, _Local}},
+ #xmlNode{node = #xmlElement{name = Tag}}=_N, _Context) ->
+ %io:format("node_test({tag, ~p}, ~p) -> true.~n", [Tag, write_node(_N)]),
+ true;
+node_test({name, {Tag, Prefix, Local}},
+ #xmlNode{node = #xmlElement{name = Name,
+ expanded_name = EExpName,
+ nsinfo = {_Prefix1, _}
+ }}, Context) ->
+ case expanded_name(Prefix, Local, Context) of
+ [] ->
+ Res = (Tag == Name),
+ ?dbg("node_test(~p, ~p) -> ~p.~n",
+ [{Tag, Prefix, Local}, write_node(Name), Res]),
+ Res;
+ ExpName ->
+ Res = (ExpName == EExpName),
+ ?dbg("node_test(~p, ~p) -> ~p.~n",
+ [{Tag, Prefix, Local}, write_node(Name), Res]),
+ Res
+ end;
+node_test({name, {_Tag, Prefix, Local}},
+ #xmlNode{node = #xmlElement{name = Name,
+ expanded_name = _EExpName,
+ namespace = NS
+ }}, Context) ->
+ case expanded_name(Prefix, Local, Context) of
+ [] ->
+ ?dbg("node_test(~p, ~p) -> ~p.~n",
+ [{_Tag, Prefix, Local}, write_node(Name), false]),
+ false;
+ ExpName ->
+ Res = (ExpName == {NS#xmlNamespace.default,Name}),
+ ?dbg("node_test(~p, ~p) -> ~p.~n",
+ [{_Tag, Prefix, Local}, write_node(Name), Res]),
+ Res
+ end;
+node_test({name, {Tag,_Prefix,_Local}},
+ #xmlNode{node = #xmlAttribute{name = Tag}}, _Context) ->
+ true;
+node_test({name, {_Tag, Prefix, Local}},
+ #xmlNode{node = #xmlAttribute{expanded_name = {URI, Local},
+ nsinfo = {_Prefix1, _},
+ namespace = NS}}, _Context) ->
+ NSNodes = NS#xmlNamespace.nodes,
+ case lists:keysearch(Prefix, 1, NSNodes) of
+ {value, {_, URI}} ->
+ ?dbg("node_test(~, ~p) -> true.~n",
+ [{_Tag, Prefix, Local}, write_node(NSNodes)]),
+ true;
+ false ->
+ ?dbg("node_test(~, ~p) -> false.~n",
+ [{_Tag, Prefix, Local}, write_node(NSNodes)]),
+ false
+ end;
+node_test({node_type, NT}, #xmlNode{node = N}, _Context) ->
+ case {NT, N} of
+ {text, #xmlText{}} ->
+ true;
+ {node, _} ->
+ true;
+ {attribute, #xmlAttribute{}} ->
+ true;
+ {namespace, #xmlNamespace{}} ->
+ true;
+ _ ->
+ false
+ end;
+node_test({processing_instruction, {literal, _, Name}},
+ #xmlNode{node = {processing_instruction, Name, _Data}}, _Context) ->
+ true;
+node_test(_Other, _N, _Context) ->
+ %io:format("node_test(~p, ~p) -> false.~n", [_Other, write_node(_N)]),
+ false.
+
+
+expanded_name(Prefix, Local, #xmlContext{namespace = NS}) ->
+ case lists:keysearch(Prefix, 1, NS) of
+ {value, {_, URI}} ->
+ {URI, list_to_atom(Local)};
+ false ->
+ []
+ end.
+
+
+to_atom(A) when is_atom(A) -> A;
+to_atom(S) when is_list(S) -> list_to_atom(S).
+
+to_string(A) when is_atom(A) -> atom_to_list(A);
+to_string(S) when is_list(S) -> S.
+
+
+get_content(#xmlElement{content = C}) when is_list(C) ->
+ C;
+get_content(#xmlElement{content = F} = E) when is_function(F) ->
+ case F() of
+ C when is_list(C) ->
+ C;
+ _Other ->
+ exit({bad_content, E})
+ end;
+get_content(#xmlDocument{content = C}) when is_list(C) ->
+ C;
+get_content(#xmlDocument{content = C}) ->
+ [C].
+
+
+get_position(#xmlElement{pos = N}) ->
+ N;
+get_position(#xmlText{pos = N}) ->
+ N.
diff --git a/lib/xmerl/src/xmerl_xpath_lib.erl b/lib/xmerl/src/xmerl_xpath_lib.erl
new file mode 100644
index 0000000000..cfd0e36667
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xpath_lib.erl
@@ -0,0 +1,53 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+-module(xmerl_xpath_lib).
+
+-include("xmerl.hrl").
+
+-export([eval/3]).
+
+-define(string(X), #xmlObj{type = string,
+ value = X}).
+-define(number(X), #xmlObj{type = number,
+ value = X}).
+
+
+eval(primary_expr,PrimExpr,C) ->
+ primary_expr(PrimExpr, C);
+eval(predicate,Pred,C) ->
+ xmerl_xpath_pred:eval(Pred,C).
+
+primary_expr({number, N}, _C) ->
+ ?number(N);
+primary_expr({literal, S}, _C) ->
+ ?string(S);
+primary_expr({function_call, F, Args}, C) ->
+ case xmerl_xpath_pred:core_function(F) of
+ {true, F1} ->
+ xmerl_xpath_pred:F1(C, Args);
+ true ->
+ xmerl_xpath_pred:F(C, Args);
+ false ->
+ %% here, we should look up the function in the context provided
+ %% by the caller, but we haven't figured this out yet.
+ exit({not_a_core_function, F})
+ end.
+
diff --git a/lib/xmerl/src/xmerl_xpath_parse.yrl b/lib/xmerl/src/xmerl_xpath_parse.yrl
new file mode 100644
index 0000000000..37576b9e61
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xpath_parse.yrl
@@ -0,0 +1,311 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Yecc spec for XPATH grammar
+%% This version of the parser is based on the XPATH spec:
+%% http://www.w3.org/TR/1999/REC-xpath-19991116 (XPATH version 1.0)
+
+
+Nonterminals
+ 'LocationPath'
+ 'AbsoluteLocationPath'
+ 'RelativeLocationPath'
+ 'Step'
+%% 'AxisSpecifier'
+ 'NodeTest'
+ 'Predicate'
+ 'PredicateExpr'
+ 'AbbreviatedAbsoluteLocationPath'
+ 'AbbreviatedRelativeLocationPath'
+ 'AbbreviatedStep'
+%% 'AbbreviatedAxisSpecifier'
+ 'Expr'
+ 'PrimaryExpr'
+ 'FunctionCall'
+ 'Argument'
+ 'UnionExpr'
+ 'PathExpr'
+ 'FilterExpr'
+ 'OrExpr'
+ 'AndExpr'
+ 'EqualityExpr'
+ 'RelationalExpr'
+ 'AdditiveExpr'
+ 'MultiplicativeExpr'
+ 'UnaryExpr'
+%% 'Operator'
+%% 'OperatorName'
+ 'MultiplyOperator'
+ 'NameTest'
+ '<PredicateList>'
+ '<PredicateMember>'
+ '<ArgumentList>'
+ '<ArgumentMember>'
+ .
+
+Terminals
+ 'number'
+ 'axis'
+ 'node_type'
+ 'literal'
+ 'prefix_test'
+ 'var_reference'
+ 'function_name'
+ 'name'
+ 'processing-instruction'
+ 'wildcard'
+ '(' ')' '[' ']' '.' '..' '@' ',' '::'
+ 'and' 'or' 'mod' 'div'
+ '/' '//' '|' '+' '-' '=' '!=' '<' '<=' '>' '>='
+ '*'
+ .
+
+Rootsymbol 'Expr'.
+
+Endsymbol '$end' .
+
+Left 100 'or' .
+Left 200 'and' .
+Left 300 '=' .
+Left 300 '!=' .
+Left 400 '<' .
+Left 400 '>=' .
+Left 400 '>' .
+Left 400 '<=' .
+Unary 500 '-' .
+
+Expect 2.
+
+%%------------------------------------------------------------
+%% Clauses
+%%
+
+%% [1]
+'LocationPath' -> 'RelativeLocationPath' : {path, rel, '$1'} .
+'LocationPath' -> 'AbsoluteLocationPath' : {path, abs, '$1'}.
+
+%% [2]
+'AbsoluteLocationPath' -> '/' 'RelativeLocationPath' : '$2' .
+'AbsoluteLocationPath' -> '/' : '/' .
+
+%% [3]
+'RelativeLocationPath' -> 'AbbreviatedAbsoluteLocationPath' : '$1' .
+'RelativeLocationPath' -> 'Step' : '$1' .
+'RelativeLocationPath' -> 'RelativeLocationPath' '/' 'Step' :
+ {refine, '$1', '$3'} .
+'RelativeLocationPath' -> 'AbbreviatedRelativeLocationPath' : '$1' .
+
+%% [4]
+'Step' -> 'axis' '::' 'NodeTest' '<PredicateList>'
+ : {step, {value('$1'), '$3', '$4'}} .
+'Step' -> 'axis' '::' 'NodeTest'
+ : {step, {value('$1'), '$3', []}} .
+'Step' -> '@' 'name' '<PredicateList>'
+ : {step, {value('$1'), '$2', '$3'}} .
+'Step' -> '@' 'name'
+ : {step, {'attribute', '$2', []}} .
+'Step' -> 'NodeTest' '<PredicateList>'
+ : {step, {'child', '$1', '$2'}} .
+'Step' -> 'NodeTest'
+ : {step, {'child', '$1', []}} .
+'Step' -> 'AbbreviatedStep'
+ : {abbrev_step, '$1'} .
+
+
+'<PredicateList>' -> '<PredicateMember>' : lists:reverse('$1') .
+
+
+'<PredicateMember>' -> '<PredicateMember>' 'Predicate'
+ : ['$2'|'$1'] .
+'<PredicateMember>' -> 'Predicate' : ['$1'] .
+
+
+%% [5]
+%% 'AxisSpecifier' -> 'axis' '::' : '$1' .
+%% 'AxisSpecifier' -> 'AbbreviatedAxisSpecifier' : '$1' .
+
+
+%% [7]
+'NodeTest' -> 'NameTest' : '$1' .
+'NodeTest' -> 'node_type' '(' ')' : {node_type, value('$1')} .
+'NodeTest' -> 'processing-instruction' '(' 'literal' ')'
+ : {processing_instruction, value('$3')} .
+
+
+%% [8]
+'Predicate' -> '[' 'PredicateExpr' ']' : {pred, '$2'} .
+
+%% [9]
+'PredicateExpr' -> 'Expr' : '$1' .
+
+%% [10]
+'AbbreviatedAbsoluteLocationPath' -> '//' 'RelativeLocationPath'
+ : {'//', '$2'} .
+
+%% [11]
+'AbbreviatedRelativeLocationPath' -> 'RelativeLocationPath' '//' 'Step'
+ : {'$1', '//', '$3'} .
+
+%% [12]
+'AbbreviatedStep' -> '.' : '$1' .
+'AbbreviatedStep' -> '..' : '$1' .
+
+%% [13]
+%% 'AbbreviatedAxisSpecifier' -> '$empty' : 'child' .
+%% 'AbbreviatedAxisSpecifier' -> '@' : '$1' .
+
+%% [14]
+'Expr' -> 'OrExpr' : '$1' .
+
+%% [15]
+'PrimaryExpr' -> 'var_reference' : {variable_reference, value('$1')} .
+'PrimaryExpr' -> '(' Expr ')' : '$2' .
+'PrimaryExpr' -> 'literal' : {literal, value('$1')} .
+'PrimaryExpr' -> 'number' : {number, value('$1')} .
+'PrimaryExpr' -> 'FunctionCall' : '$1' .
+
+
+%% [16]
+'FunctionCall' -> 'function_name' '(' ')' : {function_call, value('$1'), []} .
+'FunctionCall' -> 'function_name' '(' '<ArgumentList>' ')'
+ : {function_call, value('$1'), '$3'} .
+
+'<ArgumentList>' -> '<ArgumentMember>' : lists:reverse('$1') .
+
+'<ArgumentMember>' -> '<ArgumentMember>' ',' 'Argument'
+ : ['$3'|'$1'] .
+'<ArgumentMember>' -> 'Argument' : ['$1'] .
+
+
+%% [17]
+'Argument' -> 'Expr' : '$1' .
+
+
+%% [18]
+'UnionExpr' -> 'PathExpr' : '$1' .
+'UnionExpr' -> 'UnionExpr' '|' 'PathExpr' : {path, union, {'$1', '$3'}} .
+
+
+%% [19]
+'PathExpr' -> 'LocationPath' : '$1' .
+'PathExpr' -> 'FilterExpr' : '$1' .
+'PathExpr' -> 'FilterExpr' '/' 'RelativeLocationPath' : {refine, '$1', '$3'} .
+'PathExpr' -> 'FilterExpr' '//' 'RelativeLocationPath' : {'$1', '//', '$3'} .
+
+%% [20]
+'FilterExpr' -> 'PrimaryExpr' : '$1' .
+'FilterExpr' -> 'FilterExpr' 'Predicate' : {path, filter, {'$1', '$2'}} .
+
+
+%% [21]
+'OrExpr' -> 'AndExpr' : '$1' .
+'OrExpr' -> 'OrExpr' 'or' 'AndExpr'
+ : {bool, 'or', '$1', '$3'} .
+
+
+%% [22]
+'AndExpr' -> 'EqualityExpr' : '$1' .
+'AndExpr' -> 'AndExpr' 'and' 'EqualityExpr'
+ : {bool, 'and', '$1', '$3'} .
+
+%% [23]
+'EqualityExpr' -> 'RelationalExpr' : '$1' .
+'EqualityExpr' -> 'EqualityExpr' '=' 'RelationalExpr'
+ : {comp, '=', '$1', '$3'} .
+'EqualityExpr' -> 'EqualityExpr' '!=' 'RelationalExpr'
+ : {comp, '!=', '$1', '$3'} .
+
+%%[24]
+'RelationalExpr' -> 'AdditiveExpr' : '$1' .
+'RelationalExpr' -> 'RelationalExpr' '<' 'AdditiveExpr'
+ : {comp, '<', '$1', '$3'} .
+'RelationalExpr' -> 'RelationalExpr' '>' 'AdditiveExpr'
+ : {comp, '>', '$1', '$3'} .
+'RelationalExpr' -> 'RelationalExpr' '<=' 'AdditiveExpr'
+ : {comp, '<=', '$1', '$3'} .
+'RelationalExpr' -> 'RelationalExpr' '>=' 'AdditiveExpr'
+ : {comp, '>=', '$1', '$3'} .
+
+
+%% [25]
+'AdditiveExpr' -> 'MultiplicativeExpr' : '$1' .
+'AdditiveExpr' -> 'AdditiveExpr' '+' 'MultiplicativeExpr'
+ : {arith, '+', '$1', '$3'} .
+'AdditiveExpr' -> 'AdditiveExpr' '-' 'MultiplicativeExpr'
+ : {arith, '-', '$1', '$3'} .
+
+
+%% [26]
+'MultiplicativeExpr' -> 'UnaryExpr' : '$1' .
+'MultiplicativeExpr' -> 'MultiplicativeExpr' 'MultiplyOperator' 'UnaryExpr'
+ : {arith, '$2', '$1', '$3'} .
+'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
+ : {arith, 'div', '$1', '$3'} .
+'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr'
+ : {arith, 'mod', '$1', '$3'} .
+
+
+%% [27]
+'UnaryExpr' -> 'UnionExpr' : '$1' .
+'UnaryExpr' -> '-' UnaryExpr : {'negative', '$2'} .
+
+
+
+%% [32]
+%% 'Operator' -> 'OperatorName' : '$1' .
+%% 'Operator' -> 'MultiplyOperator' : '$1' .
+%% 'Operator' -> '/' : '$1' .
+%% 'Operator' -> '//' : '$1' .
+%% 'Operator' -> '|' : '$1' .
+%% 'Operator' -> '+' : '$1' .
+%% 'Operator' -> '-' : '$1' .
+%% 'Operator' -> '=' : '$1' .
+%% 'Operator' -> '!=' : '$1' .
+%% 'Operator' -> '<' : '$1' .
+%% 'Operator' -> '<=' : '$1' .
+%% 'Operator' -> '>' : '$1' .
+%% 'Operator' -> '>=' : '$1' .
+
+%% [33]
+%% 'OperatorName' -> 'and' : '$1' .
+%% 'OperatorName' -> 'mod' : '$1' .
+%% 'OperatorName' -> 'div' : '$1' .
+
+%% [34]
+'MultiplyOperator' -> '*' : '*' .
+
+
+%% [37]
+'NameTest' -> 'wildcard' : {wildcard, value('$1')} .
+'NameTest' -> 'prefix_test' : {prefix_test, value('$1')} .
+'NameTest' -> 'name' : {name, value('$1')} .
+
+
+
+Erlang code.
+
+% token({Token, _Line}) ->
+% Token;
+% token({Token, _Line, _Value}) ->
+% Token.
+
+value({Token, _Line}) ->
+ Token;
+value({_Token, _Line, Value}) ->
+ Value.
diff --git a/lib/xmerl/src/xmerl_xpath_pred.erl b/lib/xmerl/src/xmerl_xpath_pred.erl
new file mode 100644
index 0000000000..451a09bee3
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xpath_pred.erl
@@ -0,0 +1,808 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Helper module to xmerl_xpath: XPATH predicates.
+
+-module(xmerl_xpath_pred).
+
+%% API
+-export([eval/2]).
+
+
+%% internal functions (called via apply/3)
+-export([boolean/1, boolean/2,
+ ceiling/2,
+ concat/2,
+ contains/2,
+ count/2,
+ floor/2,
+ fn_false/2,
+ fn_not/2,
+ fn_true/2,
+ id/2,
+ lang/2,
+ last/2,
+ 'local-name'/2,
+ 'namespace-uri'/2,
+ name/2,
+ string/2,
+ nodeset/1,
+ 'normalize-space'/2,
+ number/1, number/2,
+ position/2,
+ round/2,
+ 'starts-with'/2,
+ string/1,
+ 'string-length'/2,
+ substring/2,
+ 'substring-after'/2,
+ 'substring-before'/2,
+ sum/2,
+ translate/2]).
+-export([core_function/1]).
+
+-include("xmerl.hrl").
+-include("xmerl_xpath.hrl").
+
+%% -record(obj, {type,
+%% value}).
+
+
+-define(string(X), #xmlObj{type = string,
+ value = X}).
+-define(nodeset(X), #xmlObj{type = nodeset,
+ value = X}).
+-define(number(X), #xmlObj{type = number,
+ value = X}).
+-define(boolean(X), #xmlObj{type = boolean,
+ value = X}).
+
+
+
+
+eval(Expr, C = #xmlContext{context_node = #xmlNode{pos = Pos}}) ->
+ Obj = expr(Expr, C),
+ Res = case Obj#xmlObj.type of
+ number when Obj#xmlObj.value == Pos ->
+ true;
+ number ->
+ false;
+ boolean ->
+ Obj#xmlObj.value;
+ _ ->
+ mk_boolean(C, Obj)
+ end,
+% io:format("eval(~p, ~p) -> ~p~n", [Expr, Pos, Res]),
+ Res.
+
+
+string(X) ->
+ ?string(X).
+
+nodeset(X) ->
+ ?nodeset(X).
+
+number(X) ->
+ ?number(X).
+
+boolean(X) ->
+ ?boolean(X).
+
+
+expr({arith, Op, E1, E2}, C) ->
+ arith_expr(Op, E1, E2, C);
+expr({comp, Op, E1, E2}, C) ->
+ comp_expr(Op, E1, E2, C);
+expr({bool, Op, E1, E2}, C) ->
+ bool_expr(Op, E1, E2, C);
+expr({'negative', E}, C) ->
+ N = mk_number(C, E),
+ - N;
+expr({number, N}, _C) ->
+ ?number(N);
+expr({literal, S}, _C) ->
+ ?string(S);
+expr({function_call, F, Args}, C) ->
+ case core_function(F) of
+ {true, F1} ->
+ ?MODULE:F1(C, Args);
+ true ->
+ ?MODULE:F(C, Args);
+ false ->
+ %% here, we should look up the function in the context provided
+ %% by the caller, but we haven't figured this out yet.
+ exit({not_a_core_function, F})
+ end;
+expr({path, Type, PathExpr}, C) ->
+ #state{context=#xmlContext{nodeset = NS}} =
+ xmerl_xpath:eval_path(Type, PathExpr, C),
+ ?nodeset(NS);
+expr(Expr, _C) ->
+ exit({unknown_expr, Expr}).
+
+
+arith_expr('+', E1, E2, C) ->
+ ?number(mk_number(C, E1) + mk_number(C, E2));
+arith_expr('-', E1, E2, C) ->
+ ?number(mk_number(C, E1) - mk_number(C, E2));
+arith_expr('*', E1, E2, C) ->
+ ?number(mk_number(C, E1) * mk_number(C, E2));
+arith_expr('div', E1, E2, C) ->
+ ?number(mk_number(C, E1) / mk_number(C, E2));
+arith_expr('mod', E1, E2, C) ->
+ ?number(mk_number(C, E1) rem mk_number(C, E2)).
+
+comp_expr('>', E1, E2, C) ->
+ N1 = expr(E1,C),
+ N2 = expr(E2,C),
+ ?boolean(compare_ineq_format(N1,N2,C) > compare_ineq_format(N2,N1,C));
+comp_expr('<', E1, E2, C) ->
+ N1 = expr(E1,C),
+ N2 = expr(E2,C),
+ ?boolean(compare_ineq_format(N1,N2,C) > compare_ineq_format(N2,N1,C));
+comp_expr('>=', E1, E2, C) ->
+ N1 = expr(E1,C),
+ N2 = expr(E2,C),
+ ?boolean(compare_ineq_format(N1,N2,C) > compare_ineq_format(N2,N1,C));
+comp_expr('<=', E1, E2, C) ->
+ N1 = expr(E1,C),
+ N2 = expr(E2,C),
+ ?boolean(compare_ineq_format(N1,N2,C) > compare_ineq_format(N2,N1,C));
+comp_expr('=', E1, E2, C) ->
+ N1 = expr(E1,C),
+ N2 = expr(E2,C),
+ ?boolean(compare_eq_format(N1,N2,C) == compare_eq_format(N2,N1,C));
+comp_expr('!=', E1, E2, C) ->
+ N1 = expr(E1,C),
+ N2 = expr(E2,C),
+ ?boolean(compare_eq_format(N1,N2,C) /= compare_eq_format(N2,N1,C)).
+
+bool_expr('or', E1, E2, C) ->
+ ?boolean(mk_boolean(C, E1) or mk_boolean(C, E2));
+bool_expr('and', E1, E2, C) ->
+ ?boolean(mk_boolean(C, E1) and mk_boolean(C, E2)).
+
+%% According to chapter 3.4 in XML Path Language ver 1.0 the format of
+%% the compared objects are depending on the type of the other
+%% object.
+%% 1. Comparisons involving node-sets is treated equally despite
+%% of which comparancy operand is used. In this case:
+%% - node-set comp node-set: string values are used
+%% - node-set comp number : ((node-set string value) -> number)
+%% - node-set comp boolean : (node-set string value) -> boolean
+%% 2. Comparisons when neither object is a node-set and the operand
+%% is = or != the following transformation is done before comparison:
+%% - if one object is a boolean the other is converted to a boolean.
+%% - if one object is a number the other is converted to a number.
+%% - otherwise convert both to the string value.
+%% 3. Comparisons when neither object is a node-set and the operand is
+%% <=, <, >= or > both objects are converted to a number.
+compare_eq_format(N1=#xmlObj{type=T1},N2=#xmlObj{type=T2},C) when T1==nodeset;
+ T2==nodeset ->
+ compare_nseq_format(N1,N2,C);
+compare_eq_format(N1=#xmlObj{type=T1},#xmlObj{type=T2},C) when T1==boolean;
+ T2==boolean ->
+ mk_boolean(C,N1);
+compare_eq_format(N1=#xmlObj{type=T1},#xmlObj{type=T2},C) when T1==number;
+ T2==number ->
+ mk_number(C,N1);
+compare_eq_format(N1,_,C) ->
+ mk_string(C,string_value(N1)).
+
+compare_ineq_format(N1=#xmlObj{type=T1},
+ N2=#xmlObj{type=T2},C) when T1==nodeset;
+ T2==nodeset ->
+ compare_nseq_format(N1,N2,C);
+compare_ineq_format(N1,_N2,C) ->
+ mk_number(C,N1).
+
+compare_nseq_format(N1=#xmlObj{type = number},_N2,C) ->
+ mk_number(C,N1);
+compare_nseq_format(N1=#xmlObj{type = boolean},_N2,C) ->
+ mk_boolean(C,N1);
+compare_nseq_format(N1=#xmlObj{type = string},_N2,C) ->
+ mk_string(C,N1);
+compare_nseq_format(N1=#xmlObj{type = nodeset},_N2=#xmlObj{type=number},C) ->
+ %% transform nodeset value to its string-value
+ mk_number(C,string_value(N1));
+compare_nseq_format(N1=#xmlObj{type = nodeset},_N2=#xmlObj{type=boolean},C) ->
+ mk_boolean(C,N1);
+compare_nseq_format(N1=#xmlObj{type = nodeset},_N2,C) ->
+ mk_string(C,string_value(N1)).
+
+
+core_function('last') -> true;
+core_function('position') -> true;
+core_function('count') -> true;
+core_function('id') -> true;
+core_function('local-name') -> true;
+core_function('namespace-uri') -> true;
+core_function('name') -> true;
+core_function('string') -> true;
+core_function('concat') -> true;
+core_function('starts-with') -> true;
+core_function('contains') -> true;
+core_function('substring-before') -> true;
+core_function('substring-after') -> true;
+core_function('string-length') -> true;
+core_function('normalize-space') -> true;
+core_function('translate') -> true;
+core_function('boolean') -> true;
+core_function('not') -> {true, fn_not};
+core_function('true') -> {true, fn_true};
+core_function('false') -> {true, fn_false};
+core_function('lang') -> true;
+core_function('number') -> true;
+core_function('sum') -> true;
+core_function('floor') -> true;
+core_function('ceiling') -> true;
+core_function('round') -> true;
+core_function(_) ->
+ false.
+
+
+%%% node set functions
+
+%% number: last()
+last(#xmlContext{nodeset = Set}, []) ->
+ ?number(length(Set)).
+
+%% number: position()
+position(#xmlContext{context_node = #xmlNode{pos = Pos}}, []) ->
+ ?number(Pos).
+
+%% number: count(node-set)
+count(C, [Arg]) ->
+ ?number(length(mk_nodeset(C, Arg))).
+
+%% node-set: id(object)
+id(C, [Arg]) ->
+ WD = C#xmlContext.whole_document,
+ NS0 = [WD],
+ Obj = mk_object(C,Arg),
+ case Obj#xmlObj.type of
+ nodeset ->
+ NodeSet = Obj#xmlObj.value,
+ IdTokens =
+ lists:foldl(
+ fun(N, AccX) ->
+ StrVal = string_value(N),
+ TokensX = id_tokens(StrVal),
+ TokensX ++ AccX
+ end, [], NodeSet),
+ NewNodeSet =
+ xmerl_xpath:axis(descendant_or_self,
+ fun(Node) ->
+ attribute_test(Node, id, IdTokens)
+ end, C#xmlContext{nodeset = NS0}),
+ ?nodeset(NewNodeSet);
+ _ ->
+ StrVal = string_value(Obj#xmlObj.value),
+ IdTokens = id_tokens(StrVal),
+ NodeSet = [(WD#xmlNode.node)#xmlDocument.content],
+ NewNodeSet = lists:foldl(
+ fun(Tok, AccX) ->
+ select_on_attribute(NodeSet, id, Tok, AccX)
+ end, [], IdTokens),
+ ?nodeset(NewNodeSet)
+
+ end.
+
+id_tokens(Str=#xmlObj{type=string}) ->
+ string:tokens(Str#xmlObj.value, " \t\n\r").
+%%id_tokens(Str) when list(Str) ->
+%% string:tokens(Str, " \t\n\r").
+
+attribute_test(#xmlNode{node = #xmlElement{attributes = Attrs}},
+ Key, Vals) ->
+ case lists:keysearch(Key, #xmlAttribute.name, Attrs) of
+ {value, #xmlAttribute{value = V}} ->
+ lists:member(V, Vals);
+ _ ->
+ false
+ end;
+attribute_test(_Node, _Key, _Vals) ->
+ false.
+
+%%% CONTINUE HERE!!!!
+
+%% string: local-name(node-set?)
+'local-name'(C, []) ->
+ local_name1(default_nodeset(C));
+
+'local-name'(C, [Arg]) ->
+ local_name1(mk_nodeset(C, Arg)).
+
+local_name1([]) ->
+ ?string([]);
+local_name1([#xmlNode{type=element,node=El}|_]) ->
+ #xmlElement{name=Name,nsinfo=NSI} = El,
+ local_name2(Name,NSI);
+local_name1([#xmlNode{type=attribute,node=Att}|_]) ->
+ #xmlAttribute{name=Name,nsinfo=NSI} = Att,
+ local_name2(Name,NSI);
+local_name1([#xmlElement{name = Name, nsinfo = NSI}|_]) ->
+ local_name2(Name,NSI).
+local_name2(Name, NSI) ->
+ case NSI of
+ {_Prefix, Local} ->
+ ?string(Local);
+ [] ->
+ ?string(atom_to_list(Name))
+ end.
+
+%% string: namespace-uri(node-set?)
+'namespace-uri'(C, []) ->
+ ns_uri(default_nodeset(C));
+
+'namespace-uri'(C, [Arg]) ->
+ ns_uri(mk_nodeset(C, Arg)).
+
+
+ns_uri([]) ->
+ ?string([]);
+ns_uri([#xmlElement{nsinfo = NSI, namespace = NS}|_]) ->
+ ns_uri2(NSI,NS);
+ns_uri([#xmlNode{type=element,node=El}|_]) ->
+ #xmlElement{nsinfo=NSI, namespace = NS} = El,
+ ns_uri2(NSI,NS);
+ns_uri([#xmlNode{type=attribute,node=Att}|_]) ->
+ #xmlAttribute{nsinfo=NSI, namespace = NS} = Att,
+ ns_uri2(NSI,NS);
+ns_uri(_) ->
+ ?string([]).
+
+ns_uri2(NSI,NS) ->
+ case NSI of
+ {Prefix, _} ->
+ case lists:keysearch(Prefix, 1, NS#xmlNamespace.nodes) of
+ false ->
+ ?string([]);
+ {value, {_K, V}} ->
+ string_value(V)
+ end;
+ [] ->
+ ?string([])
+ end.
+
+%% name(node-set) -> xmlObj{type=string}
+%% The name function returns a string containing the QName of the node
+%% first in document order. The representation of the QName is not
+%% standardized and applications have their own format. At
+%% http://xml.coverpages.org/clarkNS-980804.html (the author of XPath)
+%% adopts the format "namespace URI"+"local-name" but according to
+%% other sources it is more common to use the format:
+%% '{'"namespace URI"'}'"local-name". This function also uses this
+%% latter form.
+name(C,[]) ->
+ name1(default_nodeset(C));
+name(C, [Arg]) ->
+ name1(mk_nodeset(C, Arg)).
+name1([]) ->
+ ?string([]);
+name1(NodeSet) ->
+ NSVal =
+ case ns_uri(NodeSet) of
+ #xmlObj{value=NSStr} when NSStr =/= [] ->
+ "{"++NSStr++"}";
+ _ ->
+ ""
+ end,
+ #xmlObj{value=LocalName} = local_name1(NodeSet),
+ ?string(NSVal++LocalName).
+
+
+
+%%% String functions
+
+%% string: string(object?)
+string(C, []) ->
+ ns_string(default_nodeset(C));
+string(C, [Arg]) ->
+ string_value(mk_object(C, Arg)).
+
+ns_string([Obj|_]) ->
+ string_value(Obj).
+
+string_value(#xmlObj{type=nodeset,value=[]}) ->
+ ?string("");
+string_value(N=#xmlObj{type=nodeset}) ->
+ string_value(hd(N#xmlObj.value));
+string_value(N=#xmlObj{}) ->
+ string_value(N#xmlObj.value);
+%% Needed also string_value for root_nodes, elements (concatenation of
+%% al decsendant text nodes) and attribute nodes (normalized value).
+string_value(A=#xmlNode{type=attribute}) ->
+ #xmlAttribute{value=AttVal}=A#xmlNode.node,
+ ?string(AttVal);
+string_value(El=#xmlNode{type=element}) ->
+ #xmlElement{content=C} = El#xmlNode.node,
+ TextValue = fun(#xmlText{value=T},_Fun) -> T;
+ (#xmlElement{content=Cont},Fun) -> Fun(Cont,Fun);
+ (_,_) -> []
+ end,
+ TextDecendants=fun(X) -> TextValue(X,TextValue) end,
+ ?string(lists:flatten(lists:map(TextDecendants,C)));
+string_value(T=#xmlNode{type=text}) ->
+ #xmlText{value=Txt} = T#xmlNode.node,
+ ?string(Txt);
+string_value(infinity) -> ?string("Infinity");
+string_value(neg_infinity) -> ?string("-Infinity");
+string_value(A) when is_atom(A) ->
+ ?string(atom_to_list(A));
+string_value(N) when is_integer(N) ->
+ ?string(integer_to_list(N));
+string_value(N) when is_float(N) ->
+ N1 = round(N * 10000000000000000),
+ ?string(strip_zeroes(integer_to_list(N1)));
+string_value(Str) when is_list(Str) ->
+ ?string(Str).
+
+strip_zeroes(Str) ->
+ strip_zs(lists:reverse(Str), 15).
+
+strip_zs([H|T], 0) ->
+ lists:reverse(T) ++ [$., H];
+strip_zs("0" ++ T, N) ->
+ strip_zs(T, N-1);
+strip_zs([H|T], N) ->
+ strip_zs(T, N-1, [H]).
+
+strip_zs([H|T], 0, Acc) ->
+ lists:reverse(T) ++ [$.,H|Acc];
+strip_zs([H|T], N, Acc) ->
+ strip_zs(T, N-1, [H|Acc]).
+
+
+%% string: concat(string, string, string*)
+concat(C, Args = [_, _|_]) ->
+ Strings = [mk_string(C, A) || A <- Args],
+ ?string(lists:concat(Strings)).
+
+%% boolean: starts-with(string, string)
+'starts-with'(C, [A1, A2]) ->
+ ?boolean(lists:prefix(mk_string(C, A2), mk_string(C, A1))).
+
+%% boolean: contains(string, string)
+contains(C, [A1, A2]) ->
+ Pos = string:str(mk_string(C, A1), mk_string(C, A2)),
+ ?boolean(Pos > 0).
+
+%% string: substring-before(string, string)
+'substring-before'(C, [A1, A2]) ->
+ S1 = mk_string(C, A1),
+ S2 = mk_string(C, A2),
+ Pos = string:str(S1, S2),
+ ?string(string:substr(S1, 1, Pos)).
+
+%% string: substring-after(string, string)
+'substring-after'(C, [A1, A2]) ->
+ S1 = mk_string(C, A1),
+ S2 = mk_string(C, A2),
+ case string:str(S1, S2) of
+ 0 ->
+ ?string([]);
+ Pos ->
+ ?string(string:substr(S1, Pos))
+ end.
+
+%% string: substring(string, number, number?)
+substring(C, [A1, A2]) ->
+ S = mk_string(C, A1),
+ Pos = mk_integer(C, A2),
+ ?string(string:substr(S, Pos));
+substring(C, [A1, A2, A3]) ->
+ S = mk_string(C, A1),
+ Pos = mk_integer(C, A2),
+ Length = mk_integer(C, A3),
+ ?string(string:substr(S, Pos, Length)).
+
+
+%% number: string-length(string?)
+'string-length'(C = #xmlContext{context_node = N}, []) ->
+ length(mk_string(C, string_value(N)));
+
+'string-length'(C, [A]) ->
+ length(mk_string(C, A)).
+
+
+%% string: normalize-space(string?)
+'normalize-space'(C = #xmlContext{context_node = N}, []) ->
+ normalize(mk_string(C, string_value(N)));
+
+'normalize-space'(C, [A]) ->
+ normalize(mk_string(C, A)).
+
+
+%% string: translate(string, string, string)
+translate(C, [A1, A2, A3]) ->
+ S1 = mk_string(C, A1),
+ S2 = mk_string(C, A2),
+ S3 = mk_string(C, A3),
+ ?string(translate1(S1, translations(S2, S3))).
+
+translate1([H|T], Xls) ->
+ case lists:keysearch(H, 1, Xls) of
+ {value, {_, remove}} ->
+ translate1(T, Xls);
+ {value, {_, replace, H1}} ->
+ [H1|translate1(T, Xls)];
+ false ->
+ [H|translate1(T, Xls)]
+ end;
+translate1([], _) ->
+ [].
+
+translations([H|T], [H1|T1]) ->
+ [{H, replace, H1}|translations(T, T1)];
+translations(Rest, []) ->
+ [{X, remove} || X <- Rest];
+translations([], _Rest) ->
+ [].
+
+
+
+%% boolean: boolean(object)
+boolean(C, [Arg]) ->
+ ?boolean(mk_boolean(C, Arg)).
+
+%% boolean: not(boolean) ->
+fn_not(C, [Arg]) ->
+ ?boolean(not(mk_boolean(C, Arg))).
+
+%% boolean: true() ->
+fn_true(_C, []) ->
+ ?boolean(true).
+
+%% boolean: false() ->
+fn_false(_C, []) ->
+ ?boolean(false).
+
+%% boolean: lang(string) ->
+lang(C = #xmlContext{context_node = N}, [Arg]) ->
+ S = mk_string(C, Arg),
+ Lang =
+ case N of
+ #xmlElement{language = L} -> L;
+ #xmlAttribute{language = L} -> L;
+ #xmlText{language = L} -> L;
+ #xmlComment{language = L} -> L;
+ _ -> []
+ end,
+ case Lang of
+ [] ->
+ ?boolean(false);
+ _ ->
+ ?boolean(match_lang(upcase(S), upcase(Lang)))
+ end.
+
+
+upcase([H|T]) when H >= $a, H =< $z ->
+ [H+($A-$a)|upcase(T)];
+upcase([H|T]) ->
+ [H|upcase(T)];
+upcase([]) ->
+ [].
+
+match_lang([H|T], [H|T1]) ->
+ match_lang(T, T1);
+match_lang([], "-" ++ _) ->
+ true;
+match_lang([], []) ->
+ true;
+match_lang(_, _) ->
+ false.
+
+
+
+%% number: number(object)
+number(C = #xmlContext{context_node = N}, []) ->
+ ?number(mk_number(C, string(C, N)));
+number(C, [Arg]) ->
+ ?number(mk_number(C, Arg)).
+
+
+sum(C, [Arg]) ->
+ NS = mk_nodeset(C, Arg),
+ lists:foldl(
+ fun(N, Sum) ->
+ Sum + mk_number(C, string(C, N))
+ end, 0, NS).
+
+floor(C, [Arg]) ->
+ Num = mk_number(C, Arg),
+ case trunc(Num) of
+ Num1 when Num1 > Num ->
+ ?number(Num1-1);
+ Num1 ->
+ ?number(Num1)
+ end.
+
+ceiling(C, [Arg]) ->
+ Num = mk_number(C, Arg),
+ case trunc(Num) of
+ Num1 when Num1 < Num ->
+ ?number(Num1+1);
+ Num1 ->
+ ?number(Num1)
+ end.
+
+
+round(C, [Arg]) ->
+ case mk_number(C, Arg) of
+ A when is_atom(A) ->
+ A;
+ N when is_integer(N) ->
+ N;
+ F when is_float(F) ->
+ round(F)
+ end.
+
+
+select_on_attribute([E = #xmlElement{attributes = Attrs}|T], K, V, Acc) ->
+ case lists:keysearch(K, #xmlAttribute.name, Attrs) of
+ {value, #xmlAttribute{value = V}} ->
+ Acc2 = select_on_attribute(E#xmlElement.content,K,V,[E|Acc]),
+ select_on_attribute(T, K, V, Acc2);
+ _ ->
+ Acc2 = select_on_attribute(E#xmlElement.content,K,V,Acc),
+ select_on_attribute(T, K, V, Acc2)
+ end;
+select_on_attribute([H|T], K, V, Acc) when is_record(H,xmlText) ->
+ select_on_attribute(T, K, V, Acc);
+select_on_attribute([], _K, _V, Acc) ->
+ Acc.
+
+
+%%%%
+
+mk_nodeset(_C0, #xmlContext{nodeset = NS}) ->
+ NS;
+mk_nodeset(_C0, #xmlObj{type = nodeset, value = NS}) ->
+ NS;
+mk_nodeset(C0, Expr) ->
+ case expr(Expr, C0) of
+ #xmlObj{type = nodeset, value = NS} ->
+ NS;
+ Other ->
+ exit({expected_nodeset, Other})
+ end.
+
+
+default_nodeset(#xmlContext{context_node = N}) ->
+ [N].
+
+
+mk_object(_C0, Obj = #xmlObj{}) ->
+ Obj;
+mk_object(C0, Expr) ->
+ expr(Expr, C0).
+
+
+mk_string(_C0, #xmlObj{type = string, value = V}) ->
+ V;
+mk_string(C0, Obj = #xmlObj{}) ->
+ mk_string(C0,string_value(Obj));
+mk_string(C0, Expr) ->
+ mk_string(C0, expr(Expr, C0)).
+
+
+
+mk_integer(_C0, #xmlObj{type = number, value = V}) when is_float(V) ->
+ round(V);
+mk_integer(_C0, #xmlObj{type = number, value = V}) when is_integer(V) ->
+ V;
+mk_integer(C, Expr) ->
+ mk_integer(C, expr(Expr, C)).
+
+
+mk_number(_C, #xmlObj{type = string, value = V}) ->
+ scan_number(V);
+mk_number(_C, #xmlObj{type = number, value = V}) ->
+ V;
+mk_number(C, N=#xmlObj{type = nodeset}) ->
+ mk_number(C,string_value(N));
+mk_number(_C, #xmlObj{type = boolean, value = false}) ->
+ 0;
+mk_number(_C, #xmlObj{type = boolean, value = true}) ->
+ 1;
+mk_number(C, Expr) ->
+ mk_number(C, expr(Expr, C)).
+
+
+mk_boolean(_C, #xmlObj{type = boolean, value = V}) ->
+ V;
+mk_boolean(_C, #xmlObj{type = number, value = 0}) ->
+ false;
+mk_boolean(_C, #xmlObj{type = number, value = V}) when is_float(V) ; is_integer(V) ->
+ true;
+mk_boolean(_C, #xmlObj{type = nodeset, value = []}) ->
+ false;
+mk_boolean(_C, #xmlObj{type = nodeset, value = _V}) ->
+ true;
+mk_boolean(_C, #xmlObj{type = string, value = []}) ->
+ false;
+mk_boolean(_C, #xmlObj{type = string, value = _V}) ->
+ true;
+mk_boolean(C, Expr) ->
+ mk_boolean(C, expr(Expr, C)).
+
+
+normalize([H|T]) when ?whitespace(H) ->
+ normalize(T);
+normalize(Str) ->
+ ContF = fun(_ContF, RetF, _S) ->
+ RetF()
+ end,
+ normalize(Str,
+ #xmerl_scanner{acc_fun = fun() -> exit(acc_fun) end,
+ event_fun = fun() -> exit(event_fun) end,
+ hook_fun = fun() -> exit(hook_fun) end,
+ continuation_fun = ContF},
+ []).
+
+
+normalize(Str = [H|_], S, Acc) when ?whitespace(H) ->
+ case xmerl_scan:accumulate_whitespace(Str, S, preserve, Acc) of
+ {" " ++ Acc1, [], _S1} ->
+ lists:reverse(Acc1);
+ {Acc1, [], _S1} ->
+ lists:reverse(Acc1);
+ {Acc1, T1, S1} ->
+ normalize(T1, S1, Acc1)
+ end;
+normalize([H|T], S, Acc) ->
+ normalize(T, S, [H|Acc]);
+normalize([], _S, Acc) ->
+ lists:reverse(Acc).
+
+
+scan_number([H|T]) when ?whitespace(H) ->
+ scan_number(T);
+scan_number("-" ++ T) ->
+ case catch xmerl_xpath_scan:scan_number(T) of
+ {{number, N}, Tail} ->
+ case is_all_white(Tail) of
+ true ->
+ N;
+ false ->
+ 'NaN'
+ end;
+ _Other ->
+ 'NaN'
+ end;
+scan_number(T) ->
+ case catch xmerl_xpath_scan:scan_number(T) of
+ {{number, N}, Tail} ->
+ case is_all_white(Tail) of
+ true ->
+ N;
+ false ->
+ 'NaN'
+ end;
+ _Other ->
+ 'NaN'
+ end.
+
+is_all_white([H|T]) when ?whitespace(H) ->
+ is_all_white(T);
+is_all_white([_H|_T]) ->
+ false;
+is_all_white([]) ->
+ true.
diff --git a/lib/xmerl/src/xmerl_xpath_scan.erl b/lib/xmerl/src/xmerl_xpath_scan.erl
new file mode 100644
index 0000000000..10e2756e74
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xpath_scan.erl
@@ -0,0 +1,308 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Token scanner for XPATH grammar
+
+%%%----------------------------------------------------------------------
+%%%
+%%% The XPATH grammar is a bit tricky, due to operator overloading.
+%%% This version of the scanner is based on the XPATH spec:
+%%% http://www.w3.org/TR/1999/REC-xpath-19991116 (XPATH version 1.0)
+%%%
+%%% Quote from the spec:
+%%%
+%%% "The following special tokenization rules must be applied in the order
+%%% specified to disambiguate the ExprToken grammar:
+%%%
+%%% o If there is a preceding token and the preceding token is not one of
+%%% @, ::. (, [, or an Operator, then a * must be recognized as a
+%%% MultiplyOperator and an NCName must be recognized as an OperatorName
+%%% o If the character following an NCName (possible after intervening
+%%% ExprWhiteSpace) is (, then the token must be recognized as a NodeType
+%%% or a FunctionName.
+%%% o If the two characters following an NCName (possible after intervening
+%%% ExprWhiteSpace) are ::, then the token must be recognized as an
+%%% AxisName.
+%%% o Otherwise, the token must not be recognized as a MultiplyOperator, an
+%%% OperatorName, a NodeType, a FunctionName, or an AxisName."
+%%%----------------------------------------------------------------------
+
+-module(xmerl_xpath_scan).
+
+
+%% main API
+-export([tokens/1]).
+
+%% exported helper functions
+-export([scan_number/1]).
+
+-include("xmerl.hrl").
+
+-define(L, 1).
+
+
+tokens(Str) ->
+ tokens(strip_ws(Str), []).
+
+tokens([], Acc) ->
+ lists:reverse([{'$end', ?L, '$end'}|Acc]);
+tokens(Str, Acc) ->
+ case scan_token(Str, Acc) of
+ {rescan, NewStr} ->
+ tokens(NewStr, Acc);
+ {Token, T} ->
+ tokens(strip_ws(T), [Token|Acc])
+ end.
+
+%% Expr Tokens
+scan_token("(" ++ T, _A) -> {{'(', ?L, '('}, T};
+scan_token(")" ++ T, _A) -> {{')', ?L, ')'}, T};
+scan_token("[" ++ T, _A) -> {{'[', ?L, '['}, T};
+scan_token("]" ++ T, _A) -> {{']', ?L, ']'}, T};
+scan_token(".." ++ T, _A) -> {rescan,"parent::node()" ++ T} ;
+ % {{'..',?L,'..'}, T};
+scan_token("@" ++ T, _A) -> {rescan,"attribute::" ++ T};
+ % {{'@',?L,'@'},T};
+scan_token("," ++ T, _A) -> {{',', ?L, ','}, T};
+scan_token("::" ++ T, _A) -> {{'::', ?L, '::'}, T};
+
+%% operators
+scan_token("//" ++ T, _A) -> {rescan,"/descendant-or-self::node()/" ++ T};
+ % {{'//',?L,'//'},T};
+scan_token("/" ++ T, _A) -> {{'/', ?L, '/'}, T};
+scan_token("|" ++ T, _A) -> {{'|', ?L, '|'}, T};
+scan_token("+" ++ T, _A) -> {{'+', ?L, '+'}, T};
+scan_token("-" ++ T, _A) -> {{'-', ?L, '-'}, T};
+scan_token("=" ++ T, _A) -> {{'=', ?L, '='}, T};
+scan_token("!=" ++ T, _A) -> {{'!=', ?L, '!='}, T};
+scan_token("<=" ++ T, _A) -> {{'<=', ?L, '<='}, T};
+scan_token("<" ++ T, _A) -> {{'<', ?L, '<'}, T};
+scan_token(">=" ++ T, _A) -> {{'>=', ?L, '>='}, T};
+scan_token(">" ++ T, _A) -> {{'>', ?L, '>'}, T};
+
+scan_token("*" ++ T, A) ->
+ Tok =
+ case A of
+ [{X,_,_}|_] ->
+ case special_token(X) of
+ false ->
+ {'*', ?L, '*'};
+ true ->
+ {'wildcard', ?L, 'wildcard'}
+ end;
+ _ ->
+ {'wildcard', ?L, 'wildcard'}
+ end,
+ {Tok, T};
+
+%% numbers
+scan_token(Str = [H|_], _A) when H >= $0, H =< $9 ->
+ scan_number(Str);
+scan_token(Str = [$., H|_], A) when H >= $0, H =< $9 ->
+ scan_number(Str, A);
+scan_token("." ++ T, _A) ->
+% {{'.', ?L, '.'}, T};
+ {rescan, "self::node()" ++ T};
+
+%% Variable Reference
+scan_token([$$|T], _A) ->
+ {{Prefix, Local}, T1} = scan_name(T),
+ case Prefix of
+ [] ->
+ {{var_reference, ?L, list_to_atom(Local)}, T1};
+ _ ->
+ {{var_reference, ?L, list_to_atom(Prefix++":"++Local)}, T1}
+ end;
+
+scan_token([H|T], _A) when H == $" ; H == $' ->
+ {Literal, T1} = scan_literal(T, H, []),
+ {{literal, ?L, Literal}, T1};
+
+scan_token(T, A) ->
+ {{Prefix, Local}, T1} = scan_name(T),
+ case A of
+ [{X,_,_}|_] ->
+ case special_token(X) of
+ false ->
+ operator_name(Prefix, Local, T1);
+ true ->
+ other_name(Prefix, Local, strip_ws(T1))
+ end;
+ _ ->
+ other_name(Prefix, Local, T1)
+ end.
+
+operator_name([], "and", T) -> {{'and', ?L, 'and'}, T};
+operator_name([], "or", T) -> {{'or', ?L, 'or'}, T};
+operator_name([], "mod", T) -> {{'mod', ?L, 'mod'}, T};
+operator_name([], "div", T) -> {{'div', ?L, 'div'}, T}.
+
+
+other_name(Prefix, [], "*" ++ T) ->
+ %% [37] NameTest ::= '*' | NCName ':' '*' | QName
+ {{prefix_test, ?L, Prefix}, T};
+other_name(Prefix, Local, T = "(" ++ _) ->
+ node_type_or_function_name(Prefix, Local, T);
+other_name(Prefix, Local, T = "::" ++ _) ->
+ axis(Prefix, Local, T);
+other_name([], Local, T) ->
+ {{name, ?L, {list_to_atom(Local), [], Local}}, T};
+other_name(Prefix, Local, T) ->
+ {{name, ?L, {list_to_atom(Prefix++":"++Local), Prefix, Local}}, T}.
+
+
+
+%% node types
+node_type_or_function_name([], "comment", T) ->
+ {{node_type, ?L, comment}, T};
+node_type_or_function_name([], "text", T) ->
+ {{node_type, ?L, text}, T};
+node_type_or_function_name([], "processing-instruction", T) ->
+ {{'processing-instruction', ?L, 'processing-instruction'}, T};
+node_type_or_function_name([], "node", T) ->
+ {{node_type, ?L, node}, T};
+node_type_or_function_name(Prefix, Local, T) ->
+ {{function_name, ?L, list_to_atom(Prefix ++ Local)}, T}.
+
+
+%% axis names
+axis([], "ancestor-or-self", T) -> {{axis, ?L, ancestor_or_self}, T};
+axis([], "ancestor", T) -> {{axis, ?L, ancestor}, T};
+axis([], "attribute", T) -> {{axis, ?L, attribute}, T};
+axis([], "child", T) -> {{axis, ?L, child}, T};
+axis([], "descendant-or-self", T) -> {{axis, ?L, descendant_or_self}, T};
+axis([], "descendant", T) -> {{axis, ?L, descendant}, T};
+axis([], "following-sibling", T) -> {{axis, ?L, following_sibling}, T};
+axis([], "following", T) -> {{axis, ?L, following}, T};
+axis([], "namespace", T) -> {{axis, ?L, namespace}, T};
+axis([], "parent", T) -> {{axis, ?L, parent}, T};
+axis([], "preceding-sibling", T) -> {{axis, ?L, preceding_sibling}, T};
+axis([], "preceding", T) -> {{axis, ?L, preceding}, T};
+axis([], "self", T) -> {{axis, ?L, self}, T}.
+
+
+
+
+scan_literal([H|T], H, Acc) ->
+ {lists:reverse(Acc), T};
+scan_literal([H|T], Delim, Acc) ->
+ scan_literal(T, Delim, [H|Acc]).
+
+
+scan_name([H1, H2 | T]) when H1 == $: ; H1 == $_ ->
+ if ?whitespace(H2) ->
+ exit({invalid_name, [H1, H2, '...']});
+ true ->
+ scan_prefix(T, [H2, H1])
+ end;
+scan_name([H|T]) ->
+ case xmerl_lib:is_letter(H) of
+ true ->
+ scan_prefix(T, [H]);
+ false ->
+ exit({invalid_name, lists:sublist([H|T], 1, 6)})
+ end;
+scan_name(Str) ->
+ exit({invalid_name, lists:sublist(Str, 1, 6)}).
+
+scan_prefix([], Acc) ->
+ {{[], lists:reverse(Acc)}, []};
+scan_prefix(Str = [H|_], Acc) when ?whitespace(H) ->
+ {{[], lists:reverse(Acc)}, Str};
+scan_prefix(T = "::" ++ _, Acc) ->
+ %% This is the next token
+ {{[], lists:reverse(Acc)}, T};
+scan_prefix(":" ++ T, Acc) ->
+ {LocalPart, T1} = scan_local_part(T, []),
+ Prefix = lists:reverse(Acc),
+ {{Prefix, LocalPart}, T1};
+scan_prefix(Str = [H|T], Acc) ->
+ case xmerl_lib:is_namechar(H) of
+ true ->
+ scan_prefix(T, [H|Acc]);
+ false ->
+ {{[], lists:reverse(Acc)}, Str}
+ end.
+
+scan_local_part([], Acc) ->
+ {lists:reverse(Acc), []};
+scan_local_part(Str = [H|_], Acc) when ?whitespace(H) ->
+ {lists:reverse(Acc), Str};
+scan_local_part(Str = [H|T], Acc) ->
+ case xmerl_lib:is_namechar(H) of
+ true ->
+ scan_local_part(T, [H|Acc]);
+ false ->
+ {lists:reverse(Acc), Str}
+ end.
+
+
+scan_number(T) ->
+ scan_number(T, []).
+
+scan_number([], Acc) ->
+ {{number, ?L, list_to_integer(lists:reverse(Acc))}, []};
+scan_number("." ++ T, []) ->
+ {Digits, T1} = scan_digits(T, ".0"),
+ Number = list_to_float(Digits),
+ {{number, ?L, Number}, T1};
+scan_number("." ++ T, Acc) ->
+ {Digits, T1} = scan_digits(T, "." ++ Acc),
+ Number = list_to_float(Digits),
+ {{number, ?L, Number}, T1};
+scan_number([H|T], Acc) when H >= $0, H =< $9 ->
+ scan_number(T, [H|Acc]);
+scan_number(T, Acc) ->
+ {{number, ?L, list_to_integer(lists:reverse(Acc))}, T}.
+
+scan_digits([], Acc) ->
+ {lists:reverse(Acc), []};
+scan_digits([H|T], Acc) when H >= $0, H =< $9 ->
+ scan_digits(T, [H|Acc]);
+scan_digits(T, Acc) ->
+ {lists:reverse(Acc), T}.
+
+
+strip_ws([H|T]) when ?whitespace(H) ->
+ strip_ws(T);
+strip_ws(T) ->
+ T.
+
+
+special_token('@') -> true;
+special_token('::') -> true;
+special_token('(') -> true;
+special_token('[') -> true;
+special_token('/') -> true;
+special_token('//') -> true;
+special_token('|') -> true;
+special_token('+') -> true;
+special_token('-') -> true;
+special_token('=') -> true;
+special_token('!=') -> true;
+special_token('<') -> true;
+special_token('<=') -> true;
+special_token('>') -> true;
+special_token('>=') -> true;
+special_token('and') -> true;
+special_token('or') -> true;
+special_token('mod') -> true;
+special_token('div') -> true;
+special_token(_) ->
+ false.
diff --git a/lib/xmerl/src/xmerl_xs.erl b/lib/xmerl/src/xmerl_xs.erl
new file mode 100644
index 0000000000..f42a470a43
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xs.erl
@@ -0,0 +1,123 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Implements XSLT like transformations in Erlang
+
+%% @doc
+% Erlang has similarities to XSLT since both languages
+% have a functional programming approach. Using <code>xmerl_xpath</code>
+% it is possible to write XSLT like transforms in Erlang.
+%
+% <p>XSLT stylesheets are often used when transforming XML
+% documents, to other XML documents or (X)HTML for presentation.
+% XSLT contains quite many
+% functions and learning them all may take some effort.
+% This document assumes a basic level of
+% understanding of XSLT.
+% </p>
+% <p>Since XSLT is based on a functional programming approach
+% with pattern matching and recursion it is possible to write
+% similar style sheets in Erlang. At least for basic
+% transforms. This
+% document describes how to use the XPath implementation together
+% with Erlangs pattern matching and a couple of functions to write
+% XSLT like transforms.</p>
+% <p>This approach is probably easier for an Erlanger but
+% if you need to use real XSLT stylesheets in order to "comply to
+% the standard" there is an adapter available to the Sablotron
+% XSLT package which is written i C++.
+% See also the <a href="xmerl_xs_examples.html">Tutorial</a>.
+% </p>
+
+-module(xmerl_xs).
+
+-export([xslapply/2, value_of/1, select/2, built_in_rules/2 ]).
+-include("xmerl.hrl").
+
+
+%% @spec xslapply(Function, EList::list()) -> List
+%% Function = () -> list()
+%% @doc xslapply is a wrapper to make things look similar to
+%% xsl:apply-templates.
+%%
+%% <p>Example, original XSLT:</p><br/><pre>
+%% &lt;xsl:template match="doc/title">
+%% &lt;h1>
+%% &lt;xsl:apply-templates/>
+%% &lt;/h1>
+%% &lt;/xsl:template>
+%% </pre>
+%%
+%% <p>becomes in Erlang:</p><br/><pre>
+%% template(E = #xmlElement{ parents=[{'doc',_}|_], name='title'}) ->
+%% ["&lt;h1>",
+%% xslapply(fun template/1, E),
+%% "&lt;/h1>"];
+%% </pre>
+
+xslapply(Fun, EList) when is_list(EList) ->
+ lists:map( Fun, EList);
+xslapply(Fun, E = #xmlElement{})->
+ lists:map( Fun, E#xmlElement.content).
+
+
+%% @spec value_of(E) -> List
+%% E = unknown()
+%%
+%% @doc Concatenates all text nodes within the tree.
+%%
+%% <p>Example:</p><br/><pre>
+%% &lt;xsl:template match="title">
+%% &lt;div align="center">
+%% &lt;h1>&lt;xsl:value-of select="." />&lt;/h1>
+%% &lt;/div>
+%% &lt;/xsl:template>
+%% </pre>
+%%
+%% <p>becomes:</p><br/> <pre>
+%% template(E = #xmlElement{name='title'}) ->
+%% ["&lt;div align="center">&lt;h1>",
+%% value_of(select(".", E)), "&lt;/h1>&lt;/div>"]
+%% </pre>
+value_of(E)->
+ lists:reverse(xmerl_lib:foldxml(fun value_of1/2, [], E)).
+
+value_of1(#xmlText{}=T1, Accu)->
+ [xmerl_lib:export_text(T1#xmlText.value)|Accu];
+value_of1(_, Accu) ->
+ Accu.
+
+%% @spec select(String::string(),E)-> E
+%%
+%% @doc Extracts the nodes from the xml tree according to XPath.
+%% @see value_of/1
+select(Str,E)->
+ xmerl_xpath:string(Str,E).
+
+%% @spec built_in_rules(Fun, E) -> List
+%%
+%% @doc The default fallback behaviour. Template funs should end with:
+%% <br/><code>template(E) -> built_in_rules(fun template/1, E)</code>.
+built_in_rules(Fun, E = #xmlElement{})->
+ lists:map(Fun, E#xmlElement.content);
+built_in_rules(_Fun, E = #xmlText{}) ->
+ xmerl_lib:export_text(E#xmlText.value);
+built_in_rules(_Fun, E = #xmlAttribute{}) ->
+ E#xmlAttribute.value;
+built_in_rules(_Fun, _E) ->[].
diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl
new file mode 100644
index 0000000000..c7bca86205
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xsd.erl
@@ -0,0 +1,5710 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2006-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% @doc Interface module for XML Schema vlidation.
+%% It handles the W3.org
+%% <a href="http://www.w3.org/XML/Schema#dev">specifications</a>
+%% of XML Schema second edition 28 october 2004. For an introduction to
+%% XML Schema study <a href="http://www.w3.org/TR/xmlschema-0/">part 0.</a>
+%% An XML structure is validated by xmerl_xsd:validate/[2,3].
+%% @type global_state(). <p>The global state of the validator. It is
+%% representated by the <code>#xsd_state{}</code> record.
+%% </p>
+%% @type option_list(). <p>Options allow to customize the behaviour of the
+%% validation.
+%% </p>
+%% Possible options are :
+%% <dl>
+%% <dt><code>{tab2file,boolean()}</code></dt>
+%% <dd>Enables saving of abstract structure on file for debugging
+%% purpose.</dd>
+%% <dt><code>{xsdbase,filename()}</code></dt>
+%% <dd>XSD Base directory.</dd>
+%% <dt><code>{fetch_fun,FetchFun}</code></dt>
+%% <dd>Call back function to fetch an external resource.</dd>
+%% <dt><code>{fetch_path,PathList}</code></dt>
+%% <dd>PathList is a list of directories to search when fetching files.
+%% If the file in question is not in the fetch_path, the URI will
+%% be used as a file name.</dd>
+%% <dt><code>{state,State}</code></dt>
+%% <dd>It is possible by this option to provide a state with process
+%% information from an earlier validation.</dd>
+%% </dl>
+%%%-------------------------------------------------------------------
+-module(xmerl_xsd).
+
+%%----------------------------------------------------------------------
+%% Include files
+%%----------------------------------------------------------------------
+-include("xmerl.hrl").
+-include("xmerl_internal.hrl").
+-include("xmerl_xsd.hrl").
+-include_lib("kernel/include/file.hrl").
+
+%%----------------------------------------------------------------------
+%% External exports
+%%----------------------------------------------------------------------
+-export([
+ validate/2,validate/3,process_validate/2,process_validate/3,
+ process_schema/1,process_schema/2,
+ process_schemas/1,process_schemas/2,
+ state2file/1,state2file/2,file2state/1,format_error/1
+ ]).
+
+%%----------------------------------------------------------------------
+%% Internal exports
+%%----------------------------------------------------------------------
+-export([print_table/1]).
+%%-export([whitespace/1]).
+
+%%----------------------------------------------------------------------
+%% Imports
+%%----------------------------------------------------------------------
+-import(xmerl_lib,[is_facet/1, is_builtin_simple_type/1, is_xsd_string/1]).
+-import(xmerl_xsd_type,[facet_fun/2]).
+-import(lists,[reverse/1,reverse/2,foldl/3,member/2,filter/2,flatten/1,map/2,
+ splitwith/2,mapfoldl/3,keysearch/3,keymember/3,
+ keyreplace/4,keydelete/3]).
+
+
+
+%%======================================================================
+%% Functions
+%%======================================================================
+
+%% @spec validate(Element,State) -> Result
+%% @equiv validate(Element,State,[])
+validate(Xml,State) ->
+ validate(Xml,State,[]).
+
+%% @spec validate(Element,State,Options) -> Result
+%% Element = XmlElement
+%% Options = option_list()
+%% Result = {ValidElement,global_state()} | {error,Reasons}
+%% ValidElement = XmlElement
+%% State = global_state()
+%% Reasons = [ErrorReason] | ErrorReason
+%% @doc Validates a parsed well-formed XML element (Element).
+%% <p>A call to validate/2 or validate/3 must provide a well formed
+%% parsed XML element <code>#xmlElement{}</code> and a State,
+%% <code>global_state()</code>, which holds necessary information from
+%% an already processed schema.
+%% Thus validate enables reuse of the schema information and
+%% therefore if one shall validate several times towards the same
+%% schema it reduces time consumption.</p>
+%% <p>The result, ValidElement, is the valid element that conforms to the
+%% post-schema-validation infoset. When the validator finds an error it
+%% tries to continue and reports a list of all errors found. In those cases
+%% an unexpected error is found it may cause a single error reason.
+%% </p>
+%% <p> Usage example:</p>
+%% <p>
+%% <code>1>{E,_} = xmerl_scan:file("my_XML_document.xml").</code><br/>
+%% <code>2>{ok,S} = xmerl_xsd:process_schema("my_XML_Schema.xsd").</code><br/>
+%% <code>3>{E2,_} = xmerl_xsd:validate(E,S).</code>
+%% </p>
+%% <p> Observe that E2 may differ from E if for instance there are default
+%% values defined in <code>my_XML_Schema.xsd</code>.</p>
+validate(Xml,State,Opts) when is_record(State,xsd_state) ->
+ S2 = initiate_state2(State,Opts),
+ S3 = validation_options(S2,Opts),
+ validate3(S3#xsd_state.schema_name,Xml,S3).
+
+%% @spec state2file(State) -> ok | {error,Reason}
+%% @doc Same as state2file(State,SchemaName)
+%%
+%% The name of the saved file is the same as the name of the
+%% schema, but with <code>.xss</code> extension.
+state2file(S=#xsd_state{schema_name=SN}) ->
+ state2file(S,filename:rootname(SN)).
+
+%% @spec state2file(State,FileName) -> ok | {error,Reason}
+%% State = global_state()
+%% FileName = filename()
+%% @doc Saves the schema state with all information of the processed
+%% schema in a file. You can provide the file name for the saved
+%% state. FileName is saved with the <code>.xss</code> extension
+%% added.
+state2file(S,FileName) when is_record(S,xsd_state) ->
+ save_xsd_state(S),
+ case catch ets:tab2file(S#xsd_state.table,lists:append(FileName,".xss")) of
+ {'EXIT',Reason} ->
+ {error,{[],?MODULE,Reason}};
+ Ret -> Ret
+ end.
+
+%% @spec file2state(FileName) -> {ok,State} | {error,Reason}
+%% State = global_state()
+%% FileName = filename()
+%% @doc Reads the schema state with all information of the processed
+%% schema from a file created with <code>state2file/[1,2]</code>. The
+%% format of this file is internal. The state can then be used
+%% validating an XML document.
+file2state(FileName) ->
+ case catch ets:file2tab(FileName) of
+ {ok,Tab} ->
+ case load_xsd_state(Tab) of
+ [{state,S}] when is_record(S,xsd_state) ->
+ xmerl_xsd_vsn_check(S);
+%% {ok,S};
+ Other ->
+ {error,{[],?MODULE,{incomplete_file,FileName,Other}}}
+ end;
+ {error,Reason} ->
+ {error,{[],?MODULE,Reason}};
+ Other ->
+ {error,{[],?MODULE,Other}}
+ end.
+
+save_xsd_state(S) ->
+ catch ets:insert(S#xsd_state.table,{state,S}).
+load_xsd_state(Table) ->
+ catch ets:lookup(Table,state).
+
+xmerl_xsd_vsn() ->
+ case lists:keysearch(vsn,1,xmerl_xsd:module_info(attributes)) of
+ {value,{_,MD5_VSN}} ->
+ MD5_VSN;
+ _ ->
+ undefined
+ end.
+xmerl_xsd_vsn_check(S=#xsd_state{vsn=MD5_VSN}) ->
+ case [V||{vsn,V}<-xmerl_xsd:module_info(attributes)] of
+ [MD5_VSN] ->
+ {ok,S};
+ _ ->
+ {error,{[],?MODULE,{different_version_of_xmerl_xsd_module_used,
+ state_not_reliable}}}
+ end.
+
+
+
+%% @spec process_validate(Schema,Element) -> Result
+%% @equiv process_validate(Schema,Xml,[])
+process_validate(Schema,Xml) ->
+ process_validate(Schema,Xml,[]).
+%% @spec process_validate(Schema,Element,Options) -> Result
+%% Schema = filename()
+%% Element = XmlElement
+%% Options = option_list()
+%% Result = {ValidXmlElement,State} | {error,Reason}
+%% Reason = [ErrorReason] | ErrorReason
+%% @doc Validates a parsed well-formed XML element towards an XML
+%% schema. <p> Validates in two steps. First it processes the schema,
+%% saves the type and structure info in an ets table and then
+%% validates the element towards the schema.</p>
+%% <p> Usage example:</p>
+%% <p>
+%% <code>1>{E,_} = xmerl_scan:file("my_XML_document.xml").</code><br/>
+%% <code>2>{E2,_} = xmerl_xsd:validate("my_XML_Schema.xsd",E).</code>
+%% </p>
+%% <p> Observe that E2 may differ from E if for instance there are default
+%% values defined in <code>my_XML_Schema.xsd</code>.</p>
+process_validate(Schema,Xml,Opts) ->
+ TargetNamespace = target_namespace(Xml),
+ case Schema of
+ [H|_] when is_list(H); is_tuple(H) ->
+ case process_schemas(Schema,
+ [{target_namespace,TargetNamespace}|Opts]) of
+ {ok,S} ->
+ S2 = validation_options(S,Opts),
+ validate3(S2#xsd_state.schema_name,Xml,S2);
+ Err ->
+ Err
+ end;
+ _ ->
+ process_validate2(xmerl_scan:file(Schema),Schema,Xml,Opts)
+ end.
+
+process_validate2(Err={error,_},_,_,_) ->
+ Err;
+process_validate2({SE,_},Schema,Xml,Opts) ->
+ S = initiate_state(Opts,Schema),
+ S1 = validate_schema(SE,S),
+ S2 = validate_schema_ph2(S1),
+ S3 = schema_concistence_checks(S2),
+ S4 = validation_options(S3,Opts),
+ validate3(Schema,Xml,S4).
+
+validate3(Schema,Xml,S=#xsd_state{errors=[]}) ->
+ Ret = {_,S2} =
+ case catch validate_xml(Xml,S) of
+ {[XML2],[],Sx} ->
+ {XML2,Sx};
+ {XML2,[],Sx} ->
+ {XML2,Sx};
+ {_,UnValidated,Sx} ->
+ {Xml,acc_errs(Sx,{error_path(UnValidated,Xml#xmlElement.name),?MODULE,
+ {unvalidated_rest,UnValidated}})};
+ _Err = {error,Reason} ->
+ {Xml,acc_errs(S,Reason)};
+ {'EXIT',Reason} ->
+ {Xml,acc_errs(S,{error_path(Xml,Xml#xmlElement.name),?MODULE,
+ {undefined,{internal_error,Reason}}})}
+ end,
+ save_to_file(S2,filename:rootname(Schema)++".tab2"),
+ case S2#xsd_state.errors of
+ [] ->
+ Ret;
+ L ->
+ %%delete_table(S2),
+ return_error(L)
+ end;
+validate3(_,_,S) ->
+ return_schema_error(S#xsd_state.errors).
+
+%% @spec process_schema(Schema) -> Result
+%% @equiv process_schema(Schema,[])
+process_schema(Schema) ->
+ process_schema(Schema,[]).
+%% @spec process_schema(Schema,Options) -> Result
+%% Schema = filename()
+%% Result = {ok,State} | {error,Reason}
+%% State = global_state()
+%% Reason = [ErrorReason] | ErrorReason
+%% Options = option_list()
+%% @doc Reads the referenced XML schema and checks that it is valid.
+%% Returns the <code>global_state()</code> with schema info or an
+%% error reason. The error reason may be a list of several errors
+%% or a single error encountered during the processing.
+process_schema(Schema,Options) when is_list(Options) ->
+ S = initiate_state(Options,Schema),
+ process_schema2(xmerl_scan:file(filename:join(S#xsd_state.xsd_base, Schema)),S,Schema);
+process_schema(Schema,State) when is_record(State,xsd_state) ->
+ process_schema2(xmerl_scan:file(filename:join(State#xsd_state.xsd_base, Schema)),State,Schema).
+
+process_schema2(Err={error,_},_,_) ->
+ Err;
+process_schema2({SE,_},State,_Schema) ->
+ S1 = validate_schema(SE,State),
+ S2 = validate_schema_ph2(S1),
+ case schema_concistence_checks(S2) of
+ S3 = #xsd_state{errors=[]} ->
+ {ok,S3};
+ S3 ->
+ delete_table(S3),
+ return_error(S3#xsd_state.errors)
+ end.
+
+%% @spec process_schemas(Schemas) -> Result
+%% @equiv process_schema(Schemas,[])
+process_schemas(Schemas) ->
+ process_schemas(Schemas,[]).
+%% @spec process_schemas(Schemas,Options) -> Result
+%% Schemas = [{NameSpace,filename()}|Schemas] | []
+%% Result = {ok,State} | {error,Reason}
+%% Reason = [ErrorReason] | ErrorReason
+%% Options = option_list()
+%% @doc Reads the referenced XML schemas and controls they are valid.
+%% Returns the <code>global_state()</code> with schema info or an
+%% error reason. The error reason may be a list of several errors
+%% or a single error encountered during the processing.
+process_schemas(Schemas=[{_,Schema}|_],Options) when is_list(Options) ->
+ process_schemas(Schemas,initiate_state(Options,Schema));
+process_schemas([{_NS,Schema}|Rest],State=#xsd_state{fetch_fun=Fetch}) ->
+%% case process_external_schema_once(Schema,if_list_to_atom(NS),State) of
+%% S when is_record(S,xsd_state) ->
+%% case process_schema(filename:join([State#xsd_state.xsd_base,Schema]),State) of
+%% {ok,S} ->
+ Res=
+ case Fetch(Schema,State) of
+ {ok,{file,File},_} ->
+ process_schema2(xmerl_scan:file(File),State,Schema);
+ {ok,{string,Str},_} ->
+ process_schema2(xmerl_scan:string(Str),State,Schema);
+ {ok,[],_} ->
+ {ok,State};
+ Err ->
+ Err
+ end,
+ case Res of
+ {ok,S2} ->
+ process_schemas(Rest,S2);
+ _ ->
+ Res
+ end;
+process_schemas([],S) when is_record(S,xsd_state) ->
+ {ok,S}.
+
+
+initiate_state(Opts,Schema) ->
+ XSDBase = filename:dirname(Schema),
+ {{state,S},RestOpts}=new_state(Opts),
+ S2 = create_tables(S),
+ initiate_state2(S2#xsd_state{schema_name = Schema,
+ xsd_base = XSDBase,
+ fetch_fun = fun fetch/2},RestOpts).
+initiate_state2(S,[]) ->
+ S;
+initiate_state2(S,[{tab2file,Bool}|T]) ->
+ initiate_state2(S#xsd_state{tab2file=Bool},T);
+initiate_state2(S,[{xsdbase,XSDBase}|T]) ->
+ initiate_state2(S#xsd_state{xsd_base=XSDBase},T);
+initiate_state2(S,[{fetch_fun,FetchFun}|T]) ->
+ initiate_state2(S#xsd_state{fetch_fun=FetchFun},T);
+initiate_state2(S,[{fetch_path,FetchPath}|T]) ->
+ initiate_state2(S#xsd_state{fetch_path=FetchPath},T);
+initiate_state2(S,[{schema_preprocessed,Bool}|T]) ->
+ initiate_state2(S#xsd_state{schema_preprocessed=Bool},T);
+initiate_state2(S,[{target_namespace,_NS}|T]) ->
+%% initiate_state2(S#xsd_state{targetNamespace=if_list_to_atom(NS)},T);
+ initiate_state2(S,T); %% used in validation phase
+initiate_state2(S,[H|T]) ->
+ error_msg("Invalid option: ~p~n",[H]),
+ initiate_state2(S,T).
+
+validation_options(S,[{target_namespace,NS}|T]) ->
+ validation_options(S#xsd_state{targetNamespace=if_list_to_atom(NS)},T);
+validation_options(S,[_H|T]) ->
+ validation_options(S,T);
+validation_options(S,[]) ->
+ S.
+
+new_state(Opts) ->
+ XSD_VSN = xmerl_xsd_vsn(),
+ keysearch_delete(state,1,Opts,{state,#xsd_state{vsn=XSD_VSN}}).
+
+
+%% validate_schema/2 traverses the shema element to save necessary
+%% information as defined elements and types.
+validate_schema(E=#xmlElement{},
+ S) ->
+ %% namespace is always a xmlNamespace record, attributs a list of
+ %% #xmlAttributes and content a list of #xmlElements|#xmlText|...
+
+ %% Have to save namespace nodes. Use of namespace in paths for
+ %% unique,key and keyref are used after the schema is processed.
+
+ S1 = S#xsd_state{targetNamespace=target_namespace(E)},
+ case is_already_processed(S1#xsd_state.targetNamespace,S1) of
+ true ->
+ save_namespace_definition(S1#xsd_state.targetNamespace,S1);
+ _ ->
+ S2 = S1,%save_namespace_definition(S1#xsd_state.targetNamespace,S1),
+ {CM,S3} = traverse_content(E,S2),
+ save_schema_element(CM,S3),
+ S3
+ end.
+
+validate_schema_ph2(S=#xsd_state{derived_types=[]}) ->
+ S;
+validate_schema_ph2(S=#xsd_state{derived_types=DT}) ->
+ deduce_derived_types(DT,S).
+
+%% traverse_content/2 creates the content model of the schema.
+%% content model depends on (1) the type:
+%% complex type:
+%% sequence, choice, all
+%% simple type: no content other than characters
+%% (2) minOccurs/maxOccurs attributes.
+%% The schema for schemas content model is:
+%% schema: ((include | import | redefine | annotation)*,
+%% (((simpleType | complexType | group | attributeGroup)
+%% | element | attribute | notation), annotation*)*)
+%% attribute: (annotation?, simpleType?)
+%% element: (annotation?, ((simpleType | complexType)?, (unique |
+%% key | keyref)*))
+%% complexType: (annotation?, (simpleContent | complexContent |
+%% ((group | all | choice | sequence)?,
+%% ((attribute | attributeGroup)*,anyAttribute?))))
+%% attributeGroup:(annotation?,
+%% ((attribute | attributeGroup)*, anyAttribute?))
+%% group: (annotation?, (all | choice | sequence)?)
+%% all: (annotation?, element*)
+%% sequence: (annotation?,
+%% (element | group | choice | sequence | any)*)
+%% choice: (annotation?, (element | group | choice | sequence |
+%% any)*)
+%% any: (annotation?) any wellformed xml inside "any"
+%% unique: (annotation?, (selector, field+))
+%% key: (annotation?, (selector, field+))
+%% keyref: (annotation?, (selector, field+))
+%% selector: (annotation?)
+%% field: (annotation?)
+%% notation: (annotation?)
+%% annotation: (appinfo | documentation)*
+%% appinfo: ({any})*
+%% documentation: ({any})*
+%% simpleType: (annotation?, (restriction | list | union))
+%% restriction: (annotation?, (simpleType?, (minExclusive |
+%% minInclusive | maxExclusive | maxInclusive |
+%% totalDigits | fractionDigits | length | minLength |
+%% maxLength | enumeration | whiteSpace | pattern)*))
+%% list: (annotation?, simpleType?)
+%% union: (annotation?, simpleType*)
+%% include: (annotation?)
+%% import: (annotation?)
+%% redefine: (annotation | (simpleType | complexType | group |
+%% attributeGroup))*
+traverse_content(E=#xmlElement{name=Name},S) ->
+ case local_name(Name) of
+ schema ->
+ Content = E#xmlElement.content,
+%% S1 = S#xsd_state{targetNamespace=target_namespace(E)},
+ ThisNS = {"#this#",S#xsd_state.schema_name,
+ S#xsd_state.targetNamespace},
+ S2 = S#xsd_state{checked_namespace_nodes=
+ add_once(ThisNS,S#xsd_state.checked_namespace_nodes)},
+ S3 = namespace_nodes(E,S2),
+ S4 = element_form_default(E,S3),
+ S5 = attribute_form_default(E,S4),
+ S6 = substitution_default(finalDefault,E,S5),
+ S7 = substitution_default(blockDefault,E,S6),
+ traverse_content2(Content,S7,[]);
+ Err ->
+ exit({error,{[],?MODULE,{schema_error,Err}}})
+ end.
+
+
+traverse_content2([],S,Acc) ->
+ {reverse(remove_annotation(Acc)),reset_scope(S)};
+traverse_content2([El|Els],S,Acc) when is_record(El,xmlElement) ->
+ %% element declaration: save name, type, scope.
+ {Object,S2} = element_content(kind(El,S),El,S#xsd_state.scope),%% Object={Kind,Obj}
+ traverse_content2(Els,S2,[Object|Acc]);
+traverse_content2([_T|Els],S,Acc) -> %% xmlText,xmlPI ...
+ traverse_content2(Els,S,Acc).
+
+target_namespace(E) ->
+ case get_attribute_value(targetNamespace,E,undefined) of
+ URI when is_list(URI) ->
+ list_to_atom(URI);
+ URI ->
+ URI
+ end.
+
+%% namespace_nodes/2 ->
+%% NS.
+namespace_nodes(#xmlElement{namespace=#xmlNamespace{nodes=NS}},
+ S=#xsd_state{namespace_nodes=NSN,
+ global_namespace_nodes=GNSN}) ->
+ S2 =S#xsd_state{namespace_nodes=foldl(fun add_once/2,NSN,NS)},
+ S2#xsd_state{global_namespace_nodes=
+ add_key_once(S#xsd_state.targetNamespace,1,
+ {S#xsd_state.targetNamespace,NS},
+ GNSN)}.
+
+attribute_form_default(#xmlElement{attributes=Atts},S)->
+ Def=form_default(attributeFormDefault,Atts,S),
+ S#xsd_state{attributeFormDefault=Def}.
+element_form_default(#xmlElement{attributes=Atts},S) ->
+ Def=form_default(elementFormDefault,Atts,S),
+ S#xsd_state{elementFormDefault=Def}.
+form_default(Key,Atts,_S) ->
+ case keyNsearch(Key,#xmlAttribute.name,Atts,unqualified) of
+ #xmlAttribute{value=V} when is_list(V) -> list_to_atom(V);
+ #xmlAttribute{value=V} ->V;
+ _-> unqualified
+ end.
+
+substitution_default(Subst = finalDefault,El,S) ->
+ S#xsd_state{finalDefault = substitution(Subst,El,S)};
+substitution_default(Subst = blockDefault,El,S) ->
+ S#xsd_state{blockDefault = substitution(Subst,El,S)}.
+substitution(Subst,El,_S) ->
+ split_by_whitespace(get_attribute_value(Subst,El,[]),[]).
+
+
+%% element_content may be one of: annotation, type def(simple or
+%% complex), import, unique, key, keyref, attribute def, attribute
+%% group, all, group, complexContent, simpleContent, choice, sequence
+element_content({attribute,S=#xsd_state{scope=Scope}},Att,Env) ->
+ case qualify_NCName(Att,S) of
+ no_name ->
+ Ref = attribute_ref(Att),
+ AttRef =
+ {attribute,get_QName(Ref,Att#xmlElement.namespace, %%QQQ
+ reset_scope(S))},
+ {AttRef,add_ref(S,AttRef)};
+ Name ->
+ {AttrType,S2} = attribute_type(Att,[Name|Env],S),
+ S3 = check_cm(attribute,allowed_content(attribute,Env),AttrType,S2),
+ {Attr,S4} = attribute_properties(Att#xmlElement.attributes,
+ #schema_attribute{type=AttrType},S3),
+ Object = {attribute,
+ Attr#schema_attribute{name=Name,scope=Scope}},
+ S5 = save_object(Object,S4),
+ {{attribute,Name},S5}
+ end;
+element_content({element,S},El,Env) ->
+ %% The type of an element may be a simple or complex type (named
+ %% or anonymous), a referenced name or member of a substitution group.
+ case qualify_NCName(El,S) of
+ no_name ->
+ Ref = particle_ref(El),
+ {Occ,S2} = occurance(El,{1,1},S),
+ %% 3.3.3 bullet 2.2
+ S3 = element_forbidden_properties(El,S2),
+ S4 = element_forbidden_content(El#xmlElement.content,S3),
+ ElRef =
+ {element,
+ {get_QName(Ref,El#xmlElement.namespace,reset_scope(S)),
+ Occ}},
+ {ElRef,add_ref(S4,ElRef)};
+ Name ->
+ {Type,S2} = element_type(El,[Name|Env],S),
+ S3 = check_cm(element,allowed_content(element,Env),Type,S2),
+ Type2 = remove_annotation(Type),
+ Unique = [X||X={unique,_} <- Type2],
+ Key = [X||X={K,_} <- Type2,K == key orelse K==keyref],
+ {Occur,S4} = occurance(El,{1,1},S3),
+ {SE,S5} = element_properties(El#xmlElement.attributes,
+ #schema_element{},El,S4),
+ CM = remove_attributes([X||X={Y,_}<-Type2,
+ unique=/=Y,key=/=Y,
+ keyref=/=Y,annotation=/=Y]),
+ %% take care of key/keyref later
+ SE2 = SE#schema_element{name=Name,type=CM,uniqueness=Unique,
+ key=Key, occurance=Occur,
+ scope=S5#xsd_state.scope},
+ S6 = insert_substitutionGroup(SE2,S5),
+ S7 = save_object({element,SE2},S6),
+ {{element,{Name,Occur}},S7}
+ end;
+element_content({complexType,S},CT,Env) ->
+ %% complex type definition without a name is returnd and added to
+ %% the content model at this level. A complex type may also contain
+ %% attributes or attribute group references in the end of its content.
+ %%?debug("complexType content: ~p~nenv: ~p~n",[CT,Env]),
+ {SCT,S1} = c_t_properties(CT,#schema_complex_type{},S),
+ {Mixed,S2} = mixed(CT,S1),
+ Complexity = complexity(CT#xmlElement.content),
+ {Object,Name,S7} =
+ case qualify_NCName(CT,S2) of
+ no_name ->
+ {CM,S3} = type(CT#xmlElement.content,
+ in_scope(anonymous,S2),[complexType|Env]),
+ S4 = check_cm(complexType,allowed_content(complexType,Env),CM,S3),
+ Name1 = get_QName('_xmerl_no_name_',CT#xmlElement.namespace,S4),
+ S5 = set_scope(S#xsd_state.scope,S4),
+ {Content,Attributes}=split_content(remove_annotation(CM)),
+ SCT2 = base_type(Content,SCT),
+ CTObj =
+ {complexType,
+ SCT2#schema_complex_type{name=Name1,
+ scope=S5#xsd_state.scope,
+ attributes=Attributes,
+ complexity=Complexity,
+ content=mixify(Mixed,Content)}},
+ {CTObj,Name1,S5};
+ Name2 ->
+ S3 = in_scope(Name2,S2),
+ S3a = push_circularity_mark({typeDef,Name2},S3),
+ {CM,S4} = type(CT#xmlElement.content,S3a,
+ [complexType|Env]),
+ S4a = pop_circularity_mark({typeDef,Name2},S4),
+ S5 = check_cm(complexType,allowed_content(complexType,Env),
+ CM,S4a),
+ S6 = set_scope(S#xsd_state.scope,S5),
+ {Content,Attributes}=split_content(remove_annotation(CM)),
+ SCT2 = base_type(Content,SCT),
+ {{complexType,
+ SCT2#schema_complex_type{name=Name2,
+ scope=S6#xsd_state.scope,
+ attributes=Attributes,
+ complexity=Complexity,
+ content=mixify(Mixed,Content)}},
+ Name2,S6}
+ end,
+ S8 = save_object(Object,S7),
+ S9 = derived_type(Object,S8),
+ {{complexType,Name},S9};
+element_content({attributeGroup,S},AG,Env) ->
+ %% an attribute group always have a name or a ref, the content is
+ %% (annotation?,(attribute | attributGroup)*, anyAttribute?).
+ case qualify_NCName(AG,S) of
+ no_name ->
+ %% an attribute group ref inside complex type def or attr
+ %% group def ( XSD1:3.6.2).
+ Ref = attributeGroup_ref(AG),
+ AGRef =
+ {attributeGroup,get_QName(Ref,AG#xmlElement.namespace,%%QQQ
+ reset_scope(S))},
+ {AGRef,add_ref(S,AGRef)};
+ Name ->
+ %% must occur on top level of schema( XSD1:3.6.2). The
+ %% only thing needed in content are the names of all
+ %% attributes or referenced attribute groups.
+ {CM,S2} = type(AG#xmlElement.content,in_scope(Name,S),
+ [attributeGroup|Env]),
+ S2_1 = out_scope(Name,S2),
+ S3 = check_cm(attributeGroup,allowed_content(attributeGroup,Env),CM,S2_1),
+ S4 = save_object({attributeGroup,
+ #schema_attribute_group{name=Name,
+ content=keep_attributes(CM)}},S3),
+ {{attributeGroup,Name},S4}
+ end;
+element_content({group,S},G,Env) ->
+ %% a model group associates a name with a content model. It can be
+ %% a reference or a definition.
+ %% content is one of all, choice or sequence.
+ case qualify_NCName(G,S) of
+ no_name -> % reference.
+ %% If reference is a recursive ref to a group with the
+ %% same name as this group points at the redefined valid
+ %% schema group. See XMLSchema part 1, section 4.2.2
+ %% "Schema Representation Constraint: Individual Component
+ %% Redefinition"
+ Ref = particle_ref(G),
+ {Occur,S2} = occurance(G,{1,1},S),
+ GRef =
+ {group,
+ {get_QName(Ref,G#xmlElement.namespace,reset_scope(S2)),%%QQQ
+ Occur}},
+ {GRef,add_ref(S2,GRef)};
+ Name -> % definition, always schema or redefine as parent
+ {CM,S2} = type(G#xmlElement.content,in_scope(Name,S),[group|Env]),
+ CM2 = recursive_redefine(Name,CM,S2),
+ S2_1 = out_scope(Name,S2),
+ S3 = check_cm(group,allowed_content(group,Env),CM2,S2_1),
+ S4 = save_object({group,#schema_group{name=Name,
+ content=remove_annotation(CM2)}},S3),
+ {{group,Name},S4}
+ end;
+element_content({all,S},All,Env) ->
+ %% each element occurs 0 or 1 times in any order
+ %% {all,[{element_name,occurance}]}
+%% CM = content_model(Seq#xmlElement.content,S,[all|Env]),
+ {Occur,S1} = occurance(All,{1,1},S),
+ {CM,S2} = type(All#xmlElement.content,S1,[all|Env]),
+ S3 = check_cm(all,allowed_content(all,Env),CM,S2),
+ {{all,{[X||X = {element,_} <- CM],Occur}},S3};
+element_content({sequence,S},Seq,Env) ->
+ %% {sequence,[{element_name,occurance}]}
+%% CM = content_model(Seq#xmlElement.content,S,[sequence|Env]),
+ {Occur,S1} = occurance(Seq,{1,1},S),
+ {CM,S2} = type(Seq#xmlElement.content,S1,[sequence|Env]),
+ S3 = check_cm(sequence,allowed_content(sequence,Env),CM,S2),
+ {{sequence,{remove_annotation(CM),Occur}},S3};
+element_content({choice,S},Choice,Env) ->
+ %% allowed content: (annotation?,
+ %% (element | group | choice | sequence | any)*)
+ %% returns: {choice,[element_name]}
+%% CM = content_model(Choice#xmlElement.content,S,[choice|Env]),
+ {Occur,S1} = occurance(Choice,{1,1},S),
+ {CM,S2} = type(Choice#xmlElement.content,S1,[choice|Env]),
+ S3 = check_cm(choice,allowed_content(choice,Env),CM,S2),
+ {{choice,{remove_annotation(CM),Occur}},S3};
+element_content({any,S},Any,_Env) ->
+ {Occur,S1} = occurance(Any,{1,1},S),
+ NameSpace = wildcard_namespace(Any,S1),
+ PC = processor_contents(Any),
+ ?debug("element_content, any: Any content:~p~n",[Any#xmlElement.content]),
+ Pred = fun(E=#xmlElement{}) -> case kind(E) of
+ annotation -> false;
+ _ -> true
+ end;
+ (_) ->
+ false
+ end,
+ S2 = case filter(Pred,Any#xmlElement.content) of
+ [] -> S1;
+ Err -> %% report error
+ acc_errs(S1,{error_path(Any,Any#xmlElement.name),?MODULE,
+ {unexpected_content_in_any,Err}})
+ end,
+ {{any,{NameSpace,Occur,PC}},S2};
+element_content({IDC,S},El,Env)
+ when IDC==unique;IDC==key;IDC==keyref->
+ QName = qualify_NCName(El,reset_scope(S)),
+ Ref = keyrefer(IDC,El,S),
+ {SelField,S2} = type(El#xmlElement.content,S,[IDC|Env]),
+ case {[X||X={selector,_} <- SelField],[X||X={field,_} <- SelField]} of
+ {[Sel],Fields=[_H|_T]} ->
+ IDConstr = #id_constraint{category=IDC,name=QName,refer=Ref,
+ selector=Sel,fields=Fields},
+ S3=save_idc(IDC,IDConstr,S2),
+ {{IDC,IDConstr},S3};
+ Err ->
+ S3 = acc_errs(S2,{error_path(El,El#xmlElement.name),?MODULE,
+ {erronous_content_in_identity_constraint,IDC,Err}}),
+ {{IDC,[]},S3}
+ end;
+element_content({selector,S},Sel,_Env) ->
+ case get_attribute_value(xpath,Sel,error) of
+ error ->
+ S2 = acc_errs(S,{error_path(Sel,Sel#xmlElement.name),?MODULE,
+ {missing_xpath_attribute,selector}}),
+ {{selector,[]},S2};
+ XPath ->
+ {{selector,XPath},S}
+ end;
+element_content({field,S},F,_Env) ->
+ case get_attribute_value(xpath,F,error) of
+ error ->
+ S2 = acc_errs(S,{error_path(F,F#xmlElement.name),?MODULE,
+ {missing_xpath_attribute,field}}),
+ {{field,[]},S2};
+ XPath ->
+ {{field,XPath},S}
+ end;
+element_content({notation,S},_N,_Env) ->
+ {{notation,[]},S};
+element_content({annotation,S},_Ann,_Env) ->
+ {{annotation,[]},S};
+element_content({appinfo,S},_AI,_Env) ->
+ {{appinfo,[]},S};
+element_content({documentation,S},_D,_Env) ->
+ {{documentation,[]},S};
+element_content({simpleType,S},ST,Env) ->
+ Name = case qualify_NCName(ST,S) of
+ no_name ->
+ get_QName('_xmerl_no_name_',ST#xmlElement.namespace,
+ in_scope('_xmerl_no_name_',S));%%---
+ QName ->
+ QName
+ end,
+ {Type,S2} = type(ST#xmlElement.content,
+ push_circularity_mark({typeDef,Name},in_scope(Name,S)),
+ [simpleType|Env]),
+ S2_1 = pop_circularity_mark({typeDef,Name},S2),
+ S3 = set_scope(S#xsd_state.scope,S2_1),
+ S4 = check_cm(simpleType,allowed_content(simpleType,Env),Type,S3),
+ {BaseType,Facets} = facets(Type,S4),
+ Variety = variety(Type),
+ Final = simpleType_final(ST,S4),
+ Object = {simpleType,#schema_simple_type{name=Name,
+ base_type=BaseType,
+ final=Final,
+ facets=Facets,
+ variety=Variety,
+ content=remove_annotation(Type),
+ scope=S4#xsd_state.scope}},
+ S5 = save_object(Object,S4),
+ S6 = derived_type(Object,S5),
+ {{simpleType,Name},S6};
+element_content({restriction,S},R,Env) ->
+ %% If complexContent, all element definitions of base type must be
+ %% repeated. However, attributes are inherited.
+ %% possible parents are simpleType or complexType (grand parent)
+ %% If parent is simpleType the base type is either the attribute
+ %% base (resolved by base_type/1) or the type defined in content.
+ {CM,S2} = type(R#xmlElement.content,S,[restriction|Env]),
+ S3 = check_cm(restriction,allowed_content(restriction,Env),CM,S2),
+
+ {BaseTypeName,CM2,S4} = restriction_base_type(R,CM,S3), %% a QName
+%% S5 = add_circularity_mark(BaseTypeName,S4),
+ BaseTypeType = base_type_type(Env),
+ {{restriction,{BaseTypeName,remove_annotation(CM2)}},
+ add_ref(S4,{BaseTypeType,BaseTypeName})}; %% Does not return name but content model
+element_content({list,S=#xsd_state{scope=Scope}},L,Env) ->
+ {Type,S2} = list_type(L,S,[list|Env]),
+ S3 = check_cm(list,allowed_content(list,Scope),Type,S2),
+ {{list,remove_annotation(Type)},S3};
+element_content({union,S=#xsd_state{scope=Scope}},U,Env) ->
+ {Types,S2} = union_types(U,S,[union|Env]),
+ S3 = check_cm(union,allowed_content(union,Scope),Types,S2),
+ {{union,Types},S3};
+element_content({include,S=#xsd_state{schema_name=ThisSchema,
+ targetNamespace=TNS}},I,_Env) ->
+ S2 = process_external_schema_once(I,S#xsd_state.targetNamespace,S),
+ {{include,[]},S2#xsd_state{schema_name=ThisSchema,targetNamespace=TNS}};
+element_content({import,S=#xsd_state{schema_name=ThisSchema,
+ targetNamespace=ThisNameS}},I,_Env) ->
+ %% import unlike include and redefine may include definitions from
+ %% other namespaces than the target namespace of the including
+ %% schema.
+
+ %% namespace and schemaLocation
+ Namespace =
+ case get_attribute_value(namespace,I,undefined) of
+ L when is_list(L) ->
+ list_to_atom(L);
+ A -> A
+ end,
+ %% If Namespace is absent, then the import allows unqualified
+ %% reference to components with no target namespace.
+
+ SchemaLocation = get_attribute_value(schemaLocation,I,absent),
+ %% If SchemaLocation is absent, the identification of that schema
+ %% is leaved to the instance, application or user, via the
+ %% mechanisms described ��4.3 in XML Schema Part 1.
+
+ S2 = process_external_schema_once(SchemaLocation,Namespace,S),
+ {{import,[]},S2#xsd_state{schema_name=ThisSchema,
+ targetNamespace=ThisNameS}};
+element_content({redefine,S=#xsd_state{schema_name=ThisSchema}},RD,Env) ->
+ %% Must be a child of "schema" element
+ %% redefine of simple and complex types, groups and attribute
+ %% groups obtained from external files.
+ %% Brings in all definitions of external schema and redefines one.
+ %% External schema must be in same namespace as current schema or
+ %% no namespace.
+ S2 = process_external_schema_once(RD,S#xsd_state.targetNamespace,
+ S#xsd_state{errors=[]}),
+ case S2#xsd_state.errors of
+ [] ->
+%% RedefSource = S2#xsd_state.schema_name,
+ S3 = S2#xsd_state{schema_name=ThisSchema,
+%% global_element_source=add_once({ThisSchema,RedefSource},GES),
+ errors=S#xsd_state.errors},
+ {CM,S4} = type(RD#xmlElement.content,
+ S3#xsd_state{redefine=true},[redefine|Env]),
+ S5 = S4#xsd_state{redefine=false},
+ S6 = check_cm(redefine,allowed_content(redefine,Env),CM,S5),
+ S7 = redefine(CM,S6),
+ {{redefine,[]},S7};
+ Errs ->
+ S3 = S2#xsd_state{schema_name=ThisSchema,
+ errors=Errs++S#xsd_state.errors},
+ {{redefine,[]},S3}
+ end;
+element_content({anyAttribute,S},AA,_Env) ->
+ %% has attributes processContents = (lax | skip | strict) : strict
+ %% namespace = ((##any | ##other) |
+ %% List of (anyURI | (##targetNamespace | ##local)) ) : ##any
+
+ NameSpace = wildcard_namespace(AA,S),
+ PC = processor_contents(AA),
+ Pred = fun(E=#xmlElement{}) -> case kind(E) of
+ annotation -> false;
+ _ -> true
+ end;
+ (_) -> false
+ end,
+ S2 =
+ case filter(Pred,AA#xmlElement.content) of
+ [] -> S;
+ Err -> %% report error
+ acc_errs(S,{error_path(AA,AA#xmlElement.name),?MODULE,
+ {content_in_anyAttribute,Err}})
+ end,
+ {{anyAttribute,{NameSpace,PC}},S2};
+element_content({simpleContent,S},SC,Env) ->
+ %% only as child of complexType.
+ %% allowed content: (annotation?, (restriction | extension))
+ S2 = pre_check_cm(simpleContent,SC#xmlElement.content,mk_name(S#xsd_state.scope),S),
+ case filter(fun(X=#xmlElement{}) ->
+ case kind(X) of
+ restriction -> true;
+ extension -> true;
+ _ -> false
+ end;
+ (_) -> false
+ end,
+ SC#xmlElement.content) of
+ [E] ->
+ element_content(kind(E,S2),E,[simpleContent|Env]);
+ Err ->
+ {[],acc_errs(S2,{error_path(SC,SC#xmlElement.name),?MODULE,
+ {content_in_simpleContent,Err}})}
+ end;
+element_content({complexContent,S},CC,Env) ->
+ S2 = pre_check_cm(complexContent,CC#xmlElement.content,
+ mk_name(S#xsd_state.scope),S),
+ %% the mixed attribute was fetched in the complexType element that
+ %% held this complexContent
+ case filter(fun(X=#xmlElement{}) -> case kind(X) of
+ restriction -> true;
+ extension -> true;
+ _ -> false
+ end;
+ (_) -> false
+ end,CC#xmlElement.content) of
+ [E] ->
+ element_content(kind(E,S2),E,[complexContent|Env]);
+ Err ->
+ {[],acc_errs(S2,{error_path(CC,CC#xmlElement.name),?MODULE,
+ {complexContent_content_failure,Err}})}
+ end;
+element_content({extension,S},Ext,Env) ->
+ %% may be used in both simple and complex content with different
+ %% content allowed.
+ %% this should be returned and checked for allowed content in
+ %% parent, but we don't know if base type is a forward reference.
+ BaseType = base_type(Ext),
+ {CM,S2} = type(Ext#xmlElement.content,S,[extension|Env]),
+ S3 = check_cm(extension,allowed_content(extension,S#xsd_state.scope),CM,S2),
+ BaseTypeName = get_QName(BaseType,Ext#xmlElement.namespace,reset_scope(S)),%%QQQ
+ BaseTypeType = base_type_type(Env),
+ {{extension,{BaseTypeName,CM}},add_ref(S3,{BaseTypeType,BaseTypeName})};
+%% The following are facets
+element_content({minExclusive,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{minExclusive,Value},S};
+element_content({minInclusive,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{minInclusive,Value},S};
+element_content({maxExclusive,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{maxExclusive,Value},S};
+element_content({maxInclusive,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{maxInclusive,Value},S};
+element_content({totalDigits,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{totalDigits,Value},S};
+element_content({fractionDigits,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{fractionDigits,Value},S};
+element_content({length,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{length,Value},S};
+element_content({minLength,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{minLength,Value},S};
+element_content({maxLength,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{maxLength,Value},S};
+element_content({enumeration,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{enumeration,Value},S};
+element_content({whiteSpace,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{whiteSpace,Value},S};
+element_content({pattern,S},CF,_Env) ->
+ Value = get_value(CF),
+ {{pattern,Value},S};
+element_content({Other,S=#xsd_state{errors=Errs}},C,_Env) ->
+ case Errs of
+ [] ->
+ {[],acc_errs(S,{error_path(C,C#xmlElement.name),?MODULE,
+ {unknown_content,Other}})};
+ _ ->
+ {[],S}
+ end.
+
+
+type(C,S,Env) ->
+ type(C,S,Env,[]).
+type([E=#xmlElement{}|Els],S,Env,Acc) ->
+ {CM,S2} = element_content(kind(E,S),E,Env),
+ type(Els,set_scope(S#xsd_state.scope,S2),
+ Env,[CM|Acc]);
+type([_H|Els],S,Env,Acc) ->
+ type(Els,S,Env,Acc);
+type([],S,_Env,Acc) ->
+ {flatten(reverse(Acc)),S}.
+
+simpleUrType() ->
+ {anySimpleType,[]}.
+%% simpleUrTypeRef() ->
+%% {anySimpleType,[],'http://www.w3.org/2001/XMLSchema'}.
+urType() ->
+ {anyType,[]}.
+
+
+attribute_type(Att,Env=[Name|_],S) ->
+ %% The attribute type may be referenced by the type attribute or
+ %% explicitly defined as a simpleType inside the attribute
+ %% element. In the latter case the type must be saved with the
+ %% unique name of the scope and name attribute combined.
+ {CM,S2} = type(Att#xmlElement.content,in_scope(Name,S),Env),
+ case remove_annotation(CM) of
+ [] ->
+ case keyNsearch(type,#xmlAttribute.name,
+ Att#xmlElement.attributes,[]) of
+ #xmlAttribute{value=SimpleTypeName} -> %% a QName as string
+ %% This name may be a forward reference to a simple type.
+ TypeRef = {simpleType,get_QName(SimpleTypeName, %%QQQ
+ Att#xmlElement.namespace,
+ reset_scope(S))},
+ {[TypeRef],
+ set_scope(S#xsd_state.scope,add_ref(S2,TypeRef))};
+ _ -> {[{simpleType,simpleUrType()}],
+ set_scope(S#xsd_state.scope,S2)}
+ end;
+ Type ->
+ {Type,set_scope(S#xsd_state.scope,S2)}
+ end.
+
+element_type(El,Env=[Name|_],S) ->
+ %% In the top environment of the schema there may exist: global
+ %% element declarations, substitution group members.
+ %% Other element declarations are local
+ {CM,S2} = type(El#xmlElement.content,in_scope(Name,S),Env),
+ case remove_annotation(CM) of
+ [] -> %% no simple or complex type definition
+ case {get_attribute_value(type,El,no_name),
+ get_attribute_value(substitutionGroup,El,undefined)} of
+ {no_name,SGName} when is_list(SGName) ->
+ QN = get_QName(SGName,El#xmlElement.namespace,reset_scope(S)),%%QQQ
+ case is_simple_type(QN,S2) of
+ true ->
+ exit(this_can_never_happen),
+ %% A substitutionGroup is an element, and
+ %% the type of this element is the
+ %% resolved type of the referenced
+ %% element.
+ TRef = {simpleType,QN},
+ {[TRef],
+ add_ref(set_scope(S#xsd_state.scope,S2),TRef)};
+ _ ->
+ {[{substitutionGroup,QN}],
+ set_scope(S#xsd_state.scope,S2)}
+ end;
+ {TName,_} when is_list(TName) ->
+ QN = get_QName(TName,El#xmlElement.namespace,reset_scope(S2)),%%QQQ
+ case is_simple_type(QN,S2) of
+ true ->
+ TRef={simpleType,QN},
+ {[TRef],
+ add_ref(set_scope(S#xsd_state.scope,S2),TRef)};
+ _ ->
+ TRef = {simple_or_complex_Type,QN},
+ {[TRef],
+ add_ref(set_scope(S#xsd_state.scope,S2),TRef)}
+ end;
+ _ ->
+ case {get_attribute_value(ref,El,no_name),
+ is_global_env(Env)} of
+ {Ref,false} when is_list(Ref) ->
+ %% a ref attribute references an element
+ {[{element,
+ get_QName(Ref,El#xmlElement.namespace,%%QQQ
+ reset_scope(S))}],
+ set_scope(S#xsd_state.scope,S2)};
+ _ ->
+ {[urType()],
+ set_scope(S#xsd_state.scope,S2)}
+ end
+ end;
+%% Type ->
+%% {Type,set_scope(S#xsd_state.scope,S2)}
+ _Type ->
+ {CM,set_scope(S#xsd_state.scope,S2)}
+ end.
+
+%% list_type/3 -> list() | name()
+list_type(L,S,Env) ->
+ case keyNsearch(itemType,#xmlAttribute.name,L#xmlElement.attributes,[]) of
+ [] ->
+%% {element(1,type(L#xmlElement.content,S,Env)),S};
+ type(L#xmlElement.content,S,Env);
+ #xmlAttribute{value=V} ->
+ %% this type should be preliminary saved and checked after
+ %% the parsing of the schema.
+ TypeRef ={simpleType,
+ get_QName(V,L#xmlElement.namespace,reset_scope(S))},
+ {[TypeRef],add_ref(S,TypeRef)}
+ end.
+union_types(U,S,Env) ->
+ {MemberTypes,S2} =
+ case keyNsearch(memberTypes,#xmlAttribute.name,U#xmlElement.attributes,[]) of
+ [] ->
+ {[],S};
+ #xmlAttribute{value = NameString} ->
+ Names = namestring2namelist(NameString),
+ UTypeRefs =
+ [{simpleType,get_QName(X,U#xmlElement.namespace,
+ reset_scope(S))}||X<-Names],
+ {UTypeRefs,foldl(fun(X,S_in) -> add_ref(S_in,X) end,S,UTypeRefs)}
+ end,
+ {DefinedTypes,S3} = union_types1(U#xmlElement.content,S2,Env),
+ {MemberTypes++DefinedTypes,S3}.
+
+union_types1(C,S,Env) ->
+ union_types1(C,S,Env,[]).
+union_types1([],S,_Env,Acc) ->
+ {Acc,S};
+union_types1([C=#xmlElement{}|Cs],S,Env,Acc) ->
+ case element_content(kind(C,S),C,Env) of
+ {ST={simpleType,_},S2} ->
+ union_types1(Cs,S2,Env,[ST|Acc]);
+ {{annotation,_},S2} ->
+ union_types1(Cs,S2,Env,Acc);
+ {IllegalType,S2} ->
+ Err = {error_path(C,C#xmlElement.name),?MODULE,
+ {union_member_type_not_simpleType,IllegalType}},
+ union_types1(Cs,acc_errs(S2,Err),Env,Acc)
+ end;
+union_types1([_H|T],S,Env,Acc) ->
+ union_types1(T,S,Env,Acc).
+
+%% If a group in a redefine refer to itself the reference is to the
+%% "old" definition of the group. See XMLSchema part 1, section 4.2.2
+%% "Schema Representation Constraint: Individual Component
+%% Redefinition"
+recursive_redefine(Name,CM,S=#xsd_state{redefine=true}) ->
+ case remove_annotation(CM) of
+ [{MG,{C,Occ}}] ->
+ [{MG,{recursive_redefine2(Name,C,S),Occ}}];
+ _ ->
+ CM
+ end;
+recursive_redefine(_,CM,_) ->
+ CM.
+recursive_redefine2(Name,[{group,{Name,Occ}}|T],S) ->
+ %% Rename old group definition
+ case rename_redef_group(Name,S) of
+ failed ->
+ [{group,{Name,Occ}}|T];
+ NewName ->
+ [{group,{NewName,Occ}}|T]
+ end;
+recursive_redefine2(Name,[{MG,{C,Occ}}|T],S)
+ when MG =:= sequence; MG =:= choice; MG=:= all; MG=:= group ->
+ C2 = recursive_redefine2(Name,C,S),
+ [{MG,{C2,Occ}}|recursive_redefine2(Name,T,S)];
+recursive_redefine2(Name,[H|T],S) ->
+ [H|recursive_redefine2(Name,T,S)];
+recursive_redefine2(_,[],_) ->
+ [].
+
+rename_redef_group(Name={LN,Scope,NS},S) ->
+ %% Scope must be []
+ NewName = {LN,['#redefine'|Scope],NS},
+ case resolve({group,NewName},S) of
+ {SG=#schema_group{name=Name},_} ->
+ save_object({group,SG#schema_group{name=NewName}},S),
+ NewName;
+ _ ->
+ failed
+ end.
+
+
+add_ref(S=#xsd_state{unchecked_references=UR},STRef={simpleType,Ref}) ->
+ case {is_builtin_simple_type(Ref),Ref} of
+ {true,_} ->
+ S;
+ {_,{'',_,_}} ->
+ S;
+ _ ->
+ S2 = S#xsd_state{unchecked_references=add_once(STRef,UR)},
+ add_circularity_ref(STRef,S2)
+ end;
+add_ref(S=#xsd_state{unchecked_references=UR},STRef={simple_or_complex_Type,Ref}) ->
+ case {is_builtin_simple_type(Ref),Ref} of
+ {true,_} ->
+ S;
+ {_,{'',_,_}} ->
+ S;
+ {_,{anyType,_,?XSD_NAMESPACE}} ->
+ S;
+ {_,{anySimpleType,_,?XSD_NAMESPACE}} ->
+ S;
+ _ ->
+ S2 = S#xsd_state{unchecked_references=add_once(STRef,UR)},
+ add_circularity_ref(STRef,S2)
+ end;
+add_ref(S,{complexType,{anyType,_,?XSD_NAMESPACE}}) ->
+ S;
+add_ref(S=#xsd_state{unchecked_references=UR},Ref) ->
+ S2 = S#xsd_state{unchecked_references=add_once(Ref,UR)},
+ add_circularity_ref(Ref,S2).
+%% add_ref(S=#xsd_state{unchecked_references=UR},Ref) ->
+%% S#xsd_state{unchecked_references=add_once(Ref,UR)}.
+
+%% Name of simpleType/complexType is unique within the whole
+%% environment, which is checked elsewhere, so ignore the kind of type
+%% for simplicity.
+add_circularity_ref(Ref={Kind,To},S=#xsd_state{circularity_disallowed=CD,
+ redefine=false})
+ when Kind==simpleType;Kind==simple_or_complex_Type;Kind==complexType ->
+ case get_circularity_mark(Ref,S) of
+ [] ->
+ S;
+ From -> %% This is the node from which the graph reaches Ref
+ S#xsd_state{circularity_disallowed=add_once({From,{typeDef,To}},CD)}
+ end;
+add_circularity_ref(_,S) ->
+ S.
+get_circularity_mark({TD,_},S)
+ when TD==simpleType;TD==complexType;TD==simple_or_complex_Type ->
+ case S#xsd_state.circularity_stack of
+ [From={typeDef,_}|_] ->
+ From;
+ _ -> []
+ end;
+get_circularity_mark(_,_S) ->
+ [].
+
+push_circularity_mark(Mark,S=#xsd_state{circularity_stack=CS,
+ redefine=false}) ->
+ S#xsd_state{circularity_stack=[Mark|CS]};
+push_circularity_mark(_,S) ->
+ S.
+pop_circularity_mark(Mark,S=#xsd_state{redefine=false}) ->
+ case S#xsd_state.circularity_stack of
+ [Mark|Rest] ->
+ S#xsd_state{circularity_stack=Rest};
+ _ ->
+ S
+ end;
+pop_circularity_mark(_,S) ->
+ S.
+
+derived_type({complexType,#schema_complex_type{name=Name,content=C}},
+ S=#xsd_state{derived_types=DT}) ->
+ case {keymember(restriction,1,C),keymember(extension,1,C)} of
+ {false,false} ->
+ S;
+ _ ->
+ S#xsd_state{derived_types=[{complexType,Name}|DT]}
+ end;
+derived_type({simpleType,#schema_simple_type{name=Name,content=C}},
+ S=#xsd_state{derived_types=DT}) ->
+ case keymember(restriction,1,C) of
+ true ->
+ S#xsd_state{derived_types=[{simpleType,Name}|DT]};
+ _ ->
+ S
+ end.
+
+facets([{annotation,_}|Rest],S) ->
+ facets(Rest,S);
+facets([{restriction,{BaseType,CM}}],_S) ->
+ Facets = [X||X={F,_} <- CM,is_facet(F)],
+ GroupFacets = group_facets(Facets),
+ {BaseType,GroupFacets};
+facets(_,_S) ->
+ {undefined,[]}.
+
+group_facets(Facets) ->
+ group_facets(Facets,[]).
+group_facets(L=[{enumeration,_}|_Rest],Acc) ->
+ {Enums,Rest} = splitwith(fun({enumeration,_}) -> true;
+ (_) -> false
+ end,
+ L),
+ group_facets(Rest,[{enumeration,[X||{enumeration,X}<-Enums]}|Acc]);
+group_facets([H|T],Acc) ->
+ group_facets(T,[H|Acc]);
+group_facets([],Acc) ->
+ reverse(Acc).
+
+simpleType_final(ST,_S) ->
+ Final = get_attribute_value(final,ST,[]),
+ split_by_whitespace(Final,[]).
+
+%% A redefine may contain (simpleType | complexType | group |
+%% attributeGroup)*
+%%{simpleType,Name},{complexType,Name},{group,Name},{attributeGroup,Name}
+redefine([CM|Rest],S) ->
+ S2=redefine(CM,S),
+ redefine(Rest,S2);
+redefine(ST={Type,_Name},S)
+ when Type==simpleType ; Type==complexType ->
+ %% Get the original definition
+ {OriginalType,S2} = resolve(ST,S),
+ %% unnecessary to delete saved object, it will be overwritten.
+ {RedefinedType,S3} = load_redefine_object(ST,S2),
+ {_MergedType,S4} = merge_derived_types(OriginalType,RedefinedType,redefine,S3),
+ S4;
+redefine(_,S) ->
+ %% attributeGroup and group redefines are already redefined
+ S.
+
+keyrefer(keyref,El,S) ->
+ Ref=get_attribute_value(refer,El,undefined),
+ get_QName(Ref,El#xmlElement.namespace,reset_scope(S));
+keyrefer(_,_,_) ->
+ undefined.
+
+remove_annotation(CM) when is_list(CM) ->
+ [X||X = {K,_} <- CM, K=/=annotation].
+remove_attributes(CM) when is_list(CM) ->
+ [X||X = {K,_} <- CM, K=/=attribute,K=/=anyAttribute,K=/=attributeGroup].
+keep_attributes(CM) when is_list(CM) ->
+ [X||X = {K,_} <- CM, K==attribute orelse K==anyAttribute orelse K==attributeGroup].
+split_content([{restriction,{BaseT,CM}}]) ->
+ {[{restriction,{BaseT,remove_attributes(CM)}}],keep_attributes(CM)};
+split_content([{extension,{BaseT,CM}}]) ->
+ {[{extension,{BaseT,remove_attributes(remove_annotation(CM))}}],
+ keep_attributes(CM)};
+split_content(CM) ->
+ {remove_attributes(CM),keep_attributes(CM)}.
+
+restriction_base_type(R,CM,S) ->
+ case base_type(R) of
+ [] ->
+ case [X||X={simpleType,_}<-CM] of
+ [{simpleType,TypeName}] ->
+ {TypeName,keydelete(simpleType,1,CM),S};
+ Other ->
+ Err = {error_path(R,R#xmlElement.name),?MODULE,
+ {missing_base_type,restriction,Other}},
+ {{[],[],[]},CM,acc_errs(S,Err)}
+ end;
+ BT ->
+ {get_QName(BT,R#xmlElement.namespace,reset_scope(S)),CM,S}
+ end.
+
+base_type([{restriction,{BaseT,_}}],SCT) ->
+ SCT#schema_complex_type{base_type=BaseT};
+base_type([{extension,{BaseT,_}}],SCT) ->
+ SCT#schema_complex_type{base_type=BaseT};
+base_type(_,SCT) ->
+ SCT.
+
+variety([{list,_ItemType}]) ->
+ list;
+variety([{union,_ItemType}]) ->
+ union;
+variety(_) ->
+ atomic.
+
+%% pre_check_cm/2 is for now only for simpleContent | complexContent
+%% which allow content: (annotation?, (restriction | extension))
+pre_check_cm(Kind,Cs=[C=#xmlElement{}|RestC],Name,S) ->
+ case kind(C,S) of
+ {annotation,_} ->
+ pre_check_cm2(Kind,RestC,Name,C,S,0);
+ {_,S2} ->
+ pre_check_cm2(Kind,Cs,Name,C,S2,0)
+ end;
+pre_check_cm(Kind,[_C|Cs],Name,S) ->
+ pre_check_cm(Kind,Cs,Name,S);
+pre_check_cm(Kind,[],Name,S) ->
+ Err = {[],?MODULE,{content_failure,Kind,[],Name}},
+ acc_errs(S,Err).
+
+pre_check_cm2(Kind,[C=#xmlElement{}|Cs],Name,_El,S,N) ->
+ S2 =
+ case kind(C,S) of
+ {restriction,_} ->
+ S;
+ {extension,_} ->
+ S;
+ {Other,S1} ->
+ Err = {error_path(C,C#xmlElement.name),?MODULE,
+ {illegal_element,Kind,Other,Name}},
+ acc_errs(S1,Err)
+ end,
+ pre_check_cm2(Kind,Cs,Name,C,S2,N+1);
+pre_check_cm2(Kind,[_H|T],Name,El,S,N) ->
+ pre_check_cm2(Kind,T,Name,El,S,N);
+pre_check_cm2(_,[],_,_,S,N) when N==1 ->
+ S;
+pre_check_cm2(Kind,[],Name,El,S,N) ->
+ Err =
+ case N of
+ 0 ->
+ {error_path(El,El#xmlElement.name),?MODULE,
+ {content_failure_expected_restriction_or_extension,
+ Kind,Name}};
+ _ ->
+ {error_path(El,El#xmlElement.name),?MODULE,
+ {content_failure_only_one_restriction_or_extension_allowed,
+ Kind,Name}}
+ end,
+ acc_errs(S,Err).
+
+
+%% check_cm(Arg1,Arg2,Arg3)
+%% Arg1 - The allowed content for this type according to schema for schemas
+%% Arg2 - The content model of this particular schema
+check_cm(Kind,S4SCM,ContentModel,S) ->
+ case check_cm2(Kind,S4SCM,ContentModel,S) of
+ {[],_S} ->
+ S;
+ {[_,[]|_],_S} ->
+ S;
+ {_CM,S2} ->
+ S2;
+ Err ->
+ exit({error,{[],?MODULE,{internal_error,Err}}})
+ end.
+
+check_cm2(Kind,#chain{content=S4SCM,occurance=Occ},
+ ContentModel,S) ->
+ case occurance_loop(Occ,fun check_chain/1,
+ [S4SCM,ContentModel,Kind,S],0) of
+ {ok,[]} ->
+ {[],S};
+ {ok,[S4SCMRest,CMRest|_]} ->
+ case all_optional(S4SCMRest) of
+ true ->
+ {CMRest,S};
+ _ ->
+ Err = {[],?MODULE,
+ {mandatory_component_missing,S4SCMRest,Kind}},
+ acc_errs(S,Err)
+ end;
+ {error,{_,_,Reason}} ->
+ Err = {[],?MODULE,{illegal_content,Reason,Kind}},
+ {ContentModel,acc_errs(S,Err)}
+ end;
+check_cm2(Kind,#alternative{content=S4SCM,occurance=Occ},
+ ContentModel,S) ->
+ case occurance_loop(Occ,fun check_alternative/1,
+ [S4SCM,ContentModel,Kind,S],0) of
+ {ok,[]} ->
+ {[],S};
+ {ok,[_,CMRest|_]} ->
+ {CMRest,S};
+ {error,Reason} ->
+ {ContentModel,acc_errs(S,Reason)}
+ end;
+check_cm2(_,{Kind,Occ},CM,S) ->
+ case occurance_loop(Occ,fun check_simple_cm/1,[Kind,CM],0) of
+ {ok,[]} ->
+ {[],S};
+ {ok,[_,CMRest|_]} ->
+ {CMRest,S};
+ {error,Reason} ->
+ {CM,acc_errs(S,Reason)};
+ Err ->
+ {CM,acc_errs(S,Err)}
+ end.
+
+%% check_simple_cm
+check_simple_cm([Kind,CM]) ->
+ check_simple_cm(Kind,CM).
+
+
+check_simple_cm(Kind,[]) ->
+ {error,{[],?MODULE,{no_match,{Kind,[]}}}};
+check_simple_cm(Kind,[{Kind,_}|Rest]) ->
+ {ok,[Kind,Rest]};
+check_simple_cm(Kind,[{Other,_}|Rest])
+ when Kind==simpleType;Kind==complexType ->
+ case Other of
+ simple_or_complex_Type -> {ok,[Kind,Rest]};
+ _ -> {error,{[],?MODULE,{no_match,Other}}}
+ end;
+check_simple_cm(_Kind,[{Other,_}|_]) ->
+ {error,{[],?MODULE,{no_match,Other}}}.
+
+
+check_chain([S4SCM,ContentModel,Kind,S]) ->
+ check_chain(Kind,S4SCM,ContentModel,S).
+check_chain(Kind,[S4SC|S4SCs],ChainCM=[_H|_T],
+ S=#xsd_state{errors=Errs}) ->
+ NewKind =
+ case S4SC of
+ {NK,_} -> NK;
+ _ -> Kind
+ end,
+ case check_cm2(NewKind,S4SC,ChainCM,S) of
+ {ChainCMRest,#xsd_state{errors=Errs}} ->
+ check_chain(Kind,S4SCs,ChainCMRest,S);
+ {_ChainCMRest,_S2} ->
+ case optional(S4SC) of
+ true ->
+ check_chain(Kind,S4SCs,ChainCM,S);
+ _ ->
+ {error,{[],?MODULE,{unmatched_mandatory_object,Kind,S4SC}}}
+ end
+ end;
+check_chain(Kind,[],CM,S) ->
+ {ok,[[],CM,Kind,S]};
+check_chain(Kind,Rest,CM,S) ->
+ case all_optional(Rest) of
+ true ->
+ {ok,[Rest,CM,Kind,S]}; %% or {ok,[[],CM,Kind,S]}
+ _ ->
+ {error,{[],?MODULE,{bad_match,Rest,CM}}}
+
+ end.
+
+
+check_alternative([S4SC,CM,Kind,S]) ->
+ check_alternative(Kind,S4SC,CM,S).
+check_alternative(Kind,[S4SC|S4SCs],AltCM = [_H|_T],
+ S=#xsd_state{errors=Err}) ->
+ NewKind =
+ case S4SC of
+ {NK,_} -> NK;
+ _ -> Kind
+ end,
+ case check_cm2(NewKind,S4SC,AltCM,S) of
+ {AltCMRest,#xsd_state{errors=Err}} ->
+ {ok,[[S4SC],AltCMRest,Kind,S]};
+ {AltCMRest,_S2} ->
+ check_alternative(Kind,S4SCs,AltCMRest,S)
+ end;
+check_alternative(Kind,[],_AltCM,_S) ->
+ {error,{[],?MODULE,{no_match,Kind}}}.
+
+
+%% occurance_loop keeps track of the right number of elements
+%% Third argument is a list: [S4SContent,ContentModel]
+%% returns {ok,Rest} where Rest is the next unmatched abstract
+%% structure.
+occurance_loop({Min,Max},_CheckFun,[_,[]|_Rest],N)
+ when Min =< N, Max >= N ->
+ {ok,[]};
+occurance_loop(Occ={Min,Max},CheckFun,Args,N) ->
+ Nplus1 = N+1,
+ case CheckFun(Args) of
+ {error,{_,_,{no_match,_}}} when Min =< N, Max >= N ->
+ {ok,Args};
+ Err = {error,_} ->
+ Err;
+ {ok,Args} ->
+ {error,{[],?MODULE,{no_match,occurance_kind(Args)}}};
+ {ok,NewArgs} when Nplus1 < Max ->
+ occurance_loop(Occ,CheckFun,NewArgs,Nplus1);
+ Ret = {ok,_NewArgs} ->
+ Ret
+ end.
+
+occurance_kind([Kind,_]) ->
+ Kind;
+occurance_kind([_,_,Kind,_]) ->
+ Kind;
+occurance_kind(_) ->
+ [].
+%% if_simple_hd(S4SCM,ConstrCM)
+%% when is_record(S4SCM,chain);is_record(S4SCM,alternative);is_list(S4SCM) ->
+%% ConstrCM;
+%% if_simple_hd(_,[H|_Tl]) ->
+%% H.
+
+%% if_simple_tl(S4SCM,_ConstrCM)
+%% when is_record(S4SCM,chain);is_record(S4SCM,alternative);is_list(S4SCM) ->
+%% [];
+%% if_simple_tl(_,[_|Tl]) ->
+%% Tl.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+count_occur({Min,Max}) ->
+% {decrease(Min),decrease(Max)};
+ {decrease(Min),Max};
+count_occur(Other) ->
+ Other.
+
+decrease(I) when is_integer(I), I > 0 ->
+ I-1;
+decrease(I) ->
+ I.
+
+decrease_occurance({K,{ID,Occ}}) ->
+ {K,{ID,count_occur(Occ)}};
+decrease_occurance(Other) ->
+ Other.
+
+get_occur({_,{_,Occ={Min,_}}}) when is_integer(Min) ->
+ Occ;
+get_occur({_,{_,Occ={Min,_},_}}) when is_integer(Min) ->
+ Occ;
+get_occur(Other) ->
+ Other.
+
+%% remove_whitespace(L=[T=#xmlText{}|Rest]) ->
+%% case is_whitespace(T) of
+%% true ->
+%% remove_whitespace(Rest);
+%% _ -> L
+%% end;
+%% remove_whitespace(L) ->
+%% L.
+
+optional(optional_text) ->
+ true;
+optional({_,{0,_}}) ->
+ true;
+optional({_,{_,{0,_}}}) ->
+ true; %% sequence, all or choice
+optional({any,{_,{0,_},_}}) ->
+ true;
+optional(#chain{occurance={0,_}}) ->
+ true;
+optional(#alternative{occurance={0,_}}) ->
+ true;
+optional(#chain{content=Content}) ->
+ catch is_optional_content(Content);
+optional(#alternative{content=Content}) ->
+ catch is_optional_content(Content);
+optional({all,{Content,_}}) ->
+ catch is_optional_content(Content);
+optional(_) ->
+ false.
+
+is_optional_content([H|T]) ->
+ case optional(H) of
+ true ->
+ is_optional_content(T);
+ false ->
+ throw(false)
+ end;
+is_optional_content([]) ->
+ true.
+
+not_optional(X) ->
+ case optional(X) of
+ true ->
+ false;
+ _ ->
+ true
+ end.
+
+all_optional([]) ->
+ true;
+all_optional(L) ->
+ case filter(fun not_optional/1,L) of
+ [] ->
+ true;
+ _ ->
+ false
+ end.
+
+
+%% allowed_content/2 returns a representation of the allowed content
+%% model for that object
+allowed_content(element,_Parents) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #chain{content=
+ [#alternative{content=
+ [{simpleType,{1,1}},{complexType,{1,1}}],
+ occurance={0,1}},
+ #alternative{content=
+ [{unique,{1,1}},{key,{1,1}},{keyref,{1,1}}],
+ occurance={0,unbounded}}]
+ }]
+ };
+allowed_content(attribute,_Parents) ->
+ #chain{content=[{annotation,{0,1}},{simpleType,{0,1}}]};
+allowed_content(complexType,Parents) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #alternative{content=
+ [set_occurance(allowed_content(simpleContent,Parents),{1,1}),
+ set_occurance(allowed_content(complexContent,Parents),{1,1}),
+ #chain{content=
+ [#alternative{content=
+ [{group,{1,1}},{all,{1,1}},
+ {choice,{1,1}},{sequence,{1,1}}],
+ occurance={0,1}},
+ #chain{content=
+ [#alternative{content=
+ [{attribute,{1,1}},
+ {attributeGroup,{1,1}}],
+ occurance={0,unbounded}},
+ {anyAttribute,{0,1}}]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ };
+allowed_content(attributeGroup,Parents) ->
+ case member(simpleContent,Parents) of
+ true ->
+ {annotation,{0,1}};
+ _ ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #chain{content=
+ [#alternative{content=
+ [{attribute,{1,1}},
+ {attributeGroup,{1,1}}],
+ occurance={0,unbounded}},
+ {anyAttribute,{0,1}}]}]}
+ end;
+allowed_content(group,_Parents) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #alternative{content=
+ [{all,{1,1}},{choice,{1,1}},{sequence,{1,1}}],
+ occurance={0,1}}]};
+allowed_content(all,_Parents) ->
+ #chain{content=[{annotation,{0,1}},{element,{0,unbounded}}]};
+allowed_content(SorC,_Parents) when SorC==sequence;SorC==choice ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #alternative{content=
+ [{element,{1,1}},{group,{1,1}},
+ {choice,{1,1}},{sequence,{1,1}},
+ {any,{1,1}}],
+ occurance={0,unbounded}}]};
+allowed_content(E,_Parents)
+ when E==any;E==selector;E==field;E==notation;E==include;E==import;
+ E==anyAttribute ->
+ {annotation,{0,1}};
+allowed_content(UKK,_Parents) when UKK==unique;UKK==key;UKK==keyref->
+ #chain{content=
+ [{annotation,{0,1}},
+ #chain{content=
+ [{selector,{1,1}},{selector,{1,unbounded}}]}]};
+allowed_content(annotation,_Parents) ->
+ #alternative{content=[{appinfo,{1,1}},{documentation,{1,1}}],
+ occurance={0,unbounded}};
+allowed_content(E,_Parents) when E==appinfo;E==documentation ->
+ {any,{0,unbounded}};
+allowed_content(simpleType,_Parents) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #alternative{content=[{restriction,{1,1}},{list,{1,1}},
+ {union,{1,1}}]}]};
+allowed_content(restriction,Parents) ->
+ case member(simpleType,Parents) of
+ true ->
+ allowed_content2(restriction,simpleType);
+ _ ->
+ case member(simpleContent,Parents) of
+ true ->
+ allowed_content2(restriction,simpleContent);
+ _ ->
+ allowed_content2(restriction,complexContent)
+ end
+ end;
+allowed_content(LU,_Parent) when LU==list;LU==union ->
+ #chain{content=[{annotation,{0,1}},{simpleType,{0,1}}]};
+allowed_content(schema,_) ->
+ #chain{content=
+ [#alternative{content=
+ [{include,{1,1}},{import,{1,1}},
+ {redefine,{1,1}},{annotation,{1,1}}],
+ occurance={0,1}},
+ #chain{content=
+ [#alternative{content=
+ [#alternative{content=
+ [{simpleType,{1,1}},{complexType,{1,1}},
+ {group,{1,1}},{attributeGroup,{1,1}}]},
+ {element,{1,1}},
+ {attribute,{1,1}},
+ {notation,{1,1}}]},
+ {annotation,{0,unbounded}}],
+ occurance={0,unbounded}}]};
+allowed_content(redefine,_Parents) ->
+ #alternative{content=
+ [{annotation,{1,1}},
+ #alternative{content=
+ [{simpleType,{1,1}},{complexType,{1,1}},
+ {group,{1,1}},{attributeGroup,{1,1}}]}],
+ occurance={0,unbounded}};
+allowed_content(E,_Parents) when E==simpleContent;
+ E==complexContent ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #alternative{content=
+ [{restriction,{1,1}},{extension,{1,1}}]}]};
+allowed_content(extension,Parents) ->
+ case member(simpleContent,Parents) of
+ true ->
+ allowed_content2(extension,simpleContent);
+ _ ->
+ allowed_content2(extension,complexContent)
+ end;
+allowed_content(minExclusive,_Parents) ->
+ [];
+allowed_content(minInclusive,_Parents) ->
+ [];
+allowed_content(maxExclusive,_Parents) ->
+ [];
+allowed_content(maxInclusive,_Parents) ->
+ [];
+allowed_content(totalDigits,_Parents) ->
+ [];
+allowed_content(fractionDigits,_Parents) ->
+ [];
+allowed_content(length,_Parents) ->
+ [];
+allowed_content(minLength,_Parents) ->
+ [];
+allowed_content(maxLength,_Parents) ->
+ [];
+allowed_content(enumeration,_Parents) ->
+ [];
+allowed_content(whiteSpace,_Parents) ->
+ [];
+allowed_content(pattern,_Parents) ->
+ [].
+
+
+
+
+allowed_content2(restriction,simpleType) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #chain{content=
+ [{simpleType,{0,1}},
+ #alternative{content=
+ [{minExclusive,{1,1}},{minInclusive,{1,1}},
+ {maxExclusive,{1,1}},{maxInclusive,{1,1}},
+ {totalDigits,{1,1}},{fractionDigits,{1,1}},
+ {length,{1,1}},{minLength,{1,1}},
+ {maxLength,{1,1}},{enumeration,{1,1}},
+ {whiteSpace,{1,1}},{pattern,{1,1}}],
+ occurance={0,unbounded}}]}]};
+allowed_content2(restriction,simpleContent) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #chain{content=
+ [{simpleType,{0,1}},
+ #alternative{content=
+ [{minExclusive,{1,1}},{minInclusive,{1,1}},
+ {maxExclusive,{1,1}},{maxInclusive,{1,1}},
+ {totalDigits,{1,1}},{fractionDigits,{1,1}},
+ {length,{1,1}},{minLength,{1,1}},
+ {maxLength,{1,1}},{enumeration,{1,1}},
+ {whiteSpace,{1,1}},{pattern,{1,1}}],
+ occurance={0,unbounded}}],
+ occurance={0,1}},
+ #chain{content=
+ [#alternative{content=
+ [{attribute,{1,1}},{attributeGroup,{1,1}}],
+ occurance={0,unbounded}},
+ {anyAttribute,{0,1}}]}]};
+allowed_content2(restriction,complexContent) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #alternative{content=
+ [{group,{1,1}},{all,{1,1}},{choice,{1,1}},
+ {sequence,{1,1}}],
+ occurance={0,1}},
+ #chain{content=
+ [#alternative{content=
+ [{attribute,{1,1}},{attributeGroup,{1,1}}],
+ occurance={0,unbounded}},
+ {anyAttribute,{0,1}}]}]};
+allowed_content2(extension,simpleContent) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #chain{content=
+ [#alternative{content=
+ [{attribute,{1,1}},{attributeGroup,{1,1}}],
+ occurance={0,unbounded}},
+ {anyAttribute,{0,1}}]}]};
+allowed_content2(extension,complexContent) ->
+ #chain{content=
+ [{annotation,{0,1}},
+ #chain{content=
+ [#alternative{content=
+ [{group,{1,1}},{all,{1,1}},{choice,{1,1}},
+ {sequence,{1,1}}],
+ occurance={0,1}},
+ #chain{content=
+ [#alternative{content=
+ [{attribute,{1,1}},
+ {attributeGroup,{1,1}}],
+ occurance={0,1}},
+ {anyAttribute,{0,1}}]}]}]}.
+
+
+set_occurance(Ch = #chain{},Occ) ->
+ Ch#chain{occurance=Occ};
+set_occurance(Alt = #alternative{},Occ) ->
+ Alt#alternative{occurance=Occ};
+set_occurance({Name,_},Occ) when is_atom(Name) ->
+ {Name,Occ};
+set_occurance(CM,_) ->
+ CM.
+
+
+process_external_schema_once(E,Namespace,S) when is_record(E,xmlElement) ->
+ case get_attribute_value(schemaLocation,E,[]) of
+ [] ->
+ Err = {missing_schemalocation_attribute,E#xmlElement.name},
+ acc_errs(S,Err);
+ Path ->
+ process_external_schema_once(Path,Namespace,S)
+ end;
+process_external_schema_once(SchemaLocation,Namespace,S) ->
+ case fetch_external_schema(SchemaLocation,S) of
+ {E=#xmlElement{},S2} ->
+ case is_already_processed(Namespace,S2) of
+ true ->
+ save_namespace_definition(Namespace,S2);
+ _ ->
+ S3 = save_namespace_definition(Namespace,S2),
+ traverse_ext_schema(E,S3#xsd_state{targetNamespace=Namespace})
+ end;
+ {_,S2} ->
+ S2
+ end.
+
+%% process_external_schema/2 returns:
+%% {ok,some_result()} | {error,reason()}
+process_external_schema(Path,S) when is_list(Path) ->
+ case fetch_external_schema(Path,S) of
+ {E=#xmlElement{},S2} ->
+ traverse_ext_schema(E,S2);
+ {_,S2} ->
+ S2
+ end;
+process_external_schema(absent,S) ->
+ S.
+
+fetch_external_schema(Path,S) when is_list(Path) ->
+ FetchFun = S#xsd_state.fetch_fun,
+ %% {ExtXSD,S2} =
+ case FetchFun(Path,S) of
+ {ok,{file,File},_} ->
+ ?debug("scanning file: ~p~n",[File]),
+ case xmerl_scan:file(File,S#xsd_state.xml_options) of
+ {error,Reason} ->
+ {error,acc_errs(S,{[],?MODULE,{parsing_external_schema_failed,File,Reason}})};
+ {EXSD,_} ->
+ {EXSD,S#xsd_state{schema_name=File}}
+ end;
+ {_,{string,String},_} -> %% this is for a user defined fetch fun that returns an xml document on string format.
+ ?debug("scanning string: ~p~n",[File]),
+ case xmerl_scan:string(String,S#xsd_state.xml_options) of
+ {error,Reason} ->
+ {error,acc_errs(S,{[],?MODULE,{parsing_external_schema_failed,Path,Reason}})};
+ {EXSD,_} ->
+ {EXSD,S#xsd_state{schema_name=Path}}
+ end;
+ {ok,[],_} ->
+ {ok,S};
+ {_,Other,_} ->
+ {error,acc_errs(S,{[],?MODULE,{fetch_fun_failed,Other}})}
+ end;
+fetch_external_schema(absent,S) ->
+ {ok,S}.
+
+
+%% The schema name is also important here because a schema may import
+%% and must include from the same namespace as the target namespace of
+%% the including schema.
+is_already_processed(NameSpace,#xsd_state{schema_name=SchemaName,
+ checked_namespace_nodes=CNS}) ->
+%% case {keymember(SchemaName,2,CNS),keymember(NameSpace,3,CNS)} of
+%% {true,true} ->
+ case keysearch(SchemaName,2,CNS) of
+ {_,{_,_,NameSpace}} ->
+ true;
+ _ ->
+ false
+ end.
+
+%%
+save_namespace_definition(NameSpace,
+ S=#xsd_state{targetNamespace=TNS,
+ global_namespace_nodes=GNS,
+ checked_namespace_nodes=CNS}) ->
+ %% 1) Have to find a matching namespace in the global list for
+ %% this schema, and get the associated prefix. 2) Then check
+ %% whether a schema with this prefix - namespace combinaton
+ %% already is checked, if so do nothing. 3a) If this namespace is
+ %% checked but with another prefix only add the prefix - namespace
+ %% pair to the checked namespace list. 3b) Otherwise add the
+ %% prefix - namespace pair.
+ {Prefix,S2} =
+ case keysearch(TNS,1,GNS) of
+ {value,{_,ImportedNodes}} ->
+ case keysearch(NameSpace,2,ImportedNodes) of
+ {value,{_P,_}} -> {_P,S};
+ _ -> {none,S}
+ end;
+ _ ->
+ Err = {[],?MODULE,{imported_namespace_wo_namespace_definition,NameSpace}},
+ {none,acc_errs(S,Err)}
+ end,
+ %% Instead of 2, 3a and 3b just add_once
+ case Prefix of
+ none ->
+ S2;
+ _ ->
+ S#xsd_state{checked_namespace_nodes=
+ add_once({Prefix,S#xsd_state.schema_name,NameSpace},CNS)}
+ end.
+
+%% prefix_namespace_2global
+
+%% adds {Prefix,Namespace} to the global namespace nodes list for the
+%% targetnamespace. Prefix is the right one found in Nodes.
+prefix_namespace_2global(Namespace,
+ #xmlNamespace{nodes=Nodes},
+ S=#xsd_state{targetNamespace=TNS,
+ global_namespace_nodes=GNS}) ->
+ case keysearch(Namespace,2,Nodes) of
+ {value,{Prefix,_}} ->
+ case keysearch(TNS,1,GNS) of
+ {value,{_,DefinedNamespaces}} ->
+ S#xsd_state{global_namespace_nodes=
+ keyreplace(TNS,1,GNS,
+ {TNS,add_once({Prefix,Namespace},
+ DefinedNamespaces)})};
+ _ ->
+ S#xsd_state{global_namespace_nodes=
+ [{TNS,[{Prefix,Namespace}|default_namespace_by_convention()]}]}
+ end;
+ _ ->
+ S
+ end;
+prefix_namespace_2global(_,_,S) ->
+ S.
+
+
+traverse_ext_schema(E,S) ->
+ TargetNS = target_namespace(E),
+ case {TargetNS,S#xsd_state.targetNamespace} of
+ {undefined,_} ->
+ traverse_ext_schema2(E,S);
+ {TNS,TNS} ->
+ traverse_ext_schema2(E,S);
+ _ ->
+ Err = {error_path(E,schema),?MODULE,{illegal_target_namespace_external_schema,E#xmlElement.name}},
+ acc_errs(S,Err)
+ end.
+traverse_ext_schema2(E,S) ->
+
+ S1 = namespace_nodes(E,S),
+ S2 = element_form_default(E,S1),
+ S3 = attribute_form_default(E,S2),
+ S4 = substitution_default(finalDefault,E,S3),
+ S5 = substitution_default(blockDefault,E,S4),
+ {CM,S6} = traverse_content2(E#xmlElement.content,S5,[]),
+%% ?debug("External schema S6:~n~p~n",[S6]),
+ save_schema_element(CM,S6),
+ S6.
+
+
+attribute_properties([#xmlAttribute{name=default,value=Default}|Rest],
+ Attr,S) ->
+ attribute_properties(Rest,Attr#schema_attribute{default=Default},S);
+attribute_properties([#xmlAttribute{name=fixed,value=Fixed}|Rest],Attr,S) ->
+ attribute_properties(Rest,Attr#schema_attribute{fixed=Fixed},S);
+attribute_properties([#xmlAttribute{name=use,value=Use}|Rest],Attr,S) ->
+ {Use2,S2} = attribute_use(Use,S),
+ attribute_properties(Rest,Attr#schema_attribute{use=Use2},S2);
+attribute_properties([#xmlAttribute{name=form,value=Form}|Rest],Attr,S) ->
+ {Form2,S2} = attribute_form(Form,S),
+ attribute_properties(Rest,Attr#schema_attribute{form=Form2},S2);
+attribute_properties([#xmlAttribute{name=id,value=ID}|Rest],Attr,S) ->
+ S2 = check_and_save_ID(ID,S),
+ attribute_properties(Rest,Attr#schema_attribute{id=ID},S2);
+attribute_properties([_H|Rest],Attr,S) ->
+ attribute_properties(Rest,Attr,S);
+attribute_properties([],Attr,S) ->
+ {Attr,S}.
+attribute_use(Use,S) when Use=="optional";Use=="prohibited";Use=="required" ->
+ {list_to_atom(Use),S};
+attribute_use(Use,S) ->
+ {Use,acc_errs(S,{[],?MODULE,{illegal_use_value,Use}})}.
+attribute_form(Form,S) when Form=="qualified";Form=="unqualified" ->
+ {list_to_atom(Form),S};
+attribute_form(Form,S) ->
+ {Form,acc_errs(S,{[],?MODULE,{illegal_form_value,Form}})}.
+
+element_properties([#xmlAttribute{name=default,value=Default}|Rest],SE,El,S) ->
+ case SE#schema_element.value_constraint of
+ {fixed,_} ->
+ Err = {error_path(El,schema),?MODULE,{"only one of final/default attributes allowed",
+ El#xmlElement.name}},
+ element_properties(Rest,SE,El,acc_errs(S,Err));
+ _ ->
+ element_properties(Rest,SE#schema_element{value_constraint=
+ {default,Default}},El,S)
+ end;
+element_properties([#xmlAttribute{name=fixed,value=Fixed}|Rest],SE,El,S) ->
+ case SE#schema_element.value_constraint of
+ {default,_} ->
+ Err = {error_path(El,schema),?MODULE,
+ {"only one of final/default attributes allowed",
+ El#xmlElement.name}},
+ element_properties(Rest,SE,El,acc_errs(S,Err));
+ _ ->
+ element_properties(Rest,SE#schema_element{value_constraint=
+ {fixed,Fixed}},El,S)
+ end;
+element_properties([#xmlAttribute{name=substitutionGroup,value=SG}|Rest],
+ SE,El,S) ->
+ SGName = get_QName(SG,El#xmlElement.namespace,reset_scope(S)),
+ element_properties(Rest,SE#schema_element{substitutionGroup=SGName},El,
+ add_ref(S,{element,SGName}));
+element_properties([#xmlAttribute{name=form,value=F}|Rest],SE,El,S) ->
+ {Form,S2} = attribute_form(F,S),
+ element_properties(Rest,SE#schema_element{form=Form},El,S2);
+element_properties([#xmlAttribute{name=id,value=ID}|Rest],SE,El,S) ->
+ S2 = check_and_save_ID(ID,S),
+ element_properties(Rest,SE#schema_element{id=ID},El,S2);
+element_properties([#xmlAttribute{name=nillable,value=N}|Rest],SE,El,S) ->
+ case boolean_to_atom(N) of
+ error ->
+ element_properties(Rest,SE,El,
+ acc_errs(S,{error_path(El,schema),?MODULE,{illegal_nillable_value,N}}));
+ N_atom ->
+ element_properties(Rest,SE#schema_element{nillable=N_atom},El,S)
+ end;
+element_properties([#xmlAttribute{name=abstract,value=A}|Rest],SE,El,S) ->
+ case boolean_to_atom(A) of
+ error ->
+ element_properties(Rest,SE,El,
+ acc_errs(S,{error_path(El,schema),?MODULE,{illegal_abstract_value,A}}));
+ A_atom ->
+ element_properties(Rest,SE#schema_element{abstract=A_atom},El,S)
+ end;
+element_properties([#xmlAttribute{name=block,value=B}|Rest],SE,El,S) ->
+ BlockValues = split_by_whitespace(B,[]),
+ case legal_block_values(element,BlockValues) of
+ {error,Reason} ->
+ element_properties(Rest,SE,El,
+ acc_errs(S,{error_path(El,schema),?MODULE,{illegal_block_values,Reason}}));
+ _ ->
+ element_properties(Rest,SE#schema_element{block=BlockValues},El,S)
+ end;
+element_properties([#xmlAttribute{name=final,value=F}|Rest],SE,El,S) ->
+ FinalValues = split_by_whitespace(F,[]),
+ case legal_final_values(element,FinalValues) of
+ {error,Reason} ->
+ element_properties(Rest,SE,El,
+ acc_errs(S,{error_path(El,schema),?MODULE,{illegal_final_values,Reason}}));
+ _ ->
+ element_properties(Rest,SE#schema_element{final=FinalValues},El,S)
+ end;
+element_properties([_H|T],SE,El,S) ->
+ element_properties(T,SE,El,S);
+element_properties([],SE,_El,S) ->
+ {SE,S}.
+
+%% 3.3.3 bullet 2.2
+%% nillable, default, fixed, form, block and type properties must be
+%% absent in element with ref.
+element_forbidden_properties(El,S) ->
+ element_forbidden_properties(El#xmlElement.attributes,El,S).
+element_forbidden_properties([#xmlAttribute{name=nillable,value=V}|Atts],El,S) ->
+ element_forbidden_properties(Atts,El,acc_errs(S,{error_path(El,schema),?MODULE,{forbidden_property,nillable,V}}));
+element_forbidden_properties([#xmlAttribute{name=default,value=V}|Atts],El,S) ->
+ element_forbidden_properties(Atts,El,acc_errs(S,{error_path(El,schema),?MODULE,{forbidden_property,default,V}}));
+element_forbidden_properties([#xmlAttribute{name=fixed,value=V}|Atts],El,S) ->
+ element_forbidden_properties(Atts,El,acc_errs(S,{error_path(El,schema),?MODULE,{forbidden_property,fixed,V}}));
+element_forbidden_properties([#xmlAttribute{name=form,value=V}|Atts],El,S) ->
+ element_forbidden_properties(Atts,El,acc_errs(S,{error_path(El,schema),?MODULE,{forbidden_property,form,V}}));
+element_forbidden_properties([#xmlAttribute{name=block,value=V}|Atts],El,S) ->
+ element_forbidden_properties(Atts,El,acc_errs(S,{error_path(El,schema),?MODULE,{forbidden_property,block,V}}));
+element_forbidden_properties([#xmlAttribute{name=type,value=V}|Atts],El,S) ->
+ element_forbidden_properties(Atts,El,acc_errs(S,{error_path(El,schema),?MODULE,{forbidden_property,type,V}}));
+element_forbidden_properties([#xmlAttribute{}|Atts],El,S) ->
+ element_forbidden_properties(Atts,El,S);
+element_forbidden_properties([],_,S) ->
+ S.
+
+%% 3.3.3 bullet 2.2
+%% complexType, simpleType, key, keyref and unique must be absent in
+%% element with ref.
+element_forbidden_content([],S) ->
+ S;
+element_forbidden_content([El=#xmlElement{}|Els],S) ->
+ case kind(El) of
+ K when K==complexType;K==simpleType;K==key;K==keyref;K==unique ->
+ acc_errs(S,{error_path(El,schema),?MODULE,{element_content_must_not_contain,K,El}});
+ annotation ->
+ element_forbidden_content(Els,S);
+ Other ->
+ acc_errs(S,{error_path(El,schema),?MODULE,{illegal_element_content,Other}})
+ end;
+element_forbidden_content([T=#xmlText{}|Rest],S) ->
+ case is_whitespace(T) of
+ true ->
+ element_forbidden_content(Rest,S);
+ _ ->
+ acc_errs(S,{error_path(T,schema),?MODULE,{illegal_element_content,T}})
+ end.
+
+c_t_properties(El,CT,S) ->
+ c_t_properties(El#xmlElement.attributes,El,CT,S).
+c_t_properties([#xmlAttribute{name=final,value=V}|Rest],El,CT,S) ->
+ FinalValues = split_by_whitespace(V,[]),
+ case legal_final_values(complexType,FinalValues) of
+ {error,Reason} ->
+ Err = {error_path(El,schema),?MODULE,{illegal_final_values,Reason}},
+ c_t_properties(Rest,El,CT,acc_errs(S,Err));
+ _ ->
+ c_t_properties(Rest,El,
+ CT#schema_complex_type{final=FinalValues},S)
+ end;
+c_t_properties([#xmlAttribute{name=block,value=V}|Rest],El,CT,S) ->
+ BlockValues = split_by_whitespace(V,[]),
+ case legal_block_values(complexType,BlockValues) of
+ {error,Reason} ->
+ Err = {error_path(El,schema),?MODULE,
+ {illegal_block_values,Reason}},
+ c_t_properties(Rest,El,CT,acc_errs(S,Err));
+ _ ->
+ c_t_properties(Rest,El,CT#schema_complex_type{block=BlockValues},S)
+ end;
+c_t_properties([#xmlAttribute{name=abstract,value=V}|Rest],El,CT,S) ->
+ case boolean_to_atom(V) of
+ error ->
+ Err = {error_path(El,schema),?MODULE,
+ {illegal_abstract_value,V}},
+ c_t_properties(Rest,El,CT,acc_errs(S,Err));
+ V_atom ->
+ c_t_properties(Rest,El,CT#schema_complex_type{abstract=V_atom},S)
+ end;
+c_t_properties([_H|T],El,CT,S) ->
+ c_t_properties(T,El,CT,S);
+c_t_properties([],_,CT,S) ->
+ {CT,S}.
+
+
+legal_block_values(_,['#all']) ->
+ true;
+legal_block_values(element,BlockValues) ->
+ list_members(BlockValues,[extension,restriction,substitution]);
+legal_block_values(complexType,BlockValues) ->
+ list_members(BlockValues,[extension,restriction]).
+
+legal_final_values(_,['#all']) ->
+ true;
+legal_final_values(_,FinalValues) ->
+ list_members(FinalValues,[extension,restriction]).
+
+boolean_to_atom(B) when B=="1";B=="true" ->
+ true;
+boolean_to_atom(B) when B=="0";B=="false" ->
+ false;
+boolean_to_atom(_) ->
+ error.
+
+
+count_num_el(S=#xsd_state{num_el=N}) ->
+ S#xsd_state{num_el=N+1}.
+set_num_el(S=#xsd_state{},I) when is_integer(I) ->
+ S#xsd_state{num_el=I};
+set_num_el(S=#xsd_state{},#xsd_state{num_el=I}) ->
+ S#xsd_state{num_el=I}.
+
+
+occurance(El=#xmlElement{attributes=Atts},{Min,Max},S) ->
+ AttVal=fun(#xmlAttribute{value=V},Sin) ->
+ case catch mk_int_or_atom(V) of
+ {'EXIT',_} ->
+ Err = {error_path(El,schema),?MODULE,
+ {illegal_occurance_value,V}},
+ {V,acc_errs(Sin,Err)};
+ IAV -> {IAV,Sin}
+ end;
+ (V1,Sin) -> {V1,Sin}
+ end,
+ {MinVal,S2} = AttVal(keyNsearch(minOccurs,#xmlAttribute.name,
+ Atts,Min),S),
+ {MaxVal,S3} = AttVal(keyNsearch(maxOccurs,#xmlAttribute.name,
+ Atts,Max),S2),
+ {{MinVal,MaxVal},S3}.
+
+mk_int_or_atom(V="unbounded") ->
+ list_to_atom(V);
+mk_int_or_atom(V) when is_list(V) ->
+ list_to_integer(V);
+mk_int_or_atom(V) ->
+ V.
+
+%% E is a complexType, possible kind of content is A)simpleContent, B)
+%% complexContent or C) one or zero of 1)group,2)all,3)choice or
+%% 4)sequence, followed by any number of attribute or attributeGroup
+%% and finally one optional anyAttribute
+mixed(E=#xmlElement{content=C},S) ->
+ case {get_attribute_value(mixed,E,undefined),
+ [Y||Y=#xmlElement{}<-C,kind(Y)==simpleContent]} of
+ {_,[_SCE]} ->
+ {false,S}; %% mixed is false in simpleContent
+ {undefined,_} ->
+ case [X||X=#xmlElement{}<-C,
+ kind(X)==complexContent] of
+ [E2] ->
+%% {get_attribute_value(mixed,E2,false),S};
+ mixed(E2,S);
+ _ ->
+ {false,S}
+ end;
+ {M,_} when M=="1";M=="true" -> {true,S};
+ {M,_} when M=="0";M=="false" -> {false,S};
+ {M,_} ->
+ Err = {error_path(E,schema),?MODULE,{invalid_mixed_value,M}},
+ {false,acc_errs(S,Err)}
+ end.
+
+mixify(false,CM) ->
+ CM;
+mixify(true,CM) ->
+ mixify2(CM,[optional_text]).
+mixify2([],Acc) ->
+ reverse(Acc);
+mixify2([H|T],Acc) ->
+ mixify2(T,[optional_text,H|Acc]).
+
+complexity([]) ->
+ undefined;
+complexity([#xmlText{}|T]) ->
+ complexity(T);
+complexity([H|T]) ->
+ case kind(H) of
+ simpleContent ->
+ simple;
+ complexContent ->
+ complex;
+ _ ->
+ complexity(T)
+ end.
+
+%% Validation takes care of the following:
+%% 1) a) Check that targetNamespace attribute in schema matches
+%% namespace URI if the element.
+%% b) If schema don't have a targetNamespace the instance element
+%% must not be namespace-qualified
+
+%% 2) a) Examine type of the element according to the schema and block
+%% attributes in the element decl.
+%% b) Default values and other infoset contributions are applied.
+
+%% 3) Check the immediate attributes and contents of the element
+%% comparing these against the attributes and contents
+%% permitted.
+%% a) simple type:
+%% -verify there are no attributes or elements.
+%% -verify character content matches rules for type.
+%% b) complex type:
+%% -verify attributes present and values ok.
+%% -check subelements according to content model.
+%% validate_xml/2
+validate_xml(El = #xmlElement{name=Name},
+ S=#xsd_state{table=Tab,schemaLocations=SchemaLocations}) ->
+ ElQName = {_,_,Namespace} = mk_EII_QName(Name,El,S),
+ SchemaCM = get_schema_cm(Tab,Namespace),
+ case [X||X={element,{QName,Occ}} <- SchemaCM#schema.content,
+ cmp_name(ElQName,QName,S),
+ at_least_one(Occ)] of
+ [Obj] ->
+ {Object,S2} = load_object(Obj,S),
+ validate_xml(El,Object,S2);
+ _ ->
+ %% In case the namespace in El is not processed even
+ %% though it is present.
+ case is_already_processed(Namespace,S) of
+ true -> %% nothing more to do
+ {error,{error_path(El,Name),?MODULE,
+ {element_not_in_schema,[Name,ElQName,SchemaCM]}}};
+ _ ->
+ case keysearch(if_atom_to_list(Namespace),1,SchemaLocations) of
+ {value,{_,Location}} ->
+ %% namespace present by schemaLocation
+ %% attribute in instance.
+ S1 = prefix_namespace_2global(Namespace,El#xmlElement.namespace,S),
+ S2 = save_namespace_definition(Namespace,S1),
+ S3 = process_external_schema(Location,S2#xsd_state{targetNamespace=Namespace}),
+ validate_xml(El,S3);
+ _ -> %% namespace not imported in schema or instance.
+ {error,{error_path(El,Name),?MODULE,
+ {element_not_in_schema,[Name,ElQName,SchemaCM]}}}
+ end
+ end
+ end.
+%% validate_xml/3
+validate_xml(XMLEl=#xmlElement{},SEl=#schema_element{},S) ->
+ %% check that targetNamespace of schema matches URI of the element.
+ case check_target_namespace(XMLEl,S) of
+ ok ->
+ %% Extract the schemaLocation links in the instance,
+ %% examine type of the element according to the schema and
+ %% the block attributes in the element declaration
+ S2 = schemaLocations(XMLEl,S),
+ ?debug("schemaLocations: ~p~n",[S2#xsd_state.schemaLocations]),
+ #schema_element{name=_Name,type=_Type,block=Bl} = SEl,
+ Block = blocking(Bl,S2#xsd_state.blockDefault), %% complex types, elements
+ Ret = check_element_type([XMLEl],SEl,[],Block,S2,[]),
+ case Ret of
+ {ValXML,UnvalRest,S3} ->
+%% S4 = schema_concistence_checks(S3),
+ {ValXML,UnvalRest,S3};
+ _ ->
+ Ret
+ end;
+ _ ->
+ Err = {error_path(XMLEl,XMLEl#xmlElement.name),?MODULE,
+ {target_namespace_missmatch}},
+ {XMLEl,[],acc_errs(S,Err)}
+ end.
+
+%% check_element_type/3
+%% examine type according to schema including info of block
+%% attributes. If complex type do test recursively
+%% 2 often
+check_element_type(XML=[XMLTxt=#xmlText{}|Rest],CM=[CMEl|CMRest],Env,
+ Block,S,Checked) ->
+ %% XMLTxt ��r det f��rsta av content i element,
+ %% CMEl ��r den till��tna typen enligt schemat
+ case is_whitespace(XMLTxt) of
+ true -> %% Ignore XMLEl
+ check_element_type(Rest,CM,Env,Block,S,[XMLTxt|Checked]);
+ _ -> %% CMEl allows optional_text or is an absent optional element
+ {ResolvedT,S2} = resolve(CMEl,S),
+ case check_text_type(XML,ResolvedT,S2) of
+ {error,Reason} ->
+ case is_optional(CMEl,S) of
+ true ->
+ check_element_type(XML,CMRest,Env,Block,S,Checked);
+ _ ->
+ check_element_type(Rest,CM,Env,Block,
+ acc_errs(S,Reason),Checked)
+ end;
+ {Ret,Rest2,S3} ->
+ check_element_type(Rest2,CMRest,Env,Block,S3,reverse(Ret,Checked))
+ end
+ end;
+%% If CMEl is a sequence more than the first element of the XML list
+%% may match.
+check_element_type(XML=[#xmlElement{}|_],[{sequence,{CM,Occ}}|_CMRest],
+ Env,_Block,S,Checked) ->
+ ?debug("calling sequence/6~n",[]),
+ check_sequence(XML,CM,Occ,Env,set_num_el(S,0),Checked);
+check_element_type(XML=[#xmlElement{}|_],[{choice,{CM,Occ}}|_CMRest],
+ Env,_Block,S,Checked) ->
+ ?debug("calling choice/6~n",[]),
+ check_choice(XML,CM,Occ,Env,set_num_el(S,0),Checked);
+check_element_type(XML=[#xmlElement{}|_],[{all,{CM,Occ}}|_CMRest],
+ Env,_Block,S,Checked) ->
+ ?debug("calling choice/6~n",[]),
+ check_all(XML,CM,Occ,Env,S,Checked,XML);
+%% 3 often. CMEL may be ((simpleType | complexType)?, (unique | key | keyref)*))
+check_element_type(XML=[XMLEl=#xmlElement{}|_],[CMEl|CMRest],Env,
+ Block,S,Checked) ->
+ %% Three possible releations between XMLEl - CMEl:
+ %% (1) XMLEl matches CMEl.
+ %% (2) XMLEl don't matches CMEl and CMEl is optional.
+ %% (3) XMLEl don't matches CMEl, CMEl mandatory, - error.
+ %% On the other side may CMEl also match more elements in
+ %% Rest. This should come down to 2) next function call.
+
+ {ResolvedT,S2} = resolve(CMEl,S),
+ case check_element_type(XML,ResolvedT,Env,Block,S2,[]) of
+ {error,Reason} -> % 3
+ check_element_type(tl(XML),CMRest,Env,Block,
+ acc_errs(S,Reason),[XMLEl|Checked]);
+ {[],_,_} -> % 2
+ check_element_type(XML,CMRest,Env,Block,S,Checked);
+ {XMLEl2,RestXML,S3} -> % 1 This return value does not conform to the others
+ check_element_type(RestXML,[decrease_occurance(CMEl)|CMRest],Env,
+ Block,S3,XMLEl2++Checked)
+ end;
+check_element_type([],[],_Env,_Block,S,Checked) ->
+ {Checked,[],S};
+check_element_type([],[CMEl|CMRest],Env,Block,S,Checked) ->
+ case is_optional(CMEl,S) of
+ true ->
+ check_element_type([],CMRest,Env,Block,S,Checked);
+ _ ->
+ Err = {error_path(Checked,undefined),?MODULE,
+ {missing_mandatory_element,CMEl}},
+ {Checked,[],acc_errs(S,Err)}
+ end;
+check_element_type(_XML=[],
+ #schema_complex_type{name=_Name,base_type=BT,
+ complexity=simple,
+ content=_C} = CT,
+ _Env,_Block,S,Checked) ->
+
+ %% maybe check attributes here as well.
+ {ResolvedType,_} = resolve({simple_or_complex_Type,BT},S),
+ case ResolvedType of
+ #schema_simple_type{} ->
+ {NewVal,S2} = check_type(ResolvedType,[],unapplied,S),
+ {NewVal,[],S2};
+ {simpleType,_} ->
+ {NewVal,S2} = check_type(ResolvedType,[],unapplied,S),
+ {NewVal,[],S2};
+ _ ->
+ {error,{error_path(Checked,undefined),?MODULE,
+ {empty_content_not_allowed,CT}}}
+ end;
+check_element_type([],#schema_complex_type{name=_Name,block=_Bl,content=C},
+ _Env,_Block,S,Checked) ->
+ %% This type must have an empty content to be valid
+ case allow_empty_content(C) of
+ true -> {[],[],S};
+ false ->
+ {error,{error_path(Checked,undefined),?MODULE,
+ {empty_content_not_allowed,C}}}
+ end;
+check_element_type(C,{anyType,_},_Env,_Block,S,_Checked) ->
+ %% permitt anything
+ {C,[],S};
+
+check_element_type(XML=[#xmlText{}|_],Type=#schema_simple_type{},
+ _Env,_Block,S,_Checked) ->
+ check_text_type(XML,Type,S);
+check_element_type(XML=[#xmlText{}|_],Type={simpleType,_NameNS},
+ _Env,_Block,S,_Checked) ->
+ check_text_type(XML,Type,S);
+
+check_element_type(XML=[#xmlText{}|_],
+ #schema_complex_type{name=_Name,base_type=BT,
+ complexity=simple,
+ content=_C},Env,Block,S,Checked) ->
+
+ %% maybe check attributes here as well.
+ {ResolvedType,_} = resolve({simple_or_complex_Type,BT},S),
+ check_element_type(XML,ResolvedType,Env,Block,S,Checked);
+
+%% single schema object
+check_element_type(XML=[_H|_],
+ #schema_complex_type{name=Name,block=Bl,content=C},
+ Env,_Block,S,Checked) ->
+ EnvName = case Name of
+ {LN,_Scope,_NS} -> LN;
+ _ -> anonymous
+ end,
+ Block = blocking(Bl,S#xsd_state.blockDefault),
+ check_element_type(XML,C,[EnvName|Env],Block,name_scope(Name,S),Checked);
+
+%% 1
+check_element_type(XML=[XMLEl=#xmlElement{name=Name}|RestXML],
+ CMEl=#schema_element{name=CMName,type=Type},
+ Env,Block,S,Checked) ->
+ ElName = mk_EII_QName(Name,XMLEl,S#xsd_state{scope=element(2,CMName)}),
+ {Min,Max} = CMEl#schema_element.occurance,
+ case cmp_name(ElName,CMName,S) of %% substitutionGroup
+ true when S#xsd_state.num_el =< Max ->
+ S1 = id_constraints(CMEl,XMLEl,S),
+ %% If CMEl element has a substitutionGroup we have to
+ %% switch to the rigth element and type here.
+ {CMEl2,Type2,S2} =
+ if
+ ElName =:= CMName ->
+ {CMEl,Type,S1};
+ true ->
+ case resolve({element,ElName},S1) of
+ {SESub=#schema_element{type=SubType},Ssub} ->
+ {SESub,SubType,Ssub};
+ {_,Ssub} ->
+ {CMEl,Type,Ssub}
+ end
+ end,
+
+ {ResolvedType,S3} = resolve(Type2,XMLEl,S2),
+ %% What's the value of Resolve?: It must be a simpleType,
+ %% complexType or an identity-constraint object
+ XsiFactors = xsi_factors(CMEl2),
+ {XMLEl2,S4} = check_attributes(XMLEl,ResolvedType,
+ XsiFactors,S3),
+ S5 = check_abstract(ElName,XMLEl,CMEl,S4),
+ S6 = check_form(ElName,Name,XMLEl,
+ actual_form_value(CMEl#schema_element.form,
+ S5#xsd_state.elementFormDefault),
+ S5),
+ %Step into content of XML element.
+ {Content,_,S7} =
+ case
+ check_element_type(XMLEl2#xmlElement.content,
+ ResolvedType,Env,
+ Block,S6,Checked) of
+ {error,Reason} ->
+ {XMLEl2#xmlElement.content,[],acc_errs(S6,Reason)};
+ Result ={_,[],_} -> Result;
+ {_,UnexpectedRest,_} ->
+ Err = {error_path(XMLEl,Name),?MODULE,
+ {unexpected_rest,UnexpectedRest}},
+ {XMLEl2#xmlElement.content,[],
+ acc_errs(S6,Err)}
+ end,
+ {[XMLEl2#xmlElement{content=reverse(Content)}],
+ RestXML,
+ set_scope(S5#xsd_state.scope,set_num_el(S7,S6))};
+ true ->
+ {error,{error_path(XMLEl,Name),?MODULE,
+ {element_not_suitable_with_schema,ElName,S}}};
+ _ when S#xsd_state.num_el >= Min ->
+ %% it may be a match error or an optional element not
+ %% present
+ {[],XML,S#xsd_state{num_el=0}};
+ _ ->
+ {error,{error_path(XMLEl,Name),?MODULE,
+ {element_not_suitable_with_schema,ElName,CMName,CMEl,S}}}
+ end;
+check_element_type(XML,#schema_group{content=[CM]},Env,Block,S,Checked) ->
+ %% content may contain one of all | choice | sequence or empty
+ check_element_type(XML,CM,Env,Block,S,Checked);
+check_element_type(XML,#schema_group{content=[]},_Env,_Block,_S,_Checked) ->
+ {error,{error_path(XML,undefined),?MODULE,{no_element_expected_in_group,XML}}};
+check_element_type(XML=[#xmlElement{content=_Content}|_Rest],
+ {sequence,{Els,Occ}},Env,_Block,S,Checked) ->
+ ?debug("calling sequence/6~n",[]),
+ case check_sequence(XML,Els,Occ,Env,S#xsd_state{num_el=0},Checked) of
+ Err = {error,_} ->
+ Err;
+ {ValidContent,Rest2,S2} ->
+ %% The sequence may consume more than one element
+ %%{ValidContent,Rest,acc_errs(S2,{sequence_unexpected_rest_objects,UnexpRest})}
+ {ValidContent,Rest2,S2}
+ end;
+check_element_type(XML=[#xmlElement{}|_Rest],
+ {choice,{Els,Occ}},Env,_Block,S,Checked) ->
+ ?debug("calling choice/6~n",[]),
+
+ case check_choice(XML,Els,Occ,Env,S#xsd_state{num_el=0},Checked) of
+ Err = {error,_} ->
+ Err;
+ {ValidContent,Rest2,S2} ->
+ %% The choice may consume more than one element
+ {ValidContent,Rest2,S2}
+ end;
+check_element_type(XML=[E=#xmlElement{name=Name}|Rest],
+ Any={any,{Namespace,_Occ={Min,_},ProcessorContents}},Env,
+ _Block,S,_Checked) ->
+ ?debug("check any: {any,{~p,~p,~p}}~n",[Namespace,Occ,ProcessorContents]),
+ %% ProcessorContents any of lax | strict | skip
+ %% lax: may validate if schema is found
+ %% strict: must validate
+ ElName = mk_EII_QName(Name,E,S),
+ case cmp_any_namespace(ElName,Namespace,S) of
+ true ->
+ case ProcessorContents of
+ skip ->
+ {[E],Rest,S};
+ lax ->
+ {[E],Rest,S};
+%% strict when Namespace==['##local'] ->
+ strict ->
+ case member(absent,Namespace) of
+ true ->
+ %% unqualified well-formed xml is required. The
+ %% xml is well-formed, check that it is
+ %% unqualified.
+ Traverse =
+ fun(#xmlElement{nsinfo=[],
+ attributes=Atts,
+ content=C},
+ Sin,Fun) ->
+ Sin2 = Fun(Atts,Sin,Fun),
+ Fun(C,Sin2,Fun);
+ (#xmlAttribute{namespace=[]},Sin,_Fun) ->
+ Sin;
+ (#xmlText{},Sin,_Fun) -> Sin;
+ ([H|T],Sin,Fun) ->
+ Sin2 = Fun(H,Sin,Fun),
+ Fun(T,Sin2,Fun);
+ ([],Sin,_Fun) ->
+ Sin;
+ (El,Sin,_Fun) ->
+ Err = {error_path(E,Name),?MODULE,
+ {illegal_component_in_any,El}},
+ acc_errs(Sin,Err)
+ end,
+ S2 = Traverse(E,S,Traverse),
+ {[E],Rest,S2};
+ _ ->
+ {Result,S2}=check_any(E,Any,Env,S),
+ {[Result],Rest,S2}
+ end
+ end;
+ false when S#xsd_state.num_el >= Min ->
+ {[],XML,S};
+ _ ->
+ {error,{error_path(E,Name),?MODULE,{element_bad_match,E,Any,Env}}}
+ end;
+check_element_type([],CM,_Env,_Block,_S,Checked) ->
+ %% #schema_complex_type, any, #schema_group, anyType and lists are
+ %% catched above.
+ case CM of
+ {simpleType,_} ->
+ {error,{error_path(Checked,undefined),?MODULE,
+ {empty_content_not_allowed,CM}}};
+ _ ->
+ {error,{error_path(Checked,undefined),?MODULE,
+ {empty_content_not_allowed,CM}}}
+ end;
+check_element_type(XML,CM,_Env,_Block,S,_Checked) ->
+ {error,{error_path(XML,undefined),?MODULE,{match_failure,XML,CM,S}}}.
+%% single xml content object and single schema object
+check_text_type(XML=[#xmlText{}|_],optional_text,S) ->
+% {XMLTxt,optional_text};
+ {XMLText,Rest} = split_xmlText(XML),
+ {XMLText,Rest,S};
+check_text_type(XML=[Txt=#xmlText{}|_],Type={simpleType,_},S) ->
+ {XMLText,Rest} = split_xmlText(XML),
+ {NewVal,S2}=check_type(Type,flatten([X||#xmlText{value=X}<-XMLText]),unapplied,S),
+ {[Txt#xmlText{value=NewVal}],Rest,S2};
+check_text_type(XML=[Txt=#xmlText{}|_],Type=#schema_simple_type{},S) ->
+ {XMLText,Rest} = split_xmlText(XML),
+ {NewVal,S2}=check_type(Type,flatten([X||#xmlText{value=X}<-XMLText]),unapplied,S),
+ {[Txt#xmlText{value=NewVal}],Rest,S2};
+check_text_type([XMLTxt=#xmlText{}|_],CMEl,_S) ->
+ {error,{error_path(XMLTxt,undefined),?MODULE,
+ {cannot_contain_text,XMLTxt,CMEl}}}.
+
+split_xmlText(XML) ->
+ splitwith(fun(#xmlText{}) -> true;(_) -> false end,XML).
+
+%% Sequence
+check_sequence([T=#xmlText{}|Rest],Els,Occ,Env,S,Checked) ->
+ check_sequence(Rest,Els,Occ,Env,S,[T|Checked]);
+check_sequence(Seq=[_InstEl=#xmlElement{}|_],[El|Els],Occ={_Min,_Max},Env,S,Checked) ->
+ %% El any of (element | group | choice | sequence | any)*
+
+ {ResolvedT,S2} = resolve(El,S),
+ case check_element_type(Seq,ResolvedT,Env,[],count_num_el(S2),[]) of
+ {[],_,S3} -> %% An optional element not present or maybe content == [].
+ case is_optional(El,S3) of
+ true ->
+ check_sequence(Seq,Els,Occ,Env,set_num_el(S3,0),Checked);
+ _ ->
+ {error,{error_path(Checked,undefined),?MODULE,
+ {missing_mandatory_elements,El}}}
+ end;
+ Err={error,_Reason} ->
+ case {is_optional(El,S),S#xsd_state.num_el,get_occur(El)} of
+ {true,_,_} ->
+ check_sequence(Seq,Els,Occ,Env,set_num_el(S,0),Checked);
+ {_,N,{_Min2,Max}} when N>=Max ->
+ check_sequence(Seq,Els,Occ,Env,set_num_el(S,0),Checked);
+ _ ->
+ Err
+ end;
+%% {error,_Reason} when Min==0 -> %% optional element
+%% {[],Seq,S}; %% {Checked,Seq,S}
+%% {error,_Reason} when S#xsd_state.num_el >= Max ->
+%% %% This failure because of number limit
+%% {Checked,Seq,S};
+%% Err = {error,_Reason} ->
+%% %% Even though this match failed
+%% Err;
+ {Ret,UnValRest,S3} ->
+ %% must also take care of more elements of same name
+ %% decrease occurance in El for the optional measurements
+ %% when Seq is empty.
+ check_sequence(UnValRest,[decrease_occurance(El)|Els],Occ,Env,
+ count_num_el(set_num_el(S3,S2)),
+ Ret++Checked)
+ end;
+check_sequence(Rest,[],_Occ,_Env,S,Checked) ->
+ {Checked,Rest,set_num_el(S,0)};
+check_sequence([],Els,_Occ,_Env,S,Checked) ->
+ case [X||X={_,Y={_,_}} <- Els,optional(Y)==false] of
+ [] ->
+ {Checked,[],set_num_el(S,0)};
+ MandatoryEls ->
+ {error,{error_path(Checked,undefined),?MODULE,
+ {missing_mandatory_elements,MandatoryEls}}}
+ end.
+%%check_sequence(Seq,[],_Occ,_Env,_S,_Checked) ->
+ %%{error,{unmatched_elements,Seq}}.
+
+
+%% Choice one alternative must occur unless all alternatives are
+%% optional or the entire choice is optional.
+check_choice([T=#xmlText{}|Rest],Els,Occ,Env,S,Checked) ->
+ case is_whitespace(T) of
+ true ->
+ check_choice(Rest,Els,Occ,Env,S,[T|Checked]);
+ _ ->
+ {error,{error_path(T,undefined),?MODULE,
+ {choice_missmatch,T,Els}}}
+ end;
+check_choice(Ch=[#xmlElement{}|_],[El|Els],Occ,Env,S,Checked) ->
+ {ResolvedT,S2} = resolve(El,S),
+ case check_element_type(Ch,ResolvedT,Env,[],count_num_el(S2),[]) of
+ {[],_,_S3} -> %% not matched optional element
+ check_choice(Ch,Els,Occ,Env,S2,Checked);
+ {error,_Reason} -> %% This may happen but not for the
+ %% last alternative element unless the
+ %% entire choice is optional. So, just
+ %% continue.
+ case [X||X=#xmlElement{}<-Checked] of
+ [] ->
+ check_choice(Ch,Els,Occ,Env,S2,Checked);
+ _ ->
+ {Checked,Ch,set_num_el(S,0)}
+ end;
+ {Result,UnValRest,S3} -> %% in this case only more elements of
+ %% El may be allowed
+ check_choice(UnValRest,[El],Occ,Env,
+ count_num_el(set_num_el(S3,S)),Result++Checked)
+ end;
+check_choice([],_,_,_,S,Checked) ->
+ {Checked,[],set_num_el(S,0)};
+check_choice(XML,[],{0,_},_,S,Checked) ->
+ %% Choice is optional
+ {Checked,XML,set_num_el(S,0)};
+check_choice(XML,[],_,_,S,Checked) ->
+ %% Choice has already matched something, the rest is for somthing
+ %% else to match.
+ case S#xsd_state.num_el > 0 of
+ true ->
+ {Checked,XML,set_num_el(S,0)};
+ _ ->
+ {error,{error_path(XML,undefined),?MODULE,
+ {no_element_matching_choice,XML}}}
+ end.
+
+check_all([T=#xmlText{}|RestXML],CM,Occ,Env,S,Checked,XML) ->
+ case is_whitespace(T) of
+ true ->
+ check_all(RestXML,CM,Occ,Env,S,[T|Checked],XML);
+ _ ->
+ {error,{error_path(T,undefined),?MODULE,{all_missmatch,T,CM}}}
+ end;
+check_all(XML=[E=#xmlElement{name=Name}|RestXML],CM,Occ,Env,S,
+ Checked,PrevXML) ->
+ ElName = mk_EII_QName(Name,E,S),
+ case search_delete_all_el(ElName,CM,S) of
+ {CMEl={element,_},RestCM} ->
+ {ResolvedT,S2} = resolve(CMEl,S),
+ case check_element_type(XML,ResolvedT,Env,[],S2,[]) of
+ {[],_,_S3} ->
+ Err = {error_path(E,Name),?MODULE,
+ {validation_error_all,ElName,CM}},
+ check_all(RestXML,CM,Occ,Env,acc_errs(S,Err),
+ Checked,PrevXML);
+ {error,_} when element(1,Occ)==0 ->
+ {[],PrevXML,S};
+ {error,Reason} ->
+ check_all(RestXML,RestCM,Occ,Env,
+ acc_errs(S,Reason),[E|Checked],PrevXML);
+ {Result,UnValRest,S3} ->
+ check_all(UnValRest,RestCM,Occ,Env,
+ S3#xsd_state{scope=S#xsd_state.scope},
+ Result++Checked,PrevXML)
+ end;
+ _ when element(1,Occ) == 0 ->
+ {[],PrevXML,S};
+ _ ->
+ Err = {error_path(E,Name),?MODULE,
+ {element_not_in_all,ElName,E,CM}},
+ check_all(RestXML,CM,Occ,Env,acc_errs(S,Err),[E|Checked],PrevXML)
+ end;
+check_all(XML,[],_,_,S,Checked,_) ->
+ {Checked,XML,S};
+check_all([],CM,_Occ,_,S,Checked,_PrevXML) ->
+ case [X||X={_,Y={_,_}} <- CM,optional(Y)==false] of
+ [] ->
+ {Checked,[],set_num_el(S,0)};
+ MandatoryEls ->
+ {error,{error_path(Checked,undefined),?MODULE,
+ {missing_mandatory_elements_in_all,MandatoryEls}}}
+ end.
+
+check_any(E,Any,_Env,S) ->
+ case catch validate_xml(E,S#xsd_state{scope=[]}) of
+ {[Result],[],S2} ->
+ {Result,S2#xsd_state{scope=S#xsd_state.scope}};
+ {Result,[],S2} ->
+ {Result,S2#xsd_state{scope=S#xsd_state.scope}};
+ {_,_Unvalidated,S2} ->
+ Err = {error_path(E,undefined),?MODULE,{failed_validating,E,Any}},
+ {E,acc_errs(S2#xsd_state{scope=S#xsd_state.scope},Err)};
+ {error,Reason} ->
+ {E,acc_errs(S,Reason)};
+ {'EXIT',Reason} ->
+%% {E,acc_errs(S,format_error({internal_error,Reason},E,Any,Env))}
+ Err = {error_path(E,undefined),?MODULE,{internal_error,Reason}},
+ {E,acc_errs(S,Err)}
+ end.
+
+check_target_namespace(XMLEl,S) ->
+ case {S#xsd_state.targetNamespace,XMLEl#xmlElement.nsinfo} of
+ {undefined,[]} ->
+ ok;
+ {URI,{Prefix,_}} ->
+ NS = XMLEl#xmlElement.namespace,
+ case namespace(Prefix,NS,NS#xmlNamespace.default) of
+ URI ->
+ ok;
+ _ ->
+ failed
+ end;
+ {URI,_} ->
+ case (XMLEl#xmlElement.namespace)#xmlNamespace.default of
+ URI ->
+ ok;
+ _ ->
+ failed
+ end
+ end.
+
+schemaLocations(El=#xmlElement{attributes=Atts},S) ->
+ Pred = fun(#xmlAttribute{name=schemaLocation}) -> false;
+ (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false;
+ (_) -> true
+ end,
+ case lists:dropwhile(Pred,Atts) of
+ [] ->
+ S;
+ [#xmlAttribute{value=Paths}|_] ->
+ case string:tokens(Paths," ") of
+ L when length(L) > 0 ->
+ case length(L) rem 2 of
+ 0 ->
+ PairList =
+ fun([],_Fun) ->
+ [];
+ ([SLNS,SLLoc|Rest],Fun) ->
+ [{SLNS,SLLoc}|Fun(Rest,Fun)]
+ end,
+ S#xsd_state{schemaLocations=PairList(L,PairList)};
+ _ ->
+ Err = {error_path(El,El#xmlElement.name),?MODULE,
+ {schemaLocation_list_failure,Paths}},
+ acc_errs(S,Err)
+ end;
+ _ ->
+ S
+ end;
+ _ ->
+ S
+ end.
+
+blocking([],BlockDefault) ->
+ BlockDefault;
+blocking(Block,_) ->
+ Block.
+
+allow_empty_content([]) ->
+ true;
+allow_empty_content([{restriction,{_BT,_CM=[]}}]) ->
+ true;
+allow_empty_content([{extension,{_BT,_CM=[]}}]) ->
+ true;
+allow_empty_content([{_,{_,{0,_}}}|Rest]) ->
+ allow_empty_content(Rest);
+allow_empty_content([{_,{Content,_}}|Rest]) ->
+ case allow_empty_content(Content) of
+ true ->
+ allow_empty_content(Rest);
+ _ -> false
+ end;
+allow_empty_content(_) ->
+ false.
+
+empty_xml_content([]) ->
+ true;
+empty_xml_content([H|T]) ->
+ case is_whitespace(H) of
+ true ->
+ empty_xml_content(T);
+ _ ->
+ false
+ end;
+empty_xml_content(_) ->
+ false.
+
+xsi_factors(#schema_element{nillable=N}) ->
+ [{nillable,N}].
+check_xsi_factors({nil,_,?XSD_INSTANCE_NAMESPACE},
+ #xmlAttribute{value="true"},XsiFactors,XMLEl,S) ->
+ case key1search(nillable,XsiFactors,false) of
+ {_,true} ->
+ case empty_xml_content(XMLEl#xmlElement.content) of
+ true ->
+ S;
+ _ ->
+ Err = {error_path(XMLEl,XMLEl#xmlElement.name),?MODULE,
+ {element_content_not_nil,XMLEl}},
+ acc_errs(S,Err)
+ end;
+ _ ->
+ S
+ end;
+check_xsi_factors(_,_,_,_,S) ->
+ S.
+
+check_attributes(XMLEl=#xmlElement{attributes=Atts},
+ #schema_complex_type{name=Name,attributes=SchemaAtts},
+ XsiFactors,S) ->
+ %% For each att in Atts check that it is allowed, and has right type.
+ %% For each att in CT that is required check that it exists. Apply
+ %% none present atts that have default values.
+ OldScope = S#xsd_state.scope,
+ SchemaAtts2 = resolve_attributeGroups(SchemaAtts,XMLEl,S),
+ {XMLEl2,S2}=check_attributes(Atts,SchemaAtts2,XMLEl,XsiFactors,
+ name_scope(Name,S),[]),
+ {XMLEl2,S2#xsd_state{scope=OldScope}};
+check_attributes(XMLEl=#xmlElement{attributes=[]},_,_,S) ->
+ {XMLEl,S};
+check_attributes(XMLEl=#xmlElement{name=N,attributes=Atts},_,XsiFactors,S) ->
+ Fun =
+ fun(AttX,S_in) ->
+ case reserved_attribute(AttX,XMLEl#xmlElement.namespace) of
+ true ->
+ AttQName =
+ mk_EII_QName(AttX#xmlAttribute.name,XMLEl,S_in),
+ check_xsi_factors(AttQName,AttX,XsiFactors,XMLEl,S_in);
+ _ ->
+ Err = {error_path(XMLEl,N),?MODULE,
+ {attribute_in_simpleType,XMLEl,AttX}},
+ acc_errs(S_in,Err)
+ end
+ end,
+ {XMLEl,foldl(Fun,S,Atts)}.
+
+check_attributes([],[SA|SchemaAtts],XMLEl,XsiFactors,S,CheckedAtts) ->
+ case resolve(SA,S) of
+ {#schema_attribute{name=Name,use=Use,default=Def,fixed=Fix},S2} ->
+ case {Use,Def,Fix} of
+ {required,_,_} ->
+ Err = {error_path(XMLEl,XMLEl#xmlElement.name),?MODULE,
+ {required_attribute_missed,XMLEl,Name}},
+ check_attributes([],SchemaAtts,XMLEl,XsiFactors,
+ acc_errs(S2,Err),CheckedAtts);
+ {optional,undefined,undefined} ->
+ check_attributes([],SchemaAtts,XMLEl,XsiFactors,
+ S2,CheckedAtts);
+ {optional,Default,undefined} ->
+ NewAtt = create_attribute(Name,Default),
+ check_attributes([],SchemaAtts,XMLEl,XsiFactors,S2,
+ [NewAtt|CheckedAtts]);
+ {optional,undefined,Fix} ->
+ NewAtt = create_attribute(Name,Def),
+ check_attributes([],SchemaAtts,XMLEl,XsiFactors,S2,
+ [NewAtt|CheckedAtts]);
+ {optional,Default,Fix} ->
+ Err = {error_path(XMLEl,XMLEl#xmlElement.name),?MODULE,
+ {default_and_fixed_attributes_mutual_exclusive,
+ Name,Default,Fix}},
+ check_attributes([],SchemaAtts,XMLEl,XsiFactors,
+ acc_errs(S2,Err),CheckedAtts);
+ _ ->
+ check_attributes([],SchemaAtts,XMLEl,XsiFactors,
+ S2,CheckedAtts)
+ end;
+ {{anyAttribute,{_Namespaces,_PC}},S2} ->
+ check_attributes([],SchemaAtts,XMLEl,XsiFactors,
+ S2,CheckedAtts);
+ Err ->
+ ErrMsg={error_path(XMLEl,XMLEl#xmlElement.name),?MODULE,
+ {schema_error,unexpected_object,SA,Err}},
+ check_attributes([],SchemaAtts,XMLEl,XsiFactors,
+ acc_errs(S,ErrMsg),CheckedAtts)
+ end;
+check_attributes([],[],XMLEl,_XsiFactors,S,CheckedAtts) ->
+ {XMLEl#xmlElement{attributes=reverse(CheckedAtts)},S};
+check_attributes([Att|Atts],SchemaAtts,XMLEl,XsiFactors,
+ S,CheckedAtts) ->
+%% AttQName = mk_EII_QName(Att#xmlAttribute.name,XMLEl,S),
+ {IsQ,AttQName} = mk_EII_Att_QName(Att#xmlAttribute.name,XMLEl,S),
+ case search_attribute(IsQ,AttQName,SchemaAtts) of
+ {AttObj={attribute,_},SchemaAtts2} ->
+ {SA,S2} = load_object(AttObj,S),
+ #schema_attribute{type=[AttType]} = SA,
+ {Val,S4} = check_type(AttType,
+ Att#xmlAttribute.value, unapplied,S2),
+ check_attributes(Atts,SchemaAtts2,XMLEl,XsiFactors,S4,
+ [Att#xmlAttribute{value=Val}|CheckedAtts]);
+ {undefined,SchemaAtts2} ->
+ %% check for reserved attributes or anyAttribute
+ case reserved_attribute(Att,XMLEl#xmlElement.namespace) of
+ true ->
+ S2 = check_xsi_factors(AttQName,Att,XsiFactors,XMLEl,S),
+ check_attributes(Atts,SchemaAtts2,XMLEl,XsiFactors,
+ S2,[Att|CheckedAtts]);
+ _ ->
+ case check_anyAttribute(Att,SchemaAtts2,XMLEl,S) of
+ {error,Reason} ->
+ check_attributes(Atts,SchemaAtts2,XMLEl,XsiFactors,
+ acc_errs(S,Reason),CheckedAtts);
+ {Att2,S2} ->
+ check_attributes(Atts,SchemaAtts2,XMLEl,XsiFactors,
+ S2,[Att2|CheckedAtts])
+ end
+ end;
+ Other ->
+ Err = {[],?MODULE,{internal_error,Other}},
+ check_attributes(Atts,SchemaAtts,XMLEl,XsiFactors,
+ acc_errs(S,Err),CheckedAtts)
+ end.
+
+check_anyAttribute(Att,SchemaAtts,El=#xmlElement{name=Name,namespace=NS},S) ->
+ case [Any||Any={anyAttribute,_}<-SchemaAtts] of
+ [] ->
+ {error,{error_path(El,Name),?MODULE,
+ {attribute_not_defined_in_schema,
+ Att#xmlAttribute.name}}};
+ [{_,{Namespace,PC}}|_] ->
+ case check_anyAttribute_namespace(Namespace,NS) of
+ ok ->
+ check_anyAttribute2(Namespace,PC,Att,NS,S);
+ _ ->
+ {error,{error_path(El,Name),?MODULE,
+ {disallowed_namespace,Namespace,
+ NS,Att#xmlAttribute.name}}}
+ end
+ end.
+check_anyAttribute2(_,PC,Att,_,S) when PC==skip;PC==lax ->
+ {Att,S};
+check_anyAttribute2(_Namespace,_,Att,_NS,S) ->
+ %% PC == strict
+ {Att,S}.
+
+check_anyAttribute_namespace(['##any'|_],_NS) ->
+ ok;
+check_anyAttribute_namespace([absent],_NS) ->
+ ok;
+check_anyAttribute_namespace([NS|_],NS) ->
+ ok;
+check_anyAttribute_namespace([{'not',NS}|_],NS) ->
+ false;
+check_anyAttribute_namespace([_H|T],NS) ->
+ check_anyAttribute_namespace2(T,NS).
+check_anyAttribute_namespace2([NS|_],NS) ->
+ ok;
+check_anyAttribute_namespace2([_H|T],NS) ->
+ check_anyAttribute_namespace2(T,NS);
+check_anyAttribute_namespace2([],_NS) ->
+ false.
+
+resolve_attributeGroups(SchemaAtts,El,S) ->
+ resolve_attributeGroups(SchemaAtts,El,S,[],[]).
+resolve_attributeGroups([AG={attributeGroup,_}|SchemaAtts],El,S,Parents,Acc) ->
+ case resolve(AG,S) of
+ {#schema_attribute_group{name=Name,content=AGC},_S2} ->
+ case {member(Name,Parents),S#xsd_state.redefine} of
+ {true,false} ->
+ Err = {error_path(El,El#xmlElement.name),?MODULE,
+ {cirkular_attributeGroup_reference,Name}},
+ resolve_attributeGroups(SchemaAtts,El,acc_errs(S,Err),
+ Parents,Acc);
+ {true,_} ->
+ resolve_attributeGroups(SchemaAtts,El,S,Parents,Acc);
+ _ ->
+ resolve_attributeGroups(AGC++[marker|SchemaAtts],
+ El,S,[Name|Parents],Acc)
+ end;
+ Err ->
+ ErrMsg={error_path(El,El#xmlElement.name),?MODULE,
+ {schema_error,unexpected_object,AG,Err}},
+ resolve_attributeGroups(SchemaAtts,El,acc_errs(S,ErrMsg),
+ Parents,Acc)
+ end;
+resolve_attributeGroups([marker|T],El,S,[_P|Ps],Acc) ->
+ resolve_attributeGroups(T,El,S,Ps,Acc);
+resolve_attributeGroups([H|T],El,S,Parents,Acc) ->
+ resolve_attributeGroups(T,El,S,Parents,[H|Acc]);
+resolve_attributeGroups([],_,_,_,Acc) ->
+ Acc.
+
+check_type(Type=#schema_simple_type{},Value,FacetS,S) ->
+ check_simpleType(Type,Value,FacetS,S);
+check_type({simpleType,{anySimpleType,_}},Value, _FacetS,S) ->
+ {Value,S};
+check_type({union,Types},Value,_FacetS,S) ->
+ check_union_types(Types,Value,S);
+check_type(ST={simpleType,QName={Name,_Scope,_NS}},Value, FacetS,S) ->
+ case is_builtin_simple_type(QName) of
+ true ->
+ {ConstrainedValue,S2} =
+ constrained(QName,default_facets(FacetS,Name),Value,S),
+ case xmerl_xsd_type:check_simpleType(Name,ConstrainedValue,S2) of
+ {ok,_} when Name=='IDREF';Name=='IDREFS' ->
+ %% do something more
+ {ConstrainedValue,S2};
+ {ok,_} ->
+ {ConstrainedValue,S2};
+ {error,Reason} ->
+ ?debug("Error validating type: ~p~nwith value: ~p~n",[Name,Value]),
+ {Value,acc_errs(S2,Reason)}
+ end;
+ _ ->
+ case resolve(ST,S) of
+ {[],S2} ->
+ Err = {[],?MODULE,{could_not_resolve_type,ST}},
+ {Value,acc_errs(S2,Err)};
+ {RefedST,S2} ->
+ check_type(RefedST,Value, unapplied,S2)
+ end
+ end;
+check_type(Type,Value, _FacetS,S) ->
+ Err = {[],?MODULE,{could_not_check_value_for_type,Type}},
+ ?debug("ERROR: not implemented: ~p~nfor value: ~p~n",[Type,Value]),
+ {Value,acc_errs(S,Err)}.
+
+check_simpleType(#schema_simple_type{base_type=BT,final=_Final,
+ facets=Facets,content=Type},
+ Value,FacetS,S) ->
+ case {BT,Type} of
+ {{_ST,_,_},_} ->
+ case is_builtin_simple_type(BT) of
+ true ->
+ {ConstrainedValue,S2} =
+ constrained(BT,merge_facets(default_facets(FacetS,BT),Facets),Value,S),
+ {_,_S3} = check_type({simpleType,BT},ConstrainedValue,applied,S2);
+ _ ->
+ case resolve({simpleType,BT},S) of
+ {BaseST=#schema_simple_type{facets=Facets2},_} ->
+ check_simpleType(BaseST#schema_simple_type{facets=Facets++Facets2},Value,unapplied,S);
+ _ ->
+ Err = {[],?MODULE,{unknown_simpleType,BT}},
+ {Value,acc_errs(S,Err)}
+ end
+ end;
+ {_,[CT]} ->
+ {_,_S2} = check_type(CT,Value,unapplied,S)
+ end.
+
+check_union_types(Types,Value,S) ->
+ check_union_types(Types,Types,Value,S).
+check_union_types([],UT,Value,S) ->
+ acc_errs(S,{[],?MODULE,{value_not_valid,Value,UT}});
+check_union_types([T|Ts],UT,Value,S = #xsd_state{errors=Errs}) ->
+ case check_type(T,Value,unapplied,S) of
+ {Val,S2=#xsd_state{errors=Errs}} ->
+ {Val,S2};
+ {_,_} ->
+ check_union_types(Ts,UT,Value,S)
+ end.
+
+reserved_attribute({RA,_,?XSD_INSTANCE_NAMESPACE},_)
+ when RA==type;RA==nil;RA==schemaLocation;RA==noNamespaceSchemaLocation ->
+ true;
+reserved_attribute(#xmlAttribute{name=Name},#xmlNamespace{nodes=NSNodes}) ->
+ NameStr = if
+ is_atom(Name) -> atom_to_list(Name);
+ true -> Name
+ end,
+ case string:tokens(NameStr,":") of
+ ["xmlns"|_] ->
+ true;
+ [Prefix,InstAtt] when InstAtt=="type";
+ InstAtt=="nil";
+ InstAtt=="schemaLocation";
+ InstAtt=="noNamespaceSchemaLocation" ->
+ case keyNsearch(?XSD_INSTANCE_NAMESPACE,2,NSNodes,[]) of
+ {Prefix,_} ->
+ true;
+ _ ->
+ false
+ end;
+ _ ->
+ false
+ end;
+reserved_attribute(_,_) ->
+ false.
+
+
+default_facets(applied,_) ->
+ [];
+default_facets(_,Type) ->
+ default_facets(Type).
+default_facets({Name,_,_}) when is_list(Name) ->
+ %% Type already proven to be a built in simple type
+ default_facets(list_to_atom(Name));
+default_facets({Name,_,_}) ->
+ default_facets(Name);
+default_facets(TypeName) ->
+ case is_xsd_string(TypeName) of
+ false ->
+ [{whiteSpace,"collapse"}];
+ _ ->
+ []
+ end.
+
+merge_facets([],DefinedF) ->
+ DefinedF;
+merge_facets([F={Name,_}|Rest],DefinedF) ->
+ %% At this moment only F has the allowed value
+ merge_facets(Rest,keyreplace(Name,1,DefinedF,F)).
+
+constrained({T,_,_},Facets,Value,S) ->
+ FacetFuns = [facet_fun(T,F)||F<-Facets],
+ constrained2(FacetFuns,Value,S).
+constrained2([],Value,S) ->
+ {Value,S};
+constrained2([Facet|RestFacets],Value,S) ->
+ case Facet(Value) of
+ {error,Reason} ->
+ constrained2(RestFacets,Value,acc_errs(S,Reason));
+ {ok,NewValue} ->
+ constrained2(RestFacets,NewValue,S)
+ end.
+
+id_constraints(CMEl,XMLEl,S) ->
+ S1 = check_uniqueness(CMEl#schema_element.uniqueness,
+ XMLEl,S),
+ S2 = check_keys([X||{key,X}<-CMEl#schema_element.key],XMLEl,S1),
+ prepare_keyrefs([X||{keyref,X}<-CMEl#schema_element.key],XMLEl,S2).
+
+check_abstract(ElName,El,#schema_element{name=ElName,abstract=true},S) ->
+ acc_errs(S,{error_path(El,El#xmlElement.name),?MODULE,
+ {abstract_element_instance,ElName}});
+check_abstract(ElName,_El,#schema_element{name=ElName},S) ->
+ S;
+check_abstract(ElName,El,#schema_element{},S) ->
+ {XMLEl,_S2} = load_object({element,ElName},S),
+ check_abstract(ElName,El,XMLEl,S).
+
+%% Check of form compliance.
+%% Globally declared elements may be qualified even though
+%% elementformdefault = "unqualified".
+%% If ActualFormValue = "qualified" locally defined names must be
+%% explicitly or implicitly qualified.
+%% check_form({LocalName,Scope,Namespace},LocalName,
+%% InstanceNamespace,ActualFormDefault,S) -> NewS
+check_form({LocalName,_,Namespace},LocalName,
+ El=#xmlElement{name=Name,namespace=NS},qualified,S) ->
+ case NS#xmlNamespace.default of
+ Namespace ->
+ S;
+ _ ->
+ acc_errs(S,{error_path(El,Name),?MODULE,
+ {qualified_name_required,LocalName}})
+ end;
+check_form({LocalName,_,_},LocalName,_El,_ActualFormDefault,S) ->
+ S;
+check_form({_LocalName,[],_},_QualifiedName,_El,_ActualFormDefault,S) ->
+ S;
+check_form({_LocalName,_,_},QualifiedName,El,unqualified,S) ->
+ acc_errs(S,{error_path(El,El#xmlElement.name),?MODULE,
+ {unqualified_name_required,QualifiedName}});
+check_form({_LocalName,_,_},_QualifiedName,_El,_ActualFormDefault,S) ->
+ S.
+
+actual_form_value(undefined,GlobalForm) ->
+ GlobalForm;
+actual_form_value(LocalForm,_) ->
+ LocalForm.
+
+
+check_uniqueness(undefined,_,S) ->
+ S;
+check_uniqueness(Unique,XMLEl,S) ->
+ case Unique of
+ [{unique,#id_constraint{selector={selector,SelectorPath},
+ fields=Fields}}] ->
+ TargetNodeSet = target_node_set(SelectorPath,XMLEl,S),
+ case qualified_node_set(Fields,TargetNodeSet,XMLEl,S) of
+ {[],S1} -> S1;
+ {[_E],S1} -> S1;
+ {L,S1} when is_list(L) ->
+ key_sequence_uniqueness(L,XMLEl,S1)
+ end;
+ _ -> S
+ end.
+
+target_node_set(SelectorPath,XMLEl,S) ->
+ xmerl_xpath:string(SelectorPath,XMLEl,
+ [{namespace,S#xsd_state.namespace_nodes}]).
+
+qualified_node_set(Fields,Set,El,S) ->
+ qualified_node_set([X||{field,X} <- Fields],Set,El,S,[]).
+
+qualified_node_set([],_Set,_El,S,Acc) ->
+ {Acc,S};
+qualified_node_set(_,[],_El,S,Acc) ->
+ {Acc,S};
+qualified_node_set(Paths,[QN|QNs],El,S,Acc) ->
+ Fun = fun(P,Sx) ->
+ case apply_field(P,QN,Sx) of
+ L when length(L) =< 1 -> % Part1:3.11.4.3
+ {L,Sx};
+ Err ->
+ RetErr =
+ {error_path(El,El#xmlElement.name),?MODULE,
+ {illegal_key_sequence_value,Err}},
+ {[],acc_errs(Sx,RetErr)}
+ end
+ end,
+ {KeySequence,S2} = mapfoldl(Fun,S,Paths),
+ qualified_node_set(Paths,QNs,El,S2,[flatten(KeySequence)|Acc]).
+
+
+apply_field(F,El,S) ->
+ %% xmerl_xpath:string returns a list
+ xmerl_xpath:string(F,El,[{namespace,S#xsd_state.namespace_nodes}]).
+
+check_keys([],_XMLEl,S) ->
+ S;
+check_keys([Key=#id_constraint{selector={selector,SelectorPath},
+ fields=Fields}|Keys],XMLEl,S) ->
+ TargetNodeSet = target_node_set(SelectorPath,XMLEl,S),
+ S3=
+ case qualified_node_set(Fields,TargetNodeSet,XMLEl,S) of
+ {L,S1} when length(L)==length(TargetNodeSet) ->
+ %% Part1: 3.11.4.4.2.1
+ S2 = key_sequence_uniqueness(L,XMLEl,S1),
+ save_key(Key#id_constraint{key_sequence=L},S2),
+ S2;
+ {Err,S1} ->
+ acc_errs(S1,{error_path(XMLEl,XMLEl#xmlElement.name),?MODULE,
+ {qualified_node_set_not_correct_for_key,Err}})
+ end,
+ check_keys(Keys,XMLEl,S3).
+
+%% A reference to a key may occur in another environment than the key
+%% was defined. Thus the key must be referenced after the whole
+%% document has been processed. At this moment save the info about the
+%% keyref and compare it with the key later.
+prepare_keyrefs([],_XMLEl,S) ->
+ S;
+prepare_keyrefs([KeyRef=#id_constraint{selector={selector,SelectorPath},
+ fields=Fields}|Rest],XMLEl,S) ->
+ TargetNodeSet = target_node_set(SelectorPath,XMLEl,S),
+ {L,S1} = qualified_node_set(Fields,TargetNodeSet,XMLEl,S),
+ save_keyref(KeyRef#id_constraint{key_sequence=L},S1),
+ prepare_keyrefs(Rest,XMLEl,S1).
+
+
+
+%% key_sequence_uniqueness(KeySequence,XMLElement,State)
+%% Each element in KeySequence has same length and is a list of one or
+%% more elements. key_sequence_uniqueness/2 checks that no two
+%% elements has equal values. If it detects two (or more) elements
+%% that have equal first subelements it must continue comparing the
+%% other subelements of those elements. It returns the state with all
+%% detected errors saved.
+key_sequence_uniqueness([],_,S) ->
+ S;
+key_sequence_uniqueness([_H],_,S) ->
+ S;
+key_sequence_uniqueness([KS=[F1|FRest]|KSs],El,S) ->
+ case is_key_sequence_equal(F1,KSs) of
+ {true,TailOfEquals} ->
+ S1 =
+ case k_s_u(FRest,TailOfEquals,S) of
+ true ->
+ acc_errs(S,{error_path(El,El#xmlElement.name),?MODULE,
+ {key_value_not_unique,KS}});
+ _ ->
+ S
+ end,
+ key_sequence_uniqueness(KSs,El,S1);
+ false ->
+ key_sequence_uniqueness(KSs,El,S)
+ end.
+
+k_s_u([],_,_) ->
+ true;
+k_s_u([F|Fs],KSs,S) ->
+ case is_key_sequence_equal(F,KSs) of
+ {true,TailOfEquals} ->
+ k_s_u(Fs,TailOfEquals,S);
+ _ ->
+ false
+ end.
+
+is_key_sequence_equal(F,KSs) ->
+ is_key_sequence_equal(F,KSs,[]).
+is_key_sequence_equal(_F,[],[]) ->
+ false;
+is_key_sequence_equal(_F,[],Acc) ->
+ {true,reverse(Acc)};
+is_key_sequence_equal(F,[[F1|TlF1]|Rest],Acc) ->
+ case is_key_el_equal(F,F1) of
+ true ->
+ is_key_sequence_equal(F,Rest,[TlF1|Acc]);
+ false ->
+ is_key_sequence_equal(F,Rest,Acc)
+ end.
+
+%% This test must be more elaborated considering the equal facet
+is_key_el_equal(#xmlElement{content=C1},#xmlElement{content=C2}) ->
+ %% content must be empty or text since elements must be of
+ %% simpleType
+ is_equal_content(C1,C2);
+is_key_el_equal(#xmlAttribute{value=V1},#xmlAttribute{value=V1}) ->
+ true;
+is_key_el_equal(_,_) ->
+ false.
+
+is_equal_content([T1|Rest1],[T2|Rest2])
+ when is_record(T1,xmlText),is_record(T2,xmlText) ->
+ case is_whitespace(T1) of
+ true ->
+ case is_whitespace(T2) of
+ true ->
+ is_equal_content(Rest1,Rest2);
+ _ ->
+ is_equal_content(Rest1,[T2|Rest2])
+ end;
+ _ ->
+ case T1#xmlText.value==T2#xmlText.value of
+ true ->
+ is_equal_content(Rest1,Rest2);
+ _ ->
+ false
+ end
+ end;
+is_equal_content([],[]) ->
+ true;
+is_equal_content(_,_) ->
+ false.
+
+schema_concistence_checks(S) ->
+ S2 = check_keyrefs(S),
+ S3 = check_references(S2),
+ S4 = check_substitutionGroups(S3#xsd_state.substitutionGroups,S3),
+ S5 = check_cyclic_defs(S4),
+ reset_state(S5).
+
+reset_state(S) ->
+ S#xsd_state{keyrefs=[],
+ 'IDs'=[],
+ unchecked_references=[],
+ substitutionGroups=[],
+ derived_types=[],
+ circularity_stack=[],
+ circularity_disallowed=[]}.
+
+check_keyrefs(S) ->
+ KeyRefs = S#xsd_state.keyrefs,
+ %% check that a key exists with same name as each keyref
+ KeyExist =
+ fun({keyref,Name,Refer},S_in) ->
+ case load_key(Refer,S_in) of
+ Key=#id_constraint{} ->
+ check_keyref_cardinality(Name,
+ load_keyref(Name,S_in),
+ Key,S_in);
+% S_in;
+ _ ->
+ acc_errs(S_in,{[],?MODULE,
+ {keyref_missed_matching_key,Refer}})
+ end;
+ (Other,S_in) ->
+ acc_errs(S_in,{[],?MODULE,
+ {keyref_unexpected_object,Other}})
+ end,
+ foldl(KeyExist, S, KeyRefs).
+check_keyref_cardinality(_,KR=#id_constraint{category=keyref,fields=KeyRefFs},
+ K=#id_constraint{fields=KeyFs},S) ->
+ case length(KeyRefFs) == length(KeyFs) of
+ true ->
+ S;
+ _ ->
+ acc_errs(S,{[],?MODULE,
+ {cardinality_of_fields_not_equal,KR,K}})
+ end;
+check_keyref_cardinality(Name,_,_,S) ->
+ acc_errs(S,{[],?MODULE,{could_not_load_keyref,Name}}).
+
+check_references(S) when is_record(S,xsd_state) ->
+ check_references(S#xsd_state.unchecked_references,S).
+check_references([],S) ->
+ S;
+check_references([H|T],S) ->
+ check_references(T,check_reference(H,S)).
+check_reference(Ref={attribute,_},S) ->
+ case load_object(Ref,S) of
+ {#schema_attribute{},S2} ->
+ S2;
+ _ ->
+ acc_errs(S,{[],?MODULE,{reference_undeclared,attribute,Ref}})
+ end;
+check_reference(Ref={element,_},S) ->
+ case load_object(Ref,S) of
+ {#schema_element{},S2} ->
+ S2;
+ _ ->
+ acc_errs(S,{[],?MODULE,{reference_undeclared,element,Ref}})
+ end;
+check_reference(Ref={attributeGroup,_},S) ->
+ case load_object(Ref,S) of
+ {#schema_attribute_group{},S2} ->
+ S2;
+ _ ->
+ acc_errs(S,{[],?MODULE,{reference_undeclared,attributeGroup,Ref}})
+ end;
+check_reference(Ref={group,_},S) ->
+ case load_object(Ref,S) of
+ {#schema_group{},S2} -> S2;
+ _ -> acc_errs(S,{[],?MODULE,{reference_undeclared,group,Ref}})
+ end;
+check_reference(Ref={simpleType,_},S) ->
+ case load_object(Ref,S) of
+ {#schema_simple_type{},S2} -> S2;
+ _ -> acc_errs(S,{[],?MODULE,{reference_undeclared,simpleType,Ref}})
+ end;
+check_reference(Ref={complexType,_},S) ->
+ case load_object(Ref,S) of
+ {#schema_complex_type{},S2} -> S2;
+ _ -> acc_errs(S,{[],?MODULE,{reference_undeclared,complexType,Ref}})
+ end;
+check_reference({simple_or_complex_Type,Ref},S=#xsd_state{errors=Errs}) ->
+ %% complex or simple type
+ case check_reference({complexType,Ref},S) of
+ S2=#xsd_state{errors=Errs} -> S2;
+ _ -> check_reference({simpleType,Ref},S)
+ end;
+check_reference(Ref,S) ->
+ acc_errs(S,{[],?MODULE,{internal_error,unknown_reference,Ref}}).
+
+%% Substitution groups should be checked for cirkular references
+%% (invalid), that reference structure and type structure are
+%% concistent.
+check_substitutionGroups([],S) ->
+ S;
+check_substitutionGroups(SGs,S) ->
+ S2 = check_substGr_acyclic(SGs,S),
+ S3 = check_substGr_type_structure(SGs,S2),
+ save_substitutionGroup(SGs,S3).
+check_substGr_acyclic(SGs,S) ->
+ Set = sofs:family(SGs),
+ case catch sofs:family_to_digraph(Set, [acyclic]) of
+ {'EXIT',{cyclic,_}} ->
+ acc_errs(S,{[],?MODULE,{cyclic_substitutionGroup,SGs}});
+ DG ->
+ digraph:delete(DG),
+ S
+ end.
+check_substGr_type_structure([SG|SGs],S) ->
+ check_substGr_type_structure(SGs,check_substGr_type_structure2(SG,S));
+check_substGr_type_structure([],S) ->
+ S.
+check_substGr_type_structure2({Head,SGMembers},S) ->
+ TypeCheck =
+ fun(SG,S_in) ->
+ case catch cmp_substGr_types(Head,SG,S_in) of
+ {'EXIT',_} ->
+ acc_errs(S_in,{[],?MODULE,
+ {substitutionGroup_error,Head,SG}});
+ S_out -> S_out
+ end
+ end,
+ foldl(TypeCheck,S,SGMembers).
+cmp_substGr_types(Head,SG,S) ->
+ {HeadElement,S2} = load_object({element,Head},S),
+ {MemberElement,S3} = load_object({element,SG},S2),
+ case catch derived_or_equal(MemberElement#schema_element.type,
+ HeadElement#schema_element.type,
+ [],S3) of
+ S4=#xsd_state{} ->
+ S4;
+ _ ->
+ acc_errs(S3,{[],?MODULE,{internal_error,derived_or_equal,
+ MemberElement#schema_element.type,
+ HeadElement#schema_element.type}})
+ end.
+check_cyclic_defs(S=#xsd_state{circularity_disallowed=CA}) ->
+ Set = sofs:relation_to_family(sofs:relation(CA)),
+ case catch sofs:family_to_digraph(Set, [acyclic]) of
+ {'EXIT',{cyclic,_}} ->
+ acc_errs(S,{[],?MODULE,{cyclic_definition,CA}});
+ DG ->
+ digraph:delete(DG),
+ S
+ end.
+
+
+
+derived_or_equal(Type,Type,_Block,S) ->
+ S;
+derived_or_equal([MemberTypeRef],[HeadTypeRef],Block,S) ->
+ %% HeadType has to be a
+ {HeadType,_} = resolve(HeadTypeRef,S),
+ {MemberType,_} = resolve(MemberTypeRef,S),
+ derived_or_equal_types(MemberType,HeadType,schema,Block,S).
+derived_or_equal_types(MemT,{anyType,_},Env,Block,S) ->
+ case MemT of
+ #schema_simple_type{content=Cntnt} ->
+ is_derivation_blocked(Env,Block,Cntnt,S);
+ #schema_complex_type{content=Cntnt} ->
+ is_derivation_blocked(Env,Block,Cntnt,S);
+ _ -> S
+ end;
+derived_or_equal_types(MemT=#schema_simple_type{name=Mem,base_type=MemBase},
+ #schema_simple_type{name=Head},Env,Block,S)
+ when Mem==Head;MemBase==Head ->
+ is_derivation_blocked(Env,Block,MemT#schema_simple_type.content,S);
+derived_or_equal_types({simpleType,Name},
+ {simpleType,Name},_Env,_Block,S) ->
+ S;
+derived_or_equal_types(#schema_simple_type{base_type=Name,content=Content},
+ {simpleType,Name},Env,Block,S) ->
+ is_derivation_blocked(Env,Block,Content,S);
+derived_or_equal_types(#schema_simple_type{content=[{LoU,[Content]}]},
+ SimpleType,Env,Block,S) when LoU==list;LoU==union ->
+ {NewMemType,S2}=resolve(Content,S),
+ derived_or_equal_types(NewMemType,SimpleType,Env,Block,S2);
+derived_or_equal_types(MemT=#schema_complex_type{name=Mem,base_type=MemBase},
+ #schema_complex_type{name=Head},Env,Block,S)
+ when Mem==Head;MemBase==Head ->
+ is_derivation_blocked(Env,Block,MemT#schema_complex_type.content,S);
+derived_or_equal_types(MemT,HeadT,_Env,_Block,S) ->
+ acc_errs(S,{[],?MODULE,{type_of_element_not_derived,MemT,HeadT}}).
+
+is_derivation_blocked(schema,_,_,S) ->
+ S;
+is_derivation_blocked(instance,['#all'],Derivation,S) ->
+ acc_errs(S,{derivation_blocked,'#all',Derivation});
+is_derivation_blocked(instance,[],_,S) ->
+ S;
+is_derivation_blocked(instance,Block,C=[{Derivation,_}],S) ->
+ case member(Derivation,Block) of
+ true ->
+ acc_errs(S,{[],?MODULE,{derivation_blocked,Derivation,C}});
+ _ ->
+ S
+ end;
+is_derivation_blocked(instance,_Block,_,S) ->
+ S.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+create_attribute(QName,Value) ->
+ {Name,_Scope,NSName} = QName,
+ #xmlAttribute{name=Name,namespace={Name,NSName},value=Value}.
+
+%% mk_name(L), L must be a list in reversed order
+mk_name(L) ->
+ mk_name(L,[]).
+mk_name([],_Acc) ->
+ [];
+mk_name([H],[]) ->
+ H;
+mk_name([H],Acc) ->
+ list_to_atom(lists:concat([H,'_'|Acc]));
+mk_name([H|T],[]) ->
+ mk_name(T,[H]);
+mk_name([H1|T],Acc) ->
+ mk_name(T,[H1,'_'|Acc]).
+
+cmp_name({LName,Scope,NS},{LName,Scope,NS},_S) ->
+ true;
+%% substitutionGroup allows different names
+cmp_name(XMLName={_,Scope,NS},CMName={_,Scope,NS},S) ->
+ {El,_S2} = load_object({element,XMLName},S),
+ cmp_SG_name(El,CMName,S);
+cmp_name(_,_,_) ->
+ false.
+
+cmp_SG_name(#schema_element{substitutionGroup=Name},Name,_S) ->
+ true;
+cmp_SG_name(#schema_element{substitutionGroup=SGName},CMName,S) ->
+ cmp_name(SGName,CMName,S);
+cmp_SG_name(_,_,_) ->
+ false.
+
+%% Namespace: [{not,NS} | NS]
+%%
+cmp_any_namespace({_,_,EIINS},Namespace,_S) ->
+ case member(EIINS,Namespace) of
+ true ->
+ true;
+ _ ->
+ case keysearch(EIINS,2,Namespace) of
+ {value,{'not',EIINS}} ->
+ false;
+ _ ->
+ true
+ end
+ end.
+
+at_least_one({_Min,Max}) when Max > 0 ->
+ true;
+at_least_one(_) ->
+ false.
+
+is_optional({element,{_,{0,_}}},_S) ->
+ true;
+is_optional({any,{_,{0,_},_}},_S) ->
+ true;
+is_optional({MG,{_CM,{0,_}}},_S)
+ when MG =:= all; MG =:= sequence; MG =:= choice ->
+ true;
+is_optional({MG,{CM,_Occ}},S)
+ when MG =:= all; MG =:= sequence; MG =:= choice ->
+ case member(false,[is_optional(Y,S)||Y<-CM]) of
+ true ->
+ false;
+ _ -> true
+ end;
+is_optional({group,{_,{0,_}}},_S) ->
+ true;
+is_optional(G={group,_},S) ->
+ case resolve(G,S) of
+ {#schema_group{content=[]},_} ->
+ true;
+ {#schema_group{content=[CM]},_} ->
+ is_optional(CM,S)
+ end;
+is_optional(_,_) ->
+ false.
+
+
+
+acc_errs(S=#xsd_state{errors=Errs},ErrMsg) ->
+ S#xsd_state{errors=[ErrMsg|Errs]}.
+
+%% invoked with an element/XML-node and a name of the
+error_path([H|_T],Top) when H==#xmlElement{};H==#xmlText{} ->
+ error_path(H,Top);
+error_path([_H|T],Top) ->
+ error_path(T,Top);
+error_path(#xmlElement{parents=Ps,pos=Pos},Top) ->
+ error_path(Ps,Pos,Top);
+error_path(#xmlAttribute{parents=Ps,pos=Pos},Top) ->
+ error_path(Ps,Pos,Top);
+error_path(#xmlText{parents=Ps,pos=Pos},Top) ->
+ error_path(Ps,Pos,Top);
+error_path(_,_) ->
+ [].
+error_path([],Pos,Top) when is_integer(Pos) ->
+ mk_xpath_path([{Top,Pos}]);
+error_path([],_,Top) ->
+ Top;
+error_path(Nodes,_,_) ->
+ mk_xpath_path(Nodes).
+
+mk_xpath_path(Nodes) ->
+ Slash =
+ fun([H1,H2|T],Fun,Acc) -> Fun([H2|T],Fun,["/",H1|Acc]);
+ ([H1],_,Acc) -> [H1|Acc];
+ ([],_,Acc) -> Acc
+ end,
+ flatten(Slash([lists:concat([A,"[",B,"]"])||{A,B}<-Nodes],Slash,[])).
+
+resolve(XSDType,InstanceEl,S) ->
+ explicit_type(XSDType,InstanceEl,S).
+
+resolve([H],S) ->
+ resolve(H,S);
+resolve(Any={any,_},S) ->
+ {Any,S};
+resolve(Any={anyAttribute,_},S) ->
+ {Any,S};
+resolve(Any={anyType,_},S) ->
+ {Any,S};
+resolve(Seq={sequence,_},S) ->
+ {Seq,S};
+resolve(Choice={choice,_},S) ->
+ {Choice,S};
+resolve({simple_or_complex_Type,QN},S) ->
+%% case load_object({simpleType,QN},S) of
+ case resolve({simpleType,QN},S) of
+ Res={#schema_simple_type{},_S1} ->
+ Res;
+ {[],_S} ->
+ case load_object({complexType,QN},S) of
+ {[],_} ->
+ ?debug("could not load object ~p~n",
+ [{simple_or_complex_Type,QN}]),
+ {[],S};
+ T ->
+ T
+ end;
+ T ->
+ T
+ end;
+resolve({complexType,{anyType,_,_}},S) ->
+ {{anyType,[]},S};
+resolve({simpleType,{anyType,_,_}},S) ->
+ {{anyType,[]},S};
+resolve(ST={simpleType,NameNS={_,_,_}},S) ->
+ case load_object(ST,S) of
+ {[],_S} -> case is_builtin_simple_type(NameNS) of
+ true ->
+ {ST,S};
+ _ ->
+ {[],S}
+ end;
+ Obj ->
+ %resolve(Obj,S)
+ Obj
+ end;
+resolve({substitutionGroup,QName},S) ->
+ %% This shall resolve to the type of the element QName
+ case load_object({element,QName},S) of
+ Ret = {[],_S} -> Ret;
+ {#schema_element{type=[Type]},S2} ->
+ case Type of
+ {simple_or_complex_Type,_} ->
+ resolve(Type,S2);
+ _ ->
+ {Type,S2}
+ end;
+ {#schema_element{type=Type},S2} ->
+ {Type,S2}
+ end;
+resolve({extension,{BaseType,CM}},S) ->
+ case is_builtin_simple_type(BaseType) of
+ true ->
+ {{simpleType,BaseType},S};
+ _ ->
+ case resolve({simple_or_complex_Type,BaseType},S) of
+ {ST = #schema_simple_type{},_} ->
+ {ST,S}; %% any attributes in CM are already
+ %% propagated to the outer complex type.
+ {CT = #schema_complex_type{content=C},_} ->
+ {NewC,S2} = extend_type(C,CM,S),
+ {CT#schema_complex_type{content=NewC},S2};
+ T -> T
+ end
+ end;
+resolve({restriction,{BaseType,CM}},S) ->
+ case is_builtin_simple_type(BaseType) of
+ true ->
+ {{simpleType,BaseType},S};
+ _ ->
+ case resolve({simple_or_complex_Type,BaseType},S) of
+ {ST = #schema_simple_type{content=C},_} ->
+ {NewContent,S2} = restrict_simple_type(C,CM,BaseType,S),
+ {ST#schema_simple_type{content=NewContent},S2};
+ %% the outer complex type.
+ {CT = #schema_complex_type{content=C},_} ->
+ {NewContent,S2} = restrict_type(C,CM,BaseType,S),
+ {CT#schema_complex_type{content=NewContent},S2};
+ T -> T
+ end
+ end;
+resolve(optional_text,S) ->
+ {optional_text,S};
+resolve(E,S) ->
+ ?debug("resolve(~p, S)~n",[E]),
+ load_object(E,S).
+
+%% explicit_type checks whether the instance element is of an explicit
+%% type pointed out by xsi:type. A type refernced by xsi:type must be
+%% the same as, or derived from the instance element's type. Concluded
+%% from 3.4.6 section "Schema Component Constraint: Type Derivation OK
+%% (Complex)".
+explicit_type(XSDType,InstanceEl=#xmlElement{namespace=NS,attributes=Atts},S) ->
+ case get_instance_type(NS,Atts) of
+ false ->
+ resolve(XSDType,S);
+ {ok,Name} ->
+ %% Create a {name,scope,namespace}, what is scope?
+ %% assume scope always is at top for the referenced type.
+ QName = mk_EII_QName(Name,InstanceEl,S#xsd_state{scope=[]}),
+ %% The type referenced by "xsi:type" attribute must be a
+ %% legal substitution for InstanceEl: "xsi:type" is the
+ %% same as or a derivation from InstanceEl's type.
+
+ {XsiType,S2} = resolve({simple_or_complex_Type,QName},S),
+ {_Blocks,S3} = legal_substitution(InstanceEl,XsiType,S2),
+%% {ResXSDType,S4} = resolve(XSDType,S3),
+ {XsiType,S3}
+%% merge_derived_types(ResXSDType,XsiType,Blocks,xsitype,S4)
+ end.
+
+get_instance_type(#xmlNamespace{nodes=Nodes},Atts) ->
+ case keyNsearch(?XSD_INSTANCE_NAMESPACE,2,Nodes,[]) of
+ {Prefix,_} ->
+ TypeAtt = list_to_atom(Prefix++":type"),
+ case keyNsearch(TypeAtt,#xmlAttribute.name,Atts,[]) of
+ #xmlAttribute{value=Value} ->
+ {ok,Value};
+ _ -> false
+ end;
+ _ ->
+ false
+ end.
+
+merge_derived_types(Type1,Type2,Mode,S) ->
+ merge_derived_types(Type1,Type2,[],Mode,S).
+merge_derived_types(Type,Type,_Blocks,_Mode,S) ->
+ {Type,S};
+merge_derived_types(XSDType,InstType,Blocks,Mode,S) ->
+ case catch merge_derived_types2(XSDType,InstType,Blocks,Mode,S) of
+ {'EXIT',Reason} ->
+ {InstType,acc_errs(S,{[],?MODULE,{internal_error,merge_derived_types,Reason}})};
+ {error,S2} ->
+ {InstType,S2};
+ {MergedType,S2} ->
+ save_merged_type(MergedType,S2),
+ {MergedType,S2}
+ end.
+
+merge_derived_types2(XSDType=#schema_complex_type{},
+ InstType=#schema_complex_type{},Blocks,Mode,S) ->
+ %% InstType is the type of the instance element that may reference
+ %% a type that is an extension/restriction of the XSDType.
+ %% Alternatively XSDType is the base type and InstType the derived
+ %% type or XSDType is the original type that is redefined into
+ %% InstType.
+ %%
+ %% complexType can turn into:
+ %% simpleContent | complexContent
+ %% simpleContent -> restriction
+ %% complexContent -> restriction | extension
+ %% of course also one of:
+ %% ((group | all | choice | sequence)?,
+ %% ((attribute | attributeGroup)*,anyAttribute?))))
+ %% but then it shouldn't be any difference between XSDType
+ %% and InstType
+ case InstType#schema_complex_type.content of
+ [{extension,{BaseTypeName,CM}}] ->
+ {ExtendedAtts,S2} =
+ extend_attributes(XSDType#schema_complex_type.attributes,
+ InstType#schema_complex_type.attributes,
+ BaseTypeName,CM,Mode,
+ allowed_derivation(extension,Blocks,S)),
+ case compare_base_types(BaseTypeName,XSDType,S2) of
+ ok ->
+ {NewContent,S3} =
+ extend_type(XSDType#schema_complex_type.content,CM,S2),
+ {InstType#schema_complex_type{attributes=ExtendedAtts,
+ content=NewContent},S3};
+ Err ->
+ {error,acc_errs(S2,Err)}
+ end;
+ [{restriction,{BaseTypeName,CM}}] ->
+ {RestrictedAtts,S2} =
+ restrict_attributes(XSDType#schema_complex_type.attributes,
+ InstType#schema_complex_type.attributes,
+ allowed_derivation(restriction,Blocks,S)),
+ case compare_base_types(BaseTypeName,XSDType,S2) of
+ ok ->
+ {NewContent,S3}=
+ case InstType#schema_complex_type.complexity of
+ simple ->
+ restrict_simple_type(XSDType#schema_complex_type.content,CM,BaseTypeName,S2);
+ _ ->
+ restrict_type(XSDType#schema_complex_type.content,CM,BaseTypeName,S2)
+ end,
+ {InstType#schema_complex_type{attributes=RestrictedAtts,
+ content=NewContent},S3};
+ Err ->
+ {error,acc_errs(S,Err)}
+ end;
+ Other ->
+ {error,acc_errs(S,{[],?MODULE,{unexpected_type,Other}})}
+ end;
+merge_derived_types2(XSDType=#schema_simple_type{},
+ InstType=#schema_simple_type{},Blocks,_Mode,S) ->
+ case InstType#schema_simple_type.content of
+ [{restriction,{BaseTypeName,CM}}] ->
+ case compare_base_types(BaseTypeName,XSDType,S) of
+ ok ->
+
+ {NewContent,S2}=
+ restrict_simple_type(XSDType#schema_simple_type.content,CM,
+ BaseTypeName,S),
+ {InstType#schema_simple_type{content=NewContent},
+ allowed_derivation(restriction,Blocks,S2)};
+ Err ->
+ {error,allowed_derivation(restriction,Blocks,
+ acc_errs(S,Err))}
+ end;
+ Other ->
+ {error,acc_errs(S,{unexpected_type,Other})}
+ end;
+merge_derived_types2(XSDType=#schema_simple_type{content=XSDContent},
+ InstType=#schema_complex_type{},Blocks,_Mode,S) ->
+ %% This is the way to add attributes to a simpleType
+ case InstType#schema_complex_type.content of
+ [{extension,{BaseTypeName,CM}}] ->
+ case compare_base_types(BaseTypeName,XSDType,S) of
+ ok ->
+ {NewContent,S2} =
+ if CM==[] -> {XSDContent,S};
+ true -> extend_type(XSDContent,CM,S)
+ end,
+ {InstType#schema_complex_type{content=NewContent},
+ allowed_derivation(extension,Blocks,S2)};
+ Err ->
+ {error,allowed_derivation(extension,Blocks,
+ acc_errs(S,Err))}
+ end;
+ [{restriction,{BaseTypeName,_CM}}]
+ when InstType#schema_complex_type.complexity == simple ->
+ case compare_base_types(BaseTypeName,XSDType,S) of
+ ok ->
+ {InstType,
+ allowed_derivation(restriction,Blocks,S)};
+ Err ->
+ {error,allowed_derivation(extension,Blocks,
+ acc_errs(S,Err))}
+ end;
+ Other ->
+ {error,acc_errs(S,{[],?MODULE,{unexpected_type,Other}})}
+ end;
+merge_derived_types2(_XSDType={simpleType,BuiltInType},
+ InstType=#schema_complex_type{content=Content},
+ Blocks,_Mode,S) ->
+ case Content of
+ [{extension,{BuiltInType,CM}}] ->
+ {NewContent,S2} = extend_type([],CM,S),
+ {InstType#schema_complex_type{base_type=BuiltInType,
+ content=NewContent},
+ allowed_derivation(extension,Blocks,S2)};
+ [{restriction,{BuiltInType,CM}}] ->
+ {NewContent,S2} = restrict_simple_type([],CM,BuiltInType,S),
+ {InstType#schema_complex_type{base_type=BuiltInType,
+ content=NewContent},
+ allowed_derivation(restriction,Blocks,S2)};
+ Other ->
+ {error,acc_errs(S,{[],?MODULE,{unexpected_content,Other,InstType}})}
+ end;
+merge_derived_types2(_XSDType={anyType,_},InstType,Blocks,_Mode,S) ->
+ case type_content(InstType) of
+ [{restriction,{_BaseTypeName,CM}}] ->
+ {set_type_content(InstType,CM),
+ allowed_derivation(restriction,Blocks,S)};
+ Other ->
+ {error,acc_errs(S,{[],?MODULE,{unexpected_content,Other,InstType}})}
+ end;
+merge_derived_types2({simpleType,BuiltInType},
+ InstType=#schema_simple_type{content=Content},
+ Blocks,_Mode,S) ->
+ case Content of
+ [{restriction,{BuiltInType,CM}}] ->
+ {InstType#schema_simple_type{base_type=BuiltInType,
+ content=CM},
+ allowed_derivation(restriction,Blocks,S)};
+ Other ->
+ {error,acc_errs(S,{[],?MODULE,{unexpected_content,Other,InstType}})}
+ end;
+merge_derived_types2(XSDType,InstType,Blocks,Mode,S) ->
+ case {variety_type(XSDType,S),variety_type(InstType,S)} of
+ {XSDType,InstType} ->
+ {error,acc_errs(S,{[],?MODULE,{unexpected_type,XSDType,InstType}})};
+ {_XSDType2,InstType2} ->
+ case allowed_derivation(substitution,Blocks,S) of
+ S ->
+ merge_derived_types2(XSDType,InstType2,Blocks,Mode,S);
+ S2 ->
+ {error,S2}
+ end
+ end.
+
+variety_type(#schema_simple_type{variety=list,content=[{list,[Type]}]},S) ->
+ {VarietyType,_}=resolve(Type,S),
+ VarietyType;
+variety_type(#schema_simple_type{variety=union,content=[{union,Types}]},S) ->
+ [T||{T,_}<-[resolve(VarietyType,S)||VarietyType<-Types]];
+variety_type(Type,_S) ->
+ Type.
+
+allowed_derivation(_Derivation,_Blocks,S) ->
+%% case {member(Derivation,Blocks),member('#all',Blocks)} of
+%% {true,_} ->
+%% acc_errs(S,{[],?MODULE,{derivation_blocked,Blocks,Derivation}});
+%% {_,true} ->
+%% acc_errs(S,{[],?MODULE,{derivation_blocked,'#all',Derivation}});
+%% _ ->
+%% S
+%% end.
+ S.
+
+%% El is the instance element that has the xsi:type attribute with
+%% XsiType.
+legal_substitution(El=#xmlElement{name=ElName},XsiType,S) ->
+ %% See 3.3.6, Substitution Group OK (Transitive)
+ %% For ok one of following: 1) same type in El as XsiType, 2)
+ %% XsiType is a restriction/extension where El's type is the
+ %% base, 3) XsiType is a member in the substitutionGroup of
+ %% ElName.
+ QName = mk_EII_QName(ElName,El,S),
+ {HeadElement,_} = load_object({element,QName},S),
+
+ legal_substitution2(HeadElement,XsiType,S).
+legal_substitution2(#schema_element{type=Type,block=Bl},XsiType,S) ->
+ {HeadType,_}=resolve(Type,S),
+ Block = blocking(Bl,S#xsd_state.blockDefault),
+ S2 = derived_or_equal_types(XsiType,HeadType,instance,Block,S),
+ {Block,S2}.
+
+compare_base_types(QName,#schema_complex_type{name=QName},_S) ->
+ ok;
+compare_base_types(QName1,#schema_complex_type{name=QName2},_S) ->
+ {[],?MODULE,{names_not_equal,QName1,QName2}};
+compare_base_types(QName,#schema_simple_type{name=QName},_S) ->
+ ok;
+compare_base_types(QName1,#schema_simple_type{name=QName2},_S) ->
+ {[],?MODULE,{names_not_equal,QName1,QName2}}.
+%%compare_base_types(QName1,Other,_S) ->
+%% {[],?MODULE,{miss_match_base_types,QName1,Other}}.
+
+extend_type(Base,Extension,S) ->
+ extend_type(Base,Extension,[],S).
+%% Content may be (attribute | attributeGroup)*, anyAttribute? if
+%% it is of simpleContent or:
+%% (group | all | choice | sequence)?,((attribute | attributeGroup)*,
+%% anyAttribute?) if it is of complexContent
+extend_type([],[],Acc,S) ->
+ {reverse(Acc),S};
+extend_type([BaseCM|BaseRest],Ext=[{SeqCho,{Extension,Occ}}|ExtRest],Acc,S)
+ when SeqCho == sequence; SeqCho == choice ->
+ case BaseCM of
+ {SeqCho,{BC,_Occ}} ->
+ extend_type(BaseRest,ExtRest,[{SeqCho,{BC++Extension,Occ}}|Acc],S);
+ G = {group,{_Ref,_Occ}} ->
+ {ResG,S2} = resolve(G,S),
+ case ResG of
+ #schema_group{content=GC} ->
+ case keysearch(SeqCho,1,GC) of
+ {value,SCC} ->
+ extend_type([SCC|BaseRest],Ext,Acc,S);
+ _ ->
+ S3 = acc_errs(S2,{[],?MODULE,{illegal_content_in_extension,Ext}}),
+ {reverse(Acc),S3}
+ end;
+ _ ->
+ S3 = acc_errs(S2,{[],?MODULE,{illegal_content_in_extension,ResG}}),
+ {reverse(Acc),S3}
+ end;
+ _ ->
+ %% BaseCM may be a group that has a sequence
+ extend_type([BaseCM|BaseRest],ExtRest,[{SeqCho,{Extension,Occ}}|Acc],S)
+ end;
+extend_type(BaseCM,ExtCM,Acc,S) when is_list(BaseCM),is_list(ExtCM) ->
+ extend_type([],[],reverse(ExtCM)++reverse(BaseCM)++Acc,S).
+
+restrict_type(Content,CM,BaseTypeName,S) ->
+ restrict_type(Content,CM,BaseTypeName,[],S).
+%% Restriction may appear within a 1) simpleType, 2) simpleContent or
+%% 3) complexContent construct.
+%% The possible content of restriction in different contexts are:
+%% 1) (simpleType?, (Any facet)*)
+%% 2) (simpleType?, (Any facet)*),((attribute | attributeGroup)*, anyAttribute?)
+%% 3) (group | all | choice | sequence)?,
+%% ((attribute | attributeGroup)*, anyAttribute?)
+%% A restriction of a simpleType narrows the possible values of the
+%% base type by applying facets.
+%% A restriction of a complexType (simpleContent / complexContent) must
+%% enumerate all elements, including the preserved ones of the base type.
+%% Attributes don't have to be enumerated.
+restrict_type([],[],_TypeName,Acc,S) ->
+ {reverse(Acc),S};
+restrict_type([{restriction,{_Type,CM1}}],[],_TypeName,Acc,S) ->
+ {CM1++reverse(Acc),S};
+restrict_type([{extension,{_Type,CM1}}],[],_TypeName,Acc,S) ->
+ {CM1++reverse(Acc),S};
+restrict_type(BaseRest,[ST={simpleType,_Name}|RestrRest],TypeName,Acc,S) ->
+ %% context 1 or 2
+ restrict_type(BaseRest,RestrRest,TypeName,[ST|Acc],S);
+restrict_type([BaseCM|BaseRest],[{SeqCho,{CM,Occ}}|RestrRest],TypeName,Acc,S)
+ when SeqCho == sequence; SeqCho == choice ->
+ %% context 3
+ case BaseCM of
+ {SeqCho,{BCM,_}} ->
+ case check_element_presence(CM,BCM) of
+ {error,Reason} ->
+ {reverse(Acc),acc_errs(S,Reason)};
+ ok ->
+ restrict_type(BaseRest,RestrRest,TypeName,
+ [{SeqCho,{CM,Occ}}|Acc],S)
+ end;
+ Other ->
+ {reverse(Acc),acc_errs(S,{[],?MODULE,{SeqCho,expected,Other,found}})}
+ end;
+restrict_type(BaseRest,[Facet={F,_Val}|RestrRest],TypeName,Acc,S) ->
+ case is_facet(F) of
+ true ->
+ restrict_type(BaseRest,RestrRest,TypeName,[Facet|Acc],S);
+ _ ->
+ {reverse(Acc),acc_errs(S,{[],?MODULE,{does_not_support,Facet,in_restriction}})}
+ end.
+
+restrict_simple_type([{restriction,{_Type,BaseCM}}],RestrCM,_TypeName,S) ->
+ restrict_simple_type(BaseCM,RestrCM,_TypeName,S);
+restrict_simple_type(CM=[{extension,{_Type,_BaseCM}}],_RestrCM,TypeName,S) ->
+ {[],acc_errs(S,{[],?MODULE,{illegal_content_simple_type,CM,TypeName}})};
+restrict_simple_type(BaseCM,RestrCM,TypeName,S) ->
+ %% all restrictions in base comes first, then check that no one of
+ %% the facets in the restriction attempts to redefine a fixed
+ %% facet in the base. Add the facets of the restriction.
+ {Acc,S2} =
+ case BaseCM of
+ [] -> {[],S};
+ _ ->
+ restrict_simple_type([],BaseCM,TypeName,S)
+ end,
+ %% Acc = reverse(BaseCM),
+ Fun = fun(X={simpleType,_},{Acc_in,S_in})->
+ {[X|Acc_in],S_in};
+ (X={LU,_},{Acc_in,S_in}) when LU==list;LU==union ->
+ {[X|Acc_in],S_in};
+ (X={F,_},{Acc_in,S_in})->
+%% Fun = fun(X={F,_},{Acc_in,S_in})->
+ case is_facet(F) of
+ true ->
+ {[X|Acc_in],S_in};
+ _ ->
+ {Acc_in,acc_errs(S_in,{[],?MODULE,{illegal_in_restriction_of_simpleType,X}})}
+ end;
+ (X,{Acc_in,S_in}) ->
+ {Acc_in,acc_errs(S_in,{[],?MODULE,{illegal_in_restriction_of_simpleType,X}})}
+ end,
+ foldl(Fun,{Acc,S2},RestrCM).
+
+check_element_presence([],_BCM) ->
+ ok;
+check_element_presence([{element,{Name,_}}|CM],BCM) ->
+ case check_element_presence2(Name,BCM) of
+ {ok,BCM2} ->
+ check_element_presence(CM,BCM2);
+ _ ->
+ {error,{[],?MODULE,{element,Name,not_present_in_restriction}}}
+ end;
+check_element_presence([_C|CM],BCM) ->
+ check_element_presence(CM,BCM).
+
+check_element_presence2(Name,BCM) ->
+ check_element_presence2(Name,BCM,[]).
+check_element_presence2({LocalName,_,NS},[{element,{{LocalName,_,NS},_}}|BCM],Acc) ->
+ {ok,reverse(Acc)++BCM};
+check_element_presence2(Name,[E|BCM],Acc) ->
+ check_element_presence2(Name,BCM,[E|Acc]);
+check_element_presence2(_Name,[],_Acc) ->
+ error.
+
+%% A check of the extended attribute should take place here.
+%%
+extend_attributes(BaseAtts,[EA={attribute,Name}|ExtAtts],
+ BaseTypeName,CM,Mode,S) ->
+ NewAtts=key_replace_or_insert(Name,2,BaseAtts,EA),
+ extend_attributes(NewAtts,ExtAtts,BaseTypeName,CM,Mode,S);
+%% Extension of wildcards should be handled as described in chapter
+%% 3.4.2 and subsection "Complex Type Definition with simple content
+%% Schema Component".
+extend_attributes(BaseAtts,[LocalWC={anyAttribute,_NS_PC}|ExtAtts],
+ BaseTypeName,CM,deduce,S) ->
+ {CompleteWC,S2} = complete_wildcard(LocalWC,CM,S),
+ BaseWC = base_wildcard(BaseAtts),
+ {NewWC,S4} =
+ case BaseWC of
+ [] -> {CompleteWC,S2};
+ _ ->
+ if CompleteWC==LocalWC -> {BaseWC,S2};
+ true ->
+ {NS,S3} = attribute_wildcard_union(wc_ns(CompleteWC),
+ wc_ns(BaseWC),S2),
+ PC = wc_pc(CompleteWC),
+ {[{anyAttribute,{NS,PC}}],S3}
+ end
+ end,
+ NewBaseAtts = keyreplace(anyAttribute,1,BaseAtts,NewWC),
+ extend_attributes(NewBaseAtts,ExtAtts,BaseTypeName,CM,deduce,S4);
+extend_attributes(Atts,[],_,_,_Mode,S) ->
+ {reverse(Atts),S}.
+%% A check of the restricted attribute should take place here.
+restrict_attributes(BaseAtts,[RA|RAtts],S) ->
+%% NewAtts = keyreplace(Name,2,BaseAtts,EA),
+ {NewAtts,S2} = restrict_attribute_replace(BaseAtts,RA,S),
+ restrict_attributes(NewAtts,RAtts,S2);
+restrict_attributes(Atts,[],S) ->
+ {reverse(Atts),S}.
+restrict_attribute_replace(BaseAtts,EA={attribute,Name},S) ->
+ {keyreplace(Name,2,BaseAtts,EA),S};
+restrict_attribute_replace(BaseAtts,EA={anyAttribute,{NS,_}},S) ->
+ case key1search(anyAttribute,BaseAtts,false) of
+ false ->
+ {BaseAtts,acc_errs(S,{invalid_derivation,EA,BaseAtts})};
+ {_,{BaseNS,_}} ->
+ S2 = wildcard_subset(BaseNS,NS,S),
+ {keyreplace(anyAttribute,1,BaseAtts,EA),S2}
+ end.
+%% 3.10.6 Constraints on Wildcard Schema Components
+%% Schema Component Constraint: Wildcard Subset
+%% bullet 1:
+wildcard_subset(['##any'],_NS,S) ->
+ S;
+%% bullet 2:
+wildcard_subset([{'not',NS}],[{'not',NS}],S) ->
+ S;
+%% bullet 3:
+%% if NS has a number of namespaces all of them must be in BaseNS,
+%% if BaseNS has {not,Namespaces} neither of Namespaces must be in NS
+wildcard_subset(_,[],S) ->
+ S;
+wildcard_subset(BaseNS,NS,S) when is_list(BaseNS),is_list(NS) ->
+ case [X||X<-NS,member(X,BaseNS)] of
+ NS ->
+ S;
+ _ ->
+ acc_errs(S,{[],?MODULE,{wildcard_namespace,NS,
+ not_subset_of_base_namespace,BaseNS}})
+ end;
+wildcard_subset(BaseNS=[{'not',BNS}],NS,S) when is_list(NS) ->
+ case [X||X<-BNS,member(X,NS)] of
+ [] ->
+ S;
+ _ ->
+ acc_errs(S,{[],?MODULE,{wildcard_namespace,NS,
+ not_subset_of_base_namespace,BaseNS}})
+ end;
+wildcard_subset(BaseNS,NS,S) ->
+ acc_errs(S,{[],?MODULE,{wildcard_namespace,NS,
+ not_subset_of_base_namespace,BaseNS}}).
+
+base_wildcard(BaseAtts) ->
+ key1search(anyAttribute,BaseAtts,[]).
+
+complete_wildcard(LocalWC,CM,S) ->
+ case keysearch(attributeGroup,1,CM) of
+ {value,AttG={_,_Name}} ->
+ case resolve(AttG,S) of
+ {#schema_attribute_group{content=Atts},_S} ->
+ case keysearch(anyAttribute,1,Atts) of
+ {value,AA} ->
+ {PC,S2} =
+ attribute_wildcard_intersection(wc_ns(LocalWC),
+ wc_ns(AA),S),
+ {{anyAttribute,{wc_pc(LocalWC),PC}},S2};
+ _ -> {LocalWC,S}
+ end;
+ _ -> {LocalWC,S}
+ end;
+ _ -> {LocalWC,S}
+ end.
+
+wc_ns({anyAttribute,{NS,_}})->
+ NS;
+wc_ns(_) ->
+ [].
+wc_pc({anyAttribute,{_,PC}})->
+ PC;
+wc_pc(_) ->
+ strict.
+
+%% Union of wildcard namespace:
+%% 3.10.6 Constraints on Wildcard Schema Components
+%% Schema Component Constraint: Attribute Wildcard Union
+%% bullet 1
+attribute_wildcard_union(NS,NS,S) ->
+ {NS,S};
+%% bullet 2
+attribute_wildcard_union(NS1,NS2,S) when NS1==['##any'];NS2==['##any'] ->
+ {['##any'],S};
+attribute_wildcard_union(NS1,NS2,S) ->
+ case {keysearch('not',1,NS1),keysearch('not',1,NS2)} of
+ {false,false} -> %% bullet 3
+ {NS1 ++ [X||X<-NS2,member(X,NS1)==false],S};
+ {{value,{_,Set1}},{value,{_,Set2}}} -> %% bullet 4 or 1
+ case {lists:sort(Set1),lists:sort(Set2)} of
+ {L,L} -> {[{'not',L}],S};
+ _ -> {[{'not',[absent]}],S}
+ end;
+ _ -> %% either is a {not,NS}
+ case toggle_ns(NS1,NS2) of
+ {_O1=[absent],NS3} -> %% bullet 6
+ case member(absent,NS3)of
+ true -> {['##any'],S};
+ _ -> {[{'not',[absent]}],S}
+ end;
+ {O1=[O1Name],NS4} -> %% bullet 5
+ case member(O1Name,NS4) of
+ true ->
+ case member(absent,NS4) of
+ true -> {['##any'],S}; %% 5.1
+ _ -> {[{'not',[absent]}],S} %% 5.2
+ end;
+ _ ->
+ case member(absent,NS4) of
+ true ->
+ %% not expressible 5.3
+ Err = {[],?MODULE,{wildcard_namespace_union_not_expressible,NS1,NS2}},
+ {[],acc_errs(S,Err)};
+ _ -> {[{'not',O1}],S} %% 5.4
+ end
+ end
+ end
+ end.
+
+%% Schema Component Constraint: Attribute Wildcard Intersection
+%% bullet 1
+attribute_wildcard_intersection(O1,O1,S) -> {O1,S};
+%% bullet 2
+attribute_wildcard_intersection(['##any'],O2,S) -> {O2,S};
+attribute_wildcard_intersection(O1,['##any'],S) -> {O1,S};
+%% bullet 6
+attribute_wildcard_intersection([{'not',[absent]}],O2=[{'not',_}],S) -> {O2,S};
+attribute_wildcard_intersection(O1=[{'not',_}],[{'not',[absent]}],S) -> {O1,S};
+%% bullet 5
+attribute_wildcard_intersection([{'not',NS1}],[{'not',NS2}],S) ->
+ case [X||X<-NS1,member(X,NS2)] of
+ [] -> {[],acc_errs(S,{[],?MODULE,{wildcard_namespace_intersection_not_expressible,NS1,NS2}})};
+ NS3 -> {[{'not',NS3}],S}
+ end;
+%% bullet 3
+attribute_wildcard_intersection([{'not',NS}],O2,S) ->
+ {lists:delete(absent,[X||X<-O2,member(X,NS)==false]),S};
+attribute_wildcard_intersection(O1,[{'not',NS}],S) ->
+ {lists:delete(absent,[X||X<-O1,member(X,NS)==false]),S};
+%% bullet 4
+attribute_wildcard_intersection(O1,O2,S) ->
+ case [X||X<-O1,member(X,O2)] of
+ [] ->
+ {[absent],S};
+ L ->{L,S}
+ end.
+
+toggle_ns(NS1,NS2=[{'not',_}]) ->
+ {NS2,NS1};
+toggle_ns(NS1,NS2) ->
+ {NS1,NS2}.
+
+
+deduce_derived_types([DT|DTs],S) ->
+ deduce_derived_types(DTs,deduce_derived_type(DT,S,[]));
+deduce_derived_types([],S) ->
+ S.
+
+%% deduce_derived_type
+deduce_derived_type(DT={_Kind,TName},S,RefChain) ->
+ %% check circular references
+ case keymember(TName,2,RefChain) of
+ true ->
+ acc_errs(S,{[],?MODULE,{circular_reference_of_type,TName}});
+ _ ->
+ deduce_derived_type2(DT,S,[DT|RefChain])
+ end.
+deduce_derived_type2(DT,S,RefChain) ->
+ {DerivedType,_} = resolve(DT,S),
+ case is_unmerged_type(DerivedType) of
+ true ->
+ BaseTypeRef = get_base_type(DerivedType),
+ {BaseType,_} = resolve({simple_or_complex_Type,BaseTypeRef},S),
+ BaseTypeKind =
+ fun(#schema_complex_type{}) -> complexType;
+ (_) -> simpleType
+ end (BaseType),
+ case is_unmerged_type(BaseType) of
+ true ->
+ %% both derived and not deduced
+ S2 = deduce_derived_type({BaseTypeKind,BaseTypeRef},S,RefChain),
+ case S2#xsd_state.errors==S#xsd_state.errors of
+ true -> deduce_derived_type2(DT,S2,RefChain);
+ _ -> S2
+ end;
+ _ ->
+ {_,S2} = merge_derived_types(BaseType,DerivedType,deduce,S),
+ S2
+ end;
+ _ ->
+ S
+ end.
+is_unmerged_type(Type) ->
+ case type_content(Type) of
+ [{restriction,_}] -> true;
+ [{extension,_}] -> true;
+ _ -> false
+ end.
+type_content(#schema_simple_type{content=C}) ->
+ C;
+type_content(#schema_complex_type{content=C}) ->
+ C;
+type_content(_) ->
+ [].
+
+set_type_content(Type=#schema_simple_type{},CM) ->
+ Type#schema_simple_type{content=CM};
+set_type_content(Type=#schema_complex_type{},CM) ->
+ Type#schema_complex_type{content=CM}.
+
+get_base_type(#schema_simple_type{base_type=BT}) ->
+ BT;
+get_base_type(#schema_complex_type{base_type=BT}) ->
+ BT.
+
+in_scope({Local,_Scope,_NS},S) ->
+ in_scope(Local,S);
+in_scope(Name,S=#xsd_state{scope=Scope}) when is_atom(Name) ->
+ S#xsd_state{scope=[Name|Scope]};
+in_scope(Name,S=#xsd_state{scope=Scope}) when is_list(Name) ->
+ S#xsd_state{scope=[atom_if_shortasciilist(Name)|Scope]}.
+
+out_scope({Local,_,_},S) ->
+ out_scope(atom_if_shortasciilist(Local),S);
+out_scope(Name,S=#xsd_state{scope=[Name|Rest]}) ->
+ S#xsd_state{scope=Rest};
+out_scope(_Name,S) ->
+ S.
+
+name_scope({'_xmerl_no_name_',Scope,_NS},S) ->
+ S#xsd_state{scope=Scope};
+name_scope({Name,Scope,_NS},S) ->
+ S#xsd_state{scope=[Name|Scope]}.
+
+reset_scope(S) ->
+ S#xsd_state{scope=[]}.
+
+set_scope(Scope,S) when is_list(Scope) ->
+ S#xsd_state{scope=Scope};
+set_scope(_,S) ->
+ S.
+
+is_global_env([_Env]) ->
+ true;
+is_global_env(_) ->
+ false.
+
+kind(#xmlElement{name=Name},S) ->
+ LocalName=local_name(Name),
+ is_a(LocalName,S).
+
+kind(#xmlElement{name=Name}) ->
+ LocalName=local_name(Name),
+ element(1,is_a(LocalName,dummy)).
+
+is_a(element,S) -> {element,S};
+is_a(annotation,S) -> {annotation,S};
+is_a(simpleType,S) -> {simpleType,S};
+is_a(complexType,S) -> {complexType,S};
+is_a(simpleContent,S) -> {simpleContent,S};
+is_a(complexContent,S) -> {complexContent,S};
+is_a(include,S) -> {include,S};
+is_a(import,S) -> {import,S};
+is_a(redefine,S) -> {redefine,S};
+is_a(unique,S) -> {unique,S};
+is_a(key,S) -> {key,S};
+is_a(keyref,S) -> {keyref,S};
+is_a(attribute,S) -> {attribute,S};
+is_a(attributeGroup,S) -> {attributeGroup,S};
+is_a(group,S) -> {group,S};
+is_a(all,S) -> {all,S};
+is_a(sequence,S) -> {sequence,S};
+is_a(choice,S) -> {choice,S};
+is_a(any,S) -> {any,S};
+is_a(anyAttribute,S) -> {anyAttribute,S};
+is_a(selector,S) -> {selector,S};
+is_a(field,S) -> {field,S};
+is_a(notation,S) -> {notation,S};
+is_a(appinfo,S) -> {appinfo,S};
+is_a(documentation,S) -> {documentation,S};
+is_a(restriction,S) -> {restriction,S};
+is_a(extension,S) -> {extension,S};
+is_a(list,S) -> {list,S};
+is_a(union,S) -> {union,S};
+is_a(schema,S) -> {schema,S};
+is_a(minExclusive,S) -> {minExclusive,S};
+is_a(minInclusive,S) -> {minInclusive,S};
+is_a(maxExclusive,S) -> {maxExclusive,S};
+is_a(maxInclusive,S) -> {maxInclusive,S};
+is_a(totalDigits,S) -> {totalDigits,S};
+is_a(fractionDigits,S) -> {fractionDigits,S};
+is_a(length,S) -> {length,S};
+is_a(minLength,S) -> {minLength,S};
+is_a(maxLength,S) -> {maxLength,S};
+is_a(enumeration,S) -> {enumeration,S};
+is_a(whiteSpace,S) -> {whiteSpace,S};
+is_a(pattern,S) -> {pattern,S};
+is_a(Name,S) when is_record(S,xsd_state) ->
+ {Name,acc_errs(S,{[],?MODULE,{unknown_content,Name}})};
+is_a(Name,_) ->
+ exit({error,{internal_error,not_implemented,Name}}).
+
+
+
+
+%% namespace/2 -> [token()]
+%% token() -> {not,namespace_name()} | namespace_name()
+%% ((##any | ##other) | List of (anyURI | (##targetNamespace | ##local)) ) : ##any
+%% The result will be:
+%% NSList ::= ['##any'] | [{'not',[TNS]}] | NSURIs
+%% TNS ::= URI | absent
+%% NSURIs ::= (URI | absent) +
+%% URI ::= atomified URI-string
+wildcard_namespace(E,S) ->
+ AttVal = get_attribute_value(namespace,E,"##any"),
+ ListOfVals = namestring2namelist(AttVal),
+ Pred = fun('##other') ->
+ case S#xsd_state.targetNamespace of
+ undefined -> {'not',[absent]};
+ TN -> {'not',TN}
+ end;
+ ('##targetNamespace') ->
+ case S#xsd_state.targetNamespace of
+ undefined -> absent;
+ TN -> TN
+ end;
+ ('##local') -> absent;%%'##local'; %% any well-formed xml that
+ %% is not qualified.
+ (X) -> X
+ end,
+ [X||X <- map(Pred,ListOfVals),X=/=[]].
+
+processor_contents(Any) ->
+ case get_attribute_value(processContents,Any,strict) of
+ V when is_list(V) -> list_to_atom(V);
+ A -> A
+ end.
+
+base_type(E) ->
+ get_attribute_value(base,E,[]).
+base_type_type(Env) ->
+ case member(simpleType,Env) of
+ true -> simpleType;
+ _ -> simple_or_complex_Type
+ end.
+
+attribute_ref(A) ->
+ get_attribute_value(ref,A,[]).
+
+particle_ref(El) ->
+ get_attribute_value(ref,El,[]).
+
+attributeGroup_ref(El) ->
+ get_attribute_value(ref,El,[]).
+
+get_value(El) ->
+ get_attribute_value(value,El,undefined).
+
+get_attribute_value(Key,#xmlElement{attributes=Atts},Default) ->
+ case keyNsearch(Key,#xmlAttribute.name,Atts,Default) of
+ #xmlAttribute{value=V} ->
+ V;
+ _ -> Default
+ end.
+
+%% qualify_NCName/2 returns a qualified name, QName, that has
+%% information of the name attribute and namespace of the XSD object.
+%% The object E has a name attribute with a NCName. The Namespace
+%% part of the QName is from the targetNamespace attribute of the
+%% schema or the empty list.
+qualify_NCName(E=#xmlElement{},S) ->
+ case get_local_name(E) of
+ [] -> no_name;
+ LocalName ->
+ Namespace =
+ case S#xsd_state.targetNamespace of
+ undefined ->
+ []; %%?XSD_NAMESPACE;
+ TNS ->
+ TNS
+ end,
+ {atom_if_shortasciilist(LocalName),S#xsd_state.scope,Namespace}
+ end.
+
+
+get_local_name(#xmlElement{attributes=Atts}) ->
+ case keyNsearch(name,#xmlAttribute.name,Atts,[]) of
+ #xmlAttribute{value=V} ->
+ V;
+ Default -> Default
+ end.
+
+local_name(Name) when is_atom(Name) ->
+ local_name(atom_to_list(Name));
+local_name(Name) when is_list(Name) ->
+ case splitwith(fun($:) -> false;(_)->true end,Name) of
+ {_,":"++LocalName} -> list_to_atom(LocalName);
+ _ ->
+ list_to_atom(Name)
+ end.
+
+%% transforms "a B c" to [a,'B',c]
+namestring2namelist(Str) ->
+ split_by_whitespace(Str,[]).
+split_by_whitespace(Str,Acc) when is_list(Str),length(Str) > 0 ->
+ F = fun($ ) ->
+ false;
+ (_) ->
+ true
+ end,
+ {Str1,Rest} = splitwith(F,Str),
+ split_by_whitespace(string:strip(Rest),[list_to_atom(Str1)|Acc]);
+split_by_whitespace(_,Acc) ->
+ reverse(Acc).
+
+%% get_QName(Name,S) where Name is a QName in string format, or where
+%% a QName is expected according to schema specification. If the name
+%% is unqualified it is qualified with the targetNamespace of the schema
+%% or with the empty list.
+get_QName(Name,NS,S) when is_atom(Name) ->
+ get_QName(atom_to_list(Name),NS,S);
+get_QName(Name,NS,#xsd_state{scope=Scope}) ->
+ qualified_name(Name,NS,NS#xmlNamespace.default,Scope).
+
+qualified_name(Name,NS,Default,Scope) ->
+ case splitwith(fun($:) -> false;(_)->true end,Name) of
+ {GlobalName,":"++LocalName} -> {atom_if_shortasciilist(LocalName),Scope,
+ namespace(GlobalName,NS,Default)};
+ _ ->
+ {atom_if_shortasciilist(Name),Scope,Default}
+ end.
+
+atom_if_shortasciilist(N) when is_list(N) ->
+ case catch list_to_atom(N) of
+ {'EXIT',_Reason} ->
+ %% Reason may be system_limit if N is very long, it may be
+ %% badarg ifN is a list of UTF characters.
+ N;
+ AN -> AN
+ end;
+atom_if_shortasciilist(N) ->
+ N.
+
+namespace("xml",_,_) -> 'http://www.w3.org/XML/1998/namespace';
+namespace(Prefix,NS,Default) ->
+ case key1search(Prefix,NS#xmlNamespace.nodes,Default) of
+ {Prefix,Namespace} ->
+ Namespace;
+ Namespace -> Namespace
+ end.
+
+
+%% mk_EII_QName/2
+%% makes a name with qualified info out of an Element Information Item
+%% A) If name is qualified get namespace matching prefix.
+%% B) If not qualified search parents for a namespace:
+%% 1) use default namespace if defined, else.
+%% 2) if a parent is qualified use that namespace or
+%% 3) no namespace is applied
+mk_EII_QName(Name,#xmlElement{name=Me,namespace=NS,parents=P},S)
+ when is_list(Name) ->
+ mk_EII_QName(list_to_atom(Name),
+ #xmlElement{name=Me,namespace=NS,parents=P},S);
+mk_EII_QName(Name,#xmlElement{name=Me,namespace=NS,parents=P},S) ->
+ Scope = S#xsd_state.scope,
+ NameStr = atom_to_list(Name),
+ case string:tokens(NameStr,":") of
+ ["xmlns",PrefixDef] -> %% special case
+ {'xmlns',Scope,namespace(PrefixDef,NS,[])};
+ [Prefix,LocalName] -> %% A
+ {list_to_atom(LocalName),Scope,namespace(Prefix,NS,[])};
+ [_LocalName] -> %% B
+ {Name,Scope,mk_EII_namespace([{Me,0}|P],NS,S)}
+ end.
+mk_EII_namespace([],#xmlNamespace{default=DefaultNS},_S) ->
+ DefaultNS;
+%%mk_EII_namespace([{PName,_}|GrandPs],NS=#xmlNamespace{default=[]},S) ->
+mk_EII_namespace([{PName,_}|GrandPs],NS,S) ->
+ NameStr = atom_to_list(PName),
+ case string:tokens(NameStr,":") of
+ [Prefix,_LocalName] ->
+ namespace(Prefix,NS,[]);
+ [_LocalName] ->
+ mk_EII_namespace(GrandPs,NS,S)
+ end;
+mk_EII_namespace(_,NS,_S) ->
+ NS#xmlNamespace.default.
+
+mk_EII_Att_QName(AttName,XMLEl,S) when is_list(AttName) ->
+ mk_EII_Att_QName(list_to_atom(AttName),XMLEl,S);
+mk_EII_Att_QName(AttName,XMLEl,S) ->
+ NameStr = atom_to_list(AttName),
+ {member($:,NameStr),mk_EII_QName(AttName,XMLEl,S)}.
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% table access functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+create_tables(S=#xsd_state{table=undefined}) ->
+ Tid=ets:new(xmerl_schema_tab,[]),
+ initial_tab_data(Tid),
+ S#xsd_state{table=Tid};
+create_tables(S) ->
+ S.
+
+delete_table(#xsd_state{table=Tab}) ->
+ catch ets:delete(Tab).
+
+%% @hidden
+print_table(#xsd_state{table=Tab}) ->
+ case catch ets:tab2list(Tab) of
+ Res when is_list(Res) ->
+ Res;
+ {'EXIT',Reason} ->
+ {error,{?MODULE,[],Reason}}
+ end;
+print_table(_) ->
+ ok.
+
+%save_object({name,_},S) ->
+% %% already saved.
+% S;
+%% only simpleType asn complexType are temporary saved with
+%% three-tuple key. They are loaded and merged in redefine/2.
+save_object({Kind,Obj},S=#xsd_state{redefine=true})
+ when Kind == simpleType; Kind == complexType ->
+ save_in_table({Kind,redefine,object_name(Obj)},Obj,S);
+save_object({Kind,Obj},S=#xsd_state{redefine=true})
+ when Kind == group; Kind == attributeGroup ->
+ save_in_table({Kind,object_name(Obj)},Obj,S);
+save_object({Kind,Obj},S) when Kind == simpleType; Kind == complexType ->
+ save_unique_type({Kind,object_name(Obj)},Obj,S);
+save_object({Kind,Obj},S)
+ when Kind == attributeGroup; Kind == group ->
+ save_uniquely({Kind,object_name(Obj)},Obj,S);
+save_object({Kind,Obj},S) ->
+ save_in_table({Kind,object_name(Obj)},Obj,S).
+
+save_unique_type(Key={_,Name},Obj,S) ->
+ case resolve({simple_or_complex_Type,Name},S) of
+ {#schema_simple_type{},_} ->
+ acc_errs(S,{[],?MODULE,{type_not_uniquely_defined_in_schema,Name}});
+ {#schema_complex_type{},_} ->
+ acc_errs(S,{[],?MODULE,{type_not_uniquely_defined_in_schema,Name}});
+ _ ->
+ save_in_table(Key,Obj,S)
+ end.
+
+save_uniquely(Key,Obj,S) ->
+ case load_object(Key,S) of
+ {[],_} ->
+ save_in_table(Key,Obj,S);
+ _ ->
+ acc_errs(S,{[],?MODULE,{not_uniquely_defined_in_schema,Key}})
+ end.
+
+
+
+save_schema_element(CM,S=#xsd_state{elementFormDefault = EFD,
+ attributeFormDefault = AFD,
+ targetNamespace = TN,
+ finalDefault = FD,
+ blockDefault = BD}) ->
+ ElementList = [X||X = {element,_} <- CM],
+%% OtherGlobalEls = other_global_elements(S,ElementList),
+ Schema = get_schema_cm(S#xsd_state.table,TN),
+ Schema2 =
+ case Schema == #schema{} of
+ true ->
+ Schema#schema{elementFormDefault = EFD,
+ attributeFormDefault = AFD,
+ targetNamespace = TN,
+ blockDefault = BD,
+ finalDefault = FD,
+ content = ElementList};
+ _ ->
+ Content = Schema#schema.content,
+ Schema#schema{content=[X||X<-Content,member(X,ElementList)==false]++ElementList}
+ end,
+ TN2 = case TN of
+ undefined -> [];
+ _ -> TN
+ end,
+ save_in_table({schema,TN2},Schema2,S),
+ save_to_file(S).
+
+%% other_global_elements(S,ElementList) ->
+%% Schema = get_schema_cm(S#xsd_state.table,S#xsd_state.targetNamespace),
+%% [X||X<-Schema#schema.content,
+%% member(X,ElementList) == false].
+
+%% other_global_elements(#xsd_state{schema_name=SchemaName,
+%% table = Tab,
+%% global_element_source=GES},ElementList) ->
+%% case [X||{Y,X}<-GES,Y==SchemaName] of
+%% [] ->
+%% [];
+%% L -> %% All other schemas included in redefine
+%% NameList = [X||{element,{X,_}}<-ElementList],
+%% Contents =
+%% flatten([X||#schema{content=X}<-[get_schema_cm(Tab,Y)||Y<-L]]),
+%% SortFun =
+%% fun({_,{A,_}},{_,{B,_}}) when A =< B ->
+%% true;
+%% (_,_) -> false end,
+%% [X||X={element,{Y,_}}<-lists:sort(SortFun,Contents),member(Y,NameList)==false]
+%% end.
+
+save_to_file(S=#xsd_state{tab2file=true},FileName) ->
+ save_to_file(S#xsd_state{tab2file=FileName});
+save_to_file(_,_) ->
+ ok.
+
+save_to_file(S=#xsd_state{tab2file=TF}) ->
+ case TF of
+ true ->
+ {ok,IO}=file:open(filename:rootname(S#xsd_state.schema_name)++".tab",
+ [write]),
+ io:format(IO,"~p~n",[catch ets:tab2list(S#xsd_state.table)]),
+ file:close(IO);
+ false ->
+ ok;
+ IOFile ->
+ {ok,IO}=file:open(IOFile,[write]),
+ io:format(IO,"~p~n",[catch ets:tab2list(S#xsd_state.table)]),
+ file:close(IO)
+ end.
+
+save_merged_type(Type=#schema_simple_type{},S) ->
+ resave_object({simpleType,Type},S);
+save_merged_type(Type=#schema_complex_type{},S) ->
+ resave_object({complexType,Type},S).
+resave_object({Kind,Obj},S) ->
+ save_in_table({Kind,object_name(Obj)},Obj,S).
+
+save_in_table(Name,ElDef,S=#xsd_state{table=Tab}) ->
+ catch ets:insert(Tab,{Name,ElDef}),
+ S.
+
+save_idc(key,IDConstr,S) ->
+ save_key(IDConstr,S);
+save_idc(keyref,IDConstr,S) ->
+ save_keyref(IDConstr,S);
+save_idc(unique,IDConstr,S) ->
+ save_unique(IDConstr,S).
+
+save_key(Key,S) ->
+ save_object({key,Key},S),
+ S.
+
+save_keyref(KeyRef=#id_constraint{category=keyref},S) ->
+ S1 = add_keyref(KeyRef,S),
+ save_object({keyref,KeyRef},S1),
+ S1;
+save_keyref(_,S) ->
+ S.
+
+save_unique(Unique,S) ->
+ save_object({unique,Unique},S),
+ S.
+
+save_substitutionGroup([],S) ->
+ S;
+save_substitutionGroup([{Head,Members}|SGs],S) ->
+ %% save {head,[members]}
+ save_in_table({substitutionGroup,Head},Members,S),
+ %% save {member,head}, an element can only be a member in one
+ %% substitutionGroup
+ lists:foreach(fun(X)->save_in_table({substitutionGroup_member,X},Head,S) end,Members),
+ save_substitutionGroup(SGs,S).
+substitutionGroup_member(ElName,S) ->
+ case load_object({substitutionGroup_member,ElName},S) of
+ {[],_} ->
+ false;
+ {Res,_} ->
+ Res
+ end.
+%% substitutionGroup_head(Head,S) ->
+%% case load_object({substitutionGroup,Head},S) of
+%% {[],_} ->
+%% false;
+%% {Res,_} ->
+%% Res
+%% end.
+
+add_keyref(#id_constraint{name=Name,refer=Refer},
+ S=#xsd_state{keyrefs=KeyRefs}) ->
+ S#xsd_state{keyrefs=add_once({keyref,Name,Refer},KeyRefs)}.
+
+
+load_redefine_object({Kind,Name},S) ->
+ load_object({Kind,redefine,Name},S).
+
+load_object({element,{QN,Occ={Min,_}}},S) when is_integer(Min) ->
+ case load_object({element,QN},S) of
+ {SE=#schema_element{},S1} -> {SE#schema_element{occurance=Occ},S1};
+ Other -> Other
+ end;
+load_object({group,{QN,_Occ={Min,_}}},S) when is_integer(Min) ->
+ load_object({group,QN},S);
+load_object(Key,S=#xsd_state{table=Tab}) ->
+ case ets:lookup(Tab,Key) of
+ [{Key,Value}] ->
+ {Value,S};
+ [] ->
+ case ets:lookup(Tab,global_def(Key)) of
+ [{_,Value}] -> {Value,global_scope(S)};
+ Other -> {Other,S}
+ end;
+ Other ->
+ {Other,S}
+ end.
+
+
+load_keyref(Name,S) ->
+ case load_object({keyref,Name},S) of
+ {KeyRef=#id_constraint{},_} -> KeyRef;
+ _ ->
+ []
+ end.
+load_key(Name,S) ->
+ case load_object({key,Name},S) of
+ {Key=#id_constraint{},_} -> Key;
+ _ ->
+ case load_object({unique,Name},S) of
+ {Key=#id_constraint{},_} -> Key;
+ _ ->
+ []
+ end
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% END table access functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+save_ID(ID,S) ->
+ case member(ID,S#xsd_state.'IDs') of
+ true ->
+ acc_errs(S,{'ID_name_not_unique',ID});
+ _ ->
+ S#xsd_state{'IDs'=[ID|S#xsd_state.'IDs']}
+ end.
+check_and_save_ID(ID,S) ->
+ case xmerl_xsd_type:check_simpleType('ID',ID,S) of
+ {ok,ID} ->
+ save_ID(ID,S);
+ _ ->
+ acc_errs(S,{illegal_ID_value,ID})
+ end.
+
+insert_substitutionGroup(#schema_element{substitutionGroup=undefined},S) ->
+ S;
+insert_substitutionGroup(#schema_element{name=Name,
+ substitutionGroup=SG},
+ S=#xsd_state{substitutionGroups=SGregister}) ->
+ case key1search(SG,SGregister,[]) of
+ {_,SGList} ->
+ S#xsd_state{substitutionGroups=
+ keyreplace(SG,1,SGregister,{SG,[Name|SGList]})};
+ _ ->
+ S#xsd_state{substitutionGroups=[{SG,[Name]}|SGregister]}
+ end.
+
+
+global_scope(S=#xsd_state{}) ->
+ S#xsd_state{scope=[]}.
+
+global_def({Kind,{Local,_,NS}})
+ when Kind==simpleType; Kind==complexType; Kind==group;
+ Kind==attributeGroup; Kind==element; Kind==attribute;
+ Kind==substitutionGroup;Kind==substitutionGroup_member->
+ {Kind,{Local,[],NS}};
+global_def(D) -> D.
+
+
+get_schema_cm(Tab,undefined) ->
+ get_schema_cm(Tab,[]);
+get_schema_cm(Tab,[]) ->
+ get_schema_cm1(Tab,[]);
+get_schema_cm(Tab,Namespace) ->
+ NoNamespaceC=get_no_namespace_content(Tab),
+ Schema = get_schema_cm1(Tab,Namespace),
+ NSC = Schema#schema.content,
+ Schema#schema{content=NSC++[X||X<-NoNamespaceC,member(X,NSC)==false]}.
+get_schema_cm1(Tab,Namespace) ->
+ case catch ets:lookup(Tab,{schema,Namespace}) of
+ [{_,H}] ->
+ H;
+ _ ->
+ #schema{}
+ end.
+get_no_namespace_content(Tab) ->
+ case get_schema_cm1(Tab,[]) of
+ #schema{content=C} ->
+ C;
+ _ -> []
+ end.
+
+
+%% is_simple_type(Type,S) when is_atom(Type) ->
+%% is_simple_type(atom_to_list(Type),S);
+is_simple_type({LName,Scope,NS},S) when is_atom(LName) ->
+ is_simple_type({atom_to_list(LName),Scope,NS},S);
+is_simple_type(QName={_,_,_},S) ->
+ case is_builtin_simple_type(QName) of
+ true ->
+ true;
+ _ ->
+ is_derived_simple_type(QName,S)
+ end.
+
+
+is_derived_simple_type(QName,S) ->
+%% case resolve({simple_or_complex_Type,QName},S) of
+ case resolve({simpleType,QName},S) of
+ {#schema_simple_type{},_} -> true;
+ _ -> false
+ end.
+
+
+
+object_name(#schema_element{name=N}) ->
+ N;
+object_name(#schema_simple_type{name=N}) ->
+ N;
+object_name(#schema_complex_type{name=N}) ->
+ N;
+object_name(#schema_attribute{name=N}) ->
+ N;
+object_name(#schema_attribute_group{name=N}) ->
+ N;
+object_name(#schema_group{name=N}) ->
+ N;
+object_name(#id_constraint{name=N}) ->
+ N.
+
+
+is_whitespace(#xmlText{value=V}) ->
+ case [X|| X <- V, whitespace(X) == false] of
+ [] ->
+ true;
+ _ -> false
+ end;
+is_whitespace(_) ->
+ false.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% fetch
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+fetch(URI,S) ->
+ Split = filename:split(URI),
+ Filename = fun([])->[];(X)->lists:last(X) end (Split),
+ Fullname =
+ case Split of %% how about Windows systems?
+ ["file:"|Name]-> %% absolute path, see RFC2396 sect 3
+ %% file:/dtd_name
+ filename:join(["/"|Name]);
+ ["/"|Rest] when Rest /= [] ->
+ %% absolute path name
+ URI;
+ ["http:"|_Rest] ->
+ {http,URI};
+ [] -> %% empty systemliteral
+ [];
+ _ ->
+ filename:join(S#xsd_state.xsd_base, URI)
+ end,
+ Path = path_locate(S#xsd_state.fetch_path, Filename, Fullname),
+ ?dbg("fetch(~p) -> {file, ~p}.~n", [URI, Path]),
+ {ok, Path, S}.
+
+path_locate(_, _, {http,_}=URI) ->
+ URI;
+path_locate(_, _, []) ->
+ [];
+path_locate([Dir|Dirs], FN, FullName) ->
+ F = filename:join(Dir, FN),
+ case file:read_file_info(F) of
+ {ok, #file_info{type = regular}} ->
+ {file,F};
+ _ ->
+ path_locate(Dirs, FN, FullName)
+ end;
+path_locate([], _FN, FullName) ->
+ {file,FullName}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% return
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+return_error(Errs) ->
+ {error,reverse(Errs)}.
+
+return_schema_error(Errs) ->
+ {error,{schema_failure,reverse(Errs)}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% general helper functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+if_atom_to_list(A) when is_atom(A) ->
+ atom_to_list(A);
+if_atom_to_list(L) ->
+ L.
+if_list_to_atom(L) when is_list(L) ->
+ list_to_atom(L);
+if_list_to_atom(A) ->
+ A.
+
+list_members(Members,CompleteList) ->
+ case [X||X<-Members,member(X,CompleteList)==false] of
+ [] ->
+ true;
+ L ->
+ {error,L}
+ end.
+
+whitespace(X) when ?whitespace(X) ->
+ true;
+whitespace(_) ->
+ false.
+
+key1search(Key,List,Default) ->
+ case keysearch(Key,1,List) of
+ {value,V} -> V;
+ _ -> Default
+ end.
+
+keyNsearch(Key,N,L,Default) ->
+ case keysearch(Key,N,L) of
+ {value,V} -> V;
+ _ -> Default
+ end.
+
+key_replace_or_insert(Key,N,List,Tuple) ->
+ case keyreplace(Key,N,List,Tuple) of
+ List ->
+ [Tuple|List];
+ NewList ->
+ NewList
+ end.
+
+keysearch_delete(Key,N,List,Default) ->
+ case keysearch(Key,N,List) of
+ {value,Res} ->
+ {Res,keydelete(Key,N,List)};
+ _ ->
+ {Default,List}
+ end.
+
+search_delete_all_el(ElName,ElList,S) ->
+ case search_delete_all_el2(ElName,ElList,[]) of
+ false ->
+ case substitutionGroup_member(ElName,S) of
+ false ->
+ false;
+ Head ->
+ case search_delete_all_el(Head,ElList,S) of
+ {_,Rest} ->
+ {Name,_,NS} = ElName,
+ {{element,{Name,[],NS}},Rest};
+ _ ->
+ false
+ end
+ end;
+ Res ->
+ Res
+ end.
+search_delete_all_el2(_ElName,[],_NoMatch) ->
+ false;
+%% name must match defined (local scope) and referenced (global scope)
+%% elements.
+search_delete_all_el2({Name,Scope,NS},
+ [El={element,{{Name,ScopeCM,NS},_}}|Rest],
+ NoMatch)
+ when Scope == ScopeCM; ScopeCM == [] ->
+ {El,reverse(NoMatch)++Rest};
+search_delete_all_el2(ElName,[H|T],NoMatch) ->
+ search_delete_all_el2(ElName,T,[H|NoMatch]).
+
+%% Search attribute should not consider the scope. All attributes
+%% allowed in this scope are in SchemaAttList.
+search_attribute(true,{Name,_,Namespace},SchemaAtts) ->
+ case [A||A={_,{N,_,NS}}<-SchemaAtts,N==Name,NS==Namespace] of
+ [] ->
+ {undefined,SchemaAtts};
+ [Attr] ->
+ {Attr,lists:delete(Attr,SchemaAtts)}
+ end;
+search_attribute(_,{Name,_,_},SchemaAtts) ->
+ case [A||A={_,{N,_,_}}<-SchemaAtts,N==Name] of
+ [] ->
+ {undefined,SchemaAtts};
+ [Attr] ->
+ {Attr,lists:delete(Attr,SchemaAtts)}
+ end.
+
+error_msg(Format,Args) ->
+ io:format(Format,Args).
+
+
+add_once(El,L) ->
+ case member(El,L) of
+ true ->
+ L;
+ _ ->
+ [El|L]
+ end.
+
+add_key_once(Key,N,El,L) ->
+ case keymember(Key,N,L) of
+ true ->
+ L;
+ _ ->
+ [El|L]
+ end.
+
+%% shema_el_pathname({Type,_},Env) ->
+%% mk_path(reverse([Type|Env])).
+%% xml_el_pathname(#xmlElement{name=Name,parents=Parents,pos=Pos}) ->
+%% {element,mk_xml_path(Parents,Name,Pos)};
+%% xml_el_pathname(#xmlAttribute{name=Name,parents=Parents,pos=Pos}) ->
+%% {attribute,mk_xml_path(Parents,Name,Pos)};
+%% xml_el_pathname(#xmlText{parents=Parents,pos=Pos}) ->
+%% {text,mk_xml_path(Parents,text,Pos)}.
+
+%% mk_path([]) ->
+%% [];
+%% mk_path(L) when is_list(L) ->
+%% "/"++filename:join(L).
+
+%% mk_xml_path(Parents,Type,Pos) ->
+%% %% io:format("mk_xml_path: Parents = ~p~n",[Parents]),
+%% {filename:join([[io_lib:format("/~w(~w)",[X,Y])||{X,Y}<-Parents],Type]),Pos}.
+
+%% @spec format_error(Errors) -> Result
+%% Errors = error_tuple() | [error_tuple()]
+%% Result = string() | [string()]
+%% @doc Formats error descriptions to human readable strings.
+format_error(L) when is_list(L) ->
+ [format_error(X)||X<-L];
+format_error({unexpected_rest,UR}) ->
+ io_lib:format("XML: The following content of an element didn't validate by the provided schema, ~n~p.",[UR]);
+format_error({unvalidated_rest,UR}) ->
+ io_lib:format("XML: The following content of an element didn't validate by the provided schema, ~n~p.",[UR]);
+format_error({no_schemas_provided}) ->
+ "Schema: Validator found no schema. A schema must be provided for validation.";
+format_error({internal_error,Reason}) ->
+ io_lib:format("An error occured that was unforeseen, due to ~p.",[Reason]);
+format_error({internal_error,Reason,Info}) ->
+ io_lib:format("An error occured that was unforeseen, due to ~p: ~p.",[Reason,Info]);
+format_error({internal_error,Function,Info1,Info2}) ->
+ io_lib:format("An internal error occured in function ~p with args: ~p,~p.",[Function,Info1,Info2]);
+format_error({illegal_content,Reason,Kind}) ->
+ io_lib:format("Schema: The schema violates the content model allowed for schemas.~nReason: ~p,~nkind of schema element: ~p.",[Reason,Kind]);
+format_error({no_match,Kind}) ->
+ io_lib:format("Schema: The schema violates the content model allowed for schemas.~nKind of schema element: ~p.",[Kind]);
+format_error({bad_match,S4SC,CM}) ->
+ io_lib:format("Schema: The schema missed mandatory elements ~p in ~p.",[S4SC,CM]);
+format_error({unmatched_mandatory_object,SequenceEl1,SequenceEl2}) ->
+ io_lib:format("Schema: The schema should have had an ~p object after the ~p, but it was missing.",[SequenceEl2,SequenceEl1]);
+format_error({parsing_external_schema_failed,File,Reason}) ->
+ io_lib:format("Schema: Parsing the referenced external schema ~p, failed due to ~p.",[File,Reason]);
+format_error({fetch_fun_failed,Other}) ->
+ io_lib:format("Schema: Fetching this kind of external schema is not supported ~p.",
+ [Other]);
+format_error({element_not_in_schema,[EIIName,_ElQName,_CM]}) ->
+ io_lib:format("XML: The XML element ~p are not present in the schema.",
+ [EIIName]);
+format_error({missing_mandatory_element,CMEl}) ->
+ io_lib:format("XML: The XML file missed mandatory element(s) ~p defined in schema.",[CMEl]);
+format_error({empty_content_not_allowed,C}) ->
+ io_lib:format("XML: The XML file missed mandatory element(s): ~p defined in schema.",[C]);
+format_error({element_not_suitable_with_schema,ElName,_S}) ->
+ io_lib:format("XML: The XML element: ~p violates the schema, probably to many of same element.",[ElName]);
+format_error({element_not_suitable_with_schema,ElName,CMName,_CMEl,_S}) ->
+ io_lib:format("XML: The XML element: ~p violates the schema. Schema expected element ~p.",[ElName,CMName]);
+format_error({no_element_expected_in_group,XML}) ->
+ io_lib:format("XML: The XML element(s) ~p violates the schema. No element was expected.",[XML]);
+format_error({element_bad_match,E,Any,_Env}) ->
+ io_lib:format("XML: XML element ~p didn't match into the namespace of schema type any ~p.",[E,Any]);
+format_error({match_failure,_XML,_CM,_S}) ->
+ "XML: A combination of XML element(s) and schema definitions that is not known has occured. The implementation doesn't support this structure.";
+format_error({cannot_contain_text,_XMLTxt,CMEl}) ->
+ io_lib:format("XML: The schema structure: ~p doesn't allow text",[CMEl]);
+format_error({missing_mandatory_elements,MandatoryEls}) ->
+ io_lib:format("XML: A schema sequence has mandatory elements ~p, that were unmatched.",[MandatoryEls]);
+format_error({choice_missmatch,T,Els}) ->
+ io_lib:format("XML: A schema choice structure with the alternatives: ~p doesn't allow the text: ~p.",[Els,T]);
+format_error({no_element_matching_choice,XML}) ->
+ io_lib:format("XML: The choice at location: ~p had no alternative that matched the XML structure(s): ~p.",[error_path(XML,undefined),XML]);
+format_error({all_missmatch,T,CM}) ->
+ io_lib:format("XML: The schema expected one of: ~p, but the XML content was text: ~p at the location: ~p.",[CM,T,error_path(T,undefined)]);
+format_error({element_not_in_all,ElName,E,_CM}) ->
+ io_lib:format("XML: The element ~p at location ~p in the XML file was not allowed according to the schema.",[ElName,error_path(E,undefined)]);
+format_error({missing_mandatory_elements_in_all,MandatoryEls}) ->
+ io_lib:format("XML: The schema elements ~p were missed in the XML file.",[MandatoryEls]);
+format_error({failed_validating,E,Any}) ->
+ io_lib:format("XML: The element ~p at location ~p failed validation. It should hav been matched by an any schema element ~p",[E#xmlElement.name,error_path(E,undefined),Any]);
+format_error({schemaLocation_list_failure,Paths}) ->
+ io_lib:format("XML: schemaLocation values consists of one or more pairs of URI references, separated by white space. The first is a namespace name the second a reference to a schema: ~p.",[Paths]);
+format_error({element_content_not_nil,XMLEl}) ->
+ io_lib:format("XML: The element ~p at position ~p has content of text/elements despite the nillable attribute was true.",[XMLEl#xmlElement.name,error_path(XMLEl,undefined)]);
+format_error({attribute_in_simpleType,El,Att}) ->
+ io_lib:format("XML: The element ~p at location ~p must not have attributes like: ~p since it according to the schema has simpleType type.",[El#xmlElement.name,error_path(El,undefined),Att]);
+format_error({required_attribute_missed,El,Name}) ->
+ io_lib:format("XML: The schema required an attribute ~p in element at location ~p that was missing.",[Name,error_path(El,undefined)]);
+format_error({default_and_fixed_attributes_mutual_exclusive,
+ Name,Default,Fix}) ->
+ io_lib:format("Schema: It is an error in the schema to assign values for both default and fix for an attribute. Attribute: ~p, default: ~p, fix: ~p.",[Name,Default,Fix]);
+format_error({schema_error,unexpected_object,_SA,_Err}) ->
+ "Schema: An unforeseen error case occured, maybee due to an unimplemented feature.";
+format_error({attribute_not_defined_in_schema,Name}) ->
+ io_lib:format("XML: The attribute ~p is not defined in the provided schema.",[Name]);
+format_error({disallowed_namespace,Namespace,NS,Name}) ->
+ io_lib:format("XML: The attribute ~p is not valid because the namespace ~p is forbidden by ~p",[Name,NS,Namespace]);
+format_error({cirkular_attributeGroup_reference,Name}) ->
+ io_lib:format("Schema: Cirkular references to attribute groups are forbidden. One was detected including ~p.",[Name]);
+format_error({could_not_resolve_type,ST}) ->
+ io_lib:format("Schema: The simpleType ~p could not be found among the types defined by the provided schema.",[ST]);
+format_error({could_not_check_value_for_type,Type}) ->
+ io_lib:format("XML: Checking value for type ~p is not implemented.",[Type]);
+format_error({unknown_simpleType,BT}) ->
+ io_lib:format("Schema: The simpleType ~p could not be found among the types defined by the provided schema",[BT]);
+format_error({abstract_element_instance,ElName}) ->
+ io_lib:format("XML: Elements defined as abstract in the schema must not be instantiated in XML: ~p.",[ElName]);
+format_error({qualified_name_required,LocalName}) ->
+ io_lib:format("XML: Element name ~p in XML instance is not qualified, though the schema requires that.",[LocalName]);
+format_error({unqualified_name_required,QualifiedName}) ->
+ io_lib:format("XML: Element name ~p in XML instance must be unqualified, according to schema.",[QualifiedName]);
+format_error({illegal_key_sequence_value,Err}) ->
+ io_lib:format("XML: The 'key-sequence', (se XML-spec 3.11.4), must be a node with at most one member: ~p",[Err]);
+format_error({qualified_node_set_not_correct_for_key,_Err}) ->
+ "Schema: The 'target node set' and 'qualified node set' (se XML-spec 3.11.4.2.1) must be equal.";
+format_error({key_value_not_unique,KS}) ->
+ io_lib:format("Schema: Key values must be unique within the schema. This is not ~p,",[KS]);
+format_error({keyref_missed_matching_key,Refer}) ->
+ io_lib:format("Schema: This keyref had no matching key ~p.",[Refer]);
+format_error({keyref_unexpected_object,_Other}) ->
+ "Schema: An unforeseen error case occured, unknown failure cause.";
+format_error({cardinality_of_fields_not_equal,KR,K}) ->
+ io_lib:format("Schema: keyref and the corresponding key must have same cardinality of their fields. Missmatch in this case keyref: ~p, key: ~p.",[KR,K]);
+format_error({could_not_load_keyref,Name}) ->
+ io_lib:format("Schema: The schema didn't define a keyref with the name ~p.",[Name]);
+format_error({reference_undeclared,Kind,Ref}) ->
+ io_lib:format("Schema: The schema didn't define an ~p with the name ~p.",[Kind,Ref]);
+format_error({cyclic_substitutionGroup,SGs}) ->
+ io_lib:format("Schema: cyclic substitutionGroup was detected, substitutionGroup structure is ~p.",[SGs]);
+format_error({substitutionGroup_error,Head,SG}) ->
+ io_lib:format("Schema: Either of substitutionGroup members ~p or ~p is not defined in the provided schema.",[Head,SG]);
+format_error({cyclic_definition,CA}) ->
+ io_lib:format("Schema: A forbidden cicular definition was detected ~p.",[CA]);
+format_error({type_of_element_not_derived,MemT,HeadT}) ->
+ io_lib:format("Schema: Type in substitutionGroup members should be simpleType or complexType. In this case ~p and ~p were found.",[MemT, HeadT]);
+format_error({derivation_blocked,BlockTag,Derivation}) ->
+ io_lib:format("Derivation by ~p is blocked by the blocking tag ~p.",[Derivation,BlockTag]);
+format_error({names_not_equal,QName1,QName2}) ->
+ io_lib:format("The type ~p seems to be derived from another type than the base type ~p",[QName2,QName1]);
+%% format_error({miss_match_base_types,QName1,QName2}) ->
+%% io_lib:format("Types and/or names of base type ~p and derived type ~p doesn't fit.",[QName1,QName2]);
+format_error({illegal_content_in_extension,Ext}) ->
+ io_lib:format("The extension content ~p didn't match the content model of the provided schema.",[Ext]);
+format_error({SeqCho,expected,Other,found})
+ when SeqCho == sequence;SeqCho == choice ->
+ io_lib:format("Schema: The restriction content ~p didn't match the content model of the provided schema, ~p was expected.",[SeqCho,Other]);
+format_error({does_not_support,F,in_restriction}) ->
+ io_lib:format("Schema: The structure ~p is not supported in the implementation.",[F]);
+format_error({illegal_content_simple_type,CM,TypeName}) ->
+ io_lib:format("Schema: ~p content is not allowed in a simpleType, as in ~p.",[CM,TypeName]);
+format_error({illegal_in_restriction_of_simpleType,X}) ->
+ io_lib:format("Schema: The ~p content is illegal in a simpleType.",[X]);
+format_error({element,Name,not_present_in_restriction}) ->
+ io_lib:format("Schema: In a restriction all element names of the restriction must be one of the elements of the base type. ~p is not.",[Name]);
+format_error({invalid_derivation,EA,BaseAtts}) ->
+ io_lib:format("Schema: An anyAttribute ~p in a restricted derived type must be present among the base type attributes ~p.",[EA,BaseAtts]);
+format_error({wildcard_namespace,NS,not_subset_of_base_namespace,BaseNS}) ->
+ io_lib:format("Schema: See XML spec. section 3.10.6. This wildcard namespace ~p is not allowed by the base namespace restrictions ~p.",[NS,BaseNS]);
+format_error({wildcard_namespace_union_not_expressible,NS1,NS2}) ->
+ io_lib:format("Schema: See XML spec. section 3.10.6. The union of namespaces ~p and ~p is not expressible.",[NS1,NS2]);
+format_error({wildcard_namespace_intersection_not_expressible,NS1,NS2}) ->
+ io_lib:format("Schema: See XML spec. section 3.10.6. The intersection of namespaces ~p and ~p is not expressible.",[NS1,NS2]);
+format_error({circular_reference_of_type,TName}) ->
+ io_lib:format("Schema: An illicit circular reference involving simple/complex type ~p has been detected.",[TName]);
+format_error({type_not_uniquely_defined_in_schema,Name}) ->
+ io_lib:format("Schema: See XML spec. section 3.4.1. Type names whether simple or complex must be unique within the schema. ~p is not.",[Name]);
+format_error({not_uniquely_defined_in_schema,Key}) ->
+ io_lib:format("Schema: All schema objects of the same kind identified by name must be unique within the schema. ~p is not.",[Key]);
+format_error({illegal_ID_value,ID}) ->
+ io_lib:format("The ID value ~p is not allowed as an ID value.",[ID]);
+format_error({incomplete_file,_FileName,_Other}) ->
+ "Schema: The file containing a schema state must be produced by xmerl_xsd:state2file/[1,2].";
+format_error({unexpected_content_in_any,A}) ->
+ io_lib:format("Schema: The any type is considered to have no content besides annotation. ~p was found.",[A]);
+format_error({erronous_content_in_identity_constraint,IDC,Err}) ->
+ io_lib:format("Schema: An ~p identity constraint must have one selector and one or more field in content. This case ~p",[IDC,Err]);
+format_error({missing_xpath_attribute,IDCContent}) ->
+ io_lib:format("Schema: A ~p in a identity constraint must have a xpath attribute.",[IDCContent]);
+format_error({content_in_anyAttribute,Err}) ->
+ io_lib:format("Schema: ~p is not allowed in anyAttribute. Content cannot be anything else than annotation.",[Err]);
+format_error({content_in_simpleContent,Err}) ->
+ io_lib:format("Schema: Content of simpleContent can only be an optional annotation and one of restriction or extension. In this case ~p.",[Err]);
+format_error({complexContent_content_failure,Err}) ->
+ io_lib:format("Schema: Besides an optional annotation complexContent should have one of restriction or extension. In this case ~p.",[Err]);
+format_error({union_member_type_not_simpleType,IllegalType}) ->
+ io_lib:format("Schema: ~p is not allowed in a union. Content must be any nymber of simpleType.",[IllegalType]);
+format_error({missing_base_type,restriction,_Other}) ->
+ "Schema: A restriction must have a base type, either assigned by the 'base' attribute or as a simpleType defined in content.";
+format_error({content_failure_expected_restriction_or_extension,Kind,_}) ->
+ io_lib:format("Schema: A ~p had no restriction or extension in content.",[Kind]);
+format_error({content_failure_only_one_restriction_or_extension_allowed,Kind,_}) ->
+ io_lib:format("Schema: A ~p can only have one of restriction or extension in content.",[Kind]);
+format_error({mandatory_component_missing,S4SCMRest,Kind}) ->
+ io_lib:format("Schema: After matching a ~p the schema should have had content ~p.",[Kind,S4SCMRest]);
+format_error(Err) ->
+ io_lib:format("~p~n",[Err]).
+
+%% format_error(ErrMsg,E,SchemaE,Env) ->
+%% ?debug("format_error: ~p~n",[ErrMsg]),
+%% {ErrMsg,format_error2(E,SchemaE,Env)}.
+%% format_error2(E,SchemaE,Env) ->
+%% {shema_el_pathname(SchemaE,Env),
+%% xml_el_pathname(E)}.
+
+initial_tab_data(Tab) ->
+ ets:insert(Tab,
+ binary_to_term(
+ <<131,108,0,0,0,9,104,2,104,2,100,0,9,97,116,116,114,105,98,117,116,
+ 101,104,3,100,0,5,115,112,97,99,101,106,100,0,36,104,116,116,112,58,
+ 47,47,119,119,119,46,119,51,46,111,114,103,47,88,77,76,47,49,57,57,
+ 56,47,110,97,109,101,115,112,97,99,101,104,9,100,0,16,115,99,104,101,
+ 109,97,95,97,116,116,114,105,98,117,116,101,104,3,100,0,5,115,112,97,
+ 99,101,106,100,0,36,104,116,116,112,58,47,47,119,119,119,46,119,51,
+ 46,111,114,103,47,88,77,76,47,49,57,57,56,47,110,97,109,101,115,112,
+ 97,99,101,108,0,0,0,1,104,2,100,0,10,115,105,109,112,108,101,84,121,
+ 112,101,104,3,100,0,15,95,120,109,101,114,108,95,110,111,95,110,97,
+ 109,101,95,108,0,0,0,1,100,0,5,115,112,97,99,101,106,106,106,100,0,5,
+ 102,97,108,115,101,106,100,0,8,111,112,116,105,111,110,97,108,100,0,9,
+ 117,110,100,101,102,105,110,101,100,100,0,9,117,110,100,101,102,105,
+ 110,101,100,100,0,9,117,110,100,101,102,105,110,101,100,104,2,104,2,
+ 100,0,6,115,99,104,101,109,97,107,0,7,120,109,108,46,120,115,100,104,
+ 7,100,0,6,115,99,104,101,109,97,100,0,11,117,110,113,117,97,108,105,
+ 102,105,101,100,100,0,11,117,110,113,117,97,108,105,102,105,101,100,
+ 100,0,36,104,116,116,112,58,47,47,119,119,119,46,119,51,46,111,114,
+ 103,47,88,77,76,47,49,57,57,56,47,110,97,109,101,115,112,97,99,101,
+ 106,106,106,104,2,104,2,100,0,9,97,116,116,114,105,98,117,116,101,
+ 104,3,100,0,4,98,97,115,101,106,100,0,36,104,116,116,112,58,47,47,
+ 119,119,119,46,119,51,46,111,114,103,47,88,77,76,47,49,57,57,56,47,
+ 110,97,109,101,115,112,97,99,101,104,9,100,0,16,115,99,104,101,109,
+ 97,95,97,116,116,114,105,98,117,116,101,104,3,100,0,4,98,97,115,101,
+ 106,100,0,36,104,116,116,112,58,47,47,119,119,119,46,119,51,46,111,
+ 114,103,47,88,77,76,47,49,57,57,56,47,110,97,109,101,115,112,97,99,
+ 101,108,0,0,0,1,104,2,100,0,10,115,105,109,112,108,101,84,121,112,101,
+ 104,3,100,0,6,97,110,121,85,82,73,106,100,0,32,104,116,116,112,58,47,
+ 47,119,119,119,46,119,51,46,111,114,103,47,50,48,48,49,47,88,77,76,83,
+ 99,104,101,109,97,106,100,0,5,102,97,108,115,101,106,100,0,8,111,112,
+ 116,105,111,110,97,108,100,0,9,117,110,100,101,102,105,110,101,100,
+ 100,0,9,117,110,100,101,102,105,110,101,100,100,0,9,117,110,100,101,
+ 102,105,110,101,100,104,2,104,2,100,0,14,97,116,116,114,105,98,117,
+ 116,101,71,114,111,117,112,104,3,100,0,12,115,112,101,99,105,97,108,
+ 65,116,116,114,115,106,100,0,36,104,116,116,112,58,47,47,119,119,119,
+ 46,119,51,46,111,114,103,47,88,77,76,47,49,57,57,56,47,110,97,109,101,
+ 115,112,97,99,101,104,5,100,0,22,115,99,104,101,109,97,95,97,116,116,
+ 114,105,98,117,116,101,95,103,114,111,117,112,104,3,100,0,12,115,112,
+ 101,99,105,97,108,65,116,116,114,115,106,100,0,36,104,116,116,112,58,
+ 47,47,119,119,119,46,119,51,46,111,114,103,47,88,77,76,47,49,57,57,
+ 56,47,110,97,109,101,115,112,97,99,101,100,0,9,117,110,100,101,102,
+ 105,110,101,100,100,0,9,117,110,100,101,102,105,110,101,100,108,0,0,
+ 0,3,104,2,100,0,9,97,116,116,114,105,98,117,116,101,104,3,100,0,4,98,
+ 97,115,101,106,106,104,2,100,0,9,97,116,116,114,105,98,117,116,101,
+ 104,3,100,0,4,108,97,110,103,106,106,104,2,100,0,9,97,116,116,114,
+ 105,98,117,116,101,104,3,100,0,5,115,112,97,99,101,106,106,106,104,
+ 2,104,2,100,0,10,115,105,109,112,108,101,84,121,112,101,104,3,100,0,
+ 15,95,120,109,101,114,108,95,110,111,95,110,97,109,101,95,108,0,0,0,
+ 1,100,0,5,115,112,97,99,101,106,106,104,9,100,0,18,115,99,104,101,
+ 109,97,95,115,105,109,112,108,101,95,116,121,112,101,104,3,100,0,15,
+ 95,120,109,101,114,108,95,110,111,95,110,97,109,101,95,108,0,0,0,1,
+ 100,0,5,115,112,97,99,101,106,106,108,0,0,0,1,100,0,5,115,112,97,99,
+ 101,106,104,3,100,0,6,78,67,78,97,109,101,106,100,0,32,104,116,116,
+ 112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,50,48,48,49,47,
+ 88,77,76,83,99,104,101,109,97,100,0,5,102,97,108,115,101,106,108,0,0,
+ 0,1,104,2,100,0,11,101,110,117,109,101,114,97,116,105,111,110,108,0,0,
+ 0,2,107,0,7,100,101,102,97,117,108,116,107,0,8,112,114,101,115,101,
+ 114,118,101,106,106,100,0,6,97,116,111,109,105,99,108,0,0,0,1,104,2,
+ 100,0,11,114,101,115,116,114,105,99,116,105,111,110,104,2,104,3,100,
+ 0,6,78,67,78,97,109,101,106,100,0,32,104,116,116,112,58,47,47,119,
+ 119,119,46,119,51,46,111,114,103,47,50,48,48,49,47,88,77,76,83,99,
+ 104,101,109,97,108,0,0,0,2,104,2,100,0,11,101,110,117,109,101,114,
+ 97,116,105,111,110,107,0,7,100,101,102,97,117,108,116,104,2,100,0,
+ 11,101,110,117,109,101,114,97,116,105,111,110,107,0,8,112,114,101,
+ 115,101,114,118,101,106,106,104,2,104,2,100,0,10,115,105,109,112,
+ 108,101,84,121,112,101,104,3,100,0,15,95,120,109,101,114,108,95,110,
+ 111,95,110,97,109,101,95,108,0,0,0,1,100,0,4,108,97,110,103,106,106,
+ 104,9,100,0,18,115,99,104,101,109,97,95,115,105,109,112,108,101,95,
+ 116,121,112,101,104,3,100,0,15,95,120,109,101,114,108,95,110,111,95,
+ 110,97,109,101,95,108,0,0,0,1,100,0,4,108,97,110,103,106,106,108,0,0,
+ 0,1,100,0,4,108,97,110,103,106,100,0,9,117,110,100,101,102,105,110,
+ 101,100,100,0,5,102,97,108,115,101,106,106,100,0,6,97,116,111,109,
+ 105,99,108,0,0,0,1,104,2,100,0,5,117,110,105,111,110,108,0,0,0,2,104,
+ 2,100,0,10,115,105,109,112,108,101,84,121,112,101,104,3,100,0,8,108,
+ 97,110,103,117,97,103,101,106,100,0,32,104,116,116,112,58,47,47,119,
+ 119,119,46,119,51,46,111,114,103,47,50,48,48,49,47,88,77,76,83,99,104,
+ 101,109,97,104,2,100,0,10,115,105,109,112,108,101,84,121,112,101,104,
+ 3,100,0,15,95,120,109,101,114,108,95,110,111,95,110,97,109,101,95,108,
+ 0,0,0,2,100,0,15,95,120,109,101,114,108,95,110,111,95,110,97,109,101,
+ 95,100,0,4,108,97,110,103,106,106,106,106,104,2,104,2,100,0,9,97,116,
+ 116,114,105,98,117,116,101,104,3,100,0,2,105,100,106,100,0,36,104,116,
+ 116,112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,88,77,76,47,
+ 49,57,57,56,47,110,97,109,101,115,112,97,99,101,104,9,100,0,16,115,99,
+ 104,101,109,97,95,97,116,116,114,105,98,117,116,101,104,3,100,0,2,105,
+ 100,106,100,0,36,104,116,116,112,58,47,47,119,119,119,46,119,51,46,
+ 111,114,103,47,88,77,76,47,49,57,57,56,47,110,97,109,101,115,112,97,
+ 99,101,108,0,0,0,1,104,2,100,0,10,115,105,109,112,108,101,84,121,112,
+ 101,104,3,100,0,2,73,68,106,100,0,32,104,116,116,112,58,47,47,119,119,
+ 119,46,119,51,46,111,114,103,47,50,48,48,49,47,88,77,76,83,99,104,101,
+ 109,97,106,100,0,5,102,97,108,115,101,106,100,0,8,111,112,116,105,111,
+ 110,97,108,100,0,9,117,110,100,101,102,105,110,101,100,100,0,9,117,
+ 110,100,101,102,105,110,101,100,100,0,9,117,110,100,101,102,105,110,
+ 101,100,104,2,104,2,100,0,9,97,116,116,114,105,98,117,116,101,104,3,
+ 100,0,4,108,97,110,103,106,100,0,36,104,116,116,112,58,47,47,119,119,
+ 119,46,119,51,46,111,114,103,47,88,77,76,47,49,57,57,56,47,110,97,109,
+ 101,115,112,97,99,101,104,9,100,0,16,115,99,104,101,109,97,95,97,116,
+ 116,114,105,98,117,116,101,104,3,100,0,4,108,97,110,103,106,100,0,36,
+ 104,116,116,112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,88,
+ 77,76,47,49,57,57,56,47,110,97,109,101,115,112,97,99,101,108,0,0,0,1,
+ 104,2,100,0,10,115,105,109,112,108,101,84,121,112,101,104,3,100,0,15,
+ 95,120,109,101,114,108,95,110,111,95,110,97,109,101,95,108,0,0,0,1,
+ 100,0,4,108,97,110,103,106,106,106,100,0,5,102,97,108,115,101,106,
+ 100,0,8,111,112,116,105,111,110,97,108,100,0,9,117,110,100,101,102,
+ 105,110,101,100,100,0,9,117,110,100,101,102,105,110,101,100,100,0,9,
+ 117,110,100,101,102,105,110,101,100,104,2,104,2,100,0,10,115,105,109,
+ 112,108,101,84,121,112,101,104,3,100,0,15,95,120,109,101,114,108,95,
+ 110,111,95,110,97,109,101,95,108,0,0,0,2,100,0,15,95,120,109,101,114,
+ 108,95,110,111,95,110,97,109,101,95,100,0,4,108,97,110,103,106,106,
+ 104,9,100,0,18,115,99,104,101,109,97,95,115,105,109,112,108,101,95,
+ 116,121,112,101,104,3,100,0,15,95,120,109,101,114,108,95,110,111,95,
+ 110,97,109,101,95,108,0,0,0,2,100,0,15,95,120,109,101,114,108,95,110,
+ 111,95,110,97,109,101,95,100,0,4,108,97,110,103,106,106,108,0,0,0,2,
+ 100,0,15,95,120,109,101,114,108,95,110,111,95,110,97,109,101,95,100,
+ 0,4,108,97,110,103,106,104,3,100,0,6,115,116,114,105,110,103,106,100,
+ 0,32,104,116,116,112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,
+ 50,48,48,49,47,88,77,76,83,99,104,101,109,97,100,0,5,102,97,108,115,
+ 101,106,108,0,0,0,1,104,2,100,0,11,101,110,117,109,101,114,97,116,105,
+ 111,110,108,0,0,0,1,106,106,106,100,0,6,97,116,111,109,105,99,108,0,0,
+ 0,1,104,2,100,0,11,114,101,115,116,114,105,99,116,105,111,110,104,2,
+ 104,3,100,0,6,115,116,114,105,110,103,106,100,0,32,104,116,116,112,58,
+ 47,47,119,119,119,46,119,51,46,111,114,103,47,50,48,48,49,47,88,77,76,
+ 83,99,104,101,109,97,108,0,0,0,1,104,2,100,0,11,101,110,117,109,101,
+ 114,97,116,105,111,110,106,106,106,106>>)).
+
+default_namespace_by_convention() ->
+ [{xml,'http://www.w3.org/XML/1998/namespace'}].
diff --git a/lib/xmerl/src/xmerl_xsd_type.erl b/lib/xmerl/src/xmerl_xsd_type.erl
new file mode 100644
index 0000000000..19951f030f
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xsd_type.erl
@@ -0,0 +1,1558 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2006-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(xmerl_xsd_type).
+
+-export([check_simpleType/3,facet_fun/2,compare_floats/2,
+ replace_ws/2,collapse_ws/1]).
+
+-export([fQuotient/2,fQuotient/3,modulo/2,modulo/3,maximumDayInMonthFor/2,
+ add_duration2dateTime/2,duration_atoms/1,dateTime_atoms/1,
+ normalize_dateTime/1]).
+
+-export([compare_durations/2,compare_dateTime/2]).
+
+-include("xmerl.hrl").
+-include("xmerl_xsd.hrl").
+
+
+-define(catch_exit(_Call_,_Value_,_ErrorCause_),
+ case catch (_Call_) of
+ {'EXIT',_} ->
+ {error,{type,_ErrorCause_,_Value_}};
+ {error,_} ->
+ {error,{_ErrorCause_,_Value_}};
+ _ ->
+ {ok,_Value_}
+ end).
+
+-define(is_whitespace(__WS__),
+ __WS__==16#20; __WS__==16#9;__WS__==16#a; __WS__==16#d).
+
+check_simpleType(Name,Value,S) when is_list(Name) ->
+ ?debug("simpleType name a list: "++Name++"~n",[]),
+ check_simpleType(list_to_atom(Name),Value,S);
+check_simpleType(string,Value,_S) ->
+ case [X||X <- Value,
+ xmerl_lib:is_char(X)] of
+ Value ->
+ {ok,Value};
+ _ ->
+ {error,{value_not_string,Value}}
+ end;
+check_simpleType(normalizedString,Value,_S) ->
+ case [X||X <- Value,xmerl_lib:is_char(X),
+ ns_whitespace(X)==false] of
+ Value ->
+ {ok,Value};
+ _ ->
+ {error,{value_not_normalizedString,Value}}
+ end;
+check_simpleType(boolean,"true",_S) -> {ok,"true"};
+check_simpleType(boolean,"false",_S) -> {ok,"false"};
+check_simpleType(boolean,"1",_S) -> {ok,"1"};
+check_simpleType(boolean,"0",_S) -> {ok,"0"};
+check_simpleType(boolean,Other,_S) -> {error,{value_not_boolean,Other}};
+check_simpleType(decimal,Value,_S) ->
+ ?catch_exit(check_decimal(Value),Value,invalid_decimal);
+check_simpleType(integer,Value,_S) ->
+ ?catch_exit(check_integer(Value),Value,invalid_integer);
+
+% float values: m * 2^e, where m is an integer whose absolute value is
+% less than 2^24, and e is an integer between -149 and 104, inclusive.
+check_simpleType(float,Value,_S) ->
+ ?catch_exit(check_float(Value),Value,invalid_float);
+% double values: m * 2^e, where m is an integer whose absolute value
+% is less than 2^53, and e is an integer between -1075 and 970,
+% inclusive.
+check_simpleType(double,Value,_S) ->
+ ?catch_exit(check_double(Value),Value,invalid_double);
+% extended format PnYnMnDTnHnMnS where n is an integer. The n value
+% before S may include decimal fraction.
+check_simpleType(duration,Value,_S) ->
+ ?catch_exit(check_duration(Value),Value,invalid_duration);
+check_simpleType(dateTime,Value,_S) ->
+ ?catch_exit(check_dateTime(Value),Value,invalid_dateTime);
+check_simpleType(time,Value,_S) ->
+ ?catch_exit(check_time(Value),Value,invalid_time);
+check_simpleType(date,Value,_S) ->
+ ?catch_exit(check_date(Value),Value,invalid_date);
+check_simpleType(gYearMonth,Value,_S) ->
+ ?catch_exit(check_gYearMonth(Value),Value,invalid_gYearMonth);
+check_simpleType(gYear,Value,_S) ->
+ ?catch_exit(check_gYear(Value),Value,invalid_gYear);
+check_simpleType(gMonthDay,Value,_S) ->
+ ?catch_exit(check_gMonthDay(Value),Value,invalid_gMonthDay);
+check_simpleType(gDay,Value,_S) ->
+ ?catch_exit(check_gDay(Value),Value,invalid_gDay);
+check_simpleType(gMonth,Value,_S) ->
+ ?catch_exit(check_gMonth(Value),Value,invalid_gMonth);
+check_simpleType(hexBinary,Value,_S) ->
+ IsEven = fun(X) ->
+ case X rem 2 of
+ 0 -> true;
+ _ -> false
+ end
+ end,
+ IsHex = fun(X) when X >= $A, X =< $F -> true;
+ (X) when X >= $a, X =< $f -> true;
+ (X) when X >= $0, X =< $9 -> true;
+ (_) -> false
+ end,
+ case [X|| X<-Value,
+ IsEven(length(Value)),
+ IsHex(X)] of
+ Value ->
+ {ok,Value};
+ _ -> {error,{value_not_hexBinary,Value}}
+ end;
+check_simpleType(base64Binary,Value,_S) ->
+ check_base64Binary(Value);
+check_simpleType(anyURI,Value,S) ->
+ case xmerl_uri:parse(Value) of
+ {error,_} ->
+ %% might be a relative uri, then it has to be a path in the context
+ case catch file:read_file_info(filename:join(S#xsd_state.xsd_base,Value)) of
+ {ok,_} ->
+ {ok,Value};
+ _ ->
+ {error,{value_not_anyURI,Value}}
+ end;
+ _ ->
+ {ok,Value}
+ end;
+check_simpleType('QName',Value,_S) ->
+ case xmerl_lib:is_name(Value) of
+ true ->
+ {ok,Value};
+ _ ->
+ {error,{value_not_QName,Value}}
+ end;
+check_simpleType('NOTATION',Value,_S) ->
+ {ok,Value}; %% Must provide for check of all QNames in schema.
+check_simpleType(token,Value,_S) ->
+ ?catch_exit(check_token(Value),Value,invalid_token);
+%% conform to the pattern [a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*
+check_simpleType(language,Value,_S) ->
+ ?catch_exit(check_language(Value),Value,illegal_language);
+check_simpleType('NMTOKEN',Value,_S) ->
+ ?catch_exit(check_NMTOKEN(Value),Value,illegal_NMTOKEN);
+check_simpleType('NMTOKENS',Value,_S) ->
+ ?catch_exit(check_NMTOKENS(Value),Value,illegal_NMTOKENS);
+check_simpleType('Name',Value,_S) ->
+ ?catch_exit(check_Name(Value),Value,illegal_name);
+check_simpleType('NCName',Value,_S) ->
+ ?catch_exit(check_NCName(Value),Value,illegal_name);
+check_simpleType('ID',Value,_S) ->
+ ?catch_exit(check_ID(Value),Value,illegal_ID);
+check_simpleType('IDREF',Value,_S) ->
+ ?catch_exit(check_IDREF(Value),Value,illegal_IDREF);
+check_simpleType('IDREFS',Value,_S) ->
+ ?catch_exit(check_IDREFS(Value),Value,illegal_IDREFS);
+check_simpleType('ENTITY',Value,_S) ->
+ ?catch_exit(check_ENTITY(Value),Value,illegal_ENTITY);
+check_simpleType('ENTITIES',Value,_S) ->
+ ?catch_exit(check_ENTITIES(Value),Value,illegal_ENTITIES);
+check_simpleType(nonPositiveInteger,Value,_S) ->
+ ?catch_exit(check_nonPositiveInteger(Value),Value,
+ illegal_nonPositiveInteger);
+check_simpleType(negativeInteger,Value,_S) ->
+ ?catch_exit(check_negativeInteger(Value),Value,
+ illegal_negativeInteger);
+check_simpleType(long,Value,_S) ->
+ ?catch_exit(check_long(Value),Value,illegal_long);
+check_simpleType(int,Value,_S) ->
+ ?catch_exit(check_int(Value),Value,illegal_int);
+check_simpleType(short,Value,_S) ->
+ ?catch_exit(check_short(Value),Value,illegal_short);
+check_simpleType(byte,Value,_S) ->
+ ?catch_exit(check_byte(Value),Value,illegal_byte);
+check_simpleType(nonNegativeInteger,Value,_S) ->
+ ?catch_exit(check_nonNegativeInteger(Value),Value,
+ illegal_nonNegativeInteger);
+check_simpleType(unsignedLong,Value,_S) ->
+ ?catch_exit(check_unsignedLong(Value),Value,illegal_unsignedLong);
+check_simpleType(unsignedInt,Value,_S) ->
+ ?catch_exit(check_unsignedInt(Value),Value,illegal_unsignedInt);
+check_simpleType(unsignedShort,Value,_S) ->
+ ?catch_exit(check_unsignedShort(Value),Value,illegal_unsignedShort);
+check_simpleType(unsignedByte,Value,_S) ->
+ ?catch_exit(check_unsignedByte(Value),Value,illegal_unsignedByte);
+check_simpleType(positiveInteger,Value,_S) ->
+ ?catch_exit(check_positiveInteger(Value),Value,illegal_positiveInteger);
+check_simpleType(Unknown,Value,_S) ->
+ {error,{unknown_type,Unknown,Value}}.
+
+check_decimal(Value) ->
+ case string:tokens(Value,".") of
+ L when length(L) == 1; length(L) == 2 ->
+ _ = [list_to_integer(X)||X <- L],
+ {ok,Value};
+ _ ->
+ {error,{value_not_decimal,Value}}
+ end.
+%% I=string:chr(Value,$.),
+%% {NumberDot,Decimal}=lists:split(I,Value),
+%% Number=string:strip(NumberDot,right,$.),
+%% case catch {list_to_integer(Number),list_to_integer(Decimal)} of
+%% {'EXIT',_} ->
+%% {error,{value_not_decimal,Value}};
+%% _ -> {ok,Value}
+%% end.
+
+check_float(V="-INF") ->
+ {ok,V};
+check_float(V="INF") ->
+ {ok,V};
+check_float(V="NaN") ->
+ {ok,V};
+check_float(Value) ->
+%% Pred = fun(X) when X==$e;X==$E -> false;(_) -> true end,
+%% {Mantissa,Exponent}=lists:splitwith(Pred,Value),
+%% SkipEe = fun([]) -> [];(L) -> tl(L) end,
+ case string:tokens(Value,"eE") of
+ [Mantissa,Exponent] ->
+ {ok,_} = check_decimal(Mantissa),
+ {ok,_} = check_integer(Exponent);
+ [Mantissa] ->
+ check_decimal(Mantissa)
+ end,
+ {ok,Value}.
+%% case {check_decimal(Mantissa),
+%% check_simpleType(integer,SkipEe(Exponent))} of
+%% {{ok,_},{ok,_}} ->
+%% {ok,Value};
+%% _ ->
+%% {error,{value_not_float,Value}}
+%% end.
+
+check_double(Value) ->
+ check_float(Value).
+
+
+%% format PnYnMnDTnHnMnS
+%% P is always present
+%% T is absent iff all time items are absent
+%% At least one duration item must be present
+check_duration("-"++Value) ->
+ check_duration(Value);
+check_duration("P"++Value) ->
+ {Date,Time}=lists:splitwith(fun($T) -> false;(_) -> true end,Value),
+ {ok,_} = check_duration_date(Date,["Y","M","D"]),
+ {ok,_} = check_duration_time(Time,["T","H","M","S"]).
+
+check_duration_date("",_) ->
+ {ok,""};
+check_duration_date(Date,[H|T]) ->
+ case string:tokens(Date,H) of
+ [Date] ->
+ check_duration_date(Date,T);
+ [DateItem] ->
+ {ok,_} = check_positive_integer(DateItem);
+ [DateItem,Rest] ->
+ {ok,_} = check_positive_integer(DateItem),
+ check_duration_date(Rest,T)
+ end.
+%% Time any combination of TnHnMfS
+%% n unsigned integers and f unsigned decimal
+%%check_duration_time(Time,["T","H","M","S"])
+check_duration_time("",[_H|_T]) ->
+ {ok,""};
+check_duration_time(Time,[S]) ->
+ [Sec] = string:tokens(Time,S),
+ {ok,_} = check_decimal(Sec);
+check_duration_time("T"++Time,TTokens) ->
+ [_H|_] = Time,
+ check_duration_time(Time,tl(TTokens));
+check_duration_time(Time,[H|T]) ->
+ case string:tokens(Time,H) of
+ [Time] ->
+ check_duration_time(Time,T);
+ [TimeItem] ->
+ {ok,_} = check_positive_integer(TimeItem);
+ [TimeItem,Rest] ->
+ {ok,_} = check_positive_integer(TimeItem),
+ check_duration_time(Rest,T)
+ end.
+
+check_positive_integer(Value) ->
+ case catch list_to_integer(Value) of
+ Int when is_integer(Int),Int>=0 ->
+ {ok,Int};
+ _ ->
+ {error,{value_not_integer,Value}}
+ end.
+
+
+%% check_integer and thereof derived types
+check_integer(Value) ->
+ {ok,list_to_integer(Value)}.
+
+check_nonPositiveInteger(Value) ->
+ check_constr_int(Value,undefined,0,illegal_nonPositiveInteger).
+
+check_negativeInteger(Value) ->
+ check_constr_int(Value,undefined,-1,illegal_negativeInteger).
+
+check_long(Value) ->
+ check_constr_int(Value,-9223372036854775808,
+ 9223372036854775807,illegal_long).
+
+check_int(Value) ->
+ check_constr_int(Value,-2147483648,2147483647,illegal_int).
+
+check_short(Value) ->
+ check_constr_int(Value,-32768,32767,illegal_short).
+
+check_byte(Value) ->
+ check_constr_int(Value,-128,127,illegal_byte).
+
+check_nonNegativeInteger(Value) ->
+ check_constr_int(Value,0,undefined,illegal_nonNegativeInteger).
+
+check_unsignedLong(Value) ->
+ check_constr_int(Value,0,18446744073709551615,illegal_unsignedLong).
+
+check_unsignedInt(Value) ->
+ check_constr_int(Value,0,4294967295,illegal_unsignedInt).
+
+check_unsignedShort(Value) ->
+ check_constr_int(Value,0,65535,illegal_unsignedShort).
+
+check_unsignedByte(Value) ->
+ check_constr_int(Value,0,255,illegal_unsignedByte).
+
+check_positiveInteger(Value) ->
+ check_constr_int(Value,1,undefined,illegal_positiveInteger).
+
+check_constr_int(Value,undefined,Max,ErrMsg) ->
+ case check_integer(Value) of
+ {ok,Int} when Int =< Max ->
+ {ok,Int};
+ _ ->
+ {error,{ErrMsg}}
+ end;
+check_constr_int(Value,Min,Max,ErrMsg) ->
+ case check_integer(Value) of
+ {ok,Int} when Int >= Min, Int =< Max ->
+ {ok,Int};
+ _ ->
+ {error,{ErrMsg}}
+ end.
+
+%% DateTime on form: '-'? yyyy '-' mm '-' dd 'T' hh ':' mm ':' ss
+%% ('.' s+)? (zzzzzz)?
+check_dateTime("-"++DateTime) ->
+ check_dateTime(DateTime);
+check_dateTime("+"++_DateTime) ->
+ {error,{invalid_dateTime,plus_sign}};
+check_dateTime(DateTime) ->
+ [Date,Time] = string:tokens(DateTime,"T"),
+ [Y,M,D] = string:tokens(Date,"-"),
+ check_year(Y),
+ {ok,_} = check_positive_integer(M),
+ {ok,_} = check_positive_integer(D),
+ check_time(Time).
+
+check_year(Y) when length(Y)>4 ->
+ Y = string:strip(Y,left,$0),
+ {ok,list_to_integer(Y)};
+check_year(Y) ->
+ case list_to_integer(Y) of
+ Year when Year =/= 0 ->
+ {ok,Year};
+ _ ->
+ {error,{invalid_year,Y}}
+ end.
+
+check_month(Str) ->
+ case check_positive_integer(Str) of
+ {ok,Int} when Int >= 1,Int =< 12 ->
+ {ok,Int};
+ _ ->
+ {error,{invalid_month,Str}}
+ end.
+check_day(Str) ->
+ case check_positive_integer(Str) of
+ {ok,Int} when Int >= 1,Int =< 31 ->
+ {ok,Int};
+ _ ->
+ {error,{invalid_day,Str}}
+ end.
+
+
+check_time(Time) ->
+ %% hh:mm:ss (.s+)? TZ
+ {HMS,TZ} =
+ case lists:split(8,Time) of
+ {T,"."++SecFractionsTZ} ->
+ OnlyDigits = fun(X) when X>=$0,X=<$9 ->true;(_)->false end,
+ {SecFrac,TZone} = lists:splitwith(OnlyDigits,SecFractionsTZ),
+ {ok,_} = check_positive_integer(SecFrac),
+ {T,TZone};
+ {T,TZone} ->
+ {T,TZone}
+ end,
+ [H,M,S] = string:tokens(HMS,":"),
+ {ok,_} = check_hour(H),
+ {ok,_} = check_minute(M),
+ {ok,_} = check_second(S),
+ case TZ of
+ [] ->
+ {ok,Time}; %% timezone optional
+ _ ->
+ check_timezone(TZ)
+ end.
+
+check_hour(Str) ->
+ case check_positive_integer(Str) of
+ {ok,H} when H >= 0,H =< 24 ->
+ {ok,H};
+ _ ->
+ {error,{invalid_hour,Str}}
+ end.
+check_minute(Str) ->
+ case check_positive_integer(Str) of
+ {ok,H} when H >= 0,H =< 60 ->
+ {ok,H};
+ _ ->
+ {error,{invalid_minute,Str}}
+ end.
+check_second(Str) ->
+ case check_positive_integer(Str) of
+ {ok,H} when H >= 0,H =< 60 ->
+ {ok,H};
+ _ ->
+ {error,{invalid_second,Str}}
+ end.
+
+check_timezone("Z") ->
+ {ok,"Z"};
+check_timezone(TZ) ->
+ [H,M] = string:tokens(TZ,":"),
+ case check_integer(H) of
+ {ok,H2} when H2 >= -13, H2 =< 13 ->
+ case check_positive_integer(M) of
+ {ok,M2} when M2 >= 0, M2 =< 59 ->
+ {ok,{H2,M2}};
+ _ ->
+ {error,{invalid_timezone,TZ,M}}
+ end;
+ {ok,H2} when H2==14;H2==-14 ->
+ case check_positive_integer(M) of
+ {ok,0} ->
+ {ok,{H2,0}};
+ _ ->
+ {error,{invalid_timezone,TZ}}
+ end;
+ _ ->
+ {error,{invalid_timezone,TZ}}
+ end.
+
+
+%% the form: '-'? yyyy '-' mm '-' dd zzzzzz?
+check_date("-"++Date) ->
+ check_date(Date);
+check_date("+"++_Date) ->
+ {error,{invalid_date,plus_sign}};
+check_date(Date) ->
+ {Year,Month,Day} =
+ case string:tokens(Date,"-+Z") of
+ [Y,M,D,TZ] ->
+ {ok,_}=check_timezone(TZ),
+ {Y,M,D};
+ [Y,M,D] ->
+ {Y,M,D}
+ end,
+ {ok,_}=check_year(Year),
+ {ok,_}=check_month(Month),
+ {ok,_}=check_day(Day).
+
+%% gYearMonth on the form: '-'? ccyy '-' mm zzzzzz?
+check_gYearMonth("-"++Value) ->
+ check_gYearMonth(Value);
+check_gYearMonth("+"++_Value) ->
+ {error,{invalid_gYearMonth,plus_sign}};
+check_gYearMonth(Value) ->
+ {Year,Month} =
+ case string:tokens(Value,"-+Z") of
+ [Y,M,TZ] ->
+ {ok,_} = check_timezone(TZ),
+ {Y,M};
+ [Y,M] ->
+ {Y,M}
+ end,
+ {ok,_} = check_year(Year),
+ {ok,_} = check_month(Month).
+
+%% gYear on the form: '-'? ccyy zzzzzz?
+check_gYear("-"++Value) ->
+ check_gYear(Value);
+check_gYear("+"++_Value) ->
+ {error,{invalid_gYear,plus_sign}};
+check_gYear(Value) ->
+ Year =
+ case string:tokens(Value,"-+Z") of
+ [Y,TZ] ->
+ {ok,_} = check_timezone(TZ),
+ Y;
+ [Y] ->
+ Y
+ end,
+ {ok,_} = check_year(Year).
+
+%% gMonthDay on the form: mm dd zzzzzz?
+check_gMonthDay("--"++Value) ->
+ {M,"-"++DTZ} = lists:split(2,Value),
+ {ok,_} = check_month(M),
+ {ok,_} = check_gDay2(DTZ).
+
+%% dDay on the form dd zzzzzz?
+check_gDay("---"++Value) ->
+ check_gDay2(Value).
+check_gDay2(Value) ->
+ {D,TZ} = lists:split(2,Value),
+ {ok,_} = check_day(D),
+ case TZ of
+ [] ->
+ {ok,Value};
+ _ ->
+ {ok,_} = check_timezone(TZ)
+ end.
+%% dMonth on the form mm zzzzzz?
+check_gMonth("--"++Value) ->
+ {M,TZ} = lists:split(2,Value),
+ {ok,_} = check_month(M),
+ case TZ of
+ [] ->
+ {ok,Value};
+ _ ->
+ {ok,_} = check_timezone(TZ)
+ end.
+
+check_base64Binary(Value) ->
+ case catch xmerl_b64Bin:parse(xmerl_b64Bin_scan:scan(Value)) of
+ {ok,_} ->
+ {ok,Value};
+ Err = {error,_} ->
+ Err;
+ {'EXIT',{error,Reason}} -> %% scanner failed on character
+ {error,Reason};
+ {'EXIT',Reason} ->
+ {error,{internal_error,Reason}}
+ end.
+
+%% tokens may not contain the carriage return (#xD), line feed (#xA)
+%% nor tab (#x9) characters, that have no leading or trailing spaces
+%% (#x20) and that have no internal sequences of two or more spaces.
+check_token(V=[32|_]) ->
+ {error,{invalid_token,leading_space,V}};
+check_token(Value) ->
+ check_token(Value,Value).
+check_token([],Value) ->
+ {ok,Value};
+check_token([32],V) ->
+ {error,{invalid_token,trailing_space,V}};
+check_token([9|_T],V) ->
+ {error,{invalid_token,tab,V}};
+check_token([10|_T],V) ->
+ {error,{invalid_token,line_feed,V}};
+check_token([13|_T],V) ->
+ {error,{invalid_token,carriage_return,V}};
+check_token([32,32|_T],V) ->
+ {error,{invalid_token,double_space,V}};
+check_token([_H|T],V) ->
+ check_token(T,V).
+
+%% conform to the pattern [a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*
+check_language(Value) ->
+ check_language(Value,0).
+check_language([H|T],N) when H>=$A,H=<$Z ->
+ check_language(T,N+1);
+check_language([H|T],N) when H>=$a,H=<$z ->
+ check_language(T,N+1);
+check_language([$-|T],N) when N>=1,N=<8 ->
+ check_language2(T,0);
+check_language([],N) when N>=1,N=<8 ->
+ {ok,[]}.
+check_language2([H|T],N) when H>=$A,H=<$Z ->
+ check_language2(T,N+1);
+check_language2([H|T],N) when H>=$a,H=<$z ->
+ check_language2(T,N+1);
+check_language2([H|T],N) when H>=$0,H=<$9 ->
+ check_language2(T,N+1);
+check_language2([$-|T],N) when N>=1,N=<8 ->
+ check_language2(T,0);
+check_language2([],N) when N>=1,N=<8 ->
+ {ok,[]}.
+
+check_NMTOKEN([H|T]) ->
+ true = xmerl_lib:is_namechar(H),
+ check_NMTOKEN2(T).
+check_NMTOKEN2([]) ->
+ {ok,[]};
+check_NMTOKEN2([H|T]) ->
+ true = xmerl_lib:is_namechar(H),
+ check_NMTOKEN2(T).
+
+check_NMTOKENS(Value) ->
+ TokList = string:tokens(Value," "),
+ lists:foreach(fun check_NMTOKEN/1,TokList),
+ {ok,Value}.
+
+check_Name(Value) ->
+ true = xmerl_lib:is_name(Value),
+ {ok,Value}.
+
+check_NCName(Value) ->
+ true = xmerl_lib:is_ncname(Value),
+ {ok,Value}.
+
+check_ID(Value) ->
+ %% ID must be a NCName and uniquely identify the element which
+ %% bear it. Only one ID per element.
+ true = xmerl_lib:is_ncname(Value),
+ {ok,Value}.
+
+check_IDREF(Value) ->
+ true = xmerl_lib:is_name(Value),
+ {ok,Value}.
+
+check_IDREFS(Value) ->
+ check_list_type(Value,fun check_IDREF/1).
+
+check_ENTITY(Value) ->
+ true = xmerl_lib:is_ncname(Value),
+ {ok,Value}.
+
+check_ENTITIES(Value) ->
+ check_list_type(Value,fun check_ENTITY/1).
+
+check_list_type(Value,BaseTypeFun) ->
+ Tokens = string:tokens(Value," "),
+ lists:foreach(BaseTypeFun,Tokens),
+ {ok,Value}.
+
+ns_whitespace(WS) when WS==16#9;WS==16#A;WS==16#D ->
+ true;
+ns_whitespace(_) ->
+ false.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% facet functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+facet_fun(Type,{length,V}) ->
+ length_fun(Type,list_to_integer(V));
+facet_fun(Type,{minLength,V}) ->
+ minLength_fun(Type,list_to_integer(V));
+facet_fun(Type,{maxLength,V}) ->
+ maxLength_fun(Type,list_to_integer(V));
+facet_fun(Type,{pattern,V}) ->
+%% fun(Val) ->
+%% {ok,Val}
+%% end;
+ pattern_fun(Type,V);
+facet_fun(Type,{enumeration,V}) ->
+ enumeration_fun(Type,V);
+facet_fun(Type,{whiteSpace,V}) ->
+ whiteSpace_fun(Type,V);
+facet_fun(Type,{maxInclusive,V}) ->
+ maxInclusive_fun(Type,V);
+facet_fun(Type,{maxExclusive,V}) ->
+ maxExclusive_fun(Type,V);
+facet_fun(Type,{minExclusive,V}) ->
+ minExclusive_fun(Type,V);
+facet_fun(Type,{minInclusive,V}) ->
+ minInclusive_fun(Type,V);
+facet_fun(Type,{totalDigits,V}) ->
+ totalDigits_fun(Type,list_to_integer(V));
+facet_fun(Type,{fractionDigits,V}) ->
+ fractionDigits_fun(Type,list_to_integer(V));
+facet_fun(Type,F) ->
+ fun(_X_) ->
+ io:format("Warning: not valid facet on ~p ~p~n",[Type,F])
+ end.
+
+
+length_fun(T,V)
+ when T==string;T==normalizedString;T==token;
+ T=='Name';T=='NCName';T==language;T=='ID';
+ T=='IDREF';T=='IDREFS';T=='ENTITY';T=='ENTITIES';
+ T=='NMTOKEN';T=='NMTOKENS';T==anyURI ->
+ fun(Val) ->
+ case string:len(Val) == V of
+ true -> {ok,Val};
+ false -> {error,{length,string:len(Val),should_be,V}}
+ end
+ end;
+length_fun(T,_V) when T=='NOTATION';T=='QName' ->
+ fun(Val) ->
+ {ok,Val}
+ end;
+length_fun(T,V) when T==base64Binary;T==hexBinary ->
+ fun(Val) ->
+ case length(Val)==V of
+ true -> {ok,Val};
+ false -> {error,{length,length(Val),xhould_be,V}}
+ end
+ end;
+length_fun(T,_V) ->
+ fun(_Val) ->
+ {error,{length_not_applicable_on,T}}
+ end.
+
+minLength_fun(T,V)
+ when T==string;T==normalizedString;T==token;
+ T=='Name';T=='NCName';T==language;T=='ID';
+ T=='IDREF';T=='IDREFS';T=='ENTITY';T=='ENTITIES';
+ T=='NMTOKEN';T=='NMTOKENS';T==anyURI ->
+ fun(Val) ->
+ case string:len(Val) >= V of
+ true -> {ok,Val};
+ false -> {error,{minLength,string:len(Val),should_at_least_be,V}}
+ end
+ end;
+minLength_fun(T,_V) when T=='NOTATION';T=='QName' ->
+ fun(Val) ->
+ {ok,Val}
+ end;
+minLength_fun(T,V) when T==base64Binary;T==hexBinary ->
+ fun(Val) ->
+ case length(Val) >= V of
+ true -> {ok,Val};
+ false -> {error,{minLength,length(Val),should_at_least_be,V}}
+ end
+ end;
+minLength_fun(T,_V) ->
+ fun(_Val) ->
+ {error,{minLength_not_applicable_on,T}}
+ end.
+
+maxLength_fun(T,V)
+ when T==string;T==normalizedString;T==token;
+ T=='Name';T=='NCName';T==language;T=='ID';
+ T=='IDREF';T=='IDREFS';T=='ENTITY';T=='ENTITIES';
+ T=='NMTOKEN';T=='NMTOKENS';T==anyURI ->
+ fun(Val) ->
+ case length(Val) of
+ Len when Len =< V -> {ok,Val};
+ _ -> {error,{maxLength,string:len(Val),should_not_be_more_than,V}}
+ end
+ end;
+maxLength_fun(T,_V) when T=='NOTATION';T=='QName' ->
+ fun(Val) ->
+ {ok,Val}
+ end;
+maxLength_fun(T,V) when T==base64Binary;T==hexBinary ->
+ fun(Val) ->
+ case length(Val) =< V of
+ true -> {ok,Val};
+ false -> {error,{maxLength,length(Val),should_not_be_more_than,V}}
+ end
+ end;
+maxLength_fun(T,_V) ->
+ fun(_Val) ->
+ {error,{maxLength_not_applicable_on,T}}
+ end.
+
+pattern_fun(_Type,RegExp) ->
+ case xmerl_regexp:setup(RegExp) of
+ {ok,RE} ->
+ fun(Val) ->
+ case xmerl_regexp:first_match(Val,RE) of
+ {match,_,_} -> {ok,Val};
+ _ -> {error,{pattern_mismatch,Val,RegExp}}
+ end
+ end;
+ _ ->
+ fun(Val) ->
+ {error,{unsupported_pattern,Val,RegExp}}
+ end
+ end.
+
+enumeration_fun(_Type,V) ->
+ fun(Val) ->
+ case lists:member(Val,V) of
+ true -> {ok,Val};
+ false -> {error,{enumeration,Val,should_be_one_of,V}}
+ end
+ end.
+
+whiteSpace_fun(_Type,"preserve") ->
+ fun(Val) ->
+ {ok,Val}
+ end;
+whiteSpace_fun(_Type,"replace") ->
+ fun(Val) ->
+ {ok,?MODULE:replace_ws(Val,[])}
+ end;
+whiteSpace_fun(_Type,"collapse") ->
+ fun(Val) ->
+ {ok,?MODULE:collapse_ws(Val)}
+ end.
+
+replace_ws([16#9|T],Acc) ->
+ replace_ws(T,[16#20|Acc]);
+replace_ws([16#a|T],Acc) ->
+ replace_ws(T,[16#20|Acc]);
+replace_ws([16#d|T],Acc) ->
+ replace_ws(T,[16#20|Acc]);
+replace_ws([H|T],Acc) ->
+ replace_ws(T,[H|Acc]);
+replace_ws([],Acc) ->
+ lists:reverse(Acc).
+
+collapse_ws(Val) ->
+ collapse_ws(lists:dropwhile(fun(WS) when ?is_whitespace(WS) ->true;(_) -> false end,
+ replace_ws(Val,[])),[]).
+collapse_ws([16#20,16#20|T],Acc) ->
+ collapse_ws([16#20|T],Acc);
+collapse_ws([H|T],Acc) ->
+ collapse_ws(T,[H|Acc]);
+collapse_ws([],Acc) ->
+ lists:reverse(lists:dropwhile(fun($ ) ->true;(_) -> false end,Acc)).
+
+maxInclusive_fun(T,V)
+ when T==integer;T==positiveInteger;T==negativeInteger;
+ T==nonNegativeInteger;T==nonPositiveInteger;T==long;
+ T==unsignedLong;T==int;T==unsignedInt;T==short;
+ T==unsignedShort;T==byte;T==unsignedByte ->
+ fun(Val) ->
+ case (catch list_to_integer(Val) =< list_to_integer(V)) of
+ true ->
+ {ok,Val};
+ _ ->
+ {error,{maxInclusive,Val,should_be_less_than_or_equal_with,V}}
+ end
+ end;
+maxInclusive_fun(T,V) when T==decimal;T==float;T==double ->
+ fun(Val) ->
+ case ?MODULE:compare_floats(Val,V) of
+ gt ->
+ {error,{maxInclusive,Val,should_be_less_than_or_equal_with,V}};
+ Err={error,_} -> Err;
+ _ ->
+ {ok,Val}
+ end
+ end;
+maxInclusive_fun(T,V) when T==duration ->
+ fun(Val) ->
+ case ?MODULE:compare_durations(Val,V) of
+ GT when GT==gt;GT==indefinite ->
+ {error,{maxInclusive,Val,should_be_less_than_or_equal_with,V}};
+ _ ->
+ {ok,Val}
+ end
+ end;
+maxInclusive_fun(T,V) when T==dateTime ->
+ fun(Val) ->
+ case ?MODULE:compare_dateTime(Val,V) of
+ GT when GT==gt;GT==indefinite ->
+ {error,{maxInclusive,Val,should_be_less_than_or_equal_with,V}};
+ _ ->
+ {ok,Val}
+ end
+ end;
+maxInclusive_fun(T,_V) ->
+%% when T==duration;T==dateTime;T==date;T==time;T==gYear;T==gYearMonth;
+%% T==gMonth;T==gMonthDay;T==gDay ->
+ fun(_) -> {error,{maxInclusive,not_implemented_for,T}} end.
+
+maxExclusive_fun(T,V)
+ when T==integer;T==positiveInteger;T==negativeInteger;
+ T==nonNegativeInteger;T==nonPositiveInteger;T==long;
+ T==unsignedLong;T==int;T==unsignedInt;T==short;
+ T==unsignedShort;T==byte;T==unsignedByte ->
+ fun(Val) ->
+ case (catch list_to_integer(Val) < list_to_integer(V)) of
+ true ->
+ {ok,Val};
+ _ ->
+ {error,{maxExclusive,Val,not_less_than,V}}
+ end
+ end;
+maxExclusive_fun(T,V) when T==decimal;T==float;T==double ->
+ fun(Val) ->
+ case ?MODULE:compare_floats(Val,V) of
+ lt ->
+ {ok,Val};
+ Err={error,_} -> Err;
+ _ ->
+ {error,{maxExclusive,Val,not_less_than,V}}
+ end
+ end;
+maxExclusive_fun(T,V) when T==duration ->
+ fun(Val) ->
+ case ?MODULE:compare_durations(Val,V) of
+ lt ->
+ {ok,Val};
+ _ ->
+ {error,{maxExclusive,Val,not_less_than,V}}
+ end
+ end;
+maxExclusive_fun(T,V) when T==dateTime ->
+ fun(Val) ->
+ case ?MODULE:compare_dateTime(Val,V) of
+ lt ->
+ {ok,Val};
+ _ ->
+ {error,{maxExclusive,Val,not_less_than,V}}
+ end
+ end;
+maxExclusive_fun(T,_V) ->
+ fun(_) -> {error,{maxExclusive,not_implemented_for,T}} end.
+
+minExclusive_fun(T,V)
+ when T==integer;T==positiveInteger;T==negativeInteger;
+ T==nonNegativeInteger;T==nonPositiveInteger;T==long;
+ T==unsignedLong;T==int;T==unsignedInt;T==short;
+ T==unsignedShort;T==byte;T==unsignedByte ->
+ fun(Val) ->
+ case (catch list_to_integer(Val) > list_to_integer(V)) of
+ true ->
+ {ok,Val};
+ _ ->
+ {error,{minExclusive,Val,not_greater_than,V}}
+ end
+ end;
+minExclusive_fun(T,V) when T==decimal;T==float;T==double ->
+ fun(Val) ->
+ case ?MODULE:compare_floats(Val,V) of
+ gt ->
+ {ok,Val};
+ Err={error,_} -> Err;
+ _ ->
+ {error,{minExclusive,Val,not_greater_than,V}}
+ end
+ end;
+minExclusive_fun(T,V) when T==duration ->
+ fun(Val) ->
+ case ?MODULE:compare_durations(Val,V) of
+ gt ->
+ {ok,Val};
+ _ ->
+ {error,{minExclusive,Val,not_greater_than,V}}
+ end
+ end;
+minExclusive_fun(T,V) when T==dateTime ->
+ fun(Val) ->
+ case ?MODULE:compare_dateTime(Val,V) of
+ gt ->
+ {ok,Val};
+ _ ->
+ {error,{minExclusive,Val,not_greater_than,V}}
+ end
+ end;
+minExclusive_fun(T,_V) ->
+ fun(_) -> {error,{minExclusive,not_implemented_for,T}} end.
+
+minInclusive_fun(T,V)
+ when T==integer;T==positiveInteger;T==negativeInteger;
+ T==nonNegativeInteger;T==nonPositiveInteger;T==long;
+ T==unsignedLong;T==int;T==unsignedInt;T==short;
+ T==unsignedShort;T==byte;T==unsignedByte ->
+ fun(Val) ->
+ case (catch list_to_integer(Val) >= list_to_integer(V)) of
+ true ->
+ {ok,Val};
+ _ ->
+ {error,{minInclusive,Val,not_greater_than_or_equal_with,V}}
+ end
+ end;
+minInclusive_fun(T,V) when T==decimal;T==float;T==double ->
+ fun(Val) ->
+ case ?MODULE:compare_floats(Val,V) of
+ lt ->
+ {error,{minInclusive,Val,not_greater_than_or_equal_with,V}};
+ Err={error,_} -> Err;
+ _ ->
+ {ok,Val}
+ end
+ end;
+minInclusive_fun(T,V) when T==duration ->
+ fun(Val) ->
+ case ?MODULE:compare_durations(Val,V) of
+ lt ->
+ {error,{minInclusive,Val,not_greater_than_or_equal_with,V}};
+ _ ->
+ {ok,Val}
+ end
+ end;
+minInclusive_fun(T,V) when T==dateTime ->
+ fun(Val) ->
+ case ?MODULE:compare_dateTime(Val,V) of
+ lt ->
+ {error,{minInclusive,Val,not_greater_than_or_equal_with,V}};
+ _ ->
+ {ok,Val}
+ end
+ end;
+minInclusive_fun(T,_V) ->
+ fun(_) -> {error,{minInclusive,not_implemented_for,T}} end.
+
+totalDigits_fun(T,V)
+ when T==integer;T==positiveInteger;T==negativeInteger;T==nonNegativeInteger;
+ T==nonPositiveInteger;T==long;T==unsignedLong;T==int;T==unsignedInt;
+ T==short;T==unsignedShort;T==byte;T==unsignedByte;T==decimal ->
+ %% Val is expressible as i * 10^-n where i and n are integers
+ %% such that |i| < 10^Val and 0 =< n =< Val.
+ fun(Val) ->
+ Pred = fun($0)-> true;
+ (_) -> false
+ end,
+ Val2 = lists:dropwhile(Pred,Val),
+ Length =
+ case lists:member($.,Val2) of
+ true ->
+ length(lists:dropwhile(Pred,lists:reverse(Val2))) -1;
+ _ ->
+ length(Val2)
+ end,
+ if
+ Length =< V ->
+ {ok,Val};
+ true ->
+ {error,{totalDigits,Length,to_many_digits}}
+ end
+ end;
+totalDigits_fun(T,_V) ->
+ fun(_) -> {error,{totalDigits,not_applicable,T}} end.
+
+fractionDigits_fun(T,V)
+ when T==integer;T==positiveInteger;T==negativeInteger;T==nonNegativeInteger;
+ T==nonPositiveInteger;T==long;T==unsignedLong;T==int;T==unsignedInt;
+ T==short;T==unsignedShort;T==byte;T==unsignedByte;T==decimal ->
+ fun(Val) ->
+ Len =
+ case string:tokens(Val,".") of
+ [_I,Frc] when T==decimal ->
+ Pred = fun($0)-> true;
+ (_) -> false
+ end,
+ length(lists:dropwhile(Pred,lists:reverse(Frc)));
+ _ ->
+ 0
+ end,
+ if
+ Len =< V ->
+ {ok,Val};
+ true ->
+ {error,{fractionDigits,Len,to_many_digits_in,Val}}
+ end
+ end;
+fractionDigits_fun(T,_V) ->
+ fun(_) -> {error,{fractionDigits,not_applicable,T}} end.
+
+
+%% The relation between F1 and F2 may be eq,lt or gt.
+%% lt: F1 < F2
+%% gt: F1 > F2
+compare_floats(F1,F2) when F1=="NaN";F2=="NaN" ->
+ {error,{not_comparable}};
+compare_floats(F1,F1) ->
+ eq;
+compare_floats(F1,F2) when F1=="INF";F2=="-INF" ->
+ gt;
+compare_floats(F1,F2) when F1=="-INF";F2=="INF" ->
+ lt;
+compare_floats(Str1,Str2) ->
+ F1={S1,_B1,_D1,_E1} = str_to_float(Str1),
+ F2={S2,_B2,_D2,_E2} = str_to_float(Str2),
+% io:format("F1: ~p~nF2: ~p~n",[F1,F2]),
+ if
+ S1=='-',S2=='+' -> lt;
+ S1=='+',S2=='-' -> gt;
+% B1<0 -> compare_floats2(F2,F1);
+ true -> compare_floats2(F1,F2)
+ end.
+compare_floats2({S1,B1,D1,E1},{_S2,B2,D2,E2}) when B1==0;B2==0 ->
+ I1 = pow(B1,D1,E1),
+ I2 = pow(B2,D2,E2),
+ if I1 > I2 ->
+ sign(S1,gt);
+ I1 < I2 ->
+ sign(S1,lt);
+ true ->
+ eq
+ end;
+compare_floats2({S1,B1,D1,E1},{_S2,B2,D2,E2}) ->
+ %% S1 and S2 have same sign.
+ I1 = pow(B1,E1),% B1 * round(math:pow(10,E1)),
+ I2 = pow(B2,E2),%B2 * round(math:pow(10,E2)),
+ if
+ I1 > I2 -> sign(S1,gt);
+ I1 < I2 -> sign(S1,lt);
+ true ->
+ %% fractions are compared in lexicographic order
+ if
+ D1 == D2 -> eq;
+ D1 < D2 -> sign(S1,lt);
+ D1 > D2 -> sign(S1,gt)
+ end
+ end.
+
+str_to_float(String) ->
+ {Sign,Str} =
+ case String of
+ "-"++Str1 -> {'-',Str1};
+ _ -> {'+',String}
+ end,
+ case string:tokens(Str,".") of
+ [B,DE] ->
+ case string:tokens(DE,"Ee") of
+ [D,E] ->
+ %% round(math:pow(10,list_to_integer(E)))
+ {Sign,list_to_integer(B),
+ remove_trailing_zeros(D),
+ list_to_integer(E)};
+ [D] ->
+ {Sign,list_to_integer(B),
+ remove_trailing_zeros(D),0}
+ end;
+ [B] -> %% could also be 1E4, but no fraction
+ case string:tokens(Str,"Ee") of
+ [I,E] ->
+ {Sign,list_to_integer(I),"0",list_to_integer(E)};
+ _ ->
+ {Sign,list_to_integer(B),"0",0}
+ end
+ end.
+
+pow(Mantissa,Exponent) ->
+ case (Mantissa * math:pow(10,Exponent)) of
+ I when I<1 ->
+ I;
+ I -> round(I)
+ end.
+
+pow(Mantissa,Fraction,Exponent) ->
+ (Mantissa * math:pow(10,Exponent)) +
+ (list_to_integer(Fraction) * math:pow(10,Exponent-length(Fraction))).
+
+sign('-',gt) ->
+ lt;
+sign('-',lt) ->
+ gt;
+sign(_,Rel) ->
+ Rel.
+
+remove_trailing_zeros(Str) ->
+ Pred = fun($0) ->true;(_) ->false end,
+ case lists:reverse(lists:dropwhile(Pred,lists:reverse(Str))) of
+ [] ->
+ "0";
+ Fr -> Fr
+ end.
+
+
+%% when T==duration;T==dateTime;T==date;T==time;T==gYear;T==gYearMonth;
+%% T==gMonth;T==gMonthDay;T==gDay ->
+
+%% compare_duration(V1,V2) compares V1 to V2
+%% returns gt | lt | eq | indefinite
+%% ex: V1 > V2 -> gt
+%%
+%% V1, V2 on format PnYnMnDTnHnMnS
+%% P is always present
+%% T is absent iff all time items are absent
+%% compare_duration(V1,V2) ->
+%% {Y1,M1,D1,H1,M1,S1} = duration_atoms(V1),
+%% {Y2,M2,D2,H2,M2,S2} = duration_atoms(V2),
+%% YearDiff = Y1 - Y2,
+%% MonthsDiff = M1 - M2,
+%% DaysDiff = D1 - D2,
+compare_durations(V1,V2) ->
+ %% Four reference dateTimes are used, see XMLSchema part 2,
+ %% 3.2.6.2.
+ %% "The order-relation of two duration values x and y is x < y iff
+ %% s+x < s+y for each qualified dateTime s in the list below."
+ Ref1_dateTime = {1696,9,1,0,0,0,{pos,0,0}},%1696-09-01T00:00:00Z
+ Ref2_dateTime = {1697,2,1,0,0,0,{pos,0,0}},%1697-02-01T00:00:00Z
+ Ref3_dateTime = {1903,3,1,0,0,0,{pos,0,0}},%1903-03-01T00:00:00Z
+ Ref4_dateTime = {1903,7,1,0,0,0,{pos,0,0}},%1903-07-01T00:00:00Z
+ CmpRes1=compare_dateTime(normalize_dateTime(add_duration2dateTime(Ref1_dateTime,V1)),
+ normalize_dateTime(add_duration2dateTime(Ref1_dateTime,V2))),
+ CmpRes2=compare_dateTime(normalize_dateTime(add_duration2dateTime(Ref2_dateTime,V1)),
+ normalize_dateTime(add_duration2dateTime(Ref2_dateTime,V2))),
+ CmpRes3=compare_dateTime(normalize_dateTime(add_duration2dateTime(Ref3_dateTime,V1)),
+ normalize_dateTime(add_duration2dateTime(Ref3_dateTime,V2))),
+ CmpRes4=compare_dateTime(normalize_dateTime(add_duration2dateTime(Ref4_dateTime,V1)),
+ normalize_dateTime(add_duration2dateTime(Ref4_dateTime,V2))),
+ if
+ CmpRes1==CmpRes2,
+ CmpRes1==CmpRes3,
+ CmpRes1==CmpRes4 ->
+ CmpRes1;
+ true -> indefinite
+ end.
+
+
+compare_dateTime(DT1={_,_,_,_,_,_,Z},DT2={_,_,_,_,_,_,Z}) ->
+ case DT1<DT2 of
+ true -> lt;
+ _ ->
+ case DT1>DT2 of
+ true ->
+ gt;
+ _ -> eq
+ end
+ end;
+%% If P contains a time zone and Q does not, compare as follows:
+%% 1. P < Q if P < (Q with time zone +14:00)
+%% 2. P > Q if P > (Q with time zone -14:00)
+%% 3. P <> Q otherwise, that is, if (Q with time zone +14:00) < P <
+%% (Q with time zone -14:00)
+compare_dateTime(P={_,_,_,_,_,_,{_,_,_}},_Q={Y,M,D,H,Min,S,none}) ->
+ case compare_dateTime(P,normalize_dateTime({Y,M,D,H,Min,S,{pos,14,0}})) of
+ lt ->
+ lt;
+ _ ->
+ case compare_dateTime(P,normalize_dateTime({Y,M,D,H,Min,S,{neg,14,0}})) of
+ gt ->
+ gt;
+ _ ->
+ indefinite
+ end
+ end;
+%% If P does not contain a time zone and Q does, compare as follows:
+%% 1. P < Q if (P with time zone -14:00) < Q.
+%% 2. P > Q if (P with time zone +14:00) > Q.
+%% 3. P <> Q otherwise, that is, if (P with time zone +14:00) < Q <
+%% (P with time zone -14:00)
+compare_dateTime(_P={Y,M,D,H,Min,S,none},Q={_,_,_,_,_,_,{_,_,_}}) ->
+ case compare_dateTime(normalize_dateTime({Y,M,D,H,Min,S,{neg,14,0}}),Q) of
+ lt ->
+ lt;
+ _ ->
+ case compare_dateTime(normalize_dateTime({Y,M,D,H,Min,S,{pos,14,0}}),Q) of
+ gt ->
+ gt;
+ _ ->
+ indefinite
+ end
+ end;
+compare_dateTime(P,Q) when is_list(P) ->
+ compare_dateTime(normalize_dateTime(dateTime_atoms(P)),Q);
+compare_dateTime(P,Q) when is_list(Q) ->
+ compare_dateTime(P,normalize_dateTime(dateTime_atoms(Q)));
+compare_dateTime(_P,_Q) ->
+ indefinite.
+
+fQuotient(A,B) when is_float(A) ->
+ fQuotient(floor(A),B);
+fQuotient(A,B) when is_float(B) ->
+ fQuotient(A,floor(B));
+fQuotient(A,B) when A >= 0, B >= 0 ->
+ A div B;
+fQuotient(A,B) when A < 0, B < 0 ->
+ A div B;
+fQuotient(A,B) ->
+ case A rem B of
+ 0 ->
+ A div B;
+ _ ->
+ (A div B) -1
+ end.
+
+fQuotient(A, Low, High) ->
+ fQuotient(A - Low, High - Low).
+
+floor(A) ->
+ case round(A) of
+ I when I > A ->
+ I - 1;
+ I -> I
+ end.
+
+modulo(A,B) ->
+ A - (fQuotient(A,B) * B).
+
+modulo(A, Low, High) ->
+ modulo(A - Low, High - Low) + Low.
+
+maximumDayInMonthFor(YearValue, MonthValue) ->
+ M = modulo(MonthValue, 1, 13),
+ Y = YearValue + fQuotient(MonthValue, 1, 13),
+ monthValue(M,Y).
+
+monthValue(M,_Y) when M==1;M==3;M==5;M==7;M==8;M==10;M==12 ->
+ 31;
+monthValue(M,_Y) when M==4;M==6;M==9;M==11 ->
+ 30;
+monthValue(_M,Y) ->
+ case modulo(Y,400) of
+ 0 ->
+ 29;
+ _ ->
+ case {modulo(Y,100) /= 0,modulo(Y,4)} of
+ {true,0} ->
+ 29;
+ _ ->
+ 28
+ end
+ end.
+
+%% S dateTime, D duration
+%% result is E dateTime, end of time period with start S and duration
+%% D. E = S + D.
+add_duration2dateTime(S,D) when is_list(S),is_list(D) ->
+ Satoms = dateTime_atoms(S),
+ case duration_atoms(D) of
+ Datoms = {_,_,_,_,_,_} ->
+ add_duration2dateTime2(Satoms,Datoms);
+ Err ->
+ {error,Err}
+ end;
+add_duration2dateTime(S={_,_,_,_,_,_,_},D) ->
+ case duration_atoms(D) of
+ Datoms = {_,_,_,_,_,_} ->
+ add_duration2dateTime2(S,Datoms);
+ Err ->
+ {error,Err}
+ end.
+
+add_duration2dateTime2({Syear,Smonth,Sday,Shour,Sminute,Ssec,Szone},
+ {Dyears,Dmonths,Ddays,Dhours,Dminutes,Dsecs}) ->
+
+ %% months
+ Temp1 = Smonth + Dmonths,
+ Emonth = modulo(Temp1,1,13),
+ Carry1 = fQuotient(Temp1,1,13),
+
+ %% years
+ Eyear = Syear + Dyears + Carry1,
+
+ %% seconds
+ Temp2 = Ssec + Dsecs,
+ Esecs = modulo(Temp2,60),
+ Carry2 = fQuotient(Temp2,60),
+
+ %% minutes
+ Temp3 = Sminute + Dminutes + Carry2,
+ Eminute = modulo(Temp3,60),
+ Carry3 = fQuotient(Temp3,60),
+
+ %% hours
+ Temp4 = Shour + Dhours + Carry3,
+ Ehour = modulo(Temp4,24),
+ Carry4 = fQuotient(Temp4,24),
+
+ %% days
+ TempDays =
+ case maximumDayInMonthFor(Eyear,Emonth) of
+ MaxDay when Sday > MaxDay ->
+ MaxDay;
+ _ ->
+ case Sday < 1 of
+ true ->
+ 1;
+ _ ->
+ Sday
+ end
+ end,
+ {Eyear2,Emonth2,Eday} =
+ carry_loop(TempDays+Ddays+Carry4,Emonth,Eyear),
+ {Eyear2,Emonth2,Eday,Ehour,Eminute,Esecs,Szone}.
+
+carry_loop(Eday,Emonth,Eyear) when Eday < 1 ->
+ carry_loop(Eday + maximumDayInMonthFor(Eyear,Emonth - 1),
+ modulo(Emonth - 1,1,13),
+ Eyear + fQuotient(Emonth - 1,1,13));
+carry_loop(Eday,Emonth,Eyear) ->
+ case maximumDayInMonthFor(Eyear,Emonth) of
+ MaxD when Eday > MaxD ->
+ carry_loop(Eday - maximumDayInMonthFor(Eyear,Emonth),
+ modulo(Emonth + 1,1,13),
+ Eyear + fQuotient(Emonth+1,1,13));
+ _ ->
+ {Eyear,Emonth,Eday}
+ end.
+
+%% Format: '-'? yyyy '-' mm '-' dd 'T' hh ':' mm ':' ss ('.' s+)? (zzzzzz)?
+dateTime_atoms("-" ++ DT) ->
+ dateTime_atoms(DT,neg);
+dateTime_atoms(DT) ->
+ dateTime_atoms(DT,pos).
+dateTime_atoms(S,Sign) ->
+ [Date,TimeZone] = string:tokens(S,"T"),
+ [YY,MM,DD] = string:tokens(Date,"-"),
+ {Zone,ZoneSign,[Hour,Min,Sec]} =
+ case lists:reverse(TimeZone) of
+ "Z"++_ ->
+ {"Z",pos,string:tokens(TimeZone,"Z:")};
+ _ ->
+ ZS = zone_sign(TimeZone),
+ case string:tokens(TimeZone,"-+") of
+ [Time,Z] ->
+ {Z,ZS,string:tokens(Time,":")};
+ [Time] ->
+ {none,ZS,string:tokens(Time,":")}
+ end
+ end,
+ {set_sign(Sign,YY),list_to_integer(MM),list_to_integer(DD),
+ list_to_integer(Hour),list_to_integer(Min),sign_sec(pos,Sec),
+ zone_atoms(ZoneSign,Zone)}.
+
+zone_sign(TimeZone) ->
+ case lists:member($-,TimeZone) of
+ true ->
+ neg;
+ _ ->
+ pos
+ end.
+
+zone_atoms(_Sign,"Z") ->
+ {pos,0,0};
+zone_atoms(Sign,Zone) when is_list(Zone) ->
+ case string:tokens(Zone,":") of
+ [H,M] ->
+ {Sign,list_to_integer(H),list_to_integer(M)};
+ _ -> none
+ end;
+zone_atoms(_Sign,Zone) ->
+ Zone.
+
+
+%% Format: '-'? PnYnMnDTnHnMnS
+duration_atoms("-P"++Dur) ->
+ duration_atoms2(Dur,neg);
+duration_atoms("P"++Dur) ->
+ duration_atoms2(Dur,pos);
+duration_atoms(Dur) ->
+ {illegal_duration,Dur}.
+duration_atoms2(Dur,Sign) ->
+ case lists:member($T,Dur) of
+ true -> %% time atoms exists
+ case string:tokens(Dur,"T") of
+ [Date,Time] ->
+ case duration_atoms_date(Date) of
+ {Y,M,D} ->
+ case duration_atoms_time(Time) of
+ {Hour,Min,Sec} ->
+ {set_sign(Sign,Y),set_sign(Sign,M),
+ set_sign(Sign,D),set_sign(Sign,Hour),
+ set_sign(Sign,Min),sign_sec(Sign,Sec)};
+ Err ->
+ Err
+ end;
+ Err ->
+ Err
+ end;
+ [Time] ->
+ case duration_atoms_time(Time) of
+ {Hour,Min,Sec} ->
+ {0,0,0,set_sign(Sign,Hour),set_sign(Sign,Min),
+ sign_sec(Sign,Sec)};
+ Err ->
+ Err
+ end;
+ Err ->
+ {illegal_duration,Err}
+ end;
+ _ -> %% only date coomponents
+ {Y,M,D} = duration_atoms_date(Dur),
+ {set_sign(Sign,Y),set_sign(Sign,M),set_sign(Sign,D),0,0,0}
+ end.
+
+duration_atoms_date(Date) ->
+ {Y,Date2} = get_digit(Date,$Y),
+ {M,Date3} = get_digit(Date2,$M),
+ {D,Rest} = get_digit(Date3,$D),
+ case Rest of
+ "" -> {Y,M,D};
+ Err -> {illegal_duration,Err}
+ end.
+duration_atoms_time(Time) ->
+ {H,Time2} = get_digit(Time,$H),
+ {M,Time3} = get_digit(Time2,$M),
+ {S,Rest} = get_sec(Time3),
+ case Rest of
+ "" ->
+ {H,M,S};
+ Err ->
+ {illegal_duration,Err}
+ end.
+
+get_digit(Str,Delim) ->
+ get_digit(Str,Delim,[],Str).
+get_digit([Delim|T],Delim,Acc,_Str) ->
+ {lists:reverse(Acc),T};
+get_digit([H|T],Delim,Acc,Str) when H>=$0,H=<$9 ->
+ get_digit(T,Delim,[H|Acc],Str);
+get_digit([],_,[],_Str) ->
+ {"0",[]};
+get_digit([],_,_,Str) ->
+ {"0",Str};
+get_digit(_,_,_,Str) ->
+ %% this matches both the case when reaching another delimeter and
+ %% when the string already are emptied.
+ {"0",Str}.
+
+get_sec([]) ->
+ {"0",[]};
+get_sec(Str) ->
+ get_sec(Str,[],Str).
+get_sec([H|T],Acc,Str) when H>=$0,H=<$9 ->
+ get_sec(T,[H|Acc],Str);
+get_sec([$.|T],Acc,Str) ->
+ get_sec(T,[$.|Acc],Str);
+get_sec([$S|T],Acc,_) ->
+ {lists:reverse(Acc),T};
+get_sec(_,_,Str) ->
+ {"0",Str}.
+
+
+set_sign(pos,Istr) ->
+ list_to_integer(Istr);
+set_sign(_,Istr) ->
+ list_to_integer("-"++Istr).
+sign_sec(pos,Sec) ->
+ case lists:member($.,Sec) of
+ true ->
+ list_to_float(Sec);
+ _ ->
+ list_to_integer(Sec)
+ end;
+sign_sec(_,Sec) ->
+ sign_sec(pos,"-"++Sec).
+
+invert_sign(pos) ->
+ neg;
+invert_sign(neg) ->
+ pos;
+invert_sign(S) ->
+ S.
+
+normalize_dateTime({Y,M,D,Hour,Min,Sec,{Sign,ZH,ZM}}) ->
+ %% minutes
+ TmpMin = Min + set_sign(invert_sign(Sign),integer_to_list(ZM)),
+ NMin = modulo(TmpMin,60),
+ Carry1 = fQuotient(TmpMin,60),
+
+ %% hours
+ TmpHour = Hour + set_sign(invert_sign(Sign),integer_to_list(ZH)) + Carry1,
+ NHour = modulo(TmpHour,24),
+ Carry2 = fQuotient(TmpHour,24),
+
+ {NY,NM,ND} =
+ carry_loop(D+Carry2,M,Y),
+ {NY,NM,ND,NHour,NMin,Sec,{pos,0,0}};
+normalize_dateTime(DT) ->
+ DT.