aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r--lib/xmerl/src/xmerl_eventp.erl84
-rw-r--r--lib/xmerl/src/xmerl_regexp.erl4
-rw-r--r--lib/xmerl/src/xmerl_sax_old_dom.erl9
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl198
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.hrl11
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc411
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc38
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_list.erlsrc21
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc50
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc50
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc50
-rw-r--r--lib/xmerl/src/xmerl_sax_simple_dom.erl7
-rw-r--r--lib/xmerl/src/xmerl_scan.erl42
-rw-r--r--lib/xmerl/src/xmerl_xpath.erl30
-rw-r--r--lib/xmerl/src/xmerl_xs.erl7
-rw-r--r--lib/xmerl/src/xmerl_xsd.erl13
16 files changed, 738 insertions, 287 deletions
diff --git a/lib/xmerl/src/xmerl_eventp.erl b/lib/xmerl/src/xmerl_eventp.erl
index 2cb76abc6e..8d7ea25e24 100644
--- a/lib/xmerl/src/xmerl_eventp.erl
+++ b/lib/xmerl/src/xmerl_eventp.erl
@@ -25,6 +25,90 @@
%% Each contain more elaborate settings of xmerl_scan that makes usage of
%% the customization functions.
%%
+%% @type xmlElement() = #xmlElement{}.
+%%
+%% @type option_list(). <p>Options allow to customize the behaviour of the
+%% scanner.
+%% See also <a href="xmerl_examples.html">tutorial</a> on customization
+%% functions.
+%% </p>
+%% <p>
+%% Possible options are:
+%% </p>
+%% <dl>
+%% <dt><code>{acc_fun, Fun}</code></dt>
+%% <dd>Call back function to accumulate contents of entity.</dd>
+%% <dt><code>{continuation_fun, Fun} |
+%% {continuation_fun, Fun, ContinuationState}</code></dt>
+%% <dd>Call back function to decide what to do if the scanner runs into EOF
+%% before the document is complete.</dd>
+%% <dt><code>{event_fun, Fun} |
+%% {event_fun, Fun, EventState}</code></dt>
+%% <dd>Call back function to handle scanner events.</dd>
+%% <dt><code>{fetch_fun, Fun} |
+%% {fetch_fun, Fun, FetchState}</code></dt>
+%% <dd>Call back function to fetch an external resource.</dd>
+%% <dt><code>{hook_fun, Fun} |
+%% {hook_fun, Fun, HookState}</code></dt>
+%% <dd>Call back function to process the document entities once
+%% identified.</dd>
+%% <dt><code>{close_fun, Fun}</code></dt>
+%% <dd>Called when document has been completely parsed.</dd>
+%% <dt><code>{rules, ReadFun, WriteFun, RulesState} |
+%% {rules, Rules}</code></dt>
+%% <dd>Handles storing of scanner information when parsing.</dd>
+%% <dt><code>{user_state, UserState}</code></dt>
+%% <dd>Global state variable accessible from all customization functions</dd>
+%%
+%% <dt><code>{fetch_path, PathList}</code></dt>
+%% <dd>PathList is a list of
+%% directories to search when fetching files. If the file in question
+%% is not in the fetch_path, the URI will be used as a file
+%% name.</dd>
+%% <dt><code>{space, Flag}</code></dt>
+%% <dd>'preserve' (default) to preserve spaces, 'normalize' to
+%% accumulate consecutive whitespace and replace it with one space.</dd>
+%% <dt><code>{line, Line}</code></dt>
+%% <dd>To specify starting line for scanning in document which contains
+%% fragments of XML.</dd>
+%% <dt><code>{namespace_conformant, Flag}</code></dt>
+%% <dd>Controls whether to behave as a namespace conformant XML parser,
+%% 'false' (default) to not otherwise 'true'.</dd>
+%% <dt><code>{validation, Flag}</code></dt>
+%% <dd>Controls whether to process as a validating XML parser:
+%% 'off' (default) no validation, or validation 'dtd' by DTD or 'schema'
+%% by XML Schema. 'false' and 'true' options are obsolete
+%% (i.e. they may be removed in a future release), if used 'false'
+%% equals 'off' and 'true' equals 'dtd'.</dd>
+%% <dt><code>{schemaLocation, [{Namespace,Link}|...]}</code></dt>
+%% <dd>Tells explicitly which XML Schema documents to use to validate
+%% the XML document. Used together with the
+%% <code>{validation,schema}</code> option.</dd>
+%% <dt><code>{quiet, Flag}</code></dt>
+%% <dd>Set to 'true' if xmerl should behave quietly and not output any
+%% information to standard output (default 'false').</dd>
+%% <dt><code>{doctype_DTD, DTD}</code></dt>
+%% <dd>Allows to specify DTD name when it isn't available in the XML
+%% document. This option has effect only together with
+%% <code>{validation,'dtd'</code> option.</dd>
+%% <dt><code>{xmlbase, Dir}</code></dt>
+%% <dd>XML Base directory. If using string/1 default is current directory.
+%% If using file/1 default is directory of given file.</dd>
+%% <dt><code>{encoding, Enc}</code></dt>
+%% <dd>Set default character set used (default UTF-8).
+%% This character set is used only if not explicitly given by the XML
+%% declaration. </dd>
+%% <dt><code>{document, Flag}</code></dt>
+%% <dd>Set to 'true' if xmerl should return a complete XML document
+%% as an xmlDocument record (default 'false').</dd>
+%% <dt><code>{comments, Flag}</code></dt>
+%% <dd>Set to 'false' if xmerl should skip comments otherwise they will
+%% be returned as xmlComment records (default 'true').</dd>
+%% <dt><code>{default_attrs, Flag}</code></dt>
+%% <dd>Set to 'true' if xmerl should add to elements missing attributes
+%% with a defined default value (default 'false').</dd>
+%% </dl>
+%%
-module(xmerl_eventp).
-vsn('0.19').
-date('03-09-17').
diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl
index fc89b80ff1..1bf8496673 100644
--- a/lib/xmerl/src/xmerl_regexp.erl
+++ b/lib/xmerl/src/xmerl_regexp.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2006-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2006-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -1154,7 +1154,7 @@ comp_crs([], Last) -> [{Last,maxchar}].
%% build_dfa(NFA, NfaStartState) -> {DFA,DfaStartState}.
%% Build a DFA from an NFA using "subset construction". The major
%% difference from the book is that we keep the marked and unmarked
-%% DFA states in seperate lists. New DFA states are added to the
+%% DFA states in separate lists. New DFA states are added to the
%% unmarked list and states are marked by moving them to the marked
%% list. We assume that the NFA accepting state numbers are in
%% ascending order for the rules and use ordsets to keep this order.
diff --git a/lib/xmerl/src/xmerl_sax_old_dom.erl b/lib/xmerl/src/xmerl_sax_old_dom.erl
index fefcf03fce..6d0d836487 100644
--- a/lib/xmerl/src/xmerl_sax_old_dom.erl
+++ b/lib/xmerl/src/xmerl_sax_old_dom.erl
@@ -2,7 +2,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2009-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2009-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -127,9 +127,10 @@ build_dom(endDocument,
State#xmerl_sax_old_dom_state{dom=[Decl, Current#xmlElement{
content=lists:reverse(C)
}]};
- _ ->
- %%?dbg("~p\n", [D]),
- ?error("we're not at end the document when endDocument event is encountered.")
+ _ ->
+ %% endDocument is also sent by the parser when a fault occur to tell
+ %% the event receiver that no more input will be sent
+ State
end;
%% Element
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index 318a0cf7f4..e383c4c349 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -1,7 +1,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -33,6 +33,7 @@
%% External exports
%%----------------------------------------------------------------------
-export([file/2,
+ stream/3,
stream/2]).
%%----------------------------------------------------------------------
@@ -63,7 +64,7 @@
%% Description: Parse file containing an XML document.
%%----------------------------------------------------------------------
file(Name,Options) ->
- case file:open(Name, [raw, read,binary]) of
+ case file:open(Name, [raw, read_ahead, read,binary]) of
{error, Reason} ->
{error,{Name, file:format_error(Reason)}};
{ok, FD} ->
@@ -72,11 +73,12 @@ file(Name,Options) ->
File = filename:basename(Name),
ContinuationFun = fun default_continuation_cb/1,
Res = stream(<<>>,
- [{continuation_fun, ContinuationFun},
- {continuation_state, FD},
- {current_location, CL},
- {entity, File}
- |Options]),
+ [{continuation_fun, ContinuationFun},
+ {continuation_state, FD},
+ {current_location, CL},
+ {entity, File}
+ |Options],
+ file),
ok = file:close(FD),
Res
end.
@@ -92,19 +94,22 @@ file(Name,Options) ->
%% EventState = term()
%% Description: Parse a stream containing an XML document.
%%----------------------------------------------------------------------
-stream(Xml, Options) when is_list(Xml), is_list(Options) ->
+stream(Xml, Options) ->
+ stream(Xml, Options, stream).
+
+stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) ->
State = parse_options(Options, initial_state()),
- case State#xmerl_sax_parser_state.file_type of
+ case State#xmerl_sax_parser_state.file_type of
dtd ->
xmerl_sax_parser_list:parse_dtd(Xml,
State#xmerl_sax_parser_state{encoding = list,
- input_type = stream});
+ input_type = InputType});
normal ->
xmerl_sax_parser_list:parse(Xml,
State#xmerl_sax_parser_state{encoding = list,
- input_type = stream})
+ input_type = InputType})
end;
-stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
+stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) ->
case parse_options(Options, initial_state()) of
{error, Reason} -> {error, Reason};
State ->
@@ -115,21 +120,22 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
normal ->
parse
end,
- case detect_charset(Xml, State) of
- {error, Reason} -> {fatal_error,
- {
- State#xmerl_sax_parser_state.current_location,
- State#xmerl_sax_parser_state.entity,
- 1
- },
- Reason,
- [],
- State#xmerl_sax_parser_state.event_state};
- {Xml1, State1} ->
- parse_binary(Xml1,
- State1#xmerl_sax_parser_state{input_type = stream},
- ParseFunction)
- end
+ try
+ {Xml1, State1} = detect_charset(Xml, State),
+ parse_binary(Xml1,
+ State1#xmerl_sax_parser_state{input_type = InputType},
+ ParseFunction)
+ catch
+ throw:{fatal_error, {State2, Reason}} ->
+ {fatal_error,
+ {
+ State2#xmerl_sax_parser_state.current_location,
+ State2#xmerl_sax_parser_state.entity,
+ 1
+ },
+ Reason, [],
+ State2#xmerl_sax_parser_state.event_state}
+ end
end.
%%----------------------------------------------------------------------
@@ -151,8 +157,8 @@ parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) ->
xmerl_sax_parser_utf16be:F(Xml, State);
parse_binary(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) ->
xmerl_sax_parser_latin1:F(Xml, State);
-parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, _) ->
- {error, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))}.
+parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, State) ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))).
%%----------------------------------------------------------------------
%% Function: initial_state/0
@@ -206,8 +212,7 @@ parse_options([{entity, Entity} |Options], State) ->
parse_options([skip_external_dtd |Options], State) ->
parse_options(Options, State#xmerl_sax_parser_state{skip_external_dtd = true});
parse_options([O |_], _State) ->
- {error,
- lists:flatten(io_lib:format("Option: ~p not supported", [O]))}.
+ {error, lists:flatten(io_lib:format("Option: ~p not supported", [O]))}.
check_encoding_option(E) when E==utf8; E=={utf16,little}; E=={utf16,big};
@@ -225,16 +230,10 @@ check_encoding_option(E) ->
%% Output: {utf8|utf16le|utf16be|iso8859, Xml, State}
%% Description: Detects which character set is used in a binary stream.
%%----------------------------------------------------------------------
-detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) ->
- throw({error, "Can't detect character encoding due to no indata"});
-detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun,
- continuation_state = CState} = State) ->
- case CFun(CState) of
- {<<>>, _} ->
- throw({error, "Can't detect character encoding due to lack of indata"});
- {NewBytes, NewContState} ->
- detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState})
- end;
+detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = State) ->
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+detect_charset(<<>>, State) ->
+ cf(<<>>, State, fun detect_charset/2);
detect_charset(Bytes, State) ->
case unicode:bom_to_encoding(Bytes) of
{latin1, 0} ->
@@ -244,25 +243,47 @@ detect_charset(Bytes, State) ->
{RealBytes, State#xmerl_sax_parser_state{encoding=Enc}}
end.
+detect_charset_1(<<16#00>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#00, 16#3C>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#00, 16#3C, 16#00>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
detect_charset_1(<<16#00, 16#3C, 16#00, 16#3F, _/binary>> = Xml, State) ->
{Xml, State#xmerl_sax_parser_state{encoding={utf16, big}}};
+detect_charset_1(<<16#3C>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#00>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#00, 16#3F>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
detect_charset_1(<<16#3C, 16#00, 16#3F, 16#00, _/binary>> = Xml, State) ->
{Xml, State#xmerl_sax_parser_state{encoding={utf16, little}}};
-detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>> = Xml, State) ->
- case parse_xml_directive(Xml2) of
+detect_charset_1(<<16#3C>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#3F>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#3F, 16#78>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>>, State) ->
+ {Xml3, State1} = read_until_end_of_xml_directive(Xml2, State),
+ case parse_xml_directive(Xml3) of
{error, Reason} ->
- {error, Reason};
+ ?fatal_error(State, Reason);
AttrList ->
case lists:keysearch("encoding", 1, AttrList) of
{value, {_, E}} ->
case convert_encoding(E) of
{error, Reason} ->
- {error, Reason};
+ ?fatal_error(State, Reason);
Enc ->
- {Xml, State#xmerl_sax_parser_state{encoding=Enc}}
+ {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>,
+ State1#xmerl_sax_parser_state{encoding=Enc}}
end;
_ ->
- {Xml, State}
+ {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, State1}
end
end;
detect_charset_1(Xml, State) ->
@@ -372,7 +393,7 @@ parse_value_1(<<C, Rest/binary>>, Stop, Acc) ->
parse_value_1(Rest, Stop, [C |Acc]).
%%======================================================================
-%%Default functions
+%% Default functions
%%======================================================================
%%----------------------------------------------------------------------
%% Function: default_event_cb(Event, LineNo, State) -> Result
@@ -388,7 +409,7 @@ default_event_cb(_Event, _LineNo, State) ->
%%----------------------------------------------------------------------
%% Function: default_continuation_cb(IoDevice) -> Result
%% IoDevice = iodevice()
-%% Output: Result = {[char()], State}
+%% Output: Result = {binary(), IoDevice}
%% Description: Default continuation callback reading blocks.
%%----------------------------------------------------------------------
default_continuation_cb(IoDevice) ->
@@ -398,3 +419,82 @@ default_continuation_cb(IoDevice) ->
{ok, FileBin} ->
{FileBin, IoDevice}
end.
+
+%%----------------------------------------------------------------------
+%% Function: read_until_end_of_xml_directive(Rest, State) -> Result
+%% Rest = binary()
+%% Output: Result = {binary(), State}
+%% Description: Reads a utf8 or latin1 until it finds '?>'
+%%----------------------------------------------------------------------
+read_until_end_of_xml_directive(Rest, State) ->
+ case binary:match(Rest, <<"?>">>) of
+ nomatch ->
+ case cf(Rest, State) of
+ {<<>>, _} ->
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ {NewBytes, NewState} ->
+ read_until_end_of_xml_directive(NewBytes, NewState)
+ end;
+ _ ->
+ {Rest, State}
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function : cf(Rest, State) -> Result
+%% Parameters: Rest = binary()
+%% State = #xmerl_sax_parser_state{}
+%% NextCall = fun()
+%% Result : {Rest, State}
+%% Description: Function that uses provided fun to read another chunk from
+%% input stream and calls the fun in NextCall.
+%%----------------------------------------------------------------------
+cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State) ->
+ ?fatal_error(State, "Continuation function undefined");
+cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) ->
+ Result =
+ try
+ CFun(CState)
+ catch
+ throw:ErrorTerm ->
+ ?fatal_error(State, ErrorTerm);
+ exit:Reason ->
+ ?fatal_error(State, {'EXIT', Reason})
+ end,
+ case Result of
+ {<<>>, _} ->
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ {NewBytes, NewContState} ->
+ {<<Rest/binary, NewBytes/binary>>,
+ State#xmerl_sax_parser_state{continuation_state = NewContState}}
+ end.
+
+%%----------------------------------------------------------------------
+%% Function : cf(Rest, State, NextCall) -> Result
+%% Parameters: Rest = binary()
+%% State = #xmerl_sax_parser_state{}
+%% NextCall = fun()
+%% Result : {Rest, State}
+%% Description: Function that uses provided fun to read another chunk from
+%% input stream and calls the fun in NextCall.
+%%----------------------------------------------------------------------
+cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) ->
+ ?fatal_error(State, "Continuation function undefined");
+cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
+ NextCall) ->
+ Result =
+ try
+ CFun(CState)
+ catch
+ throw:ErrorTerm ->
+ ?fatal_error(State, ErrorTerm);
+ exit:Reason ->
+ ?fatal_error(State, {'EXIT', Reason})
+ end,
+ case Result of
+ {<<>>, _} ->
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ {NewBytes, NewContState} ->
+ NextCall(<<Rest/binary, NewBytes/binary>>,
+ State#xmerl_sax_parser_state{continuation_state = NewContState})
+ end.
diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl
index 932ab0cec5..56a3a42e5f 100644
--- a/lib/xmerl/src/xmerl_sax_parser.hrl
+++ b/lib/xmerl/src/xmerl_sax_parser.hrl
@@ -1,7 +1,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -88,14 +88,7 @@
current_location, % Location of the currently parsed XML entity
entity, % Parsed XML entity
skip_external_dtd = false,% If true the external DTD is skipped during parsing
- input_type % Source type: file | stream.
- % This field is a preparation for an fix in R17 of a bug in
- % the conformance against the standard.
- % Today a file which contains two XML documents will be considered
- % well-formed and the second is placed in the rest part of the
- % return tuple, according to the conformance tests this should fail.
- % In the future this will fail if xmerl_sax_aprser:file/2 is used but
- % left to the user in the xmerl_sax_aprser:stream/2 case.
+ input_type % Source type: file | stream
}).
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index 4d75805b9b..1dca9608cb 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -1,7 +1,7 @@
%%-*-erlang-*-
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -64,29 +64,42 @@
%% Description: Parsing XML from input stream.
%%----------------------------------------------------------------------
parse(Xml, State) ->
- RefTable = ets:new(xmerl_sax_entity_refs, [private]),
-
- State1 = event_callback(startDocument, State),
-
- case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
- {ok, Rest, State2} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
- {fatal_error, {State2, Reason}} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- format_error(fatal_error, State3, Reason);
- {event_receiver_error, State2, {Tag, Reason}} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- format_error(Tag, State3, Reason);
- Other ->
- _State2 = event_callback(endDocument, State1),
- ets:delete(RefTable),
- throw(Other)
+ RefTable = maps:new(),
+
+ try
+ State1 = event_callback(startDocument, State),
+ Result = parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}),
+ handle_end_document(Result)
+ catch
+ throw:Exception ->
+ handle_end_document(Exception);
+ _:OtherError ->
+ handle_end_document({other, OtherError, State})
end.
+ % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
+ % {ok, Rest, State2} ->
+ % State3 = event_callback(endDocument, State2),
+ % case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of
+ % true ->
+ % {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ % false ->
+ % format_error(fatal_error, State3, "Input found after legal document")
+ % end;
+ % {fatal_error, {State2, Reason}} ->
+ % State3 = event_callback(endDocument, State2),
+ % format_error(fatal_error, State3, Reason);
+ % {event_receiver_error, State2, {Tag, Reason}} ->
+ % State3 = event_callback(endDocument, State2),
+ % format_error(Tag, State3, Reason);
+ % {endDocument, Rest, State2} ->
+ % State3 = event_callback(endDocument, State2),
+ % {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ % Other ->
+ % _State2 = event_callback(endDocument, State1),
+ % {fatal_error, Other}
+ % end.
+
%%----------------------------------------------------------------------
%% Function: parse_dtd(Xml, State) -> Result
%% Input: Xml = string() | binary()
@@ -96,38 +109,120 @@ parse(Xml, State) ->
%% Description: Parsing XML DTD from input stream.
%%----------------------------------------------------------------------
parse_dtd(Xml, State) ->
- RefTable = ets:new(xmerl_sax_entity_refs, [private]),
-
- State1 = event_callback(startDocument, State),
-
- case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
- {fatal_error, {State2, Reason}} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- format_error(fatal_error, State3, Reason);
- {event_receiver_error, State2, {Tag, Reason}} ->
- State3 = event_callback(endDocument, State2),
- format_error(Tag, State3, Reason);
- {Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
- {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
- Other ->
- _State2 = event_callback(endDocument, State1),
- ets:delete(RefTable),
- throw(Other)
+ RefTable = maps:new(),
+
+ try
+ State1 = event_callback(startDocument, State),
+ Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}),
+ handle_end_document(Result)
+ catch
+ throw:Exception ->
+ handle_end_document(Exception);
+ _:OtherError ->
+ handle_end_document({other, OtherError, State})
end.
+ % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
+ % {fatal_error, {State2, Reason}} ->
+ % State3 = event_callback(endDocument, State2),
+ % format_error(fatal_error, State3, Reason);
+ % {event_receiver_error, State2, {Tag, Reason}} ->
+ % State3 = event_callback(endDocument, State2),
+ % format_error(Tag, State3, Reason);
+ % {Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ % State3 = event_callback(endDocument, State2),
+ % {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ % {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ % State3 = event_callback(endDocument, State2),
+ % {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ % Other ->
+ % _State2 = event_callback(endDocument, State1),
+ % {fatal_error, Other}
+ % end.
+
+
%%======================================================================
%% Internal functions
%%======================================================================
%%----------------------------------------------------------------------
+%% Function: handle_end_document(ParserResult) -> Result
+%% Input: ParseResult = term()
+%% Output: Result = {ok, Rest, EventState} |
+%% EventState = term()
+%% Description: Ends the parsing and formats output
+%%----------------------------------------------------------------------
+handle_end_document({ok, Rest, State}) ->
+ %%ok case from parse
+ try
+ State1 = event_callback(endDocument, State),
+ case check_if_rest_ok(State1#xmerl_sax_parser_state.input_type, Rest) of
+ true ->
+ {ok, State1#xmerl_sax_parser_state.event_state, Rest};
+ false ->
+ format_error(fatal_error, State1, "Input found after legal document")
+ end
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({endDocument, Rest, State}) ->
+ %% ok case from parse and parse_dtd
+ try
+ State1 = event_callback(endDocument, State),
+ {ok, State1#xmerl_sax_parser_state.event_state, Rest}
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({fatal_error, {State, Reason}}) ->
+ try
+ State1 = event_callback(endDocument, State),
+ format_error(fatal_error, State1, Reason)
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({event_receiver_error, State, {Tag, Reason}}) ->
+ try
+ State1 = event_callback(endDocument, State),
+ format_error(Tag, State1, Reason)
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({Rest, State}) when is_record(State, xmerl_sax_parser_state) ->
+ %%ok case from parse_dtd
+ try
+ State1 = event_callback(endDocument, State),
+ {ok, State1#xmerl_sax_parser_state.event_state, Rest}
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({other, Error, State}) ->
+ try
+ _State1 = event_callback(endDocument, State),
+ {fatal_error, Error}
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end.
+
+%%----------------------------------------------------------------------
%% Function: parse_document(Rest, State) -> Result
%% Input: Rest = string() | binary()
%% State = #xmerl_sax_parser_state{}
@@ -136,10 +231,11 @@ parse_dtd(Xml, State) ->
%% [1] document ::= prolog element Misc*
%%----------------------------------------------------------------------
parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) ->
- {Rest1, State1} = parse_xml_decl(Rest, State),
+ {Rest1, State1} = parse_byte_order_mark(Rest, State),
{Rest2, State2} = parse_misc(Rest1, State1, true),
{ok, Rest2, State2}.
+?PARSE_BYTE_ORDER_MARK(Bytes, State).
%%----------------------------------------------------------------------
%% Function: parse_xml_decl(Rest, State) -> Result
@@ -150,15 +246,8 @@ parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) ->
%% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
%% [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
%%----------------------------------------------------------------------
--dialyzer({[no_fail_call, no_match], parse_xml_decl/2}).
parse_xml_decl(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_xml_decl/2);
-parse_xml_decl(?BYTE_ORDER_MARK_1, State) ->
- cf(?BYTE_ORDER_MARK_1, State, fun parse_xml_decl/2);
-parse_xml_decl(?BYTE_ORDER_MARK_2, State) ->
- cf(?BYTE_ORDER_MARK_2, State, fun parse_xml_decl/2);
-parse_xml_decl(?BYTE_ORDER_MARK_REST(Rest), State) ->
- cf(Rest, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING("<") = Bytes, State) ->
cf(Bytes, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING("<?") = Bytes, State) ->
@@ -170,31 +259,19 @@ parse_xml_decl(?STRING("<?xm") = Bytes, State) ->
parse_xml_decl(?STRING("<?xml") = Bytes, State) ->
cf(Bytes, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING_REST("<?xml", Rest1), State) ->
- parse_xml_decl_1(Rest1, State);
-parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) ->
- case unicode:characters_to_list(Bytes, Enc) of
- {incomplete, _, _} ->
- cf(Bytes, State, fun parse_xml_decl/2);
- {error, _Encoded, _Rest} ->
- ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
- _ ->
- parse_prolog(Bytes, State)
- end;
-parse_xml_decl(Bytes, State) ->
- parse_prolog(Bytes, State).
-
+ parse_xml_decl_rest(Rest1, State);
+?PARSE_XML_DECL(Bytes, State).
-parse_xml_decl_1(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
+parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
if
?is_whitespace(C) ->
{_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []),
- %State2 = event_callback({processingInstruction, "xml", XmlAttributes}, State1),% The XML decl. should not be reported as a PI
parse_prolog(Rest1, State1);
true ->
parse_prolog(?STRING_REST("<?xml", Bytes), State)
end;
-parse_xml_decl_1(Bytes, State) ->
- unicode_incomplete_check([Bytes, State, fun parse_xml_decl_1/2], undefined).
+parse_xml_decl_rest(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_xml_decl_rest/2], undefined).
@@ -216,8 +293,6 @@ parse_prolog(?STRING_REST("<?", Rest), State) ->
parse_prolog(Rest1, State1);
{endDocument, Rest1, State1} ->
parse_prolog(Rest1, State1)
- % IValue = ?TO_INPUT_FORMAT("<?"),
- % {?APPEND_STRING(IValue, Rest1), State1}
end;
parse_prolog(?STRING_REST("<!", Rest), State) ->
parse_prolog_1(Rest, State);
@@ -230,7 +305,6 @@ parse_prolog(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_prolog/2],
"expecting < or whitespace").
-
parse_prolog_1(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING("D") = Bytes, State) ->
@@ -442,6 +516,16 @@ check_if_new_doc_allowed(stream, []) ->
check_if_new_doc_allowed(_, _) ->
false.
+check_if_rest_ok(file, []) ->
+ true;
+check_if_rest_ok(file, <<>>) ->
+ true;
+check_if_rest_ok(stream, _) ->
+ true;
+check_if_rest_ok(_, _) ->
+ false.
+
+
%%----------------------------------------------------------------------
%% Function: parse_pi_1(Rest, State) -> Result
%% Input: Rest = string() | binary()
@@ -886,11 +970,11 @@ send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) ->
parse_eq(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_eq/2);
parse_eq(?STRING_REST("=", Rest), State) ->
- {Rest, State};
+ {Rest, State};
parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
- {_WS, Rest, State1} =
- whitespace(Bytes, State, []),
- parse_eq(Rest, State1);
+ {_WS, Rest, State1} =
+ whitespace(Bytes, State, []),
+ parse_eq(Rest, State1);
parse_eq(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_eq/2],
"expecting = or whitespace").
@@ -908,11 +992,11 @@ parse_eq(Bytes, State) ->
parse_att_value(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_att_value/2);
parse_att_value(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" ->
- parse_att_value(Rest, State, C, []);
+ parse_att_value(Rest, State, C, []);
parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
- {_WS, Rest, State1} =
- whitespace(Bytes, State, []),
- parse_att_value(Rest, State1);
+ {_WS, Rest, State1} =
+ whitespace(Bytes, State, []),
+ parse_att_value(Rest, State1);
parse_att_value(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_att_value/2],
"\', \" or whitespace expected").
@@ -1024,16 +1108,21 @@ parse_etag(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_etag/2],
undefined).
-
parse_etag_1(?STRING_REST(">", Rest),
#xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList}
- |RestOfETags]} = State, _Tag) ->
+ |RestOfETags],
+ input_type=InputType} = State, _Tag) ->
State1 = event_callback({endElement, Uri, LocalName, QName}, State),
State2 = send_end_prefix_mapping_event(NewNsList, State1),
- parse_content(Rest,
- State2#xmerl_sax_parser_state{end_tags=RestOfETags,
- ns = OldNsList},
- [], true);
+ case check_if_new_doc_allowed(InputType, RestOfETags) of
+ true ->
+ throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}});
+ false ->
+ parse_content(Rest,
+ State2#xmerl_sax_parser_state{end_tags=RestOfETags,
+ ns = OldNsList},
+ [], true)
+ end;
parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) ->
{P,TN} = Tag,
?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN);
@@ -1051,21 +1140,26 @@ parse_etag_1(Bytes, State, Tag) ->
%% Description: Parsing the content part of tags
%% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
%%----------------------------------------------------------------------
-
parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
- case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
- {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
- {Rest, State1};
- {fatal_error, {State1, Msg}} ->
- case check_if_document_complete(State1, Msg) of
- true ->
- State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
- {?STRING_EMPTY, State2};
- false ->
- ?fatal_error(State1, Msg)
- end;
- Other ->
- throw(Other)
+ case check_if_document_complete(State, "No more bytes") of
+ true ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ {?STRING_EMPTY, State1};
+ false ->
+ case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State1};
+ {fatal_error, {State1, Msg}} ->
+ case check_if_document_complete(State1, Msg) of
+ true ->
+ State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
+ {?STRING_EMPTY, State2};
+ false ->
+ ?fatal_error(State1, Msg)
+ end;
+ Other ->
+ throw(Other)
+ end
end;
parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) ->
cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
@@ -1094,7 +1188,7 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
[] ->
- {Rest, State}; %%LATH : Skicka ignorable WS ???
+ {Rest, State}; %% Skicka ignorable WS ???
_ ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
parse_cdata(Rest1, State1)
@@ -1102,7 +1196,7 @@ parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags
parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
[] ->
- {Rest, State}; %%LATH : Skicka ignorable WS ???
+ {Rest, State}; %% Skicka ignorable WS ???
_ ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
parse_stag(Rest1, State1)
@@ -1204,7 +1298,6 @@ send_character_event(_, true, String, State) ->
%% Description: Parse whitespaces.
%% [3] S ::= (#x20 | #x9 | #xD | #xA)+
%%----------------------------------------------------------------------
--dialyzer({no_fail_call, whitespace/3}).
whitespace(?STRING_EMPTY, State, Acc) ->
case cf(?STRING_EMPTY, State, Acc, fun whitespace/3) of
{?STRING_EMPTY, State} ->
@@ -1230,16 +1323,7 @@ whitespace(?STRING_REST("\r", Rest), State, Acc) ->
whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) ->
whitespace(Rest, State, [C|Acc]);
-whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
- {lists:reverse(Acc), Bytes, State};
-whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) ->
- case unicode:characters_to_list(Bytes, Enc) of
- {incomplete, _, _} ->
- cf(Bytes, State, Acc, fun whitespace/3);
- {error, _Encoded, _Rest} ->
- ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])))
- end.
-
+?WHITESPACE(Bytes, State, Acc).
%%----------------------------------------------------------------------
%% Function: parse_reference(Rest, State, HaveToExist) -> Result
@@ -1362,23 +1446,24 @@ parse_pe_reference_1(Bytes, State, Name) ->
"missing ; after reference " ++ Name).
-
%%----------------------------------------------------------------------
-%% Function: insert_reference(Reference, State) -> Result
-%% Parameters: Reference = string()
+%% Function: insert_reference(Name, Ref, State) -> Result
+%% Parameters: Name = string()
+%% Ref = {Type, Value}
+%% Type = atom()
+%% Value = term()
%% State = #xmerl_sax_parser_state{}
%% Result :
%%----------------------------------------------------------------------
-insert_reference({Name, Type, Value}, Table) ->
- case ets:lookup(Table, Name) of
- [{Name, _, _}] ->
- ok;
+insert_reference(Name, Value, #xmerl_sax_parser_state{ref_table = Map} = State) ->
+ case maps:find(Name, Map) of
+ error ->
+ State#xmerl_sax_parser_state{ref_table = maps:put(Name, Value, Map)};
_ ->
- ets:insert(Table, {Name, Type, Value})
+ State
end.
-
%%----------------------------------------------------------------------
%% Function: look_up_reference(Reference, State) -> Result
%% Parameters: Reference = string()
@@ -1396,8 +1481,8 @@ look_up_reference("apos", _, _) ->
look_up_reference("quot", _, _) ->
{internal_general, "quot", "\""};
look_up_reference(Name, HaveToExist, State) ->
- case ets:lookup(State#xmerl_sax_parser_state.ref_table, Name) of
- [{Name, Type, Value}] ->
+ case maps:find(Name, State#xmerl_sax_parser_state.ref_table) of
+ {ok, {Type, Value}} ->
{Type, Name, Value};
_ ->
case HaveToExist of
@@ -1479,7 +1564,7 @@ parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) ->
parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
{lists:reverse(Acc), Rest, State};
parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
- parse_system_litteral(Rest, State, Stop, [C |Acc]);
+ parse_system_litteral(Rest, State, Stop, [C |Acc]);
parse_system_litteral(Bytes, State, Stop, Acc) ->
unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4],
undefined).
@@ -1651,9 +1736,11 @@ parse_external_entity(State, _PubId, SysId) ->
end_tags = []},
- EventState = handle_external_entity(ExtRef, State1),
+ {EventState, RefTable} = handle_external_entity(ExtRef, State1),
- NewState = event_callback({endEntity, SysId}, SaveState#xmerl_sax_parser_state{event_state=EventState}),
+ NewState = event_callback({endEntity, SysId},
+ SaveState#xmerl_sax_parser_state{event_state=EventState,
+ ref_table=RefTable}),
NewState#xmerl_sax_parser_state{file_type=normal}.
@@ -1680,7 +1767,8 @@ handle_external_entity({file, FileToOpen}, State) ->
entity=filename:basename(FileToOpen),
input_type=file}),
ok = file:close(FD),
- EntityState#xmerl_sax_parser_state.event_state
+ {EntityState#xmerl_sax_parser_state.event_state,
+ EntityState#xmerl_sax_parser_state.ref_table}
end;
handle_external_entity({http, Url}, State) ->
@@ -1693,14 +1781,16 @@ handle_external_entity({http, Url}, State) ->
++ file:format_error(Reason));
{ok, FD} ->
{?STRING_EMPTY, EntityState} =
- parse_external_entity_1(<<>>,
+ parse_external_entity_byte_order_mark(<<>>,
State#xmerl_sax_parser_state{continuation_state=FD,
current_location=filename:dirname(Url),
entity=filename:basename(Url),
input_type=file}),
ok = file:close(FD),
ok = file:delete(TmpFile),
- EntityState#xmerl_sax_parser_state.event_state
+ {EntityState#xmerl_sax_parser_state.event_state,
+ EntityState#xmerl_sax_parser_state.ref_table}
+
end
catch
throw:{error, Error} ->
@@ -1709,6 +1799,8 @@ handle_external_entity({http, Url}, State) ->
handle_external_entity({Tag, _Url}, State) ->
?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)).
+?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State).
+
%%----------------------------------------------------------------------
%% Function : parse_external_entity_1(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
@@ -1716,22 +1808,15 @@ handle_external_entity({Tag, _Url}, State) ->
%% Result : {Rest, State}
%% Description: Parse the external entity.
%%----------------------------------------------------------------------
--dialyzer({[no_fail_call, no_match], parse_external_entity_1/2}).
parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) ->
case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of
{Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
- {Rest, State};
+ {Rest, State1};
{fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity ->
{?STRING_EMPTY, State1};
Other ->
throw(Other)
end;
-parse_external_entity_1(?BYTE_ORDER_MARK_1, State) ->
- cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_1/2);
-parse_external_entity_1(?BYTE_ORDER_MARK_2, State) ->
- cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_1/2);
-parse_external_entity_1(?BYTE_ORDER_MARK_REST(Rest), State) ->
- parse_external_entity_1(Rest, State);
parse_external_entity_1(?STRING("<") = Bytes, State) ->
cf(Bytes, State, fun parse_external_entity_1/2);
parse_external_entity_1(?STRING("<?") = Bytes, State) ->
@@ -2452,24 +2537,24 @@ parse_entity_def(?STRING_EMPTY, State, Name) ->
cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3);
parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
{Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
- insert_reference({Name, internal_general, Value}, State1#xmerl_sax_parser_state.ref_table),
- State2 = event_callback({internalEntityDecl, Name, Value}, State1),
- {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
- parse_def_end(Rest2, State3);
+ State2 = insert_reference(Name, {internal_general, Value}, State1),
+ State3 = event_callback({internalEntityDecl, Name, Value}, State2),
+ {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
+ parse_def_end(Rest2, State4);
parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P ->
{PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
{Ndata, Rest2, State2} = parse_ndata(Rest1, State1),
case Ndata of
undefined ->
- insert_reference({Name, external_general, {PubId, SysId}},
- State2#xmerl_sax_parser_state.ref_table),
- State3 = event_callback({externalEntityDecl, Name, PubId, SysId}, State2),
- {Rest2, State3};
+ State3 = insert_reference(Name, {external_general, {PubId, SysId}},
+ State2),
+ State4 = event_callback({externalEntityDecl, Name, PubId, SysId}, State3),
+ {Rest2, State4};
_ ->
- insert_reference({Name, unparsed, {PubId, SysId, Ndata}},
- State2#xmerl_sax_parser_state.ref_table),
- State3 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State2),
- {Rest2, State3}
+ State3 = insert_reference(Name, {unparsed, {PubId, SysId, Ndata}},
+ State2),
+ State4 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State3),
+ {Rest2, State4}
end;
parse_entity_def(Bytes, State, Name) ->
unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3],
@@ -2646,19 +2731,19 @@ parse_pe_def(?STRING_EMPTY, State, Name) ->
parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
{Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
Name1 = "%" ++ Name,
- insert_reference({Name1, internal_parameter, Value},
- State1#xmerl_sax_parser_state.ref_table),
- State2 = event_callback({internalEntityDecl, Name1, Value}, State1),
- {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
- parse_def_end(Rest2, State3);
+ State2 = insert_reference(Name1, {internal_parameter, Value},
+ State1),
+ State3 = event_callback({internalEntityDecl, Name1, Value}, State2),
+ {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
+ parse_def_end(Rest2, State4);
parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P ->
{PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false),
Name1 = "%" ++ Name,
- insert_reference({Name1, external_parameter, {PubId, SysId}},
- State1#xmerl_sax_parser_state.ref_table),
- State2 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State1),
- {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
- parse_def_end(Rest2, State3);
+ State2 = insert_reference(Name1, {external_parameter, {PubId, SysId}},
+ State1),
+ State3 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State2),
+ {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
+ parse_def_end(Rest2, State4);
parse_pe_def(Bytes, State, Name) ->
unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3],
"\", \', SYSTEM or PUBLIC expected").
@@ -3290,7 +3375,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C
catch
throw:ErrorTerm ->
?fatal_error(State, ErrorTerm);
- exit:Reason ->
+ exit:Reason ->
?fatal_error(State, {'EXIT', Reason})
end,
case Result of
diff --git a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc
index 961806bf4c..6e59347fb8 100644
--- a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc
@@ -2,7 +2,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -34,8 +34,36 @@
-define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>).
-define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, latin1)).
-%% STRING_REST and STRING_UNBOUND_REST is only different in the list case
-define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar, Rest/binary>>).
--define(BYTE_ORDER_MARK_1, undefined_bom1).
--define(BYTE_ORDER_MARK_2, undefined_bom2).
--define(BYTE_ORDER_MARK_REST(Rest), <<undefined, Rest/binary>>).
+
+-define(PARSE_BYTE_ORDER_MARK(Bytes, State),
+ parse_byte_order_mark(Bytes, State) ->
+ parse_xml_decl(Bytes, State)).
+
+-define(PARSE_XML_DECL(Bytes, State),
+ parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
+ _ ->
+ parse_prolog(Bytes, State)
+ end;
+ parse_xml_decl(Bytes, State) ->
+ parse_prolog(Bytes, State)).
+
+-define(WHITESPACE(Bytes, State, Acc),
+ whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ {lists:reverse(Acc), Bytes, State};
+ whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, Acc, fun whitespace/3);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])))
+ end).
+
+-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State),
+ parse_external_entity_byte_order_mark(Bytes, State) ->
+ parse_external_entity_1(Bytes, State)).
diff --git a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc
index 624a621d92..ac89896215 100644
--- a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc
@@ -2,7 +2,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -36,6 +36,19 @@
%% In the list case we can't use a '++' when matchin against an unbound variable
-define(STRING_UNBOUND_REST(MatchChar, Rest), [MatchChar | Rest]).
--define(BYTE_ORDER_MARK_1, undefined_bom1).
--define(BYTE_ORDER_MARK_2, undefined_bom2).
--define(BYTE_ORDER_MARK_REST(Rest), [undefined|Rest]).
+
+-define(PARSE_BYTE_ORDER_MARK(Bytes, State),
+ parse_byte_order_mark(Bytes, State) ->
+ parse_xml_decl(Bytes, State)).
+
+-define(PARSE_XML_DECL(Bytes, State),
+ parse_xml_decl(Bytes, State) ->
+ parse_prolog(Bytes, State)).
+
+-define(WHITESPACE(Bytes, State, Acc),
+ whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ {lists:reverse(Acc), Bytes, State}).
+
+-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State),
+ parse_external_entity_byte_order_mark(Bytes, State) ->
+ parse_external_entity_1(Bytes, State)).
diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc
index ff84ece97a..ec89024729 100644
--- a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc
@@ -2,7 +2,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -34,8 +34,50 @@
-define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>).
-define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, big})).
-%% STRING_REST and STRING_UNBOUND_REST is only different in the list case
-define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/big-utf16, Rest/binary>>).
--define(BYTE_ORDER_MARK_1, undefined_bom1).
--define(BYTE_ORDER_MARK_2, <<16#FE>>).
+-define(BYTE_ORDER_MARK_1, <<16#FE>>).
-define(BYTE_ORDER_MARK_REST(Rest), <<16#FE, 16#FF, Rest/binary>>).
+
+-define(PARSE_BYTE_ORDER_MARK(Bytes, State),
+ parse_byte_order_mark(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2);
+ parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) ->
+ cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2);
+ parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) ->
+ parse_xml_decl(Rest, State);
+ parse_byte_order_mark(Bytes, State) ->
+ parse_xml_decl(Bytes, State)).
+
+-define(PARSE_XML_DECL(Bytes, State),
+ parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
+ _ ->
+ parse_prolog(Bytes, State)
+ end;
+ parse_xml_decl(Bytes, State) ->
+ parse_prolog(Bytes, State)).
+
+-define(WHITESPACE(Bytes, State, Acc),
+ whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ {lists:reverse(Acc), Bytes, State};
+ whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, Acc, fun whitespace/3);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])))
+ end).
+
+-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State),
+ parse_external_entity_byte_order_mark(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2);
+ parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) ->
+ cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2);
+ parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) ->
+ parse_external_entity_1(Rest, State);
+ parse_external_entity_byte_order_mark(Bytes, State) ->
+ parse_external_entity_1(Bytes, State)).
diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc
index a330fce8d0..566333a045 100644
--- a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc
@@ -2,7 +2,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -34,8 +34,50 @@
-define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>).
-define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, little})).
-%% STRING_REST and STRING_UNBOUND_REST is only different in the list case
-define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/little-utf16, Rest/binary>>).
--define(BYTE_ORDER_MARK_1, undefined_bom1).
--define(BYTE_ORDER_MARK_2, <<16#FF>>).
+-define(BYTE_ORDER_MARK_1, <<16#FF>>).
-define(BYTE_ORDER_MARK_REST(Rest), <<16#FF, 16#FE, Rest/binary>>).
+
+-define(PARSE_BYTE_ORDER_MARK(Bytes, State),
+ parse_byte_order_mark(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2);
+ parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) ->
+ cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2);
+ parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) ->
+ parse_xml_decl(Rest, State);
+ parse_byte_order_mark(Bytes, State) ->
+ parse_xml_decl(Bytes, State)).
+
+-define(PARSE_XML_DECL(Bytes, State),
+ parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
+ _ ->
+ parse_prolog(Bytes, State)
+ end;
+ parse_xml_decl(Bytes, State) ->
+ parse_prolog(Bytes, State)).
+
+-define(WHITESPACE(Bytes, State, Acc),
+ whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ {lists:reverse(Acc), Bytes, State};
+ whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, Acc, fun whitespace/3);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])))
+ end).
+
+-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State),
+ parse_external_entity_byte_order_mark(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2);
+ parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) ->
+ cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2);
+ parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) ->
+ parse_external_entity_1(Rest, State);
+ parse_external_entity_byte_order_mark(Bytes, State) ->
+ parse_external_entity_1(Bytes, State)).
diff --git a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc
index d46d60d237..f41d06d013 100644
--- a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc
@@ -2,7 +2,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -34,11 +34,55 @@
-define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>).
-define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, utf8)).
-
-%% STRING_REST and STRING_UNBOUND_REST is only different in the list case
-define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/utf8, Rest/binary>>).
-define(BYTE_ORDER_MARK_1, <<16#EF>>).
-define(BYTE_ORDER_MARK_2, <<16#EF, 16#BB>>).
-define(BYTE_ORDER_MARK_REST(Rest), <<16#EF, 16#BB, 16#BF, Rest/binary>>).
+-define(PARSE_BYTE_ORDER_MARK(Bytes, State),
+ parse_byte_order_mark(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2);
+ parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) ->
+ cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2);
+ parse_byte_order_mark(?BYTE_ORDER_MARK_2, State) ->
+ cf(?BYTE_ORDER_MARK_2, State, fun parse_byte_order_mark/2);
+ parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) ->
+ parse_xml_decl(Rest, State);
+ parse_byte_order_mark(Bytes, State) ->
+ parse_xml_decl(Bytes, State)).
+
+-define(PARSE_XML_DECL(Bytes, State),
+ parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, fun parse_xml_decl/2);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
+ _ ->
+ parse_prolog(Bytes, State)
+ end;
+ parse_xml_decl(Bytes, State) ->
+ parse_prolog(Bytes, State)).
+
+-define(WHITESPACE(Bytes, State, Acc),
+ whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
+ {lists:reverse(Acc), Bytes, State};
+ whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) ->
+ case unicode:characters_to_list(Bytes, Enc) of
+ {incomplete, _, _} ->
+ cf(Bytes, State, Acc, fun whitespace/3);
+ {error, _Encoded, _Rest} ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])))
+ end).
+-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State),
+ parse_external_entity_byte_order_mark(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2);
+ parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) ->
+ cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2);
+ parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_2, State) ->
+ cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_byte_order_mark/2);
+ parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) ->
+ parse_external_entity_1(Rest, State);
+ parse_external_entity_byte_order_mark(Bytes, State) ->
+ parse_external_entity_1(Bytes, State)).
diff --git a/lib/xmerl/src/xmerl_sax_simple_dom.erl b/lib/xmerl/src/xmerl_sax_simple_dom.erl
index 7eb3afd499..7b15cd92dc 100644
--- a/lib/xmerl/src/xmerl_sax_simple_dom.erl
+++ b/lib/xmerl/src/xmerl_sax_simple_dom.erl
@@ -2,7 +2,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2009-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2009-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -129,8 +129,9 @@ build_dom(endDocument,
State#xmerl_sax_simple_dom_state{dom=[Decl, {Tag, Attributes,
lists:reverse(Content)}]};
_ ->
- ?dbg("~p\n", [D]),
- ?error("we're not at end the document when endDocument event is encountered.")
+ %% endDocument is also sent by the parser when a fault occur to tell
+ %% the event receiver that no more input will be sent
+ State
end;
%% Element
diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl
index 2147a46a13..a1f6ad4e2c 100644
--- a/lib/xmerl/src/xmerl_scan.erl
+++ b/lib/xmerl/src/xmerl_scan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2003-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2003-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -111,13 +111,16 @@
%% <dd>Set to 'true' if xmerl should add to elements missing attributes
%% with a defined default value (default 'false').</dd>
%% </dl>
+%% @type xmlElement() = #xmlElement{}.
+%% The record definition is found in xmerl.hrl.
+%% @type xmlDocument() = #xmlDocument{}.
+%% The record definition is found in xmerl.hrl.
%% @type document() = xmlElement() | xmlDocument(). <p>
%% The document returned by <tt>xmerl_scan:string/[1,2]</tt> and
%% <tt>xmerl_scan:file/[1,2]</tt>. The type of the returned record depends on
%% the value of the document option passed to the function.
%% </p>
-
-module(xmerl_scan).
-vsn('0.20').
-date('03-09-16').
@@ -471,8 +474,8 @@ event(_X, S) ->
%% into multiple objects (in which case {Acc',Pos',S'} should be returned.)
%% If {Acc',S'} is returned, Pos will be incremented by 1 by default.
%% Below is an example of an acceptable operation
-acc(X = #xmlText{value = Text}, Acc, S) ->
- {[X#xmlText{value = Text}|Acc], S};
+acc(#xmlText{value = Text}, [X = #xmlText{value = AccText}], S) ->
+ {[X#xmlText{value = AccText ++ Text}], S};
acc(X, Acc, S) ->
{[X|Acc], S}.
@@ -2222,16 +2225,18 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook,
AllAttrs =
case S#xmerl_scanner.default_attrs of
true ->
- [ #xmlAttribute{name = AttName,
- parents = [{Name, Pos} | Parents],
- language = Lang,
- nsinfo = NSI,
- namespace = Namespace,
- value = AttValue,
- normalized = true} ||
- {AttName, AttValue} <- get_default_attrs(S, Name),
- AttValue =/= no_value,
- not lists:keymember(AttName, #xmlAttribute.name, Attrs) ];
+ DefaultAttrs =
+ [ #xmlAttribute{name = AttName,
+ parents = [{Name, Pos} | Parents],
+ language = Lang,
+ nsinfo = NSI,
+ namespace = Namespace,
+ value = AttValue,
+ normalized = true} ||
+ {AttName, AttValue} <- get_default_attrs(S, Name),
+ AttValue =/= no_value,
+ not lists:keymember(AttName, #xmlAttribute.name, Attrs) ],
+ lists:append(Attrs, DefaultAttrs);
false ->
Attrs
end,
@@ -2304,7 +2309,9 @@ expanded_name(Name, [], #xmlNamespace{default = URI}, S) ->
expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) ->
{_, Value} = lists:keyfind(Local, 1, Ns),
case Name of
- 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' ->
+ 'xmlns:xml' when Value =:= 'http://www.w3.org/XML/1998/namespace' ->
+ N;
+ 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' ->
?fatal({xml_prefix_cannot_be_redeclared, Value}, S);
'xmlns:xmlns' ->
?fatal({xmlns_prefix_cannot_be_declared, Value}, S);
@@ -2318,6 +2325,8 @@ expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) ->
N
end
end;
+expanded_name(_Name, {"xml", Local}, _NS, _S) ->
+ {'http://www.w3.org/XML/1998/namespace', list_to_atom(Local)};
expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) ->
case lists:keysearch(Prefix, 1, Ns) of
{value, {_, URI}} ->
@@ -2328,9 +2337,6 @@ expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) ->
?fatal({namespace_prefix_not_declared, Prefix}, S)
end.
-
-
-
keyreplaceadd(K, Pos, [H|T], Obj) when K == element(Pos, H) ->
[Obj|T];
keyreplaceadd(K, Pos, [H|T], Obj) ->
diff --git a/lib/xmerl/src/xmerl_xpath.erl b/lib/xmerl/src/xmerl_xpath.erl
index bbebda1030..6146feba49 100644
--- a/lib/xmerl/src/xmerl_xpath.erl
+++ b/lib/xmerl/src/xmerl_xpath.erl
@@ -43,13 +43,27 @@
%% </pre>
%%
%% @type nodeEntity() =
-%% xmlElement()
-%% | xmlAttribute()
-%% | xmlText()
-%% | xmlPI()
-%% | xmlComment()
-%% | xmlNsNode()
-%% | xmlDocument()
+%% #xmlElement{}
+%% | #xmlAttribute{}
+%% | #xmlText{}
+%% | #xmlPI{}
+%% | #xmlComment{}
+%% | #xmlNsNode{}
+%% | #xmlDocument{}
+%%
+%% @type docNodes() = #xmlElement{}
+%% | #xmlAttribute{}
+%% | #xmlText{}
+%% | #xmlPI{}
+%% | #xmlComment{}
+%% | #xmlNsNode{}
+%%
+%% @type docEntity() = #xmlDocument{} | [docNodes()]
+%%
+%% @type xPathString() = string()
+%%
+%% @type parentList() = [{atom(), integer()}]
+%%
%% @type option_list(). <p>Options allows to customize the behaviour of the
%% XPath scanner.
%% </p>
@@ -115,7 +129,7 @@ string(Str, Doc, Options) ->
%% Parents = parentList()
%% Doc = nodeEntity()
%% Options = option_list()
-%% Scalar = xmlObj
+%% Scalar = #xmlObj{}
%% @doc Extracts the nodes from the parsed XML tree according to XPath.
%% xmlObj is a record with fields type and value,
%% where type is boolean | number | string
diff --git a/lib/xmerl/src/xmerl_xs.erl b/lib/xmerl/src/xmerl_xs.erl
index 3e9f6622b8..1ce76cfa41 100644
--- a/lib/xmerl/src/xmerl_xs.erl
+++ b/lib/xmerl/src/xmerl_xs.erl
@@ -45,7 +45,6 @@
% XSLT package which is written i C++.
% See also the <a href="xmerl_xs_examples.html">Tutorial</a>.
% </p>
-
-module(xmerl_xs).
-export([xslapply/2, value_of/1, select/2, built_in_rules/2 ]).
@@ -71,15 +70,13 @@
%% xslapply(fun template/1, E),
%% "&lt;/h1>"];
%% </pre>
-
xslapply(Fun, EList) when is_list(EList) ->
- lists:map( Fun, EList);
+ lists:map(Fun, EList);
xslapply(Fun, E = #xmlElement{})->
lists:map( Fun, E#xmlElement.content).
-
%% @spec value_of(E) -> List
-%% E = unknown()
+%% E = term()
%%
%% @doc Concatenates all text nodes within the tree.
%%
diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl
index 4b5efae8dd..a89b3159ec 100644
--- a/lib/xmerl/src/xmerl_xsd.erl
+++ b/lib/xmerl/src/xmerl_xsd.erl
@@ -49,6 +49,7 @@
%% <dd>It is possible by this option to provide a state with process
%% information from an earlier validation.</dd>
%% </dl>
+%% @type filename() = string()
%% @end
%%%-------------------------------------------------------------------
-module(xmerl_xsd).
@@ -138,7 +139,7 @@ state2file(S=#xsd_state{schema_name=SN}) ->
%% @spec state2file(State,FileName) -> ok | {error,Reason}
%% State = global_state()
-%% FileName = filename()
+%% FileName = string()
%% @doc Saves the schema state with all information of the processed
%% schema in a file. You can provide the file name for the saved
%% state. FileName is saved with the <code>.xss</code> extension
@@ -153,7 +154,7 @@ state2file(S,FileName) when is_record(S,xsd_state) ->
%% @spec file2state(FileName) -> {ok,State} | {error,Reason}
%% State = global_state()
-%% FileName = filename()
+%% FileName = string()
%% @doc Reads the schema state with all information of the processed
%% schema from a file created with <code>state2file/[1,2]</code>. The
%% format of this file is internal. The state can then be used
@@ -202,7 +203,7 @@ xmerl_xsd_vsn_check(S=#xsd_state{vsn=MD5_VSN}) ->
process_validate(Schema,Xml) ->
process_validate(Schema,Xml,[]).
%% @spec process_validate(Schema,Element,Options) -> Result
-%% Schema = filename()
+%% Schema = string()
%% Element = XmlElement
%% Options = option_list()
%% Result = {ValidXmlElement,State} | {error,Reason}
@@ -282,7 +283,7 @@ validate3(_,_,S) ->
process_schema(Schema) ->
process_schema(Schema,[]).
%% @spec process_schema(Schema,Options) -> Result
-%% Schema = filename()
+%% Schema = string()
%% Result = {ok,State} | {error,Reason}
%% State = global_state()
%% Reason = [ErrorReason] | ErrorReason
@@ -324,7 +325,7 @@ process_schema2({SE,_},State,_Schema) ->
process_schemas(Schemas) ->
process_schemas(Schemas,[]).
%% @spec process_schemas(Schemas,Options) -> Result
-%% Schemas = [{NameSpace,filename()}|Schemas] | []
+%% Schemas = [{NameSpace,string()}|Schemas] | []
%% Result = {ok,State} | {error,Reason}
%% Reason = [ErrorReason] | ErrorReason
%% Options = option_list()
@@ -5426,7 +5427,7 @@ add_key_once(Key,N,El,L) ->
%% {filename:join([[io_lib:format("/~w(~w)",[X,Y])||{X,Y}<-Parents],Type]),Pos}.
%% @spec format_error(Errors) -> Result
-%% Errors = error_tuple() | [error_tuple()]
+%% Errors = tuple() | [tuple()]
%% Result = string() | [string()]
%% @doc Formats error descriptions to human readable strings.
format_error(L) when is_list(L) ->