aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src
diff options
context:
space:
mode:
authorLars Thorsen <[email protected]>2017-01-12 16:49:40 +0100
committerLars Thorsen <[email protected]>2017-02-22 10:00:18 +0100
commit00294041cd3c6f66598a50b57abf27e6a35e277f (patch)
treea479e0f3668179943324df40a1f012faec8fd07e /lib/xmerl/src
parent4f56fb3e9120e92ff7b0700402707ad032114311 (diff)
downloadotp-00294041cd3c6f66598a50b57abf27e6a35e277f.tar.gz
otp-00294041cd3c6f66598a50b57abf27e6a35e277f.tar.bz2
otp-00294041cd3c6f66598a50b57abf27e6a35e277f.zip
[xmerl] Correct bug handling multiple documents on a stream
Change how to interpret end of document to comply with Tim Brays comment on the standard. This makes it possible to handle more than one doc on a stream, the standard makes it impossible to know when the document is ended without waiting for the next document (and not always even that). Tim Brays comment about the trailing "Misc" rule: The fact that you're allowed some trailing junk after the root element, I decided (but unfortunately too late) is a real design error in XML. If I'm writing a network client, I'm probably going to close the link as soon as a I see the root element end-tag, and not depend on the other end closing it down properly. Furthermore, if I want to send a succession of XML documents over a network link, if I find a processing instruction after a root element, is it a trailer on the previous document, or part of the prolog of the next?
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl27
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc66
2 files changed, 56 insertions, 37 deletions
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index 318a0cf7f4..9b5429cffe 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -1,7 +1,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -33,6 +33,7 @@
%% External exports
%%----------------------------------------------------------------------
-export([file/2,
+ stream/3,
stream/2]).
%%----------------------------------------------------------------------
@@ -72,11 +73,12 @@ file(Name,Options) ->
File = filename:basename(Name),
ContinuationFun = fun default_continuation_cb/1,
Res = stream(<<>>,
- [{continuation_fun, ContinuationFun},
- {continuation_state, FD},
- {current_location, CL},
- {entity, File}
- |Options]),
+ [{continuation_fun, ContinuationFun},
+ {continuation_state, FD},
+ {current_location, CL},
+ {entity, File}
+ |Options],
+ file),
ok = file:close(FD),
Res
end.
@@ -92,19 +94,22 @@ file(Name,Options) ->
%% EventState = term()
%% Description: Parse a stream containing an XML document.
%%----------------------------------------------------------------------
-stream(Xml, Options) when is_list(Xml), is_list(Options) ->
+stream(Xml, Options) ->
+ stream(Xml, Options, stream).
+
+stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) ->
State = parse_options(Options, initial_state()),
case State#xmerl_sax_parser_state.file_type of
dtd ->
xmerl_sax_parser_list:parse_dtd(Xml,
State#xmerl_sax_parser_state{encoding = list,
- input_type = stream});
+ input_type = InputType});
normal ->
xmerl_sax_parser_list:parse(Xml,
State#xmerl_sax_parser_state{encoding = list,
- input_type = stream})
+ input_type = InputType})
end;
-stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
+stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) ->
case parse_options(Options, initial_state()) of
{error, Reason} -> {error, Reason};
State ->
@@ -127,7 +132,7 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
State#xmerl_sax_parser_state.event_state};
{Xml1, State1} ->
parse_binary(Xml1,
- State1#xmerl_sax_parser_state{input_type = stream},
+ State1#xmerl_sax_parser_state{input_type = InputType},
ParseFunction)
end
end.
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index 4d75805b9b..3ef2fce4d3 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -1,7 +1,7 @@
%%-*-erlang-*-
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -72,7 +72,7 @@ parse(Xml, State) ->
{ok, Rest, State2} ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
{fatal_error, {State2, Reason}} ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
@@ -81,6 +81,10 @@ parse(Xml, State) ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
format_error(Tag, State3, Reason);
+ {endDocument, Rest, State2} ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
Other ->
_State2 = event_callback(endDocument, State1),
ets:delete(RefTable),
@@ -111,7 +115,7 @@ parse_dtd(Xml, State) ->
{Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
{endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
@@ -1024,16 +1028,21 @@ parse_etag(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_etag/2],
undefined).
-
parse_etag_1(?STRING_REST(">", Rest),
#xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList}
- |RestOfETags]} = State, _Tag) ->
+ |RestOfETags],
+ input_type=InputType} = State, _Tag) ->
State1 = event_callback({endElement, Uri, LocalName, QName}, State),
State2 = send_end_prefix_mapping_event(NewNsList, State1),
- parse_content(Rest,
- State2#xmerl_sax_parser_state{end_tags=RestOfETags,
- ns = OldNsList},
- [], true);
+ case check_if_new_doc_allowed(InputType, RestOfETags) of
+ true ->
+ throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}});
+ false ->
+ parse_content(Rest,
+ State2#xmerl_sax_parser_state{end_tags=RestOfETags,
+ ns = OldNsList},
+ [], true)
+ end;
parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) ->
{P,TN} = Tag,
?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN);
@@ -1051,21 +1060,26 @@ parse_etag_1(Bytes, State, Tag) ->
%% Description: Parsing the content part of tags
%% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
%%----------------------------------------------------------------------
-
parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
- case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
- {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
- {Rest, State1};
- {fatal_error, {State1, Msg}} ->
- case check_if_document_complete(State1, Msg) of
- true ->
- State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
- {?STRING_EMPTY, State2};
- false ->
- ?fatal_error(State1, Msg)
- end;
- Other ->
- throw(Other)
+ case check_if_document_complete(State, "No more bytes") of
+ true ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ {?STRING_EMPTY, State1};
+ false ->
+ case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State1};
+ {fatal_error, {State1, Msg}} ->
+ case check_if_document_complete(State1, Msg) of
+ true ->
+ State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
+ {?STRING_EMPTY, State2};
+ false ->
+ ?fatal_error(State1, Msg)
+ end;
+ Other ->
+ throw(Other)
+ end
end;
parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) ->
cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
@@ -1094,7 +1108,7 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
[] ->
- {Rest, State}; %%LATH : Skicka ignorable WS ???
+ {Rest, State}; %% Skicka ignorable WS ???
_ ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
parse_cdata(Rest1, State1)
@@ -1102,7 +1116,7 @@ parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags
parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
[] ->
- {Rest, State}; %%LATH : Skicka ignorable WS ???
+ {Rest, State}; %% Skicka ignorable WS ???
_ ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
parse_stag(Rest1, State1)
@@ -3290,7 +3304,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C
catch
throw:ErrorTerm ->
?fatal_error(State, ErrorTerm);
- exit:Reason ->
+ exit:Reason ->
?fatal_error(State, {'EXIT', Reason})
end,
case Result of