diff options
author | Lars Thorsen <[email protected]> | 2017-01-12 16:49:40 +0100 |
---|---|---|
committer | Lars Thorsen <[email protected]> | 2017-02-22 10:00:18 +0100 |
commit | 00294041cd3c6f66598a50b57abf27e6a35e277f (patch) | |
tree | a479e0f3668179943324df40a1f012faec8fd07e /lib/xmerl/src | |
parent | 4f56fb3e9120e92ff7b0700402707ad032114311 (diff) | |
download | otp-00294041cd3c6f66598a50b57abf27e6a35e277f.tar.gz otp-00294041cd3c6f66598a50b57abf27e6a35e277f.tar.bz2 otp-00294041cd3c6f66598a50b57abf27e6a35e277f.zip |
[xmerl] Correct bug handling multiple documents on a stream
Change how to interpret end of document to comply with Tim Brays
comment on the standard. This makes it possible to handle more
than one doc on a stream, the standard makes it impossible to
know when the document is ended without waiting for the next
document (and not always even that).
Tim Brays comment about the trailing "Misc" rule:
The fact that you're allowed some trailing junk after the root
element, I decided (but unfortunately too late) is a real design
error in XML. If I'm writing a network client, I'm probably going
to close the link as soon as a I see the root element end-tag, and
not depend on the other end closing it down properly.
Furthermore, if I want to send a succession of XML documents over
a network link, if I find a processing instruction after a root
element, is it a trailer on the previous document, or part of the
prolog of the next?
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser.erl | 27 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 66 |
2 files changed, 56 insertions, 37 deletions
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl index 318a0cf7f4..9b5429cffe 100644 --- a/lib/xmerl/src/xmerl_sax_parser.erl +++ b/lib/xmerl/src/xmerl_sax_parser.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ %% External exports %%---------------------------------------------------------------------- -export([file/2, + stream/3, stream/2]). %%---------------------------------------------------------------------- @@ -72,11 +73,12 @@ file(Name,Options) -> File = filename:basename(Name), ContinuationFun = fun default_continuation_cb/1, Res = stream(<<>>, - [{continuation_fun, ContinuationFun}, - {continuation_state, FD}, - {current_location, CL}, - {entity, File} - |Options]), + [{continuation_fun, ContinuationFun}, + {continuation_state, FD}, + {current_location, CL}, + {entity, File} + |Options], + file), ok = file:close(FD), Res end. @@ -92,19 +94,22 @@ file(Name,Options) -> %% EventState = term() %% Description: Parse a stream containing an XML document. %%---------------------------------------------------------------------- -stream(Xml, Options) when is_list(Xml), is_list(Options) -> +stream(Xml, Options) -> + stream(Xml, Options, stream). + +stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) -> State = parse_options(Options, initial_state()), case State#xmerl_sax_parser_state.file_type of dtd -> xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}); + input_type = InputType}); normal -> xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}) + input_type = InputType}) end; -stream(Xml, Options) when is_binary(Xml), is_list(Options) -> +stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) -> case parse_options(Options, initial_state()) of {error, Reason} -> {error, Reason}; State -> @@ -127,7 +132,7 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) -> State#xmerl_sax_parser_state.event_state}; {Xml1, State1} -> parse_binary(Xml1, - State1#xmerl_sax_parser_state{input_type = stream}, + State1#xmerl_sax_parser_state{input_type = InputType}, ParseFunction) end end. diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 4d75805b9b..3ef2fce4d3 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -1,7 +1,7 @@ %%-*-erlang-*- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -72,7 +72,7 @@ parse(Xml, State) -> {ok, Rest, State2} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; {fatal_error, {State2, Reason}} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -81,6 +81,10 @@ parse(Xml, State) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), format_error(Tag, State3, Reason); + {endDocument, Rest, State2} -> + State3 = event_callback(endDocument, State2), + ets:delete(RefTable), + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; Other -> _State2 = event_callback(endDocument, State1), ets:delete(RefTable), @@ -111,7 +115,7 @@ parse_dtd(Xml, State) -> {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -1024,16 +1028,21 @@ parse_etag(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_etag/2], undefined). - parse_etag_1(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList} - |RestOfETags]} = State, _Tag) -> + |RestOfETags], + input_type=InputType} = State, _Tag) -> State1 = event_callback({endElement, Uri, LocalName, QName}, State), State2 = send_end_prefix_mapping_event(NewNsList, State1), - parse_content(Rest, - State2#xmerl_sax_parser_state{end_tags=RestOfETags, - ns = OldNsList}, - [], true); + case check_if_new_doc_allowed(InputType, RestOfETags) of + true -> + throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}}); + false -> + parse_content(Rest, + State2#xmerl_sax_parser_state{end_tags=RestOfETags, + ns = OldNsList}, + [], true) + end; parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) -> {P,TN} = Tag, ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN); @@ -1051,21 +1060,26 @@ parse_etag_1(Bytes, State, Tag) -> %% Description: Parsing the content part of tags %% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* %%---------------------------------------------------------------------- - parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) -> - case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of - {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State1}; - {fatal_error, {State1, Msg}} -> - case check_if_document_complete(State1, Msg) of - true -> - State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), - {?STRING_EMPTY, State2}; - false -> - ?fatal_error(State1, Msg) - end; - Other -> - throw(Other) + case check_if_document_complete(State, "No more bytes") of + true -> + State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), + {?STRING_EMPTY, State1}; + false -> + case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of + {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> + {Rest, State1}; + {fatal_error, {State1, Msg}} -> + case check_if_document_complete(State1, Msg) of + true -> + State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), + {?STRING_EMPTY, State2}; + false -> + ?fatal_error(State1, Msg) + end; + Other -> + throw(Other) + end end; parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) -> cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); @@ -1094,7 +1108,7 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) -> parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_cdata(Rest1, State1) @@ -1102,7 +1116,7 @@ parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_stag(Rest1, State1) @@ -3290,7 +3304,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C catch throw:ErrorTerm -> ?fatal_error(State, ErrorTerm); - exit:Reason -> + exit:Reason -> ?fatal_error(State, {'EXIT', Reason}) end, case Result of |