diff options
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser.erl | 33 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser.hrl | 9 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 84 |
3 files changed, 76 insertions, 50 deletions
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl index 318a0cf7f4..1aef6c58c4 100644 --- a/lib/xmerl/src/xmerl_sax_parser.erl +++ b/lib/xmerl/src/xmerl_sax_parser.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ %% External exports %%---------------------------------------------------------------------- -export([file/2, + stream/3, stream/2]). %%---------------------------------------------------------------------- @@ -72,11 +73,12 @@ file(Name,Options) -> File = filename:basename(Name), ContinuationFun = fun default_continuation_cb/1, Res = stream(<<>>, - [{continuation_fun, ContinuationFun}, - {continuation_state, FD}, - {current_location, CL}, - {entity, File} - |Options]), + [{continuation_fun, ContinuationFun}, + {continuation_state, FD}, + {current_location, CL}, + {entity, File} + |Options], + file), ok = file:close(FD), Res end. @@ -92,19 +94,22 @@ file(Name,Options) -> %% EventState = term() %% Description: Parse a stream containing an XML document. %%---------------------------------------------------------------------- -stream(Xml, Options) when is_list(Xml), is_list(Options) -> +stream(Xml, Options) -> + stream(Xml, Options, stream). + +stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) -> State = parse_options(Options, initial_state()), - case State#xmerl_sax_parser_state.file_type of + case State#xmerl_sax_parser_state.file_type of dtd -> xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}); + input_type = InputType}); normal -> xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}) + input_type = InputType}) end; -stream(Xml, Options) when is_binary(Xml), is_list(Options) -> +stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) -> case parse_options(Options, initial_state()) of {error, Reason} -> {error, Reason}; State -> @@ -127,7 +132,7 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) -> State#xmerl_sax_parser_state.event_state}; {Xml1, State1} -> parse_binary(Xml1, - State1#xmerl_sax_parser_state{input_type = stream}, + State1#xmerl_sax_parser_state{input_type = InputType}, ParseFunction) end end. @@ -226,12 +231,12 @@ check_encoding_option(E) -> %% Description: Detects which character set is used in a binary stream. %%---------------------------------------------------------------------- detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) -> - throw({error, "Can't detect character encoding due to no indata"}); + {error, "Can't detect character encoding due to no indata"}; detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) -> case CFun(CState) of {<<>>, _} -> - throw({error, "Can't detect character encoding due to lack of indata"}); + {error, "Can't detect character encoding due to lack of indata"}; {NewBytes, NewContState} -> detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState}) end; diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl index 932ab0cec5..7f9bf6c4d3 100644 --- a/lib/xmerl/src/xmerl_sax_parser.hrl +++ b/lib/xmerl/src/xmerl_sax_parser.hrl @@ -88,14 +88,7 @@ current_location, % Location of the currently parsed XML entity entity, % Parsed XML entity skip_external_dtd = false,% If true the external DTD is skipped during parsing - input_type % Source type: file | stream. - % This field is a preparation for an fix in R17 of a bug in - % the conformance against the standard. - % Today a file which contains two XML documents will be considered - % well-formed and the second is placed in the rest part of the - % return tuple, according to the conformance tests this should fail. - % In the future this will fail if xmerl_sax_aprser:file/2 is used but - % left to the user in the xmerl_sax_aprser:stream/2 case. + input_type % Source type: file | stream }). diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 4d75805b9b..2b9b37b5f3 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -1,7 +1,7 @@ %%-*-erlang-*- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -72,7 +72,12 @@ parse(Xml, State) -> {ok, Rest, State2} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of + true -> + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + false -> + format_error(fatal_error, State3, "Input found after legal document") + end; {fatal_error, {State2, Reason}} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -81,10 +86,14 @@ parse(Xml, State) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), format_error(Tag, State3, Reason); + {endDocument, Rest, State2} -> + State3 = event_callback(endDocument, State2), + ets:delete(RefTable), + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; Other -> _State2 = event_callback(endDocument, State1), ets:delete(RefTable), - throw(Other) + {fatal_error, Other} end. %%---------------------------------------------------------------------- @@ -111,7 +120,7 @@ parse_dtd(Xml, State) -> {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -119,7 +128,7 @@ parse_dtd(Xml, State) -> Other -> _State2 = event_callback(endDocument, State1), ets:delete(RefTable), - throw(Other) + {fatal_error, Other} end. @@ -442,6 +451,15 @@ check_if_new_doc_allowed(stream, []) -> check_if_new_doc_allowed(_, _) -> false. +check_if_rest_ok(file, []) -> + true; +check_if_rest_ok(file, <<>>) -> + true; +check_if_rest_ok(stream, _) -> + true; +check_if_rest_ok(_, _) -> + false. + %%---------------------------------------------------------------------- %% Function: parse_pi_1(Rest, State) -> Result %% Input: Rest = string() | binary() @@ -1024,16 +1042,21 @@ parse_etag(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_etag/2], undefined). - parse_etag_1(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList} - |RestOfETags]} = State, _Tag) -> + |RestOfETags], + input_type=InputType} = State, _Tag) -> State1 = event_callback({endElement, Uri, LocalName, QName}, State), State2 = send_end_prefix_mapping_event(NewNsList, State1), - parse_content(Rest, - State2#xmerl_sax_parser_state{end_tags=RestOfETags, - ns = OldNsList}, - [], true); + case check_if_new_doc_allowed(InputType, RestOfETags) of + true -> + throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}}); + false -> + parse_content(Rest, + State2#xmerl_sax_parser_state{end_tags=RestOfETags, + ns = OldNsList}, + [], true) + end; parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) -> {P,TN} = Tag, ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN); @@ -1051,21 +1074,26 @@ parse_etag_1(Bytes, State, Tag) -> %% Description: Parsing the content part of tags %% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* %%---------------------------------------------------------------------- - parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) -> - case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of - {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State1}; - {fatal_error, {State1, Msg}} -> - case check_if_document_complete(State1, Msg) of - true -> - State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), - {?STRING_EMPTY, State2}; - false -> - ?fatal_error(State1, Msg) - end; - Other -> - throw(Other) + case check_if_document_complete(State, "No more bytes") of + true -> + State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), + {?STRING_EMPTY, State1}; + false -> + case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of + {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> + {Rest, State1}; + {fatal_error, {State1, Msg}} -> + case check_if_document_complete(State1, Msg) of + true -> + State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), + {?STRING_EMPTY, State2}; + false -> + ?fatal_error(State1, Msg) + end; + Other -> + throw(Other) + end end; parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) -> cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); @@ -1094,7 +1122,7 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) -> parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_cdata(Rest1, State1) @@ -1102,7 +1130,7 @@ parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_stag(Rest1, State1) @@ -3290,7 +3318,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C catch throw:ErrorTerm -> ?fatal_error(State, ErrorTerm); - exit:Reason -> + exit:Reason -> ?fatal_error(State, {'EXIT', Reason}) end, case Result of |