diff options
Diffstat (limited to 'lib/xmerl')
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser.erl | 33 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser.hrl | 9 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 144 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc | 38 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_list.erlsrc | 19 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc | 50 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc | 50 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc | 50 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_scan.erl | 31 | ||||
-rw-r--r-- | lib/xmerl/test/Makefile | 4 | ||||
-rw-r--r-- | lib/xmerl/test/xmerl_SUITE.erl | 47 | ||||
-rw-r--r-- | lib/xmerl/test/xmerl_sax_SUITE.erl | 6 | ||||
-rw-r--r-- | lib/xmerl/test/xmerl_sax_std_SUITE.erl | 100 | ||||
-rw-r--r-- | lib/xmerl/test/xmerl_sax_stream_SUITE.erl | 245 | ||||
-rw-r--r-- | lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_one.xml | 17 | ||||
-rw-r--r-- | lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_one_junk.xml | 18 | ||||
-rw-r--r-- | lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_two.xml | 34 |
17 files changed, 688 insertions, 207 deletions
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl index 318a0cf7f4..1aef6c58c4 100644 --- a/lib/xmerl/src/xmerl_sax_parser.erl +++ b/lib/xmerl/src/xmerl_sax_parser.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ %% External exports %%---------------------------------------------------------------------- -export([file/2, + stream/3, stream/2]). %%---------------------------------------------------------------------- @@ -72,11 +73,12 @@ file(Name,Options) -> File = filename:basename(Name), ContinuationFun = fun default_continuation_cb/1, Res = stream(<<>>, - [{continuation_fun, ContinuationFun}, - {continuation_state, FD}, - {current_location, CL}, - {entity, File} - |Options]), + [{continuation_fun, ContinuationFun}, + {continuation_state, FD}, + {current_location, CL}, + {entity, File} + |Options], + file), ok = file:close(FD), Res end. @@ -92,19 +94,22 @@ file(Name,Options) -> %% EventState = term() %% Description: Parse a stream containing an XML document. %%---------------------------------------------------------------------- -stream(Xml, Options) when is_list(Xml), is_list(Options) -> +stream(Xml, Options) -> + stream(Xml, Options, stream). + +stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) -> State = parse_options(Options, initial_state()), - case State#xmerl_sax_parser_state.file_type of + case State#xmerl_sax_parser_state.file_type of dtd -> xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}); + input_type = InputType}); normal -> xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}) + input_type = InputType}) end; -stream(Xml, Options) when is_binary(Xml), is_list(Options) -> +stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) -> case parse_options(Options, initial_state()) of {error, Reason} -> {error, Reason}; State -> @@ -127,7 +132,7 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) -> State#xmerl_sax_parser_state.event_state}; {Xml1, State1} -> parse_binary(Xml1, - State1#xmerl_sax_parser_state{input_type = stream}, + State1#xmerl_sax_parser_state{input_type = InputType}, ParseFunction) end end. @@ -226,12 +231,12 @@ check_encoding_option(E) -> %% Description: Detects which character set is used in a binary stream. %%---------------------------------------------------------------------- detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) -> - throw({error, "Can't detect character encoding due to no indata"}); + {error, "Can't detect character encoding due to no indata"}; detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) -> case CFun(CState) of {<<>>, _} -> - throw({error, "Can't detect character encoding due to lack of indata"}); + {error, "Can't detect character encoding due to lack of indata"}; {NewBytes, NewContState} -> detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState}) end; diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl index 932ab0cec5..7f9bf6c4d3 100644 --- a/lib/xmerl/src/xmerl_sax_parser.hrl +++ b/lib/xmerl/src/xmerl_sax_parser.hrl @@ -88,14 +88,7 @@ current_location, % Location of the currently parsed XML entity entity, % Parsed XML entity skip_external_dtd = false,% If true the external DTD is skipped during parsing - input_type % Source type: file | stream. - % This field is a preparation for an fix in R17 of a bug in - % the conformance against the standard. - % Today a file which contains two XML documents will be considered - % well-formed and the second is placed in the rest part of the - % return tuple, according to the conformance tests this should fail. - % In the future this will fail if xmerl_sax_aprser:file/2 is used but - % left to the user in the xmerl_sax_aprser:stream/2 case. + input_type % Source type: file | stream }). diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 4d75805b9b..f3470b2809 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -1,7 +1,7 @@ %%-*-erlang-*- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -72,7 +72,12 @@ parse(Xml, State) -> {ok, Rest, State2} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of + true -> + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + false -> + format_error(fatal_error, State3, "Input found after legal document") + end; {fatal_error, {State2, Reason}} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -81,10 +86,14 @@ parse(Xml, State) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), format_error(Tag, State3, Reason); + {endDocument, Rest, State2} -> + State3 = event_callback(endDocument, State2), + ets:delete(RefTable), + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; Other -> _State2 = event_callback(endDocument, State1), ets:delete(RefTable), - throw(Other) + {fatal_error, Other} end. %%---------------------------------------------------------------------- @@ -111,7 +120,7 @@ parse_dtd(Xml, State) -> {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -119,7 +128,7 @@ parse_dtd(Xml, State) -> Other -> _State2 = event_callback(endDocument, State1), ets:delete(RefTable), - throw(Other) + {fatal_error, Other} end. @@ -136,10 +145,11 @@ parse_dtd(Xml, State) -> %% [1] document ::= prolog element Misc* %%---------------------------------------------------------------------- parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> - {Rest1, State1} = parse_xml_decl(Rest, State), + {Rest1, State1} = parse_byte_order_mark(Rest, State), {Rest2, State2} = parse_misc(Rest1, State1, true), {ok, Rest2, State2}. +?PARSE_BYTE_ORDER_MARK(Bytes, State). %%---------------------------------------------------------------------- %% Function: parse_xml_decl(Rest, State) -> Result @@ -150,15 +160,8 @@ parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> %% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? %% [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' %%---------------------------------------------------------------------- --dialyzer({[no_fail_call, no_match], parse_xml_decl/2}). parse_xml_decl(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_1, State) -> - cf(?BYTE_ORDER_MARK_1, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_2, State) -> - cf(?BYTE_ORDER_MARK_2, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_REST(Rest), State) -> - cf(Rest, State, fun parse_xml_decl/2); parse_xml_decl(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING("<?") = Bytes, State) -> @@ -170,31 +173,19 @@ parse_xml_decl(?STRING("<?xm") = Bytes, State) -> parse_xml_decl(?STRING("<?xml") = Bytes, State) -> cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING_REST("<?xml", Rest1), State) -> - parse_xml_decl_1(Rest1, State); -parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> - case unicode:characters_to_list(Bytes, Enc) of - {incomplete, _, _} -> - cf(Bytes, State, fun parse_xml_decl/2); - {error, _Encoded, _Rest} -> - ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); - _ -> - parse_prolog(Bytes, State) - end; -parse_xml_decl(Bytes, State) -> - parse_prolog(Bytes, State). - + parse_xml_decl_rest(Rest1, State); +?PARSE_XML_DECL(Bytes, State). -parse_xml_decl_1(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> +parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> if ?is_whitespace(C) -> {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []), - %State2 = event_callback({processingInstruction, "xml", XmlAttributes}, State1),% The XML decl. should not be reported as a PI parse_prolog(Rest1, State1); true -> parse_prolog(?STRING_REST("<?xml", Bytes), State) end; -parse_xml_decl_1(Bytes, State) -> - unicode_incomplete_check([Bytes, State, fun parse_xml_decl_1/2], undefined). +parse_xml_decl_rest(Bytes, State) -> + unicode_incomplete_check([Bytes, State, fun parse_xml_decl_rest/2], undefined). @@ -216,8 +207,6 @@ parse_prolog(?STRING_REST("<?", Rest), State) -> parse_prolog(Rest1, State1); {endDocument, Rest1, State1} -> parse_prolog(Rest1, State1) - % IValue = ?TO_INPUT_FORMAT("<?"), - % {?APPEND_STRING(IValue, Rest1), State1} end; parse_prolog(?STRING_REST("<!", Rest), State) -> parse_prolog_1(Rest, State); @@ -230,7 +219,6 @@ parse_prolog(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_prolog/2], "expecting < or whitespace"). - parse_prolog_1(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("D") = Bytes, State) -> @@ -442,6 +430,15 @@ check_if_new_doc_allowed(stream, []) -> check_if_new_doc_allowed(_, _) -> false. +check_if_rest_ok(file, []) -> + true; +check_if_rest_ok(file, <<>>) -> + true; +check_if_rest_ok(stream, _) -> + true; +check_if_rest_ok(_, _) -> + false. + %%---------------------------------------------------------------------- %% Function: parse_pi_1(Rest, State) -> Result %% Input: Rest = string() | binary() @@ -1024,16 +1021,21 @@ parse_etag(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_etag/2], undefined). - parse_etag_1(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList} - |RestOfETags]} = State, _Tag) -> + |RestOfETags], + input_type=InputType} = State, _Tag) -> State1 = event_callback({endElement, Uri, LocalName, QName}, State), State2 = send_end_prefix_mapping_event(NewNsList, State1), - parse_content(Rest, - State2#xmerl_sax_parser_state{end_tags=RestOfETags, - ns = OldNsList}, - [], true); + case check_if_new_doc_allowed(InputType, RestOfETags) of + true -> + throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}}); + false -> + parse_content(Rest, + State2#xmerl_sax_parser_state{end_tags=RestOfETags, + ns = OldNsList}, + [], true) + end; parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) -> {P,TN} = Tag, ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN); @@ -1051,21 +1053,26 @@ parse_etag_1(Bytes, State, Tag) -> %% Description: Parsing the content part of tags %% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* %%---------------------------------------------------------------------- - parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) -> - case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of - {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State1}; - {fatal_error, {State1, Msg}} -> - case check_if_document_complete(State1, Msg) of - true -> - State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), - {?STRING_EMPTY, State2}; - false -> - ?fatal_error(State1, Msg) - end; - Other -> - throw(Other) + case check_if_document_complete(State, "No more bytes") of + true -> + State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), + {?STRING_EMPTY, State1}; + false -> + case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of + {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> + {Rest, State1}; + {fatal_error, {State1, Msg}} -> + case check_if_document_complete(State1, Msg) of + true -> + State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), + {?STRING_EMPTY, State2}; + false -> + ?fatal_error(State1, Msg) + end; + Other -> + throw(Other) + end end; parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) -> cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); @@ -1094,7 +1101,7 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) -> parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_cdata(Rest1, State1) @@ -1102,7 +1109,7 @@ parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_stag(Rest1, State1) @@ -1204,7 +1211,6 @@ send_character_event(_, true, String, State) -> %% Description: Parse whitespaces. %% [3] S ::= (#x20 | #x9 | #xD | #xA)+ %%---------------------------------------------------------------------- --dialyzer({no_fail_call, whitespace/3}). whitespace(?STRING_EMPTY, State, Acc) -> case cf(?STRING_EMPTY, State, Acc, fun whitespace/3) of {?STRING_EMPTY, State} -> @@ -1230,16 +1236,7 @@ whitespace(?STRING_REST("\r", Rest), State, Acc) -> whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) -> whitespace(Rest, State, [C|Acc]); -whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> - {lists:reverse(Acc), Bytes, State}; -whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> - case unicode:characters_to_list(Bytes, Enc) of - {incomplete, _, _} -> - cf(Bytes, State, Acc, fun whitespace/3); - {error, _Encoded, _Rest} -> - ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) - end. - +?WHITESPACE(Bytes, State, Acc). %%---------------------------------------------------------------------- %% Function: parse_reference(Rest, State, HaveToExist) -> Result @@ -1362,7 +1359,6 @@ parse_pe_reference_1(Bytes, State, Name) -> "missing ; after reference " ++ Name). - %%---------------------------------------------------------------------- %% Function: insert_reference(Reference, State) -> Result %% Parameters: Reference = string() @@ -1378,7 +1374,6 @@ insert_reference({Name, Type, Value}, Table) -> end. - %%---------------------------------------------------------------------- %% Function: look_up_reference(Reference, State) -> Result %% Parameters: Reference = string() @@ -1693,7 +1688,7 @@ handle_external_entity({http, Url}, State) -> ++ file:format_error(Reason)); {ok, FD} -> {?STRING_EMPTY, EntityState} = - parse_external_entity_1(<<>>, + parse_external_entity_byte_order_mark(<<>>, State#xmerl_sax_parser_state{continuation_state=FD, current_location=filename:dirname(Url), entity=filename:basename(Url), @@ -1709,6 +1704,8 @@ handle_external_entity({http, Url}, State) -> handle_external_entity({Tag, _Url}, State) -> ?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)). +?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State). + %%---------------------------------------------------------------------- %% Function : parse_external_entity_1(Rest, State) -> Result %% Parameters: Rest = string() | binary() @@ -1716,7 +1713,6 @@ handle_external_entity({Tag, _Url}, State) -> %% Result : {Rest, State} %% Description: Parse the external entity. %%---------------------------------------------------------------------- --dialyzer({[no_fail_call, no_match], parse_external_entity_1/2}). parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) -> case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> @@ -1726,12 +1722,6 @@ parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = Other -> throw(Other) end; -parse_external_entity_1(?BYTE_ORDER_MARK_1, State) -> - cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_1/2); -parse_external_entity_1(?BYTE_ORDER_MARK_2, State) -> - cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_1/2); -parse_external_entity_1(?BYTE_ORDER_MARK_REST(Rest), State) -> - parse_external_entity_1(Rest, State); parse_external_entity_1(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_external_entity_1/2); parse_external_entity_1(?STRING("<?") = Bytes, State) -> @@ -3290,7 +3280,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C catch throw:ErrorTerm -> ?fatal_error(State, ErrorTerm); - exit:Reason -> + exit:Reason -> ?fatal_error(State, {'EXIT', Reason}) end, case Result of diff --git a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc index 961806bf4c..6e59347fb8 100644 --- a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,36 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, latin1)). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, undefined_bom2). --define(BYTE_ORDER_MARK_REST(Rest), <<undefined, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc index 624a621d92..6a4435b1d9 100644 --- a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc @@ -36,6 +36,19 @@ %% In the list case we can't use a '++' when matchin against an unbound variable -define(STRING_UNBOUND_REST(MatchChar, Rest), [MatchChar | Rest]). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, undefined_bom2). --define(BYTE_ORDER_MARK_REST(Rest), [undefined|Rest]). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc index ff84ece97a..ec89024729 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,50 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, big})). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/big-utf16, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, <<16#FE>>). +-define(BYTE_ORDER_MARK_1, <<16#FE>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#FE, 16#FF, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc index a330fce8d0..566333a045 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,50 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, little})). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/little-utf16, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, <<16#FF>>). +-define(BYTE_ORDER_MARK_1, <<16#FF>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#FF, 16#FE, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc index d46d60d237..f41d06d013 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,11 +34,55 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, utf8)). - -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/utf8, Rest/binary>>). -define(BYTE_ORDER_MARK_1, <<16#EF>>). -define(BYTE_ORDER_MARK_2, <<16#EF, 16#BB>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#EF, 16#BB, 16#BF, Rest/binary>>). +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_2, State) -> + cf(?BYTE_ORDER_MARK_2, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_2, State) -> + cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 5e0459ec21..95dc82e5c9 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -2225,16 +2225,18 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook, AllAttrs = case S#xmerl_scanner.default_attrs of true -> - [ #xmlAttribute{name = AttName, - parents = [{Name, Pos} | Parents], - language = Lang, - nsinfo = NSI, - namespace = Namespace, - value = AttValue, - normalized = true} || - {AttName, AttValue} <- get_default_attrs(S, Name), - AttValue =/= no_value, - not lists:keymember(AttName, #xmlAttribute.name, Attrs) ]; + DefaultAttrs = + [ #xmlAttribute{name = AttName, + parents = [{Name, Pos} | Parents], + language = Lang, + nsinfo = NSI, + namespace = Namespace, + value = AttValue, + normalized = true} || + {AttName, AttValue} <- get_default_attrs(S, Name), + AttValue =/= no_value, + not lists:keymember(AttName, #xmlAttribute.name, Attrs) ], + lists:append(Attrs, DefaultAttrs); false -> Attrs end, @@ -2307,7 +2309,9 @@ expanded_name(Name, [], #xmlNamespace{default = URI}, S) -> expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> {_, Value} = lists:keyfind(Local, 1, Ns), case Name of - 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> + 'xmlns:xml' when Value =:= 'http://www.w3.org/XML/1998/namespace' -> + N; + 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> ?fatal({xml_prefix_cannot_be_redeclared, Value}, S); 'xmlns:xmlns' -> ?fatal({xmlns_prefix_cannot_be_declared, Value}, S); @@ -2321,6 +2325,8 @@ expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> N end end; +expanded_name(_Name, {"xml", Local}, _NS, _S) -> + {'http://www.w3.org/XML/1998/namespace', list_to_atom(Local)}; expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> case lists:keysearch(Prefix, 1, Ns) of {value, {_, URI}} -> @@ -2331,9 +2337,6 @@ expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> ?fatal({namespace_prefix_not_declared, Prefix}, S) end. - - - keyreplaceadd(K, Pos, [H|T], Obj) when K == element(Pos, H) -> [Obj|T]; keyreplaceadd(K, Pos, [H|T], Obj) -> diff --git a/lib/xmerl/test/Makefile b/lib/xmerl/test/Makefile index 7a326e334f..b13fee05b3 100644 --- a/lib/xmerl/test/Makefile +++ b/lib/xmerl/test/Makefile @@ -55,7 +55,8 @@ SUITE_FILES= \ xmerl_xsd_SUITE.erl \ xmerl_xsd_MS2002-01-16_SUITE.erl \ xmerl_xsd_NIST2002-01-16_SUITE.erl \ - xmerl_xsd_Sun2002-01-16_SUITE.erl + xmerl_xsd_Sun2002-01-16_SUITE.erl \ + xmerl_sax_stream_SUITE.erl XML_FILES= \ testcases.dtd \ @@ -125,4 +126,5 @@ release_tests_spec: opt @tar cfh - xmerl_xsd_MS2002-01-16_SUITE_data | (cd "$(RELSYSDIR)"; tar xf -) @tar cfh - xmerl_xsd_NIST2002-01-16_SUITE_data | (cd "$(RELSYSDIR)"; tar xf -) @tar cfh - xmerl_xsd_Sun2002-01-16_SUITE_data | (cd "$(RELSYSDIR)"; tar xf -) + @tar cfh - xmerl_sax_stream_SUITE_data | (cd "$(RELSYSDIR)"; tar xf -) chmod -R u+w "$(RELSYSDIR)" diff --git a/lib/xmerl/test/xmerl_SUITE.erl b/lib/xmerl/test/xmerl_SUITE.erl index e97b8c6a4b..58c462483c 100644 --- a/lib/xmerl/test/xmerl_SUITE.erl +++ b/lib/xmerl/test/xmerl_SUITE.erl @@ -54,7 +54,8 @@ groups() -> cpd_expl_provided_DTD]}, {misc, [], [latin1_alias, syntax_bug1, syntax_bug2, syntax_bug3, - pe_ref1, copyright, testXSEIF, export_simple1, export]}, + pe_ref1, copyright, testXSEIF, export_simple1, export, + default_attrs_bug, xml_ns]}, {eventp_tests, [], [sax_parse_and_export]}, {ticket_tests, [], [ticket_5998, ticket_7211, ticket_7214, ticket_7430, @@ -223,6 +224,50 @@ syntax_bug3(Config) -> Err -> Err end. +default_attrs_bug(Config) -> + file:set_cwd(datadir(Config)), + Doc = "<!DOCTYPE doc [<!ATTLIST doc b CDATA \"default\">]>\n" + "<doc a=\"explicit\"/>", + {#xmlElement{attributes = [#xmlAttribute{name = a, value = "explicit"}, + #xmlAttribute{name = b, value = "default"}]}, + [] + } = xmerl_scan:string(Doc, [{default_attrs, true}]), + Doc2 = "<!DOCTYPE doc [<!ATTLIST doc b CDATA \"default\">]>\n" + "<doc b=\"also explicit\" a=\"explicit\"/>", + {#xmlElement{attributes = [#xmlAttribute{name = b, value = "also explicit"}, + #xmlAttribute{name = a, value = "explicit"}]}, + [] + } = xmerl_scan:string(Doc2, [{default_attrs, true}]), + ok. + + +xml_ns(Config) -> + Doc = "<?xml version='1.0'?>\n" + "<doc xml:attr1=\"implicit xml ns\"/>", + {#xmlElement{namespace=#xmlNamespace{default = [], nodes = []}, + attributes = [#xmlAttribute{name = 'xml:attr1', + expanded_name = {'http://www.w3.org/XML/1998/namespace',attr1}, + nsinfo = {"xml","attr1"}, + namespace = #xmlNamespace{default = [], nodes = []}}]}, + [] + } = xmerl_scan:string(Doc, [{namespace_conformant, true}]), + Doc2 = "<?xml version='1.0'?>\n" + "<doc xmlns:xml=\"http://www.w3.org/XML/1998/namespace\" xml:attr1=\"explicit xml ns\"/>", + {#xmlElement{namespace=#xmlNamespace{default = [], nodes = [{"xml",'http://www.w3.org/XML/1998/namespace'}]}, + attributes = [#xmlAttribute{name = 'xmlns:xml', + expanded_name = {"xmlns","xml"}, + nsinfo = {"xmlns","xml"}, + namespace = #xmlNamespace{default = [], + nodes = [{"xml",'http://www.w3.org/XML/1998/namespace'}]}}, + #xmlAttribute{name = 'xml:attr1', + expanded_name = {'http://www.w3.org/XML/1998/namespace',attr1}, + nsinfo = {"xml","attr1"}, + namespace = #xmlNamespace{default = [], + nodes = [{"xml",'http://www.w3.org/XML/1998/namespace'}]}}]}, + [] + } = xmerl_scan:string(Doc2, [{namespace_conformant, true}]), + ok. + pe_ref1(Config) -> file:set_cwd(datadir(Config)), {#xmlElement{},[]} = xmerl_scan:file(datadir_join(Config,[misc,"PE_ref1.xml"]),[{validation,true}]). diff --git a/lib/xmerl/test/xmerl_sax_SUITE.erl b/lib/xmerl/test/xmerl_sax_SUITE.erl index f5c0a783c4..7d1a70905c 100644 --- a/lib/xmerl/test/xmerl_sax_SUITE.erl +++ b/lib/xmerl/test/xmerl_sax_SUITE.erl @@ -85,17 +85,17 @@ ticket_11551(_Config) -> <a>hej</a> <?xml version=\"1.0\" encoding=\"utf-8\" ?> <a>hej</a>">>, - {ok, undefined, <<"<?xml", _/binary>>} = xmerl_sax_parser:stream(Stream1, []), + {ok, undefined, <<"\n<?xml", _/binary>>} = xmerl_sax_parser:stream(Stream1, []), Stream2= <<"<?xml version=\"1.0\" encoding=\"utf-8\" ?> <a>hej</a> <?xml version=\"1.0\" encoding=\"utf-8\" ?> <a>hej</a>">>, - {ok, undefined, <<"<?xml", _/binary>>} = xmerl_sax_parser:stream(Stream2, []), + {ok, undefined, <<"\n\n\n<?xml", _/binary>>} = xmerl_sax_parser:stream(Stream2, []), Stream3= <<"<a>hej</a> <?xml version=\"1.0\" encoding=\"utf-8\" ?> <a>hej</a>">>, - {ok, undefined, <<"<?xml", _/binary>>} = xmerl_sax_parser:stream(Stream3, []), + {ok, undefined, <<"\n\n<?xml", _/binary>>} = xmerl_sax_parser:stream(Stream3, []), ok. diff --git a/lib/xmerl/test/xmerl_sax_std_SUITE.erl b/lib/xmerl/test/xmerl_sax_std_SUITE.erl index 525a3b175a..b8412206cc 100644 --- a/lib/xmerl/test/xmerl_sax_std_SUITE.erl +++ b/lib/xmerl/test/xmerl_sax_std_SUITE.erl @@ -2,7 +2,7 @@ %%---------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2016. All Rights Reserved. +%% Copyright Ericsson AB 2010-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -507,11 +507,8 @@ end_per_testcase(_Func,_Config) -> 'not-wf-sa-036'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/036.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<"Illegal data\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -522,11 +519,8 @@ end_per_testcase(_Func,_Config) -> 'not-wf-sa-037'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/037.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<" \r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -561,11 +555,8 @@ end_per_testcase(_Func,_Config) -> 'not-wf-sa-040'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/040.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<"<doc></doc>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -576,11 +567,8 @@ end_per_testcase(_Func,_Config) -> 'not-wf-sa-041'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/041.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<"<doc></doc>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -603,11 +591,8 @@ end_per_testcase(_Func,_Config) -> 'not-wf-sa-043'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/043.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<"Illegal data\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -618,11 +603,8 @@ end_per_testcase(_Func,_Config) -> 'not-wf-sa-044'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/044.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<"<doc/>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -669,11 +651,8 @@ end_per_testcase(_Func,_Config) -> 'not-wf-sa-048'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/048.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<"<![CDATA[]]>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -1416,11 +1395,8 @@ end_per_testcase(_Func,_Config) -> 'not-wf-sa-110'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/110.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<"&e;\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -1914,9 +1890,9 @@ end_per_testcase(_Func,_Config) -> %% Special case becase we returns everything after a legal document %% as an rest instead of giving and error to let the user handle %% multipple docs on a stream. - {ok,_,<<"<?xml version=\"1.0\"?>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - % R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), - % check_result(R, "not-wf"). + %{ok,_,<<"<?xml version=\"1.0\"?>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -7784,11 +7760,8 @@ end_per_testcase(_Func,_Config) -> 'o-p01fail3'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"oasis","p01fail3.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_, <<"<bad/>", _/binary>>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -11417,12 +11390,8 @@ end_per_testcase(_Func,_Config) -> 'ibm-not-wf-P01-ibm01n02'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"ibm","not-wf/P01/ibm01n02.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_, <<"<?xml version=\"1.0\"?>", _/binary>>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - % R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), - % check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Case @@ -11433,11 +11402,8 @@ end_per_testcase(_Func,_Config) -> 'ibm-not-wf-P01-ibm01n03'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"ibm","not-wf/P01/ibm01n03.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_, <<"<title>Wrong combination!</title>", _/binary>>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Cases @@ -13027,11 +12993,8 @@ end_per_testcase(_Func,_Config) -> 'ibm-not-wf-P27-ibm27n01'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"ibm","not-wf/P27/ibm27n01.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_, <<"<!ELEMENT cat EMPTY>">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Cases @@ -13461,11 +13424,8 @@ end_per_testcase(_Func,_Config) -> 'ibm-not-wf-P39-ibm39n06'(Config) -> file:set_cwd(xmerl_test_lib:get_data_dir(Config)), Path = filename:join([xmerl_test_lib:get_data_dir(Config),"ibm","not-wf/P39/ibm39n06.xml"]), - %% Special case becase we returns everything after a legal document - %% as an rest instead of giving and error to let the user handle - %% multipple docs on a stream. - {ok,_,<<"content after end tag\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]). - %%check_result(R, "not-wf"). + R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]), + check_result(R, "not-wf"). %%---------------------------------------------------------------------- %% Test Cases diff --git a/lib/xmerl/test/xmerl_sax_stream_SUITE.erl b/lib/xmerl/test/xmerl_sax_stream_SUITE.erl new file mode 100644 index 0000000000..a306eb66a2 --- /dev/null +++ b/lib/xmerl/test/xmerl_sax_stream_SUITE.erl @@ -0,0 +1,245 @@ +%%-*-erlang-*- +%%---------------------------------------------------------------------- +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%%---------------------------------------------------------------------- +%% File : xmerl_sax_stream_SUITE.erl +%%---------------------------------------------------------------------- +-module(xmerl_sax_stream_SUITE). +-compile(export_all). + +%%---------------------------------------------------------------------- +%% Include files +%%---------------------------------------------------------------------- +-include_lib("common_test/include/ct.hrl"). +-include_lib("kernel/include/file.hrl"). + +%%====================================================================== +%% External functions +%%====================================================================== + +%%---------------------------------------------------------------------- +%% Initializations +%%---------------------------------------------------------------------- +all() -> + [ + one_document, + two_documents, + one_document_and_junk + ]. + +%%---------------------------------------------------------------------- +%% Initializations +%%---------------------------------------------------------------------- + +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_testcase(_TestCase, Config) -> + Config. + +end_per_testcase(_Func, _Config) -> + ok. + +%%---------------------------------------------------------------------- +%% Tests +%%---------------------------------------------------------------------- +one_document(Config) -> + Port = 11111, + + {ok, ListenSocket} = listen(Port), + Self = self(), + + spawn( + fun() -> + case catch gen_tcp:accept(ListenSocket) of + {ok, S} -> + Result = xmerl_sax_parser:stream(<<>>, + [{continuation_state, S}, + {continuation_fun, + fun(Sd) -> + io:format("Continuation called!!", []), + case gen_tcp:recv(Sd, 0) of + {ok, Packet} -> + io:format("Packet: ~p\n", [Packet]), + {Packet, Sd}; + {error, Reason} -> + throw({error, Reason}) + end + end}]), + Self ! {xmerl_sax, Result}, + close(S); + Error -> + Self ! {xmerl_sax, {error, {accept, Error}}} + end + end), + + {ok, SendSocket} = connect(localhost, Port), + + {ok, Binary} = file:read_file(filename:join([datadir(Config), "xmerl_sax_stream_one.xml"])), + + send_chunks(SendSocket, Binary), + + receive + {xmerl_sax, {ok, undefined, Rest}} -> + <<"\n">> = Rest, + io:format("Ok Rest: ~p\n", [Rest]) + after 5000 -> + ct:fail("Timeout") + end, + ok. + +two_documents(Config) -> + Port = 11111, + + {ok, ListenSocket} = listen(Port), + Self = self(), + + spawn( + fun() -> + case catch gen_tcp:accept(ListenSocket) of + {ok, S} -> + Result = xmerl_sax_parser:stream(<<>>, + [{continuation_state, S}, + {continuation_fun, + fun(Sd) -> + io:format("Continuation called!!", []), + case gen_tcp:recv(Sd, 0) of + {ok, Packet} -> + io:format("Packet: ~p\n", [Packet]), + {Packet, Sd}; + {error, Reason} -> + throw({error, Reason}) + end + end}]), + Self ! {xmerl_sax, Result}, + close(S); + Error -> + Self ! {xmerl_sax, {error, {accept, Error}}} + end + end), + + {ok, SendSocket} = connect(localhost, Port), + + {ok, Binary} = file:read_file(filename:join([datadir(Config), "xmerl_sax_stream_two.xml"])), + + send_chunks(SendSocket, Binary), + + receive + {xmerl_sax, {ok, undefined, Rest}} -> + <<"\n<?x", _R/binary>> = Rest, + io:format("Ok Rest: ~p\n", [Rest]) + after 5000 -> + ct:fail("Timeout") + end, + ok. + +one_document_and_junk(Config) -> + Port = 11111, + + {ok, ListenSocket} = listen(Port), + Self = self(), + + spawn( + fun() -> + case catch gen_tcp:accept(ListenSocket) of + {ok, S} -> + Result = xmerl_sax_parser:stream(<<>>, + [{continuation_state, S}, + {continuation_fun, + fun(Sd) -> + io:format("Continuation called!!", []), + case gen_tcp:recv(Sd, 0) of + {ok, Packet} -> + io:format("Packet: ~p\n", [Packet]), + {Packet, Sd}; + {error, Reason} -> + throw({error, Reason}) + end + end}]), + Self ! {xmerl_sax, Result}, + close(S); + Error -> + Self ! {xmerl_sax, {error, {accept, Error}}} + end + end), + + {ok, SendSocket} = connect(localhost, Port), + + {ok, Binary} = file:read_file(filename:join([datadir(Config), "xmerl_sax_stream_one_junk.xml"])), + + send_chunks(SendSocket, Binary), + + receive + {xmerl_sax, {ok, undefined, Rest}} -> + <<"\nth", _R/binary>> = Rest, + io:format("Ok Rest: ~p\n", [Rest]) + after 10000 -> + ct:fail("Timeout") + end, + ok. + +%%---------------------------------------------------------------------- +%% Utility functions +%%---------------------------------------------------------------------- +listen(Port) -> + case catch gen_tcp:listen(Port, [{active, false}, + binary, + {keepalive, true}, + {reuseaddr,true}]) of + {ok, ListenSocket} -> + {ok, ListenSocket}; + {error, Reason} -> + {error, {listen, Reason}} + end. + +close(Socket) -> + (catch gen_tcp:close(Socket)). + +connect(Host, Port) -> + Timeout = 5000, + % Options1 = check_options(Options), + Options = [binary], + case catch gen_tcp:connect(Host, Port, Options, Timeout) of + {ok, Socket} -> + {ok, Socket}; + {error, Reason} -> + {error, Reason} + end. + +send_chunks(Socket, Binary) -> + BSize = erlang:size(Binary), + if + BSize > 25 -> + <<Head:25/binary, Tail/binary>> = Binary, + case gen_tcp:send(Socket, Head) of + ok -> + timer:sleep(1000), + send_chunks(Socket, Tail); + {error,closed} -> + ok + end; + true -> + gen_tcp:send(Socket, Binary) + end. + +datadir(Config) -> + proplists:get_value(data_dir, Config). diff --git a/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_one.xml b/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_one.xml new file mode 100644 index 0000000000..30328bb188 --- /dev/null +++ b/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_one.xml @@ -0,0 +1,17 @@ +<?xml version="1.0"?> +<person> +<name> +Arne Andersson +</name> +<address> +<street> + Old Road 456 +</street> +<zip> +12323 +</zip> +<city> +Small City +</city> +</address> +</person> diff --git a/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_one_junk.xml b/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_one_junk.xml new file mode 100644 index 0000000000..f730a95865 --- /dev/null +++ b/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_one_junk.xml @@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<person> +<name> +Arne Andersson +</name> +<address> +<street> + Old Road 456 +</street> +<zip> +12323 +</zip> +<city> +Small City +</city> +</address> +</person> +this is junk ...... diff --git a/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_two.xml b/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_two.xml new file mode 100644 index 0000000000..e241a02190 --- /dev/null +++ b/lib/xmerl/test/xmerl_sax_stream_SUITE_data/xmerl_sax_stream_two.xml @@ -0,0 +1,34 @@ +<?xml version="1.0"?> +<person> +<name> +Arne Andersson +</name> +<address> +<street> + Old Road 456 +</street> +<zip> +12323 +</zip> +<city> +Small City +</city> +</address> +</person> +<?xml version="1.0"?> +<person> +<name> +Bertil Bengtson +</name> +<address> +<street> + New Road 4 +</street> +<zip> +12328 +</zip> +<city> +Small City +</city> +</address> +</person> |