aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Thorsen <[email protected]>2017-02-10 12:51:25 +0100
committerLars Thorsen <[email protected]>2017-02-22 10:00:19 +0100
commit760a3fa3c57fa2b56b44a9c25cbab4df0f71987e (patch)
tree0a19021823a0e01ea9af48c94fd44fb094d19ebb
parent00294041cd3c6f66598a50b57abf27e6a35e277f (diff)
downloadotp-760a3fa3c57fa2b56b44a9c25cbab4df0f71987e.tar.gz
otp-760a3fa3c57fa2b56b44a9c25cbab4df0f71987e.tar.bz2
otp-760a3fa3c57fa2b56b44a9c25cbab4df0f71987e.zip
[xmerl] Fix XML "well-formedness" bug i SAX parser
Changed the XML Sax parser behaviour so it returns an error if there are something more in the file after the the matching document. If one using the xmerl_sax_parser:stream() a rest is allowed which then can be sent to a new call of xmerl_sax_parser:stream() to parse next document. This is done to be compliant with XML conformance tests.
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.hrl9
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc16
-rw-r--r--lib/xmerl/test/xmerl_sax_std_SUITE.erl100
3 files changed, 46 insertions, 79 deletions
diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl
index 932ab0cec5..7f9bf6c4d3 100644
--- a/lib/xmerl/src/xmerl_sax_parser.hrl
+++ b/lib/xmerl/src/xmerl_sax_parser.hrl
@@ -88,14 +88,7 @@
current_location, % Location of the currently parsed XML entity
entity, % Parsed XML entity
skip_external_dtd = false,% If true the external DTD is skipped during parsing
- input_type % Source type: file | stream.
- % This field is a preparation for an fix in R17 of a bug in
- % the conformance against the standard.
- % Today a file which contains two XML documents will be considered
- % well-formed and the second is placed in the rest part of the
- % return tuple, according to the conformance tests this should fail.
- % In the future this will fail if xmerl_sax_aprser:file/2 is used but
- % left to the user in the xmerl_sax_aprser:stream/2 case.
+ input_type % Source type: file | stream
}).
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index 3ef2fce4d3..39dd3bbff3 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -72,7 +72,12 @@ parse(Xml, State) ->
{ok, Rest, State2} ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of
+ true ->
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ false ->
+ format_error(fatal_error, State3, "Input found after legal document")
+ end;
{fatal_error, {State2, Reason}} ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
@@ -446,6 +451,15 @@ check_if_new_doc_allowed(stream, []) ->
check_if_new_doc_allowed(_, _) ->
false.
+check_if_rest_ok(file, []) ->
+ true;
+check_if_rest_ok(file, <<>>) ->
+ true;
+check_if_rest_ok(stream, _) ->
+ true;
+check_if_rest_ok(_, _) ->
+ false.
+
%%----------------------------------------------------------------------
%% Function: parse_pi_1(Rest, State) -> Result
%% Input: Rest = string() | binary()
diff --git a/lib/xmerl/test/xmerl_sax_std_SUITE.erl b/lib/xmerl/test/xmerl_sax_std_SUITE.erl
index 525a3b175a..b8412206cc 100644
--- a/lib/xmerl/test/xmerl_sax_std_SUITE.erl
+++ b/lib/xmerl/test/xmerl_sax_std_SUITE.erl
@@ -2,7 +2,7 @@
%%----------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2010-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2010-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -507,11 +507,8 @@ end_per_testcase(_Func,_Config) ->
'not-wf-sa-036'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/036.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"Illegal data\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -522,11 +519,8 @@ end_per_testcase(_Func,_Config) ->
'not-wf-sa-037'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/037.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"&#32;\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -561,11 +555,8 @@ end_per_testcase(_Func,_Config) ->
'not-wf-sa-040'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/040.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"<doc></doc>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -576,11 +567,8 @@ end_per_testcase(_Func,_Config) ->
'not-wf-sa-041'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/041.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"<doc></doc>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -603,11 +591,8 @@ end_per_testcase(_Func,_Config) ->
'not-wf-sa-043'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/043.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"Illegal data\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -618,11 +603,8 @@ end_per_testcase(_Func,_Config) ->
'not-wf-sa-044'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/044.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"<doc/>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -669,11 +651,8 @@ end_per_testcase(_Func,_Config) ->
'not-wf-sa-048'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/048.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"<![CDATA[]]>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -1416,11 +1395,8 @@ end_per_testcase(_Func,_Config) ->
'not-wf-sa-110'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"xmltest","not-wf/sa/110.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"&e;\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -1914,9 +1890,9 @@ end_per_testcase(_Func,_Config) ->
%% Special case becase we returns everything after a legal document
%% as an rest instead of giving and error to let the user handle
%% multipple docs on a stream.
- {ok,_,<<"<?xml version=\"1.0\"?>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- % R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
- % check_result(R, "not-wf").
+ %{ok,_,<<"<?xml version=\"1.0\"?>\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -7784,11 +7760,8 @@ end_per_testcase(_Func,_Config) ->
'o-p01fail3'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"oasis","p01fail3.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_, <<"<bad/>", _/binary>>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -11417,12 +11390,8 @@ end_per_testcase(_Func,_Config) ->
'ibm-not-wf-P01-ibm01n02'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"ibm","not-wf/P01/ibm01n02.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_, <<"<?xml version=\"1.0\"?>", _/binary>>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- % R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
- % check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Case
@@ -11433,11 +11402,8 @@ end_per_testcase(_Func,_Config) ->
'ibm-not-wf-P01-ibm01n03'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"ibm","not-wf/P01/ibm01n03.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_, <<"<title>Wrong combination!</title>", _/binary>>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Cases
@@ -13027,11 +12993,8 @@ end_per_testcase(_Func,_Config) ->
'ibm-not-wf-P27-ibm27n01'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"ibm","not-wf/P27/ibm27n01.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_, <<"<!ELEMENT cat EMPTY>">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Cases
@@ -13461,11 +13424,8 @@ end_per_testcase(_Func,_Config) ->
'ibm-not-wf-P39-ibm39n06'(Config) ->
file:set_cwd(xmerl_test_lib:get_data_dir(Config)),
Path = filename:join([xmerl_test_lib:get_data_dir(Config),"ibm","not-wf/P39/ibm39n06.xml"]),
- %% Special case becase we returns everything after a legal document
- %% as an rest instead of giving and error to let the user handle
- %% multipple docs on a stream.
- {ok,_,<<"content after end tag\r\n">>} = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]).
- %%check_result(R, "not-wf").
+ R = xmerl_sax_parser:file(Path, [{event_fun, fun(_,_,S) -> S end}]),
+ check_result(R, "not-wf").
%%----------------------------------------------------------------------
%% Test Cases