aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2017-02-22 15:28:26 +0100
committerHans Bolinder <[email protected]>2017-02-22 15:28:26 +0100
commit38a10d18d104869ebba9af568b5566d0162c6faf (patch)
treeb2f5e37b1fdd573d4165e44002dae977fb3ba120 /lib/xmerl/src
parent056f1996759a29bfb97c4394bcb23c436905e4be (diff)
parent5073a7b88aa3a8e2c84dcf6bf48adafe4e874422 (diff)
downloadotp-38a10d18d104869ebba9af568b5566d0162c6faf.tar.gz
otp-38a10d18d104869ebba9af568b5566d0162c6faf.tar.bz2
otp-38a10d18d104869ebba9af568b5566d0162c6faf.zip
Merge branch 'maint'
* maint: [xmerl] Remove faulty throws [xmerl] Fix XML "well-formedness" bug i SAX parser [xmerl] Correct bug handling multiple documents on a stream stdlib: Improve pretty-printing of terms with maps Conflicts: lib/stdlib/test/io_SUITE.erl
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl33
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.hrl9
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc84
3 files changed, 76 insertions, 50 deletions
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index 318a0cf7f4..1aef6c58c4 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -1,7 +1,7 @@
%%--------------------------------------------------------------------
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -33,6 +33,7 @@
%% External exports
%%----------------------------------------------------------------------
-export([file/2,
+ stream/3,
stream/2]).
%%----------------------------------------------------------------------
@@ -72,11 +73,12 @@ file(Name,Options) ->
File = filename:basename(Name),
ContinuationFun = fun default_continuation_cb/1,
Res = stream(<<>>,
- [{continuation_fun, ContinuationFun},
- {continuation_state, FD},
- {current_location, CL},
- {entity, File}
- |Options]),
+ [{continuation_fun, ContinuationFun},
+ {continuation_state, FD},
+ {current_location, CL},
+ {entity, File}
+ |Options],
+ file),
ok = file:close(FD),
Res
end.
@@ -92,19 +94,22 @@ file(Name,Options) ->
%% EventState = term()
%% Description: Parse a stream containing an XML document.
%%----------------------------------------------------------------------
-stream(Xml, Options) when is_list(Xml), is_list(Options) ->
+stream(Xml, Options) ->
+ stream(Xml, Options, stream).
+
+stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) ->
State = parse_options(Options, initial_state()),
- case State#xmerl_sax_parser_state.file_type of
+ case State#xmerl_sax_parser_state.file_type of
dtd ->
xmerl_sax_parser_list:parse_dtd(Xml,
State#xmerl_sax_parser_state{encoding = list,
- input_type = stream});
+ input_type = InputType});
normal ->
xmerl_sax_parser_list:parse(Xml,
State#xmerl_sax_parser_state{encoding = list,
- input_type = stream})
+ input_type = InputType})
end;
-stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
+stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) ->
case parse_options(Options, initial_state()) of
{error, Reason} -> {error, Reason};
State ->
@@ -127,7 +132,7 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
State#xmerl_sax_parser_state.event_state};
{Xml1, State1} ->
parse_binary(Xml1,
- State1#xmerl_sax_parser_state{input_type = stream},
+ State1#xmerl_sax_parser_state{input_type = InputType},
ParseFunction)
end
end.
@@ -226,12 +231,12 @@ check_encoding_option(E) ->
%% Description: Detects which character set is used in a binary stream.
%%----------------------------------------------------------------------
detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) ->
- throw({error, "Can't detect character encoding due to no indata"});
+ {error, "Can't detect character encoding due to no indata"};
detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun,
continuation_state = CState} = State) ->
case CFun(CState) of
{<<>>, _} ->
- throw({error, "Can't detect character encoding due to lack of indata"});
+ {error, "Can't detect character encoding due to lack of indata"};
{NewBytes, NewContState} ->
detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState})
end;
diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl
index 932ab0cec5..7f9bf6c4d3 100644
--- a/lib/xmerl/src/xmerl_sax_parser.hrl
+++ b/lib/xmerl/src/xmerl_sax_parser.hrl
@@ -88,14 +88,7 @@
current_location, % Location of the currently parsed XML entity
entity, % Parsed XML entity
skip_external_dtd = false,% If true the external DTD is skipped during parsing
- input_type % Source type: file | stream.
- % This field is a preparation for an fix in R17 of a bug in
- % the conformance against the standard.
- % Today a file which contains two XML documents will be considered
- % well-formed and the second is placed in the rest part of the
- % return tuple, according to the conformance tests this should fail.
- % In the future this will fail if xmerl_sax_aprser:file/2 is used but
- % left to the user in the xmerl_sax_aprser:stream/2 case.
+ input_type % Source type: file | stream
}).
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index 4d75805b9b..2b9b37b5f3 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -1,7 +1,7 @@
%%-*-erlang-*-
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -72,7 +72,12 @@ parse(Xml, State) ->
{ok, Rest, State2} ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of
+ true ->
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ false ->
+ format_error(fatal_error, State3, "Input found after legal document")
+ end;
{fatal_error, {State2, Reason}} ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
@@ -81,10 +86,14 @@ parse(Xml, State) ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
format_error(Tag, State3, Reason);
+ {endDocument, Rest, State2} ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
Other ->
_State2 = event_callback(endDocument, State1),
ets:delete(RefTable),
- throw(Other)
+ {fatal_error, Other}
end.
%%----------------------------------------------------------------------
@@ -111,7 +120,7 @@ parse_dtd(Xml, State) ->
{Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
{endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
@@ -119,7 +128,7 @@ parse_dtd(Xml, State) ->
Other ->
_State2 = event_callback(endDocument, State1),
ets:delete(RefTable),
- throw(Other)
+ {fatal_error, Other}
end.
@@ -442,6 +451,15 @@ check_if_new_doc_allowed(stream, []) ->
check_if_new_doc_allowed(_, _) ->
false.
+check_if_rest_ok(file, []) ->
+ true;
+check_if_rest_ok(file, <<>>) ->
+ true;
+check_if_rest_ok(stream, _) ->
+ true;
+check_if_rest_ok(_, _) ->
+ false.
+
%%----------------------------------------------------------------------
%% Function: parse_pi_1(Rest, State) -> Result
%% Input: Rest = string() | binary()
@@ -1024,16 +1042,21 @@ parse_etag(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_etag/2],
undefined).
-
parse_etag_1(?STRING_REST(">", Rest),
#xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList}
- |RestOfETags]} = State, _Tag) ->
+ |RestOfETags],
+ input_type=InputType} = State, _Tag) ->
State1 = event_callback({endElement, Uri, LocalName, QName}, State),
State2 = send_end_prefix_mapping_event(NewNsList, State1),
- parse_content(Rest,
- State2#xmerl_sax_parser_state{end_tags=RestOfETags,
- ns = OldNsList},
- [], true);
+ case check_if_new_doc_allowed(InputType, RestOfETags) of
+ true ->
+ throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}});
+ false ->
+ parse_content(Rest,
+ State2#xmerl_sax_parser_state{end_tags=RestOfETags,
+ ns = OldNsList},
+ [], true)
+ end;
parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) ->
{P,TN} = Tag,
?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN);
@@ -1051,21 +1074,26 @@ parse_etag_1(Bytes, State, Tag) ->
%% Description: Parsing the content part of tags
%% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
%%----------------------------------------------------------------------
-
parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
- case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
- {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
- {Rest, State1};
- {fatal_error, {State1, Msg}} ->
- case check_if_document_complete(State1, Msg) of
- true ->
- State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
- {?STRING_EMPTY, State2};
- false ->
- ?fatal_error(State1, Msg)
- end;
- Other ->
- throw(Other)
+ case check_if_document_complete(State, "No more bytes") of
+ true ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ {?STRING_EMPTY, State1};
+ false ->
+ case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State1};
+ {fatal_error, {State1, Msg}} ->
+ case check_if_document_complete(State1, Msg) of
+ true ->
+ State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
+ {?STRING_EMPTY, State2};
+ false ->
+ ?fatal_error(State1, Msg)
+ end;
+ Other ->
+ throw(Other)
+ end
end;
parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) ->
cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
@@ -1094,7 +1122,7 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
[] ->
- {Rest, State}; %%LATH : Skicka ignorable WS ???
+ {Rest, State}; %% Skicka ignorable WS ???
_ ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
parse_cdata(Rest1, State1)
@@ -1102,7 +1130,7 @@ parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags
parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
[] ->
- {Rest, State}; %%LATH : Skicka ignorable WS ???
+ {Rest, State}; %% Skicka ignorable WS ???
_ ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
parse_stag(Rest1, State1)
@@ -3290,7 +3318,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C
catch
throw:ErrorTerm ->
?fatal_error(State, ErrorTerm);
- exit:Reason ->
+ exit:Reason ->
?fatal_error(State, {'EXIT', Reason})
end,
case Result of