aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src
diff options
context:
space:
mode:
authorMagnus Lidén <[email protected]>2014-01-21 16:41:02 +0100
committerMagnus Lidén <[email protected]>2014-01-21 16:41:02 +0100
commit811c3040a2a6f7ee2b8f19e07ef0795f2bab754c (patch)
tree1504890a11d461f15f099f896d6b8090f8a702f5 /lib/xmerl/src
parentf836096cb383c6a3c5c91956ded5bd4e2772bcd2 (diff)
parent7042cc483b7e48eeacc99a47676e8593f288285b (diff)
downloadotp-811c3040a2a6f7ee2b8f19e07ef0795f2bab754c.tar.gz
otp-811c3040a2a6f7ee2b8f19e07ef0795f2bab754c.tar.bz2
otp-811c3040a2a6f7ee2b8f19e07ef0795f2bab754c.zip
Merge branch 'maint'
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl32
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.hrl10
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc65
3 files changed, 75 insertions, 32 deletions
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index 5c006aada2..ad71072d95 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -74,7 +74,8 @@ file(Name,Options) ->
CL = filename:absname(Dir),
File = filename:basename(Name),
ContinuationFun = fun default_continuation_cb/1,
- Res = stream(<<>>, [{continuation_fun, ContinuationFun},
+ Res = stream(<<>>,
+ [{continuation_fun, ContinuationFun},
{continuation_state, FD},
{current_location, CL},
{entity, File}
@@ -98,9 +99,13 @@ stream(Xml, Options) when is_list(Xml), is_list(Options) ->
State = parse_options(Options, initial_state()),
case State#xmerl_sax_parser_state.file_type of
dtd ->
- xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list});
+ xmerl_sax_parser_list:parse_dtd(Xml,
+ State#xmerl_sax_parser_state{encoding = list,
+ input_type = stream});
normal ->
- xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list})
+ xmerl_sax_parser_list:parse(Xml,
+ State#xmerl_sax_parser_state{encoding = list,
+ input_type = stream})
end;
stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
case parse_options(Options, initial_state()) of
@@ -124,17 +129,14 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
[],
State#xmerl_sax_parser_state.event_state};
{Xml1, State1} ->
- parse(Xml1, State1, ParseFunction)
+ parse_binary(Xml1,
+ State1#xmerl_sax_parser_state{input_type = stream},
+ ParseFunction)
end
end.
-
-%%======================================================================
-%% Internal functions
-%%======================================================================
-
%%----------------------------------------------------------------------
-%% Function: parse(Encoding, Xml, State, F) -> Result
+%% Function: parse_binary(Encoding, Xml, State, F) -> Result
%% Input: Encoding = atom()
%% Xml = [integer()] | binary()
%% State = #xmerl_sax_parser_state
@@ -144,15 +146,15 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
%% EventState = term()
%% Description: Chooses the correct parser depending on the encoding.
%%----------------------------------------------------------------------
-parse(Xml, #xmerl_sax_parser_state{encoding=utf8}=State, F) ->
+parse_binary(Xml, #xmerl_sax_parser_state{encoding=utf8}=State, F) ->
xmerl_sax_parser_utf8:F(Xml, State);
-parse(Xml, #xmerl_sax_parser_state{encoding={utf16,little}}=State, F) ->
+parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,little}}=State, F) ->
xmerl_sax_parser_utf16le:F(Xml, State);
-parse(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) ->
+parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) ->
xmerl_sax_parser_utf16be:F(Xml, State);
-parse(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) ->
+parse_binary(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) ->
xmerl_sax_parser_latin1:F(Xml, State);
-parse(_, #xmerl_sax_parser_state{encoding=Enc}, _) ->
+parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, _) ->
{error, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))}.
%%----------------------------------------------------------------------
diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl
index 736316e069..b433dd6cf9 100644
--- a/lib/xmerl/src/xmerl_sax_parser.hrl
+++ b/lib/xmerl/src/xmerl_sax_parser.hrl
@@ -86,7 +86,15 @@
file_type = normal, % Can be normal, dtd and entity
current_location, % Location of the currently parsed XML entity
entity, % Parsed XML entity
- skip_external_dtd = false % If true the external DTD is skipped during parsing
+ skip_external_dtd = false,% If true the external DTD is skipped during parsing
+ input_type % Source type: file | stream.
+ % This field is a preparation for an fix in R17 of a bug in
+ % the conformance against the standard.
+ % Today a file which contains two XML documents will be considered
+ % well-formed and the second is placed in the rest part of the
+ % return tuple, according to the conformance tests this should fail.
+ % In the future this will fail if xmerl_sax_aprser:file/2 is used but
+ % left to the user in the xmerl_sax_aprser:stream/2 case.
}).
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index 7b64d7c302..e198f2fef5 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -113,6 +113,10 @@ parse_dtd(Xml, State) ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
{ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
Other ->
_State2 = event_callback(endDocument, State1),
ets:delete(RefTable),
@@ -207,8 +211,14 @@ parse_prolog(?STRING_EMPTY, State) ->
parse_prolog(?STRING("<") = Bytes, State) ->
cf(Bytes, State, fun parse_prolog/2);
parse_prolog(?STRING_REST("<?", Rest), State) ->
- {Rest1, State1} = parse_pi(Rest, State),
- parse_prolog(Rest1, State1);
+ case parse_pi(Rest, State) of
+ {Rest1, State1} ->
+ parse_prolog(Rest1, State1);
+ {endDocument, Rest1, State1} ->
+ parse_prolog(Rest1, State1)
+ % IValue = ?TO_INPUT_FORMAT("<?"),
+ % {?APPEND_STRING(IValue, Rest1), State1}
+ end;
parse_prolog(?STRING_REST("<!", Rest), State) ->
parse_prolog_1(Rest, State);
parse_prolog(?STRING_REST("<", Rest), State) ->
@@ -409,10 +419,11 @@ parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
parse_name(Rest, State, [C]),
case string:to_lower(PiTarget) of
"xml" ->
- case State#xmerl_sax_parser_state.end_tags of
- [] ->
- {Bytes, State};
- _ ->
+ case check_if_new_doc_allowed(State#xmerl_sax_parser_state.input_type,
+ State#xmerl_sax_parser_state.end_tags) of
+ true ->
+ {endDocument, Bytes, State};
+ false ->
?fatal_error(State1, "<?xml ...?> not first in document")
end;
_ ->
@@ -426,6 +437,11 @@ parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
parse_pi(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_pi/2], undefined).
+check_if_new_doc_allowed(stream, []) ->
+ true;
+check_if_new_doc_allowed(_, _) ->
+ false.
+
%%----------------------------------------------------------------------
%% Function: parse_pi_1(Rest, State) -> Result
%% Input: Rest = string() | binary()
@@ -657,8 +673,13 @@ parse_misc(?STRING_EMPTY, State, Eod) ->
parse_misc(?STRING("<") = Rest, State, Eod) ->
cf(Rest, State, Eod, fun parse_misc/3);
parse_misc(?STRING_REST("<?", Rest), State, Eod) ->
- {Rest1, State1} = parse_pi(Rest, State),
- parse_misc(Rest1, State1, Eod);
+ case parse_pi(Rest, State) of
+ {Rest1, State1} ->
+ parse_misc(Rest1, State1, Eod);
+ {endDocument, _Rest1, State1} ->
+ IValue = ?TO_INPUT_FORMAT("<?"),
+ {?APPEND_STRING(IValue, Rest), State1}
+ end;
parse_misc(?STRING("<!") = Rest, State, Eod) ->
cf(Rest, State, Eod, fun parse_misc/3);
parse_misc(?STRING("<!-") = Rest, State, Eod) ->
@@ -1063,8 +1084,13 @@ parse_content(?STRING_REST("<!--", Rest), State, Acc, IgnorableWS) ->
parse_content(Rest1, State2, [], true);
parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
- {Rest1, State2} = parse_pi(Rest, State1),
- parse_content(Rest1, State2, [], true);
+ case parse_pi(Rest, State1) of
+ {Rest1, State2} ->
+ parse_content(Rest1, State2, [], true);
+ {endDocument, _Rest1, State2} ->
+ IValue = ?TO_INPUT_FORMAT("<?"),
+ {?APPEND_STRING(IValue, Rest), State2}
+ end;
parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
[] ->
@@ -1649,8 +1675,9 @@ handle_external_entity({file, FileToOpen}, State) ->
{?STRING_EMPTY, EntityState} =
parse_external_entity_1(<<>>,
State#xmerl_sax_parser_state{continuation_state=FD,
- current_location=filename:dirname(FileToOpen),
- entity=filename:basename(FileToOpen)}),
+ current_location=filename:dirname(FileToOpen),
+ entity=filename:basename(FileToOpen),
+ input_type=file}),
file:close(FD),
EntityState#xmerl_sax_parser_state.event_state
end;
@@ -1667,8 +1694,9 @@ handle_external_entity({http, Url}, State) ->
{?STRING_EMPTY, EntityState} =
parse_external_entity_1(<<>>,
State#xmerl_sax_parser_state{continuation_state=FD,
- current_location=filename:dirname(Url),
- entity=filename:basename(Url)}),
+ current_location=filename:dirname(Url),
+ entity=filename:basename(Url),
+ input_type=file}),
file:close(FD),
file:delete(TmpFile),
EntityState#xmerl_sax_parser_state.event_state
@@ -1881,8 +1909,13 @@ parse_doctype_decl(?STRING_EMPTY, State) ->
parse_doctype_decl(?STRING("<"), State) ->
cf(?STRING("<"), State, fun parse_doctype_decl/2);
parse_doctype_decl(?STRING_REST("<?", Rest), State) ->
- {Rest1, State1} = parse_pi(Rest, State),
- parse_doctype_decl(Rest1, State1);
+ case parse_pi(Rest, State) of
+ {Rest1, State1} ->
+ parse_doctype_decl(Rest1, State1);
+ {endDocument, _Rest1, State1} ->
+ IValue = ?TO_INPUT_FORMAT("<?"),
+ {?APPEND_STRING(IValue, Rest), State1}
+ end;
parse_doctype_decl(?STRING_REST("%", Rest), State) ->
{Ref, Rest1, State1} = parse_pe_reference(Rest, State),
case Ref of