aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl32
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.hrl10
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc43
3 files changed, 54 insertions, 31 deletions
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index 5c006aada2..ad71072d95 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -74,7 +74,8 @@ file(Name,Options) ->
CL = filename:absname(Dir),
File = filename:basename(Name),
ContinuationFun = fun default_continuation_cb/1,
- Res = stream(<<>>, [{continuation_fun, ContinuationFun},
+ Res = stream(<<>>,
+ [{continuation_fun, ContinuationFun},
{continuation_state, FD},
{current_location, CL},
{entity, File}
@@ -98,9 +99,13 @@ stream(Xml, Options) when is_list(Xml), is_list(Options) ->
State = parse_options(Options, initial_state()),
case State#xmerl_sax_parser_state.file_type of
dtd ->
- xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list});
+ xmerl_sax_parser_list:parse_dtd(Xml,
+ State#xmerl_sax_parser_state{encoding = list,
+ input_type = stream});
normal ->
- xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list})
+ xmerl_sax_parser_list:parse(Xml,
+ State#xmerl_sax_parser_state{encoding = list,
+ input_type = stream})
end;
stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
case parse_options(Options, initial_state()) of
@@ -124,17 +129,14 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
[],
State#xmerl_sax_parser_state.event_state};
{Xml1, State1} ->
- parse(Xml1, State1, ParseFunction)
+ parse_binary(Xml1,
+ State1#xmerl_sax_parser_state{input_type = stream},
+ ParseFunction)
end
end.
-
-%%======================================================================
-%% Internal functions
-%%======================================================================
-
%%----------------------------------------------------------------------
-%% Function: parse(Encoding, Xml, State, F) -> Result
+%% Function: parse_binary(Encoding, Xml, State, F) -> Result
%% Input: Encoding = atom()
%% Xml = [integer()] | binary()
%% State = #xmerl_sax_parser_state
@@ -144,15 +146,15 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) ->
%% EventState = term()
%% Description: Chooses the correct parser depending on the encoding.
%%----------------------------------------------------------------------
-parse(Xml, #xmerl_sax_parser_state{encoding=utf8}=State, F) ->
+parse_binary(Xml, #xmerl_sax_parser_state{encoding=utf8}=State, F) ->
xmerl_sax_parser_utf8:F(Xml, State);
-parse(Xml, #xmerl_sax_parser_state{encoding={utf16,little}}=State, F) ->
+parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,little}}=State, F) ->
xmerl_sax_parser_utf16le:F(Xml, State);
-parse(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) ->
+parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) ->
xmerl_sax_parser_utf16be:F(Xml, State);
-parse(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) ->
+parse_binary(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) ->
xmerl_sax_parser_latin1:F(Xml, State);
-parse(_, #xmerl_sax_parser_state{encoding=Enc}, _) ->
+parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, _) ->
{error, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))}.
%%----------------------------------------------------------------------
diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl
index 736316e069..b433dd6cf9 100644
--- a/lib/xmerl/src/xmerl_sax_parser.hrl
+++ b/lib/xmerl/src/xmerl_sax_parser.hrl
@@ -86,7 +86,15 @@
file_type = normal, % Can be normal, dtd and entity
current_location, % Location of the currently parsed XML entity
entity, % Parsed XML entity
- skip_external_dtd = false % If true the external DTD is skipped during parsing
+ skip_external_dtd = false,% If true the external DTD is skipped during parsing
+ input_type % Source type: file | stream.
+ % This field is a preparation for an fix in R17 of a bug in
+ % the conformance against the standard.
+ % Today a file which contains two XML documents will be considered
+ % well-formed and the second is placed in the rest part of the
+ % return tuple, according to the conformance tests this should fail.
+ % In the future this will fail if xmerl_sax_aprser:file/2 is used but
+ % left to the user in the xmerl_sax_aprser:stream/2 case.
}).
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index 6f95b67b99..e198f2fef5 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -113,6 +113,10 @@ parse_dtd(Xml, State) ->
State3 = event_callback(endDocument, State2),
ets:delete(RefTable),
{ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ State3 = event_callback(endDocument, State2),
+ ets:delete(RefTable),
+ {ok, State3#xmerl_sax_parser_state.event_state, Rest};
Other ->
_State2 = event_callback(endDocument, State1),
ets:delete(RefTable),
@@ -211,8 +215,9 @@ parse_prolog(?STRING_REST("<?", Rest), State) ->
{Rest1, State1} ->
parse_prolog(Rest1, State1);
{endDocument, Rest1, State1} ->
- IValue = ?TO_INPUT_FORMAT("<?"),
- {?APPEND_STRING(IValue, Rest1), State1}
+ parse_prolog(Rest1, State1)
+ % IValue = ?TO_INPUT_FORMAT("<?"),
+ % {?APPEND_STRING(IValue, Rest1), State1}
end;
parse_prolog(?STRING_REST("<!", Rest), State) ->
parse_prolog_1(Rest, State);
@@ -414,10 +419,11 @@ parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
parse_name(Rest, State, [C]),
case string:to_lower(PiTarget) of
"xml" ->
- case State#xmerl_sax_parser_state.end_tags of
- [] ->
+ case check_if_new_doc_allowed(State#xmerl_sax_parser_state.input_type,
+ State#xmerl_sax_parser_state.end_tags) of
+ true ->
{endDocument, Bytes, State};
- _ ->
+ false ->
?fatal_error(State1, "<?xml ...?> not first in document")
end;
_ ->
@@ -431,6 +437,11 @@ parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
parse_pi(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_pi/2], undefined).
+check_if_new_doc_allowed(stream, []) ->
+ true;
+check_if_new_doc_allowed(_, _) ->
+ false.
+
%%----------------------------------------------------------------------
%% Function: parse_pi_1(Rest, State) -> Result
%% Input: Rest = string() | binary()
@@ -665,9 +676,9 @@ parse_misc(?STRING_REST("<?", Rest), State, Eod) ->
case parse_pi(Rest, State) of
{Rest1, State1} ->
parse_misc(Rest1, State1, Eod);
- {endDocument, Rest1, State1} ->
+ {endDocument, _Rest1, State1} ->
IValue = ?TO_INPUT_FORMAT("<?"),
- {?APPEND_STRING(IValue, Rest1), State1}
+ {?APPEND_STRING(IValue, Rest), State1}
end;
parse_misc(?STRING("<!") = Rest, State, Eod) ->
cf(Rest, State, Eod, fun parse_misc/3);
@@ -1076,9 +1087,9 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
case parse_pi(Rest, State1) of
{Rest1, State2} ->
parse_content(Rest1, State2, [], true);
- {endDocument, Rest1, State2} ->
+ {endDocument, _Rest1, State2} ->
IValue = ?TO_INPUT_FORMAT("<?"),
- {?APPEND_STRING(IValue, Rest1), State2}
+ {?APPEND_STRING(IValue, Rest), State2}
end;
parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
@@ -1664,8 +1675,9 @@ handle_external_entity({file, FileToOpen}, State) ->
{?STRING_EMPTY, EntityState} =
parse_external_entity_1(<<>>,
State#xmerl_sax_parser_state{continuation_state=FD,
- current_location=filename:dirname(FileToOpen),
- entity=filename:basename(FileToOpen)}),
+ current_location=filename:dirname(FileToOpen),
+ entity=filename:basename(FileToOpen),
+ input_type=file}),
file:close(FD),
EntityState#xmerl_sax_parser_state.event_state
end;
@@ -1682,8 +1694,9 @@ handle_external_entity({http, Url}, State) ->
{?STRING_EMPTY, EntityState} =
parse_external_entity_1(<<>>,
State#xmerl_sax_parser_state{continuation_state=FD,
- current_location=filename:dirname(Url),
- entity=filename:basename(Url)}),
+ current_location=filename:dirname(Url),
+ entity=filename:basename(Url),
+ input_type=file}),
file:close(FD),
file:delete(TmpFile),
EntityState#xmerl_sax_parser_state.event_state
@@ -1899,9 +1912,9 @@ parse_doctype_decl(?STRING_REST("<?", Rest), State) ->
case parse_pi(Rest, State) of
{Rest1, State1} ->
parse_doctype_decl(Rest1, State1);
- {endDocument, Rest1, State1} ->
+ {endDocument, _Rest1, State1} ->
IValue = ?TO_INPUT_FORMAT("<?"),
- {?APPEND_STRING(IValue, Rest1), State1}
+ {?APPEND_STRING(IValue, Rest), State1}
end;
parse_doctype_decl(?STRING_REST("%", Rest), State) ->
{Ref, Rest1, State1} = parse_pe_reference(Rest, State),