aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r--lib/xmerl/src/xmerl_regexp.erl4
-rw-r--r--lib/xmerl/src/xmerl_sax_old_dom.erl7
-rw-r--r--lib/xmerl/src/xmerl_sax_parser.erl171
-rw-r--r--lib/xmerl/src/xmerl_sax_parser_base.erlsrc293
-rw-r--r--lib/xmerl/src/xmerl_sax_simple_dom.erl5
-rw-r--r--lib/xmerl/src/xmerl_scan.erl4
6 files changed, 338 insertions, 146 deletions
diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl
index fc89b80ff1..1bf8496673 100644
--- a/lib/xmerl/src/xmerl_regexp.erl
+++ b/lib/xmerl/src/xmerl_regexp.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2006-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2006-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -1154,7 +1154,7 @@ comp_crs([], Last) -> [{Last,maxchar}].
%% build_dfa(NFA, NfaStartState) -> {DFA,DfaStartState}.
%% Build a DFA from an NFA using "subset construction". The major
%% difference from the book is that we keep the marked and unmarked
-%% DFA states in seperate lists. New DFA states are added to the
+%% DFA states in separate lists. New DFA states are added to the
%% unmarked list and states are marked by moving them to the marked
%% list. We assume that the NFA accepting state numbers are in
%% ascending order for the rules and use ordsets to keep this order.
diff --git a/lib/xmerl/src/xmerl_sax_old_dom.erl b/lib/xmerl/src/xmerl_sax_old_dom.erl
index fefcf03fce..411121370f 100644
--- a/lib/xmerl/src/xmerl_sax_old_dom.erl
+++ b/lib/xmerl/src/xmerl_sax_old_dom.erl
@@ -127,9 +127,10 @@ build_dom(endDocument,
State#xmerl_sax_old_dom_state{dom=[Decl, Current#xmlElement{
content=lists:reverse(C)
}]};
- _ ->
- %%?dbg("~p\n", [D]),
- ?error("we're not at end the document when endDocument event is encountered.")
+ _ ->
+ %% endDocument is also sent by the parser when a fault occur to tell
+ %% the event receiver that no more input will be sent
+ State
end;
%% Element
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index 1aef6c58c4..e383c4c349 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -64,7 +64,7 @@
%% Description: Parse file containing an XML document.
%%----------------------------------------------------------------------
file(Name,Options) ->
- case file:open(Name, [raw, read,binary]) of
+ case file:open(Name, [raw, read_ahead, read,binary]) of
{error, Reason} ->
{error,{Name, file:format_error(Reason)}};
{ok, FD} ->
@@ -120,21 +120,22 @@ stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) ->
normal ->
parse
end,
- case detect_charset(Xml, State) of
- {error, Reason} -> {fatal_error,
- {
- State#xmerl_sax_parser_state.current_location,
- State#xmerl_sax_parser_state.entity,
- 1
- },
- Reason,
- [],
- State#xmerl_sax_parser_state.event_state};
- {Xml1, State1} ->
- parse_binary(Xml1,
- State1#xmerl_sax_parser_state{input_type = InputType},
- ParseFunction)
- end
+ try
+ {Xml1, State1} = detect_charset(Xml, State),
+ parse_binary(Xml1,
+ State1#xmerl_sax_parser_state{input_type = InputType},
+ ParseFunction)
+ catch
+ throw:{fatal_error, {State2, Reason}} ->
+ {fatal_error,
+ {
+ State2#xmerl_sax_parser_state.current_location,
+ State2#xmerl_sax_parser_state.entity,
+ 1
+ },
+ Reason, [],
+ State2#xmerl_sax_parser_state.event_state}
+ end
end.
%%----------------------------------------------------------------------
@@ -156,8 +157,8 @@ parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) ->
xmerl_sax_parser_utf16be:F(Xml, State);
parse_binary(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) ->
xmerl_sax_parser_latin1:F(Xml, State);
-parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, _) ->
- {error, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))}.
+parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, State) ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))).
%%----------------------------------------------------------------------
%% Function: initial_state/0
@@ -211,8 +212,7 @@ parse_options([{entity, Entity} |Options], State) ->
parse_options([skip_external_dtd |Options], State) ->
parse_options(Options, State#xmerl_sax_parser_state{skip_external_dtd = true});
parse_options([O |_], _State) ->
- {error,
- lists:flatten(io_lib:format("Option: ~p not supported", [O]))}.
+ {error, lists:flatten(io_lib:format("Option: ~p not supported", [O]))}.
check_encoding_option(E) when E==utf8; E=={utf16,little}; E=={utf16,big};
@@ -230,16 +230,10 @@ check_encoding_option(E) ->
%% Output: {utf8|utf16le|utf16be|iso8859, Xml, State}
%% Description: Detects which character set is used in a binary stream.
%%----------------------------------------------------------------------
-detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) ->
- {error, "Can't detect character encoding due to no indata"};
-detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun,
- continuation_state = CState} = State) ->
- case CFun(CState) of
- {<<>>, _} ->
- {error, "Can't detect character encoding due to lack of indata"};
- {NewBytes, NewContState} ->
- detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState})
- end;
+detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = State) ->
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+detect_charset(<<>>, State) ->
+ cf(<<>>, State, fun detect_charset/2);
detect_charset(Bytes, State) ->
case unicode:bom_to_encoding(Bytes) of
{latin1, 0} ->
@@ -249,25 +243,47 @@ detect_charset(Bytes, State) ->
{RealBytes, State#xmerl_sax_parser_state{encoding=Enc}}
end.
+detect_charset_1(<<16#00>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#00, 16#3C>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#00, 16#3C, 16#00>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
detect_charset_1(<<16#00, 16#3C, 16#00, 16#3F, _/binary>> = Xml, State) ->
{Xml, State#xmerl_sax_parser_state{encoding={utf16, big}}};
+detect_charset_1(<<16#3C>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#00>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#00, 16#3F>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
detect_charset_1(<<16#3C, 16#00, 16#3F, 16#00, _/binary>> = Xml, State) ->
{Xml, State#xmerl_sax_parser_state{encoding={utf16, little}}};
-detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>> = Xml, State) ->
- case parse_xml_directive(Xml2) of
+detect_charset_1(<<16#3C>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#3F>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#3F, 16#78>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D>> = Xml, State) ->
+ cf(Xml, State, fun detect_charset_1/2);
+detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>>, State) ->
+ {Xml3, State1} = read_until_end_of_xml_directive(Xml2, State),
+ case parse_xml_directive(Xml3) of
{error, Reason} ->
- {error, Reason};
+ ?fatal_error(State, Reason);
AttrList ->
case lists:keysearch("encoding", 1, AttrList) of
{value, {_, E}} ->
case convert_encoding(E) of
{error, Reason} ->
- {error, Reason};
+ ?fatal_error(State, Reason);
Enc ->
- {Xml, State#xmerl_sax_parser_state{encoding=Enc}}
+ {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>,
+ State1#xmerl_sax_parser_state{encoding=Enc}}
end;
_ ->
- {Xml, State}
+ {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, State1}
end
end;
detect_charset_1(Xml, State) ->
@@ -377,7 +393,7 @@ parse_value_1(<<C, Rest/binary>>, Stop, Acc) ->
parse_value_1(Rest, Stop, [C |Acc]).
%%======================================================================
-%%Default functions
+%% Default functions
%%======================================================================
%%----------------------------------------------------------------------
%% Function: default_event_cb(Event, LineNo, State) -> Result
@@ -393,7 +409,7 @@ default_event_cb(_Event, _LineNo, State) ->
%%----------------------------------------------------------------------
%% Function: default_continuation_cb(IoDevice) -> Result
%% IoDevice = iodevice()
-%% Output: Result = {[char()], State}
+%% Output: Result = {binary(), IoDevice}
%% Description: Default continuation callback reading blocks.
%%----------------------------------------------------------------------
default_continuation_cb(IoDevice) ->
@@ -403,3 +419,82 @@ default_continuation_cb(IoDevice) ->
{ok, FileBin} ->
{FileBin, IoDevice}
end.
+
+%%----------------------------------------------------------------------
+%% Function: read_until_end_of_xml_directive(Rest, State) -> Result
+%% Rest = binary()
+%% Output: Result = {binary(), State}
+%% Description: Reads a utf8 or latin1 until it finds '?>'
+%%----------------------------------------------------------------------
+read_until_end_of_xml_directive(Rest, State) ->
+ case binary:match(Rest, <<"?>">>) of
+ nomatch ->
+ case cf(Rest, State) of
+ {<<>>, _} ->
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ {NewBytes, NewState} ->
+ read_until_end_of_xml_directive(NewBytes, NewState)
+ end;
+ _ ->
+ {Rest, State}
+ end.
+
+
+%%----------------------------------------------------------------------
+%% Function : cf(Rest, State) -> Result
+%% Parameters: Rest = binary()
+%% State = #xmerl_sax_parser_state{}
+%% NextCall = fun()
+%% Result : {Rest, State}
+%% Description: Function that uses provided fun to read another chunk from
+%% input stream and calls the fun in NextCall.
+%%----------------------------------------------------------------------
+cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State) ->
+ ?fatal_error(State, "Continuation function undefined");
+cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) ->
+ Result =
+ try
+ CFun(CState)
+ catch
+ throw:ErrorTerm ->
+ ?fatal_error(State, ErrorTerm);
+ exit:Reason ->
+ ?fatal_error(State, {'EXIT', Reason})
+ end,
+ case Result of
+ {<<>>, _} ->
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ {NewBytes, NewContState} ->
+ {<<Rest/binary, NewBytes/binary>>,
+ State#xmerl_sax_parser_state{continuation_state = NewContState}}
+ end.
+
+%%----------------------------------------------------------------------
+%% Function : cf(Rest, State, NextCall) -> Result
+%% Parameters: Rest = binary()
+%% State = #xmerl_sax_parser_state{}
+%% NextCall = fun()
+%% Result : {Rest, State}
+%% Description: Function that uses provided fun to read another chunk from
+%% input stream and calls the fun in NextCall.
+%%----------------------------------------------------------------------
+cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) ->
+ ?fatal_error(State, "Continuation function undefined");
+cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
+ NextCall) ->
+ Result =
+ try
+ CFun(CState)
+ catch
+ throw:ErrorTerm ->
+ ?fatal_error(State, ErrorTerm);
+ exit:Reason ->
+ ?fatal_error(State, {'EXIT', Reason})
+ end,
+ case Result of
+ {<<>>, _} ->
+ ?fatal_error(State, "Can't detect character encoding due to lack of indata");
+ {NewBytes, NewContState} ->
+ NextCall(<<Rest/binary, NewBytes/binary>>,
+ State#xmerl_sax_parser_state{continuation_state = NewContState})
+ end.
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index f3470b2809..1dca9608cb 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -64,38 +64,42 @@
%% Description: Parsing XML from input stream.
%%----------------------------------------------------------------------
parse(Xml, State) ->
- RefTable = ets:new(xmerl_sax_entity_refs, [private]),
-
- State1 = event_callback(startDocument, State),
-
- case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
- {ok, Rest, State2} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of
- true ->
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
- false ->
- format_error(fatal_error, State3, "Input found after legal document")
- end;
- {fatal_error, {State2, Reason}} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- format_error(fatal_error, State3, Reason);
- {event_receiver_error, State2, {Tag, Reason}} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- format_error(Tag, State3, Reason);
- {endDocument, Rest, State2} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
- Other ->
- _State2 = event_callback(endDocument, State1),
- ets:delete(RefTable),
- {fatal_error, Other}
+ RefTable = maps:new(),
+
+ try
+ State1 = event_callback(startDocument, State),
+ Result = parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}),
+ handle_end_document(Result)
+ catch
+ throw:Exception ->
+ handle_end_document(Exception);
+ _:OtherError ->
+ handle_end_document({other, OtherError, State})
end.
+ % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
+ % {ok, Rest, State2} ->
+ % State3 = event_callback(endDocument, State2),
+ % case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of
+ % true ->
+ % {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ % false ->
+ % format_error(fatal_error, State3, "Input found after legal document")
+ % end;
+ % {fatal_error, {State2, Reason}} ->
+ % State3 = event_callback(endDocument, State2),
+ % format_error(fatal_error, State3, Reason);
+ % {event_receiver_error, State2, {Tag, Reason}} ->
+ % State3 = event_callback(endDocument, State2),
+ % format_error(Tag, State3, Reason);
+ % {endDocument, Rest, State2} ->
+ % State3 = event_callback(endDocument, State2),
+ % {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ % Other ->
+ % _State2 = event_callback(endDocument, State1),
+ % {fatal_error, Other}
+ % end.
+
%%----------------------------------------------------------------------
%% Function: parse_dtd(Xml, State) -> Result
%% Input: Xml = string() | binary()
@@ -105,38 +109,120 @@ parse(Xml, State) ->
%% Description: Parsing XML DTD from input stream.
%%----------------------------------------------------------------------
parse_dtd(Xml, State) ->
- RefTable = ets:new(xmerl_sax_entity_refs, [private]),
-
- State1 = event_callback(startDocument, State),
-
- case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
- {fatal_error, {State2, Reason}} ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- format_error(fatal_error, State3, Reason);
- {event_receiver_error, State2, {Tag, Reason}} ->
- State3 = event_callback(endDocument, State2),
- format_error(Tag, State3, Reason);
- {Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
- {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
- State3 = event_callback(endDocument, State2),
- ets:delete(RefTable),
- {ok, State3#xmerl_sax_parser_state.event_state, Rest};
- Other ->
- _State2 = event_callback(endDocument, State1),
- ets:delete(RefTable),
- {fatal_error, Other}
+ RefTable = maps:new(),
+
+ try
+ State1 = event_callback(startDocument, State),
+ Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}),
+ handle_end_document(Result)
+ catch
+ throw:Exception ->
+ handle_end_document(Exception);
+ _:OtherError ->
+ handle_end_document({other, OtherError, State})
end.
+ % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
+ % {fatal_error, {State2, Reason}} ->
+ % State3 = event_callback(endDocument, State2),
+ % format_error(fatal_error, State3, Reason);
+ % {event_receiver_error, State2, {Tag, Reason}} ->
+ % State3 = event_callback(endDocument, State2),
+ % format_error(Tag, State3, Reason);
+ % {Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ % State3 = event_callback(endDocument, State2),
+ % {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ % {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ % State3 = event_callback(endDocument, State2),
+ % {ok, State3#xmerl_sax_parser_state.event_state, Rest};
+ % Other ->
+ % _State2 = event_callback(endDocument, State1),
+ % {fatal_error, Other}
+ % end.
+
+
%%======================================================================
%% Internal functions
%%======================================================================
%%----------------------------------------------------------------------
+%% Function: handle_end_document(ParserResult) -> Result
+%% Input: ParseResult = term()
+%% Output: Result = {ok, Rest, EventState} |
+%% EventState = term()
+%% Description: Ends the parsing and formats output
+%%----------------------------------------------------------------------
+handle_end_document({ok, Rest, State}) ->
+ %%ok case from parse
+ try
+ State1 = event_callback(endDocument, State),
+ case check_if_rest_ok(State1#xmerl_sax_parser_state.input_type, Rest) of
+ true ->
+ {ok, State1#xmerl_sax_parser_state.event_state, Rest};
+ false ->
+ format_error(fatal_error, State1, "Input found after legal document")
+ end
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({endDocument, Rest, State}) ->
+ %% ok case from parse and parse_dtd
+ try
+ State1 = event_callback(endDocument, State),
+ {ok, State1#xmerl_sax_parser_state.event_state, Rest}
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({fatal_error, {State, Reason}}) ->
+ try
+ State1 = event_callback(endDocument, State),
+ format_error(fatal_error, State1, Reason)
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({event_receiver_error, State, {Tag, Reason}}) ->
+ try
+ State1 = event_callback(endDocument, State),
+ format_error(Tag, State1, Reason)
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({Rest, State}) when is_record(State, xmerl_sax_parser_state) ->
+ %%ok case from parse_dtd
+ try
+ State1 = event_callback(endDocument, State),
+ {ok, State1#xmerl_sax_parser_state.event_state, Rest}
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end;
+handle_end_document({other, Error, State}) ->
+ try
+ _State1 = event_callback(endDocument, State),
+ {fatal_error, Error}
+ catch
+ throw:{event_receiver_error, State2, {Tag, Reason}} ->
+ format_error(Tag, State2, Reason);
+ _:Other ->
+ {fatal_error, Other}
+ end.
+
+%%----------------------------------------------------------------------
%% Function: parse_document(Rest, State) -> Result
%% Input: Rest = string() | binary()
%% State = #xmerl_sax_parser_state{}
@@ -439,6 +525,7 @@ check_if_rest_ok(stream, _) ->
check_if_rest_ok(_, _) ->
false.
+
%%----------------------------------------------------------------------
%% Function: parse_pi_1(Rest, State) -> Result
%% Input: Rest = string() | binary()
@@ -883,11 +970,11 @@ send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) ->
parse_eq(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_eq/2);
parse_eq(?STRING_REST("=", Rest), State) ->
- {Rest, State};
+ {Rest, State};
parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
- {_WS, Rest, State1} =
- whitespace(Bytes, State, []),
- parse_eq(Rest, State1);
+ {_WS, Rest, State1} =
+ whitespace(Bytes, State, []),
+ parse_eq(Rest, State1);
parse_eq(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_eq/2],
"expecting = or whitespace").
@@ -905,11 +992,11 @@ parse_eq(Bytes, State) ->
parse_att_value(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_att_value/2);
parse_att_value(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" ->
- parse_att_value(Rest, State, C, []);
+ parse_att_value(Rest, State, C, []);
parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
- {_WS, Rest, State1} =
- whitespace(Bytes, State, []),
- parse_att_value(Rest, State1);
+ {_WS, Rest, State1} =
+ whitespace(Bytes, State, []),
+ parse_att_value(Rest, State1);
parse_att_value(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_att_value/2],
"\', \" or whitespace expected").
@@ -1360,17 +1447,20 @@ parse_pe_reference_1(Bytes, State, Name) ->
%%----------------------------------------------------------------------
-%% Function: insert_reference(Reference, State) -> Result
-%% Parameters: Reference = string()
+%% Function: insert_reference(Name, Ref, State) -> Result
+%% Parameters: Name = string()
+%% Ref = {Type, Value}
+%% Type = atom()
+%% Value = term()
%% State = #xmerl_sax_parser_state{}
%% Result :
%%----------------------------------------------------------------------
-insert_reference({Name, Type, Value}, Table) ->
- case ets:lookup(Table, Name) of
- [{Name, _, _}] ->
- ok;
+insert_reference(Name, Value, #xmerl_sax_parser_state{ref_table = Map} = State) ->
+ case maps:find(Name, Map) of
+ error ->
+ State#xmerl_sax_parser_state{ref_table = maps:put(Name, Value, Map)};
_ ->
- ets:insert(Table, {Name, Type, Value})
+ State
end.
@@ -1391,8 +1481,8 @@ look_up_reference("apos", _, _) ->
look_up_reference("quot", _, _) ->
{internal_general, "quot", "\""};
look_up_reference(Name, HaveToExist, State) ->
- case ets:lookup(State#xmerl_sax_parser_state.ref_table, Name) of
- [{Name, Type, Value}] ->
+ case maps:find(Name, State#xmerl_sax_parser_state.ref_table) of
+ {ok, {Type, Value}} ->
{Type, Name, Value};
_ ->
case HaveToExist of
@@ -1474,7 +1564,7 @@ parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) ->
parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
{lists:reverse(Acc), Rest, State};
parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
- parse_system_litteral(Rest, State, Stop, [C |Acc]);
+ parse_system_litteral(Rest, State, Stop, [C |Acc]);
parse_system_litteral(Bytes, State, Stop, Acc) ->
unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4],
undefined).
@@ -1646,9 +1736,11 @@ parse_external_entity(State, _PubId, SysId) ->
end_tags = []},
- EventState = handle_external_entity(ExtRef, State1),
+ {EventState, RefTable} = handle_external_entity(ExtRef, State1),
- NewState = event_callback({endEntity, SysId}, SaveState#xmerl_sax_parser_state{event_state=EventState}),
+ NewState = event_callback({endEntity, SysId},
+ SaveState#xmerl_sax_parser_state{event_state=EventState,
+ ref_table=RefTable}),
NewState#xmerl_sax_parser_state{file_type=normal}.
@@ -1675,7 +1767,8 @@ handle_external_entity({file, FileToOpen}, State) ->
entity=filename:basename(FileToOpen),
input_type=file}),
ok = file:close(FD),
- EntityState#xmerl_sax_parser_state.event_state
+ {EntityState#xmerl_sax_parser_state.event_state,
+ EntityState#xmerl_sax_parser_state.ref_table}
end;
handle_external_entity({http, Url}, State) ->
@@ -1695,7 +1788,9 @@ handle_external_entity({http, Url}, State) ->
input_type=file}),
ok = file:close(FD),
ok = file:delete(TmpFile),
- EntityState#xmerl_sax_parser_state.event_state
+ {EntityState#xmerl_sax_parser_state.event_state,
+ EntityState#xmerl_sax_parser_state.ref_table}
+
end
catch
throw:{error, Error} ->
@@ -1716,7 +1811,7 @@ handle_external_entity({Tag, _Url}, State) ->
parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) ->
case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of
{Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
- {Rest, State};
+ {Rest, State1};
{fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity ->
{?STRING_EMPTY, State1};
Other ->
@@ -2442,24 +2537,24 @@ parse_entity_def(?STRING_EMPTY, State, Name) ->
cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3);
parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
{Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
- insert_reference({Name, internal_general, Value}, State1#xmerl_sax_parser_state.ref_table),
- State2 = event_callback({internalEntityDecl, Name, Value}, State1),
- {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
- parse_def_end(Rest2, State3);
+ State2 = insert_reference(Name, {internal_general, Value}, State1),
+ State3 = event_callback({internalEntityDecl, Name, Value}, State2),
+ {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
+ parse_def_end(Rest2, State4);
parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P ->
{PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
{Ndata, Rest2, State2} = parse_ndata(Rest1, State1),
case Ndata of
undefined ->
- insert_reference({Name, external_general, {PubId, SysId}},
- State2#xmerl_sax_parser_state.ref_table),
- State3 = event_callback({externalEntityDecl, Name, PubId, SysId}, State2),
- {Rest2, State3};
+ State3 = insert_reference(Name, {external_general, {PubId, SysId}},
+ State2),
+ State4 = event_callback({externalEntityDecl, Name, PubId, SysId}, State3),
+ {Rest2, State4};
_ ->
- insert_reference({Name, unparsed, {PubId, SysId, Ndata}},
- State2#xmerl_sax_parser_state.ref_table),
- State3 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State2),
- {Rest2, State3}
+ State3 = insert_reference(Name, {unparsed, {PubId, SysId, Ndata}},
+ State2),
+ State4 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State3),
+ {Rest2, State4}
end;
parse_entity_def(Bytes, State, Name) ->
unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3],
@@ -2636,19 +2731,19 @@ parse_pe_def(?STRING_EMPTY, State, Name) ->
parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
{Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
Name1 = "%" ++ Name,
- insert_reference({Name1, internal_parameter, Value},
- State1#xmerl_sax_parser_state.ref_table),
- State2 = event_callback({internalEntityDecl, Name1, Value}, State1),
- {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
- parse_def_end(Rest2, State3);
+ State2 = insert_reference(Name1, {internal_parameter, Value},
+ State1),
+ State3 = event_callback({internalEntityDecl, Name1, Value}, State2),
+ {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
+ parse_def_end(Rest2, State4);
parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P ->
{PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false),
Name1 = "%" ++ Name,
- insert_reference({Name1, external_parameter, {PubId, SysId}},
- State1#xmerl_sax_parser_state.ref_table),
- State2 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State1),
- {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
- parse_def_end(Rest2, State3);
+ State2 = insert_reference(Name1, {external_parameter, {PubId, SysId}},
+ State1),
+ State3 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State2),
+ {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
+ parse_def_end(Rest2, State4);
parse_pe_def(Bytes, State, Name) ->
unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3],
"\", \', SYSTEM or PUBLIC expected").
diff --git a/lib/xmerl/src/xmerl_sax_simple_dom.erl b/lib/xmerl/src/xmerl_sax_simple_dom.erl
index 7eb3afd499..d842bd982b 100644
--- a/lib/xmerl/src/xmerl_sax_simple_dom.erl
+++ b/lib/xmerl/src/xmerl_sax_simple_dom.erl
@@ -129,8 +129,9 @@ build_dom(endDocument,
State#xmerl_sax_simple_dom_state{dom=[Decl, {Tag, Attributes,
lists:reverse(Content)}]};
_ ->
- ?dbg("~p\n", [D]),
- ?error("we're not at end the document when endDocument event is encountered.")
+ %% endDocument is also sent by the parser when a fault occur to tell
+ %% the event receiver that no more input will be sent
+ State
end;
%% Element
diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl
index 3dd9504f87..a1f6ad4e2c 100644
--- a/lib/xmerl/src/xmerl_scan.erl
+++ b/lib/xmerl/src/xmerl_scan.erl
@@ -474,8 +474,8 @@ event(_X, S) ->
%% into multiple objects (in which case {Acc',Pos',S'} should be returned.)
%% If {Acc',S'} is returned, Pos will be incremented by 1 by default.
%% Below is an example of an acceptable operation
-acc(X = #xmlText{value = Text}, Acc, S) ->
- {[X#xmlText{value = Text}|Acc], S};
+acc(#xmlText{value = Text}, [X = #xmlText{value = AccText}], S) ->
+ {[X#xmlText{value = AccText ++ Text}], S};
acc(X, Acc, S) ->
{[X|Acc], S}.