From 90f1bcd23ae888c85ebe9b2b83ed9662a924300f Mon Sep 17 00:00:00 2001 From: Shaun Mangelsdorf Date: Wed, 30 Nov 2016 06:10:28 +1000 Subject: Include explicit attrs when default_attrs=true With the `default_attrs` option set to `true`, xmerl was replacing the attributes for each element with the default attributes, discarding any attributes which were explicitly set. The new behaviour appends the default attributes, keeping the explicit attributes intact. --- lib/xmerl/src/xmerl_scan.erl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 5e0459ec21..9f6b27113e 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -2225,16 +2225,18 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook, AllAttrs = case S#xmerl_scanner.default_attrs of true -> - [ #xmlAttribute{name = AttName, - parents = [{Name, Pos} | Parents], - language = Lang, - nsinfo = NSI, - namespace = Namespace, - value = AttValue, - normalized = true} || - {AttName, AttValue} <- get_default_attrs(S, Name), - AttValue =/= no_value, - not lists:keymember(AttName, #xmlAttribute.name, Attrs) ]; + DefaultAttrs = + [ #xmlAttribute{name = AttName, + parents = [{Name, Pos} | Parents], + language = Lang, + nsinfo = NSI, + namespace = Namespace, + value = AttValue, + normalized = true} || + {AttName, AttValue} <- get_default_attrs(S, Name), + AttValue =/= no_value, + not lists:keymember(AttName, #xmlAttribute.name, Attrs) ], + lists:append(Attrs, DefaultAttrs); false -> Attrs end, -- cgit v1.2.3 From a24dc1a9e2f72ce883d47f550b0097f909956085 Mon Sep 17 00:00:00 2001 From: Andrew Dryga Date: Sun, 12 Feb 2017 19:55:27 +0200 Subject: Fixed typos in lib/xmerl --- lib/xmerl/src/xmerl_regexp.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl index fc89b80ff1..566b77725f 100644 --- a/lib/xmerl/src/xmerl_regexp.erl +++ b/lib/xmerl/src/xmerl_regexp.erl @@ -1154,7 +1154,7 @@ comp_crs([], Last) -> [{Last,maxchar}]. %% build_dfa(NFA, NfaStartState) -> {DFA,DfaStartState}. %% Build a DFA from an NFA using "subset construction". The major %% difference from the book is that we keep the marked and unmarked -%% DFA states in seperate lists. New DFA states are added to the +%% DFA states in separate lists. New DFA states are added to the %% unmarked list and states are marked by moving them to the marked %% list. We assume that the NFA accepting state numbers are in %% ascending order for the rules and use ordsets to keep this order. -- cgit v1.2.3 From 5cb6de0c1db376236f7467af04324faa408af312 Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Wed, 18 Jan 2017 14:47:15 +0100 Subject: [xmerl] Correct handling of implicit XML namespace The namespace_conformant option did not work when parsing documents without explicit XML namespace declaration. --- lib/xmerl/src/xmerl_scan.erl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 9f6b27113e..95dc82e5c9 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -2309,7 +2309,9 @@ expanded_name(Name, [], #xmlNamespace{default = URI}, S) -> expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> {_, Value} = lists:keyfind(Local, 1, Ns), case Name of - 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> + 'xmlns:xml' when Value =:= 'http://www.w3.org/XML/1998/namespace' -> + N; + 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> ?fatal({xml_prefix_cannot_be_redeclared, Value}, S); 'xmlns:xmlns' -> ?fatal({xmlns_prefix_cannot_be_declared, Value}, S); @@ -2323,6 +2325,8 @@ expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> N end end; +expanded_name(_Name, {"xml", Local}, _NS, _S) -> + {'http://www.w3.org/XML/1998/namespace', list_to_atom(Local)}; expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> case lists:keysearch(Prefix, 1, Ns) of {value, {_, URI}} -> @@ -2333,9 +2337,6 @@ expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> ?fatal({namespace_prefix_not_declared, Prefix}, S) end. - - - keyreplaceadd(K, Pos, [H|T], Obj) when K == element(Pos, H) -> [Obj|T]; keyreplaceadd(K, Pos, [H|T], Obj) -> -- cgit v1.2.3 From 00294041cd3c6f66598a50b57abf27e6a35e277f Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Thu, 12 Jan 2017 16:49:40 +0100 Subject: [xmerl] Correct bug handling multiple documents on a stream Change how to interpret end of document to comply with Tim Brays comment on the standard. This makes it possible to handle more than one doc on a stream, the standard makes it impossible to know when the document is ended without waiting for the next document (and not always even that). Tim Brays comment about the trailing "Misc" rule: The fact that you're allowed some trailing junk after the root element, I decided (but unfortunately too late) is a real design error in XML. If I'm writing a network client, I'm probably going to close the link as soon as a I see the root element end-tag, and not depend on the other end closing it down properly. Furthermore, if I want to send a succession of XML documents over a network link, if I find a processing instruction after a root element, is it a trailer on the previous document, or part of the prolog of the next? --- lib/xmerl/src/xmerl_sax_parser.erl | 27 +++++++----- lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 66 ++++++++++++++++++------------ 2 files changed, 56 insertions(+), 37 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl index 318a0cf7f4..9b5429cffe 100644 --- a/lib/xmerl/src/xmerl_sax_parser.erl +++ b/lib/xmerl/src/xmerl_sax_parser.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ %% External exports %%---------------------------------------------------------------------- -export([file/2, + stream/3, stream/2]). %%---------------------------------------------------------------------- @@ -72,11 +73,12 @@ file(Name,Options) -> File = filename:basename(Name), ContinuationFun = fun default_continuation_cb/1, Res = stream(<<>>, - [{continuation_fun, ContinuationFun}, - {continuation_state, FD}, - {current_location, CL}, - {entity, File} - |Options]), + [{continuation_fun, ContinuationFun}, + {continuation_state, FD}, + {current_location, CL}, + {entity, File} + |Options], + file), ok = file:close(FD), Res end. @@ -92,19 +94,22 @@ file(Name,Options) -> %% EventState = term() %% Description: Parse a stream containing an XML document. %%---------------------------------------------------------------------- -stream(Xml, Options) when is_list(Xml), is_list(Options) -> +stream(Xml, Options) -> + stream(Xml, Options, stream). + +stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) -> State = parse_options(Options, initial_state()), case State#xmerl_sax_parser_state.file_type of dtd -> xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}); + input_type = InputType}); normal -> xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}) + input_type = InputType}) end; -stream(Xml, Options) when is_binary(Xml), is_list(Options) -> +stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) -> case parse_options(Options, initial_state()) of {error, Reason} -> {error, Reason}; State -> @@ -127,7 +132,7 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) -> State#xmerl_sax_parser_state.event_state}; {Xml1, State1} -> parse_binary(Xml1, - State1#xmerl_sax_parser_state{input_type = stream}, + State1#xmerl_sax_parser_state{input_type = InputType}, ParseFunction) end end. diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 4d75805b9b..3ef2fce4d3 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -1,7 +1,7 @@ %%-*-erlang-*- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -72,7 +72,7 @@ parse(Xml, State) -> {ok, Rest, State2} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; {fatal_error, {State2, Reason}} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -81,6 +81,10 @@ parse(Xml, State) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), format_error(Tag, State3, Reason); + {endDocument, Rest, State2} -> + State3 = event_callback(endDocument, State2), + ets:delete(RefTable), + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; Other -> _State2 = event_callback(endDocument, State1), ets:delete(RefTable), @@ -111,7 +115,7 @@ parse_dtd(Xml, State) -> {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -1024,16 +1028,21 @@ parse_etag(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_etag/2], undefined). - parse_etag_1(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList} - |RestOfETags]} = State, _Tag) -> + |RestOfETags], + input_type=InputType} = State, _Tag) -> State1 = event_callback({endElement, Uri, LocalName, QName}, State), State2 = send_end_prefix_mapping_event(NewNsList, State1), - parse_content(Rest, - State2#xmerl_sax_parser_state{end_tags=RestOfETags, - ns = OldNsList}, - [], true); + case check_if_new_doc_allowed(InputType, RestOfETags) of + true -> + throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}}); + false -> + parse_content(Rest, + State2#xmerl_sax_parser_state{end_tags=RestOfETags, + ns = OldNsList}, + [], true) + end; parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) -> {P,TN} = Tag, ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN); @@ -1051,21 +1060,26 @@ parse_etag_1(Bytes, State, Tag) -> %% Description: Parsing the content part of tags %% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* %%---------------------------------------------------------------------- - parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) -> - case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of - {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State1}; - {fatal_error, {State1, Msg}} -> - case check_if_document_complete(State1, Msg) of - true -> - State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), - {?STRING_EMPTY, State2}; - false -> - ?fatal_error(State1, Msg) - end; - Other -> - throw(Other) + case check_if_document_complete(State, "No more bytes") of + true -> + State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), + {?STRING_EMPTY, State1}; + false -> + case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of + {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> + {Rest, State1}; + {fatal_error, {State1, Msg}} -> + case check_if_document_complete(State1, Msg) of + true -> + State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), + {?STRING_EMPTY, State2}; + false -> + ?fatal_error(State1, Msg) + end; + Other -> + throw(Other) + end end; parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) -> cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); @@ -1094,7 +1108,7 @@ parse_content(?STRING_REST(" parse_content(?STRING_REST(" case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_cdata(Rest1, State1) @@ -1102,7 +1116,7 @@ parse_content(?STRING_REST(" case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_stag(Rest1, State1) @@ -3290,7 +3304,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C catch throw:ErrorTerm -> ?fatal_error(State, ErrorTerm); - exit:Reason -> + exit:Reason -> ?fatal_error(State, {'EXIT', Reason}) end, case Result of -- cgit v1.2.3 From 760a3fa3c57fa2b56b44a9c25cbab4df0f71987e Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Fri, 10 Feb 2017 12:51:25 +0100 Subject: [xmerl] Fix XML "well-formedness" bug i SAX parser Changed the XML Sax parser behaviour so it returns an error if there are something more in the file after the the matching document. If one using the xmerl_sax_parser:stream() a rest is allowed which then can be sent to a new call of xmerl_sax_parser:stream() to parse next document. This is done to be compliant with XML conformance tests. --- lib/xmerl/src/xmerl_sax_parser.hrl | 9 +-------- lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl index 932ab0cec5..7f9bf6c4d3 100644 --- a/lib/xmerl/src/xmerl_sax_parser.hrl +++ b/lib/xmerl/src/xmerl_sax_parser.hrl @@ -88,14 +88,7 @@ current_location, % Location of the currently parsed XML entity entity, % Parsed XML entity skip_external_dtd = false,% If true the external DTD is skipped during parsing - input_type % Source type: file | stream. - % This field is a preparation for an fix in R17 of a bug in - % the conformance against the standard. - % Today a file which contains two XML documents will be considered - % well-formed and the second is placed in the rest part of the - % return tuple, according to the conformance tests this should fail. - % In the future this will fail if xmerl_sax_aprser:file/2 is used but - % left to the user in the xmerl_sax_aprser:stream/2 case. + input_type % Source type: file | stream }). diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 3ef2fce4d3..39dd3bbff3 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -72,7 +72,12 @@ parse(Xml, State) -> {ok, Rest, State2} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of + true -> + {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + false -> + format_error(fatal_error, State3, "Input found after legal document") + end; {fatal_error, {State2, Reason}} -> State3 = event_callback(endDocument, State2), ets:delete(RefTable), @@ -446,6 +451,15 @@ check_if_new_doc_allowed(stream, []) -> check_if_new_doc_allowed(_, _) -> false. +check_if_rest_ok(file, []) -> + true; +check_if_rest_ok(file, <<>>) -> + true; +check_if_rest_ok(stream, _) -> + true; +check_if_rest_ok(_, _) -> + false. + %%---------------------------------------------------------------------- %% Function: parse_pi_1(Rest, State) -> Result %% Input: Rest = string() | binary() -- cgit v1.2.3 From 71ee7ed52d51d10a4c8c3b9e1de0a3a1df57a1b6 Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Fri, 17 Feb 2017 08:01:18 +0100 Subject: [xmerl] Remove faulty throws --- lib/xmerl/src/xmerl_sax_parser.erl | 6 +++--- lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl index 9b5429cffe..1aef6c58c4 100644 --- a/lib/xmerl/src/xmerl_sax_parser.erl +++ b/lib/xmerl/src/xmerl_sax_parser.erl @@ -99,7 +99,7 @@ stream(Xml, Options) -> stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) -> State = parse_options(Options, initial_state()), - case State#xmerl_sax_parser_state.file_type of + case State#xmerl_sax_parser_state.file_type of dtd -> xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list, @@ -231,12 +231,12 @@ check_encoding_option(E) -> %% Description: Detects which character set is used in a binary stream. %%---------------------------------------------------------------------- detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) -> - throw({error, "Can't detect character encoding due to no indata"}); + {error, "Can't detect character encoding due to no indata"}; detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) -> case CFun(CState) of {<<>>, _} -> - throw({error, "Can't detect character encoding due to lack of indata"}); + {error, "Can't detect character encoding due to lack of indata"}; {NewBytes, NewContState} -> detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState}) end; diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 39dd3bbff3..2b9b37b5f3 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -93,7 +93,7 @@ parse(Xml, State) -> Other -> _State2 = event_callback(endDocument, State1), ets:delete(RefTable), - throw(Other) + {fatal_error, Other} end. %%---------------------------------------------------------------------- @@ -128,7 +128,7 @@ parse_dtd(Xml, State) -> Other -> _State2 = event_callback(endDocument, State1), ets:delete(RefTable), - throw(Other) + {fatal_error, Other} end. -- cgit v1.2.3 From 47c07a3ed61b9d51c043f073ca70fd66d91ef4b2 Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Mon, 6 Mar 2017 13:17:38 +0100 Subject: [xmerl] Fix compiler and dialyzer warnings --- lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 60 +++++---------------------- lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc | 38 ++++++++++++++--- lib/xmerl/src/xmerl_sax_parser_list.erlsrc | 19 +++++++-- lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc | 50 ++++++++++++++++++++-- lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc | 50 ++++++++++++++++++++-- lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc | 50 ++++++++++++++++++++-- 6 files changed, 199 insertions(+), 68 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 2b9b37b5f3..f3470b2809 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -145,10 +145,11 @@ parse_dtd(Xml, State) -> %% [1] document ::= prolog element Misc* %%---------------------------------------------------------------------- parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> - {Rest1, State1} = parse_xml_decl(Rest, State), + {Rest1, State1} = parse_byte_order_mark(Rest, State), {Rest2, State2} = parse_misc(Rest1, State1, true), {ok, Rest2, State2}. +?PARSE_BYTE_ORDER_MARK(Bytes, State). %%---------------------------------------------------------------------- %% Function: parse_xml_decl(Rest, State) -> Result @@ -159,15 +160,8 @@ parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> %% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? %% [23] XMLDecl ::= '' %%---------------------------------------------------------------------- --dialyzer({[no_fail_call, no_match], parse_xml_decl/2}). parse_xml_decl(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_1, State) -> - cf(?BYTE_ORDER_MARK_1, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_2, State) -> - cf(?BYTE_ORDER_MARK_2, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_REST(Rest), State) -> - cf(Rest, State, fun parse_xml_decl/2); parse_xml_decl(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING(" @@ -179,31 +173,19 @@ parse_xml_decl(?STRING(" parse_xml_decl(?STRING(" cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING_REST(" - parse_xml_decl_1(Rest1, State); -parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> - case unicode:characters_to_list(Bytes, Enc) of - {incomplete, _, _} -> - cf(Bytes, State, fun parse_xml_decl/2); - {error, _Encoded, _Rest} -> - ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); - _ -> - parse_prolog(Bytes, State) - end; -parse_xml_decl(Bytes, State) -> - parse_prolog(Bytes, State). - + parse_xml_decl_rest(Rest1, State); +?PARSE_XML_DECL(Bytes, State). -parse_xml_decl_1(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> +parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> if ?is_whitespace(C) -> {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []), - %State2 = event_callback({processingInstruction, "xml", XmlAttributes}, State1),% The XML decl. should not be reported as a PI parse_prolog(Rest1, State1); true -> parse_prolog(?STRING_REST(" - unicode_incomplete_check([Bytes, State, fun parse_xml_decl_1/2], undefined). +parse_xml_decl_rest(Bytes, State) -> + unicode_incomplete_check([Bytes, State, fun parse_xml_decl_rest/2], undefined). @@ -225,8 +207,6 @@ parse_prolog(?STRING_REST(" parse_prolog(Rest1, State1); {endDocument, Rest1, State1} -> parse_prolog(Rest1, State1) - % IValue = ?TO_INPUT_FORMAT(" parse_prolog_1(Rest, State); @@ -239,7 +219,6 @@ parse_prolog(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_prolog/2], "expecting < or whitespace"). - parse_prolog_1(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("D") = Bytes, State) -> @@ -1232,7 +1211,6 @@ send_character_event(_, true, String, State) -> %% Description: Parse whitespaces. %% [3] S ::= (#x20 | #x9 | #xD | #xA)+ %%---------------------------------------------------------------------- --dialyzer({no_fail_call, whitespace/3}). whitespace(?STRING_EMPTY, State, Acc) -> case cf(?STRING_EMPTY, State, Acc, fun whitespace/3) of {?STRING_EMPTY, State} -> @@ -1258,16 +1236,7 @@ whitespace(?STRING_REST("\r", Rest), State, Acc) -> whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) -> whitespace(Rest, State, [C|Acc]); -whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> - {lists:reverse(Acc), Bytes, State}; -whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> - case unicode:characters_to_list(Bytes, Enc) of - {incomplete, _, _} -> - cf(Bytes, State, Acc, fun whitespace/3); - {error, _Encoded, _Rest} -> - ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) - end. - +?WHITESPACE(Bytes, State, Acc). %%---------------------------------------------------------------------- %% Function: parse_reference(Rest, State, HaveToExist) -> Result @@ -1390,7 +1359,6 @@ parse_pe_reference_1(Bytes, State, Name) -> "missing ; after reference " ++ Name). - %%---------------------------------------------------------------------- %% Function: insert_reference(Reference, State) -> Result %% Parameters: Reference = string() @@ -1406,7 +1374,6 @@ insert_reference({Name, Type, Value}, Table) -> end. - %%---------------------------------------------------------------------- %% Function: look_up_reference(Reference, State) -> Result %% Parameters: Reference = string() @@ -1721,7 +1688,7 @@ handle_external_entity({http, Url}, State) -> ++ file:format_error(Reason)); {ok, FD} -> {?STRING_EMPTY, EntityState} = - parse_external_entity_1(<<>>, + parse_external_entity_byte_order_mark(<<>>, State#xmerl_sax_parser_state{continuation_state=FD, current_location=filename:dirname(Url), entity=filename:basename(Url), @@ -1737,6 +1704,8 @@ handle_external_entity({http, Url}, State) -> handle_external_entity({Tag, _Url}, State) -> ?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)). +?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State). + %%---------------------------------------------------------------------- %% Function : parse_external_entity_1(Rest, State) -> Result %% Parameters: Rest = string() | binary() @@ -1744,7 +1713,6 @@ handle_external_entity({Tag, _Url}, State) -> %% Result : {Rest, State} %% Description: Parse the external entity. %%---------------------------------------------------------------------- --dialyzer({[no_fail_call, no_match], parse_external_entity_1/2}). parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) -> case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> @@ -1754,12 +1722,6 @@ parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = Other -> throw(Other) end; -parse_external_entity_1(?BYTE_ORDER_MARK_1, State) -> - cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_1/2); -parse_external_entity_1(?BYTE_ORDER_MARK_2, State) -> - cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_1/2); -parse_external_entity_1(?BYTE_ORDER_MARK_REST(Rest), State) -> - parse_external_entity_1(Rest, State); parse_external_entity_1(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_external_entity_1/2); parse_external_entity_1(?STRING(" diff --git a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc index 961806bf4c..6e59347fb8 100644 --- a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,36 @@ -define(APPEND_STRING(Rest, New), <>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, latin1)). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, undefined_bom2). --define(BYTE_ORDER_MARK_REST(Rest), <>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc index 624a621d92..6a4435b1d9 100644 --- a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc @@ -36,6 +36,19 @@ %% In the list case we can't use a '++' when matchin against an unbound variable -define(STRING_UNBOUND_REST(MatchChar, Rest), [MatchChar | Rest]). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, undefined_bom2). --define(BYTE_ORDER_MARK_REST(Rest), [undefined|Rest]). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc index ff84ece97a..ec89024729 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,50 @@ -define(APPEND_STRING(Rest, New), <>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, big})). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, <<16#FE>>). +-define(BYTE_ORDER_MARK_1, <<16#FE>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#FE, 16#FF, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc index a330fce8d0..566333a045 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,50 @@ -define(APPEND_STRING(Rest, New), <>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, little})). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, <<16#FF>>). +-define(BYTE_ORDER_MARK_1, <<16#FF>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#FF, 16#FE, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc index d46d60d237..f41d06d013 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,11 +34,55 @@ -define(APPEND_STRING(Rest, New), <>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, utf8)). - -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <>). -define(BYTE_ORDER_MARK_1, <<16#EF>>). -define(BYTE_ORDER_MARK_2, <<16#EF, 16#BB>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#EF, 16#BB, 16#BF, Rest/binary>>). +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_2, State) -> + cf(?BYTE_ORDER_MARK_2, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_2, State) -> + cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). -- cgit v1.2.3 From ca34a6c16a146cb5780a9c28cb5614e2e846d318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jimmy=20Z=C3=B6ger?= Date: Wed, 8 Mar 2017 14:11:53 +0100 Subject: Removes redundant function clause --- lib/xmerl/src/xmerl_scan.erl | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 95dc82e5c9..4eacb9fc4d 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -474,8 +474,6 @@ event(_X, S) -> %% into multiple objects (in which case {Acc',Pos',S'} should be returned.) %% If {Acc',S'} is returned, Pos will be incremented by 1 by default. %% Below is an example of an acceptable operation -acc(X = #xmlText{value = Text}, Acc, S) -> - {[X#xmlText{value = Text}|Acc], S}; acc(X, Acc, S) -> {[X|Acc], S}. -- cgit v1.2.3 From 027c94a04a3c78daa351b025c68694f01c566560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jimmy=20Z=C3=B6ger?= Date: Wed, 8 Mar 2017 14:56:06 +0100 Subject: Improves accumulator fun in xmerl_scan so that only one #xmlText record is returned for strings which have character references * Adds test with character reference in numeric form * Updates test for ticket_7430 --- lib/xmerl/src/xmerl_scan.erl | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 4eacb9fc4d..18fe7031b9 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -474,6 +474,8 @@ event(_X, S) -> %% into multiple objects (in which case {Acc',Pos',S'} should be returned.) %% If {Acc',S'} is returned, Pos will be incremented by 1 by default. %% Below is an example of an acceptable operation +acc(#xmlText{value = Text}, [X = #xmlText{value = AccText}], S) -> + {[X#xmlText{value = AccText ++ Text}], S}; acc(X, Acc, S) -> {[X|Acc], S}. -- cgit v1.2.3 From 26c3cd82529836cb5b6eefbf7f92f318fd91f847 Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Fri, 10 Mar 2017 15:00:46 +0100 Subject: Update copyright year --- lib/xmerl/src/xmerl_sax_parser.hrl | 2 +- lib/xmerl/src/xmerl_sax_parser_list.erlsrc | 2 +- lib/xmerl/src/xmerl_scan.erl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl index 7f9bf6c4d3..56a3a42e5f 100644 --- a/lib/xmerl/src/xmerl_sax_parser.hrl +++ b/lib/xmerl/src/xmerl_sax_parser.hrl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc index 6a4435b1d9..ac89896215 100644 --- a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 95dc82e5c9..3dd9504f87 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. +%% Copyright Ericsson AB 2003-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. -- cgit v1.2.3 From 83e20c62057ebc1d8064bf57b01be560cd244e1d Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Thu, 4 May 2017 15:42:21 +0200 Subject: Update copyright year --- lib/xmerl/src/xmerl_regexp.erl | 2 +- lib/xmerl/src/xmerl_sax_parser.hrl | 2 +- lib/xmerl/src/xmerl_sax_parser_list.erlsrc | 2 +- lib/xmerl/src/xmerl_scan.erl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl index 566b77725f..1bf8496673 100644 --- a/lib/xmerl/src/xmerl_regexp.erl +++ b/lib/xmerl/src/xmerl_regexp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. +%% Copyright Ericsson AB 2006-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl index 7f9bf6c4d3..56a3a42e5f 100644 --- a/lib/xmerl/src/xmerl_sax_parser.hrl +++ b/lib/xmerl/src/xmerl_sax_parser.hrl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc index 6a4435b1d9..ac89896215 100644 --- a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 18fe7031b9..a1f6ad4e2c 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. +%% Copyright Ericsson AB 2003-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. -- cgit v1.2.3 From f3851c6348596b0a9dda2997dac92d9ca9d7c2c1 Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Thu, 18 May 2017 12:03:48 +0200 Subject: [xmerl] Fix fragmented xml directive bug and replace ets table * Fix continuation bug if the xml directive is fragmented * Replace the ENTITY ets table with a map --- lib/xmerl/src/xmerl_sax_parser.erl | 171 +++++++++++++---- lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 293 +++++++++++++++++++---------- 2 files changed, 327 insertions(+), 137 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl index 1aef6c58c4..e383c4c349 100644 --- a/lib/xmerl/src/xmerl_sax_parser.erl +++ b/lib/xmerl/src/xmerl_sax_parser.erl @@ -64,7 +64,7 @@ %% Description: Parse file containing an XML document. %%---------------------------------------------------------------------- file(Name,Options) -> - case file:open(Name, [raw, read,binary]) of + case file:open(Name, [raw, read_ahead, read,binary]) of {error, Reason} -> {error,{Name, file:format_error(Reason)}}; {ok, FD} -> @@ -120,21 +120,22 @@ stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) -> normal -> parse end, - case detect_charset(Xml, State) of - {error, Reason} -> {fatal_error, - { - State#xmerl_sax_parser_state.current_location, - State#xmerl_sax_parser_state.entity, - 1 - }, - Reason, - [], - State#xmerl_sax_parser_state.event_state}; - {Xml1, State1} -> - parse_binary(Xml1, - State1#xmerl_sax_parser_state{input_type = InputType}, - ParseFunction) - end + try + {Xml1, State1} = detect_charset(Xml, State), + parse_binary(Xml1, + State1#xmerl_sax_parser_state{input_type = InputType}, + ParseFunction) + catch + throw:{fatal_error, {State2, Reason}} -> + {fatal_error, + { + State2#xmerl_sax_parser_state.current_location, + State2#xmerl_sax_parser_state.entity, + 1 + }, + Reason, [], + State2#xmerl_sax_parser_state.event_state} + end end. %%---------------------------------------------------------------------- @@ -156,8 +157,8 @@ parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) -> xmerl_sax_parser_utf16be:F(Xml, State); parse_binary(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) -> xmerl_sax_parser_latin1:F(Xml, State); -parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, _) -> - {error, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))}. +parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, State) -> + ?fatal_error(State, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))). %%---------------------------------------------------------------------- %% Function: initial_state/0 @@ -211,8 +212,7 @@ parse_options([{entity, Entity} |Options], State) -> parse_options([skip_external_dtd |Options], State) -> parse_options(Options, State#xmerl_sax_parser_state{skip_external_dtd = true}); parse_options([O |_], _State) -> - {error, - lists:flatten(io_lib:format("Option: ~p not supported", [O]))}. + {error, lists:flatten(io_lib:format("Option: ~p not supported", [O]))}. check_encoding_option(E) when E==utf8; E=={utf16,little}; E=={utf16,big}; @@ -230,16 +230,10 @@ check_encoding_option(E) -> %% Output: {utf8|utf16le|utf16be|iso8859, Xml, State} %% Description: Detects which character set is used in a binary stream. %%---------------------------------------------------------------------- -detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) -> - {error, "Can't detect character encoding due to no indata"}; -detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun, - continuation_state = CState} = State) -> - case CFun(CState) of - {<<>>, _} -> - {error, "Can't detect character encoding due to lack of indata"}; - {NewBytes, NewContState} -> - detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState}) - end; +detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = State) -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); +detect_charset(<<>>, State) -> + cf(<<>>, State, fun detect_charset/2); detect_charset(Bytes, State) -> case unicode:bom_to_encoding(Bytes) of {latin1, 0} -> @@ -249,25 +243,47 @@ detect_charset(Bytes, State) -> {RealBytes, State#xmerl_sax_parser_state{encoding=Enc}} end. +detect_charset_1(<<16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#00, 16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#00, 16#3C, 16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); detect_charset_1(<<16#00, 16#3C, 16#00, 16#3F, _/binary>> = Xml, State) -> {Xml, State#xmerl_sax_parser_state{encoding={utf16, big}}}; +detect_charset_1(<<16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#00, 16#3F>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); detect_charset_1(<<16#3C, 16#00, 16#3F, 16#00, _/binary>> = Xml, State) -> {Xml, State#xmerl_sax_parser_state{encoding={utf16, little}}}; -detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>> = Xml, State) -> - case parse_xml_directive(Xml2) of +detect_charset_1(<<16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>>, State) -> + {Xml3, State1} = read_until_end_of_xml_directive(Xml2, State), + case parse_xml_directive(Xml3) of {error, Reason} -> - {error, Reason}; + ?fatal_error(State, Reason); AttrList -> case lists:keysearch("encoding", 1, AttrList) of {value, {_, E}} -> case convert_encoding(E) of {error, Reason} -> - {error, Reason}; + ?fatal_error(State, Reason); Enc -> - {Xml, State#xmerl_sax_parser_state{encoding=Enc}} + {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, + State1#xmerl_sax_parser_state{encoding=Enc}} end; _ -> - {Xml, State} + {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, State1} end end; detect_charset_1(Xml, State) -> @@ -377,7 +393,7 @@ parse_value_1(<>, Stop, Acc) -> parse_value_1(Rest, Stop, [C |Acc]). %%====================================================================== -%%Default functions +%% Default functions %%====================================================================== %%---------------------------------------------------------------------- %% Function: default_event_cb(Event, LineNo, State) -> Result @@ -393,7 +409,7 @@ default_event_cb(_Event, _LineNo, State) -> %%---------------------------------------------------------------------- %% Function: default_continuation_cb(IoDevice) -> Result %% IoDevice = iodevice() -%% Output: Result = {[char()], State} +%% Output: Result = {binary(), IoDevice} %% Description: Default continuation callback reading blocks. %%---------------------------------------------------------------------- default_continuation_cb(IoDevice) -> @@ -403,3 +419,82 @@ default_continuation_cb(IoDevice) -> {ok, FileBin} -> {FileBin, IoDevice} end. + +%%---------------------------------------------------------------------- +%% Function: read_until_end_of_xml_directive(Rest, State) -> Result +%% Rest = binary() +%% Output: Result = {binary(), State} +%% Description: Reads a utf8 or latin1 until it finds '?>' +%%---------------------------------------------------------------------- +read_until_end_of_xml_directive(Rest, State) -> + case binary:match(Rest, <<"?>">>) of + nomatch -> + case cf(Rest, State) of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewState} -> + read_until_end_of_xml_directive(NewBytes, NewState) + end; + _ -> + {Rest, State} + end. + + +%%---------------------------------------------------------------------- +%% Function : cf(Rest, State) -> Result +%% Parameters: Rest = binary() +%% State = #xmerl_sax_parser_state{} +%% NextCall = fun() +%% Result : {Rest, State} +%% Description: Function that uses provided fun to read another chunk from +%% input stream and calls the fun in NextCall. +%%---------------------------------------------------------------------- +cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State) -> + ?fatal_error(State, "Continuation function undefined"); +cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) -> + Result = + try + CFun(CState) + catch + throw:ErrorTerm -> + ?fatal_error(State, ErrorTerm); + exit:Reason -> + ?fatal_error(State, {'EXIT', Reason}) + end, + case Result of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewContState} -> + {<>, + State#xmerl_sax_parser_state{continuation_state = NewContState}} + end. + +%%---------------------------------------------------------------------- +%% Function : cf(Rest, State, NextCall) -> Result +%% Parameters: Rest = binary() +%% State = #xmerl_sax_parser_state{} +%% NextCall = fun() +%% Result : {Rest, State} +%% Description: Function that uses provided fun to read another chunk from +%% input stream and calls the fun in NextCall. +%%---------------------------------------------------------------------- +cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) -> + ?fatal_error(State, "Continuation function undefined"); +cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, + NextCall) -> + Result = + try + CFun(CState) + catch + throw:ErrorTerm -> + ?fatal_error(State, ErrorTerm); + exit:Reason -> + ?fatal_error(State, {'EXIT', Reason}) + end, + case Result of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewContState} -> + NextCall(<>, + State#xmerl_sax_parser_state{continuation_state = NewContState}) + end. diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index f3470b2809..1dca9608cb 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -64,38 +64,42 @@ %% Description: Parsing XML from input stream. %%---------------------------------------------------------------------- parse(Xml, State) -> - RefTable = ets:new(xmerl_sax_entity_refs, [private]), - - State1 = event_callback(startDocument, State), - - case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of - {ok, Rest, State2} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of - true -> - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - false -> - format_error(fatal_error, State3, "Input found after legal document") - end; - {fatal_error, {State2, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(fatal_error, State3, Reason); - {event_receiver_error, State2, {Tag, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(Tag, State3, Reason); - {endDocument, Rest, State2} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - Other -> - _State2 = event_callback(endDocument, State1), - ets:delete(RefTable), - {fatal_error, Other} + RefTable = maps:new(), + + try + State1 = event_callback(startDocument, State), + Result = parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), + handle_end_document(Result) + catch + throw:Exception -> + handle_end_document(Exception); + _:OtherError -> + handle_end_document({other, OtherError, State}) end. + % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of + % {ok, Rest, State2} -> + % State3 = event_callback(endDocument, State2), + % case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of + % true -> + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % false -> + % format_error(fatal_error, State3, "Input found after legal document") + % end; + % {fatal_error, {State2, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(fatal_error, State3, Reason); + % {event_receiver_error, State2, {Tag, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(Tag, State3, Reason); + % {endDocument, Rest, State2} -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % Other -> + % _State2 = event_callback(endDocument, State1), + % {fatal_error, Other} + % end. + %%---------------------------------------------------------------------- %% Function: parse_dtd(Xml, State) -> Result %% Input: Xml = string() | binary() @@ -105,37 +109,119 @@ parse(Xml, State) -> %% Description: Parsing XML DTD from input stream. %%---------------------------------------------------------------------- parse_dtd(Xml, State) -> - RefTable = ets:new(xmerl_sax_entity_refs, [private]), - - State1 = event_callback(startDocument, State), - - case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of - {fatal_error, {State2, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(fatal_error, State3, Reason); - {event_receiver_error, State2, {Tag, Reason}} -> - State3 = event_callback(endDocument, State2), - format_error(Tag, State3, Reason); - {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - Other -> - _State2 = event_callback(endDocument, State1), - ets:delete(RefTable), - {fatal_error, Other} + RefTable = maps:new(), + + try + State1 = event_callback(startDocument, State), + Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), + handle_end_document(Result) + catch + throw:Exception -> + handle_end_document(Exception); + _:OtherError -> + handle_end_document({other, OtherError, State}) end. + % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of + % {fatal_error, {State2, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(fatal_error, State3, Reason); + % {event_receiver_error, State2, {Tag, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(Tag, State3, Reason); + % {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % Other -> + % _State2 = event_callback(endDocument, State1), + % {fatal_error, Other} + % end. + + %%====================================================================== %% Internal functions %%====================================================================== +%%---------------------------------------------------------------------- +%% Function: handle_end_document(ParserResult) -> Result +%% Input: ParseResult = term() +%% Output: Result = {ok, Rest, EventState} | +%% EventState = term() +%% Description: Ends the parsing and formats output +%%---------------------------------------------------------------------- +handle_end_document({ok, Rest, State}) -> + %%ok case from parse + try + State1 = event_callback(endDocument, State), + case check_if_rest_ok(State1#xmerl_sax_parser_state.input_type, Rest) of + true -> + {ok, State1#xmerl_sax_parser_state.event_state, Rest}; + false -> + format_error(fatal_error, State1, "Input found after legal document") + end + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({endDocument, Rest, State}) -> + %% ok case from parse and parse_dtd + try + State1 = event_callback(endDocument, State), + {ok, State1#xmerl_sax_parser_state.event_state, Rest} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({fatal_error, {State, Reason}}) -> + try + State1 = event_callback(endDocument, State), + format_error(fatal_error, State1, Reason) + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({event_receiver_error, State, {Tag, Reason}}) -> + try + State1 = event_callback(endDocument, State), + format_error(Tag, State1, Reason) + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({Rest, State}) when is_record(State, xmerl_sax_parser_state) -> + %%ok case from parse_dtd + try + State1 = event_callback(endDocument, State), + {ok, State1#xmerl_sax_parser_state.event_state, Rest} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({other, Error, State}) -> + try + _State1 = event_callback(endDocument, State), + {fatal_error, Error} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end. + %%---------------------------------------------------------------------- %% Function: parse_document(Rest, State) -> Result %% Input: Rest = string() | binary() @@ -439,6 +525,7 @@ check_if_rest_ok(stream, _) -> check_if_rest_ok(_, _) -> false. + %%---------------------------------------------------------------------- %% Function: parse_pi_1(Rest, State) -> Result %% Input: Rest = string() | binary() @@ -883,11 +970,11 @@ send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) -> parse_eq(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_eq/2); parse_eq(?STRING_REST("=", Rest), State) -> - {Rest, State}; + {Rest, State}; parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> - {_WS, Rest, State1} = - whitespace(Bytes, State, []), - parse_eq(Rest, State1); + {_WS, Rest, State1} = + whitespace(Bytes, State, []), + parse_eq(Rest, State1); parse_eq(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_eq/2], "expecting = or whitespace"). @@ -905,11 +992,11 @@ parse_eq(Bytes, State) -> parse_att_value(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_att_value/2); parse_att_value(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" -> - parse_att_value(Rest, State, C, []); + parse_att_value(Rest, State, C, []); parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> - {_WS, Rest, State1} = - whitespace(Bytes, State, []), - parse_att_value(Rest, State1); + {_WS, Rest, State1} = + whitespace(Bytes, State, []), + parse_att_value(Rest, State1); parse_att_value(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_att_value/2], "\', \" or whitespace expected"). @@ -1360,17 +1447,20 @@ parse_pe_reference_1(Bytes, State, Name) -> %%---------------------------------------------------------------------- -%% Function: insert_reference(Reference, State) -> Result -%% Parameters: Reference = string() +%% Function: insert_reference(Name, Ref, State) -> Result +%% Parameters: Name = string() +%% Ref = {Type, Value} +%% Type = atom() +%% Value = term() %% State = #xmerl_sax_parser_state{} %% Result : %%---------------------------------------------------------------------- -insert_reference({Name, Type, Value}, Table) -> - case ets:lookup(Table, Name) of - [{Name, _, _}] -> - ok; +insert_reference(Name, Value, #xmerl_sax_parser_state{ref_table = Map} = State) -> + case maps:find(Name, Map) of + error -> + State#xmerl_sax_parser_state{ref_table = maps:put(Name, Value, Map)}; _ -> - ets:insert(Table, {Name, Type, Value}) + State end. @@ -1391,8 +1481,8 @@ look_up_reference("apos", _, _) -> look_up_reference("quot", _, _) -> {internal_general, "quot", "\""}; look_up_reference(Name, HaveToExist, State) -> - case ets:lookup(State#xmerl_sax_parser_state.ref_table, Name) of - [{Name, Type, Value}] -> + case maps:find(Name, State#xmerl_sax_parser_state.ref_table) of + {ok, {Type, Value}} -> {Type, Name, Value}; _ -> case HaveToExist of @@ -1474,7 +1564,7 @@ parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) -> parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> {lists:reverse(Acc), Rest, State}; parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> - parse_system_litteral(Rest, State, Stop, [C |Acc]); + parse_system_litteral(Rest, State, Stop, [C |Acc]); parse_system_litteral(Bytes, State, Stop, Acc) -> unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4], undefined). @@ -1646,9 +1736,11 @@ parse_external_entity(State, _PubId, SysId) -> end_tags = []}, - EventState = handle_external_entity(ExtRef, State1), + {EventState, RefTable} = handle_external_entity(ExtRef, State1), - NewState = event_callback({endEntity, SysId}, SaveState#xmerl_sax_parser_state{event_state=EventState}), + NewState = event_callback({endEntity, SysId}, + SaveState#xmerl_sax_parser_state{event_state=EventState, + ref_table=RefTable}), NewState#xmerl_sax_parser_state{file_type=normal}. @@ -1675,7 +1767,8 @@ handle_external_entity({file, FileToOpen}, State) -> entity=filename:basename(FileToOpen), input_type=file}), ok = file:close(FD), - EntityState#xmerl_sax_parser_state.event_state + {EntityState#xmerl_sax_parser_state.event_state, + EntityState#xmerl_sax_parser_state.ref_table} end; handle_external_entity({http, Url}, State) -> @@ -1695,7 +1788,9 @@ handle_external_entity({http, Url}, State) -> input_type=file}), ok = file:close(FD), ok = file:delete(TmpFile), - EntityState#xmerl_sax_parser_state.event_state + {EntityState#xmerl_sax_parser_state.event_state, + EntityState#xmerl_sax_parser_state.ref_table} + end catch throw:{error, Error} -> @@ -1716,7 +1811,7 @@ handle_external_entity({Tag, _Url}, State) -> parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) -> case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State}; + {Rest, State1}; {fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity -> {?STRING_EMPTY, State1}; Other -> @@ -2442,24 +2537,24 @@ parse_entity_def(?STRING_EMPTY, State, Name) -> cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3); parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" -> {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), - insert_reference({Name, internal_general, Value}, State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({internalEntityDecl, Name, Value}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name, {internal_general, Value}, State1), + State3 = event_callback({internalEntityDecl, Name, Value}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P -> {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false), {Ndata, Rest2, State2} = parse_ndata(Rest1, State1), case Ndata of undefined -> - insert_reference({Name, external_general, {PubId, SysId}}, - State2#xmerl_sax_parser_state.ref_table), - State3 = event_callback({externalEntityDecl, Name, PubId, SysId}, State2), - {Rest2, State3}; + State3 = insert_reference(Name, {external_general, {PubId, SysId}}, + State2), + State4 = event_callback({externalEntityDecl, Name, PubId, SysId}, State3), + {Rest2, State4}; _ -> - insert_reference({Name, unparsed, {PubId, SysId, Ndata}}, - State2#xmerl_sax_parser_state.ref_table), - State3 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State2), - {Rest2, State3} + State3 = insert_reference(Name, {unparsed, {PubId, SysId, Ndata}}, + State2), + State4 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State3), + {Rest2, State4} end; parse_entity_def(Bytes, State, Name) -> unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3], @@ -2636,19 +2731,19 @@ parse_pe_def(?STRING_EMPTY, State, Name) -> parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" -> {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), Name1 = "%" ++ Name, - insert_reference({Name1, internal_parameter, Value}, - State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({internalEntityDecl, Name1, Value}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name1, {internal_parameter, Value}, + State1), + State3 = event_callback({internalEntityDecl, Name1, Value}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P -> {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false), Name1 = "%" ++ Name, - insert_reference({Name1, external_parameter, {PubId, SysId}}, - State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name1, {external_parameter, {PubId, SysId}}, + State1), + State3 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_pe_def(Bytes, State, Name) -> unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3], "\", \', SYSTEM or PUBLIC expected"). -- cgit v1.2.3 From f8ab425800ba3b01251616eb73d5faf22bc63387 Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Wed, 24 May 2017 07:30:27 +0200 Subject: [xmerl] Fix endDocument bug in output backends * Fix problem of unexpected endDocument in the old_dom and simple output backends --- lib/xmerl/src/xmerl_sax_old_dom.erl | 7 ++++--- lib/xmerl/src/xmerl_sax_simple_dom.erl | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_sax_old_dom.erl b/lib/xmerl/src/xmerl_sax_old_dom.erl index fefcf03fce..411121370f 100644 --- a/lib/xmerl/src/xmerl_sax_old_dom.erl +++ b/lib/xmerl/src/xmerl_sax_old_dom.erl @@ -127,9 +127,10 @@ build_dom(endDocument, State#xmerl_sax_old_dom_state{dom=[Decl, Current#xmlElement{ content=lists:reverse(C) }]}; - _ -> - %%?dbg("~p\n", [D]), - ?error("we're not at end the document when endDocument event is encountered.") + _ -> + %% endDocument is also sent by the parser when a fault occur to tell + %% the event receiver that no more input will be sent + State end; %% Element diff --git a/lib/xmerl/src/xmerl_sax_simple_dom.erl b/lib/xmerl/src/xmerl_sax_simple_dom.erl index 7eb3afd499..d842bd982b 100644 --- a/lib/xmerl/src/xmerl_sax_simple_dom.erl +++ b/lib/xmerl/src/xmerl_sax_simple_dom.erl @@ -129,8 +129,9 @@ build_dom(endDocument, State#xmerl_sax_simple_dom_state{dom=[Decl, {Tag, Attributes, lists:reverse(Content)}]}; _ -> - ?dbg("~p\n", [D]), - ?error("we're not at end the document when endDocument event is encountered.") + %% endDocument is also sent by the parser when a fault occur to tell + %% the event receiver that no more input will be sent + State end; %% Element -- cgit v1.2.3 From 43718d3b81d7f3d08e25047e22d579801bbe5044 Mon Sep 17 00:00:00 2001 From: Hans Nilsson Date: Wed, 14 Jun 2017 15:36:21 +0200 Subject: Update copyright year --- lib/xmerl/src/xmerl_sax_old_dom.erl | 2 +- lib/xmerl/src/xmerl_sax_simple_dom.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_sax_old_dom.erl b/lib/xmerl/src/xmerl_sax_old_dom.erl index 411121370f..6d0d836487 100644 --- a/lib/xmerl/src/xmerl_sax_old_dom.erl +++ b/lib/xmerl/src/xmerl_sax_old_dom.erl @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2009-2016. All Rights Reserved. +%% Copyright Ericsson AB 2009-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_sax_simple_dom.erl b/lib/xmerl/src/xmerl_sax_simple_dom.erl index d842bd982b..7b15cd92dc 100644 --- a/lib/xmerl/src/xmerl_sax_simple_dom.erl +++ b/lib/xmerl/src/xmerl_sax_simple_dom.erl @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2009-2016. All Rights Reserved. +%% Copyright Ericsson AB 2009-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. -- cgit v1.2.3 From 9afbb2d2bc4b0680e7ac3b33465b0b6e36631faf Mon Sep 17 00:00:00 2001 From: Andreas Lappe Date: Wed, 21 Mar 2018 16:57:06 +0100 Subject: Fix a simple typo in xmerl documentation --- lib/xmerl/src/xmerl_xsd.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl index a89b3159ec..d727084175 100644 --- a/lib/xmerl/src/xmerl_xsd.erl +++ b/lib/xmerl/src/xmerl_xsd.erl @@ -18,7 +18,7 @@ %% %CopyrightEnd% %% -%% @doc Interface module for XML Schema vlidation. +%% @doc Interface module for XML Schema validation. %% It handles the W3.org %% specifications %% of XML Schema second edition 28 october 2004. For an introduction to -- cgit v1.2.3 From 5ca92e2eac1e84fd22f60e7abc3aa2b0ff1cb42b Mon Sep 17 00:00:00 2001 From: Henrik Nord Date: Mon, 18 Jun 2018 14:51:18 +0200 Subject: Update copyright year --- lib/xmerl/src/xmerl_xsd.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl index d727084175..2836bb0e5b 100644 --- a/lib/xmerl/src/xmerl_xsd.erl +++ b/lib/xmerl/src/xmerl_xsd.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. +%% Copyright Ericsson AB 2006-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. -- cgit v1.2.3 From 6f94995aa58125a3652fbb2523de348a77ce2440 Mon Sep 17 00:00:00 2001 From: YeJun Su Date: Fri, 20 Jul 2018 12:26:27 +0800 Subject: Fix typo in xmerl_scan:string/1 --- lib/xmerl/src/xmerl_scan.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index a1f6ad4e2c..a7538180e6 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -279,7 +279,7 @@ int_file_decl(F, Options,_ExtCharset) -> %% @spec string(Text::list()) -> {xmlElement(),Rest} %% Rest = list() -%% @equiv string(Test, []) +%% @equiv string(Text, []) string(Str) -> string(Str, []). -- cgit v1.2.3 From d04b53936d5e9f84a3066ecd466993671b1428bf Mon Sep 17 00:00:00 2001 From: Henrik Nord Date: Fri, 21 Sep 2018 12:23:25 +0200 Subject: Update copyright year --- lib/xmerl/src/xmerl_scan.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/xmerl/src') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index a7538180e6..e543a5a11e 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2017. All Rights Reserved. +%% Copyright Ericsson AB 2003-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. -- cgit v1.2.3