diff options
Diffstat (limited to 'lib/xmerl/src')
39 files changed, 971 insertions, 522 deletions
diff --git a/lib/xmerl/src/Makefile b/lib/xmerl/src/Makefile index f8f0320f46..51d9190797 100644 --- a/lib/xmerl/src/Makefile +++ b/lib/xmerl/src/Makefile @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2003-2012. All Rights Reserved. +# Copyright Ericsson AB 2003-2016. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl.appup.src b/lib/xmerl/src/xmerl.appup.src index 388e4d8a86..e8012d8f36 100644 --- a/lib/xmerl/src/xmerl.appup.src +++ b/lib/xmerl/src/xmerl.appup.src @@ -1,7 +1,7 @@ %% -*- erlang -*- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2014. All Rights Reserved. +%% Copyright Ericsson AB 2014-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl.erl b/lib/xmerl/src/xmerl.erl index 7c8c6c56f4..32dad69166 100644 --- a/lib/xmerl/src/xmerl.erl +++ b/lib/xmerl/src/xmerl.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2011. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_b64Bin.yrl b/lib/xmerl/src/xmerl_b64Bin.yrl index 46cf23ef34..7028cc079c 100644 --- a/lib/xmerl/src/xmerl_b64Bin.yrl +++ b/lib/xmerl/src/xmerl_b64Bin.yrl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%% Copyright Ericsson AB 2006-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_b64Bin_scan.erl b/lib/xmerl/src/xmerl_b64Bin_scan.erl index 50ac5c05c2..9afe49fffe 100644 --- a/lib/xmerl/src/xmerl_b64Bin_scan.erl +++ b/lib/xmerl/src/xmerl_b64Bin_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%% Copyright Ericsson AB 2006-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_eventp.erl b/lib/xmerl/src/xmerl_eventp.erl index 243f47f159..8d7ea25e24 100644 --- a/lib/xmerl/src/xmerl_eventp.erl +++ b/lib/xmerl/src/xmerl_eventp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -25,6 +25,90 @@ %% Each contain more elaborate settings of xmerl_scan that makes usage of %% the customization functions. %% +%% @type xmlElement() = #xmlElement{}. +%% +%% @type option_list(). <p>Options allow to customize the behaviour of the +%% scanner. +%% See also <a href="xmerl_examples.html">tutorial</a> on customization +%% functions. +%% </p> +%% <p> +%% Possible options are: +%% </p> +%% <dl> +%% <dt><code>{acc_fun, Fun}</code></dt> +%% <dd>Call back function to accumulate contents of entity.</dd> +%% <dt><code>{continuation_fun, Fun} | +%% {continuation_fun, Fun, ContinuationState}</code></dt> +%% <dd>Call back function to decide what to do if the scanner runs into EOF +%% before the document is complete.</dd> +%% <dt><code>{event_fun, Fun} | +%% {event_fun, Fun, EventState}</code></dt> +%% <dd>Call back function to handle scanner events.</dd> +%% <dt><code>{fetch_fun, Fun} | +%% {fetch_fun, Fun, FetchState}</code></dt> +%% <dd>Call back function to fetch an external resource.</dd> +%% <dt><code>{hook_fun, Fun} | +%% {hook_fun, Fun, HookState}</code></dt> +%% <dd>Call back function to process the document entities once +%% identified.</dd> +%% <dt><code>{close_fun, Fun}</code></dt> +%% <dd>Called when document has been completely parsed.</dd> +%% <dt><code>{rules, ReadFun, WriteFun, RulesState} | +%% {rules, Rules}</code></dt> +%% <dd>Handles storing of scanner information when parsing.</dd> +%% <dt><code>{user_state, UserState}</code></dt> +%% <dd>Global state variable accessible from all customization functions</dd> +%% +%% <dt><code>{fetch_path, PathList}</code></dt> +%% <dd>PathList is a list of +%% directories to search when fetching files. If the file in question +%% is not in the fetch_path, the URI will be used as a file +%% name.</dd> +%% <dt><code>{space, Flag}</code></dt> +%% <dd>'preserve' (default) to preserve spaces, 'normalize' to +%% accumulate consecutive whitespace and replace it with one space.</dd> +%% <dt><code>{line, Line}</code></dt> +%% <dd>To specify starting line for scanning in document which contains +%% fragments of XML.</dd> +%% <dt><code>{namespace_conformant, Flag}</code></dt> +%% <dd>Controls whether to behave as a namespace conformant XML parser, +%% 'false' (default) to not otherwise 'true'.</dd> +%% <dt><code>{validation, Flag}</code></dt> +%% <dd>Controls whether to process as a validating XML parser: +%% 'off' (default) no validation, or validation 'dtd' by DTD or 'schema' +%% by XML Schema. 'false' and 'true' options are obsolete +%% (i.e. they may be removed in a future release), if used 'false' +%% equals 'off' and 'true' equals 'dtd'.</dd> +%% <dt><code>{schemaLocation, [{Namespace,Link}|...]}</code></dt> +%% <dd>Tells explicitly which XML Schema documents to use to validate +%% the XML document. Used together with the +%% <code>{validation,schema}</code> option.</dd> +%% <dt><code>{quiet, Flag}</code></dt> +%% <dd>Set to 'true' if xmerl should behave quietly and not output any +%% information to standard output (default 'false').</dd> +%% <dt><code>{doctype_DTD, DTD}</code></dt> +%% <dd>Allows to specify DTD name when it isn't available in the XML +%% document. This option has effect only together with +%% <code>{validation,'dtd'</code> option.</dd> +%% <dt><code>{xmlbase, Dir}</code></dt> +%% <dd>XML Base directory. If using string/1 default is current directory. +%% If using file/1 default is directory of given file.</dd> +%% <dt><code>{encoding, Enc}</code></dt> +%% <dd>Set default character set used (default UTF-8). +%% This character set is used only if not explicitly given by the XML +%% declaration. </dd> +%% <dt><code>{document, Flag}</code></dt> +%% <dd>Set to 'true' if xmerl should return a complete XML document +%% as an xmlDocument record (default 'false').</dd> +%% <dt><code>{comments, Flag}</code></dt> +%% <dd>Set to 'false' if xmerl should skip comments otherwise they will +%% be returned as xmlComment records (default 'true').</dd> +%% <dt><code>{default_attrs, Flag}</code></dt> +%% <dd>Set to 'true' if xmerl should add to elements missing attributes +%% with a defined default value (default 'false').</dd> +%% </dl> +%% -module(xmerl_eventp). -vsn('0.19'). -date('03-09-17'). @@ -199,7 +283,7 @@ cont2(F, Exception, Sofar, Fd, Fname, T, S) -> find_good_split(list_to_binary([Sofar,Bin]), F,Exception,Fd,Fname,T,S); eof -> - file:close(Fd), + ok = file:close(Fd), NewS = xmerl_scan:cont_state([{Fname, eof}|T], S), F(binary_to_list(Sofar), NewS); Error -> @@ -319,7 +403,7 @@ close(S) -> [{_Fname, eof}|T] -> xmerl_scan:cont_state(T, S); [{_Sofar, _Fname, Fd}|T] -> - file:close(Fd), + ok = file:close(Fd), xmerl_scan:cont_state(T, S) end. diff --git a/lib/xmerl/src/xmerl_html.erl b/lib/xmerl/src/xmerl_html.erl index 1c38ed111a..cb72eb4185 100644 --- a/lib/xmerl/src/xmerl_html.erl +++ b/lib/xmerl/src/xmerl_html.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_internal.hrl b/lib/xmerl/src/xmerl_internal.hrl index cbdfa4f673..e913159967 100644 --- a/lib/xmerl/src/xmerl_internal.hrl +++ b/lib/xmerl/src/xmerl_internal.hrl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2009. All Rights Reserved. +%% Copyright Ericsson AB 2009-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_lib.erl b/lib/xmerl/src/xmerl_lib.erl index a2d6a88c18..be0e28e62d 100644 --- a/lib/xmerl/src/xmerl_lib.erl +++ b/lib/xmerl/src/xmerl_lib.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2011. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_otpsgml.erl b/lib/xmerl/src/xmerl_otpsgml.erl index bd6ac33997..cbd13263f6 100644 --- a/lib/xmerl/src/xmerl_otpsgml.erl +++ b/lib/xmerl/src/xmerl_otpsgml.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% Copyright Ericsson AB 2004-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl index 2b4696f8d2..1bf8496673 100644 --- a/lib/xmerl/src/xmerl_regexp.erl +++ b/lib/xmerl/src/xmerl_regexp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%% Copyright Ericsson AB 2006-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -1154,7 +1154,7 @@ comp_crs([], Last) -> [{Last,maxchar}]. %% build_dfa(NFA, NfaStartState) -> {DFA,DfaStartState}. %% Build a DFA from an NFA using "subset construction". The major %% difference from the book is that we keep the marked and unmarked -%% DFA states in seperate lists. New DFA states are added to the +%% DFA states in separate lists. New DFA states are added to the %% unmarked list and states are marked by moving them to the marked %% list. We assume that the NFA accepting state numbers are in %% ascending order for the rules and use ordsets to keep this order. diff --git a/lib/xmerl/src/xmerl_sax_old_dom.erl b/lib/xmerl/src/xmerl_sax_old_dom.erl index 2a1e5f6df8..6d0d836487 100644 --- a/lib/xmerl/src/xmerl_sax_old_dom.erl +++ b/lib/xmerl/src/xmerl_sax_old_dom.erl @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2009. All Rights Reserved. +%% Copyright Ericsson AB 2009-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -127,9 +127,10 @@ build_dom(endDocument, State#xmerl_sax_old_dom_state{dom=[Decl, Current#xmlElement{ content=lists:reverse(C) }]}; - _ -> - %%?dbg("~p\n", [D]), - ?error("we're not at end the document when endDocument event is encountered.") + _ -> + %% endDocument is also sent by the parser when a fault occur to tell + %% the event receiver that no more input will be sent + State end; %% Element diff --git a/lib/xmerl/src/xmerl_sax_old_dom.hrl b/lib/xmerl/src/xmerl_sax_old_dom.hrl index 04c9253e83..923b59cd10 100644 --- a/lib/xmerl/src/xmerl_sax_old_dom.hrl +++ b/lib/xmerl/src/xmerl_sax_old_dom.hrl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2009. All Rights Reserved. +%% Copyright Ericsson AB 2009-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl index 6924db6c15..fe836fd8cd 100644 --- a/lib/xmerl/src/xmerl_sax_parser.erl +++ b/lib/xmerl/src/xmerl_sax_parser.erl @@ -1,8 +1,8 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2012. All Rights Reserved. -%% +%% +%% Copyright Ericsson AB 2008-2018. All Rights Reserved. +%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,13 +14,13 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% +%% %% %CopyrightEnd% %%---------------------------------------------------------------------- %% File : xmerl_sax_parser.erl %% Description : XML SAX parse API module. %% -%% Created : 4 Jun 2008 +%% Created : 4 Jun 2008 %%---------------------------------------------------------------------- -module(xmerl_sax_parser). @@ -32,17 +32,14 @@ %%---------------------------------------------------------------------- %% External exports %%---------------------------------------------------------------------- --export([ - file/2, - stream/2 - ]). +-export([file/2, + stream/3, + stream/2]). %%---------------------------------------------------------------------- %% Internal exports %%---------------------------------------------------------------------- --export([ - default_continuation_cb/1 - ]). +-export([default_continuation_cb/1]). %%---------------------------------------------------------------------- %% Macros @@ -67,7 +64,7 @@ %% Description: Parse file containing an XML document. %%---------------------------------------------------------------------- file(Name,Options) -> - case file:open(Name, [raw, read,binary]) of + case file:open(Name, [raw, read_ahead, read,binary]) of {error, Reason} -> {error,{Name, file:format_error(Reason)}}; {ok, FD} -> @@ -75,13 +72,14 @@ file(Name,Options) -> CL = filename:absname(Dir), File = filename:basename(Name), ContinuationFun = fun default_continuation_cb/1, - Res = stream(<<>>, - [{continuation_fun, ContinuationFun}, - {continuation_state, FD}, - {current_location, CL}, - {entity, File} - |Options]), - file:close(FD), + Res = stream(<<>>, + [{continuation_fun, ContinuationFun}, + {continuation_state, FD}, + {current_location, CL}, + {entity, File} + |Options], + file), + ok = file:close(FD), Res end. @@ -96,44 +94,48 @@ file(Name,Options) -> %% EventState = term() %% Description: Parse a stream containing an XML document. %%---------------------------------------------------------------------- -stream(Xml, Options) when is_list(Xml), is_list(Options) -> +stream(Xml, Options) -> + stream(Xml, Options, stream). + +stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) -> State = parse_options(Options, initial_state()), - case State#xmerl_sax_parser_state.file_type of + case State#xmerl_sax_parser_state.file_type of dtd -> - xmerl_sax_parser_list:parse_dtd(Xml, + xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}); + input_type = InputType}); normal -> - xmerl_sax_parser_list:parse(Xml, + xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}) + input_type = InputType}) end; -stream(Xml, Options) when is_binary(Xml), is_list(Options) -> - case parse_options(Options, initial_state()) of +stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) -> + case parse_options(Options, initial_state()) of {error, Reason} -> {error, Reason}; State -> - ParseFunction = + ParseFunction = case State#xmerl_sax_parser_state.file_type of dtd -> parse_dtd; normal -> parse end, - case detect_charset(Xml, State) of - {error, Reason} -> {fatal_error, - { - State#xmerl_sax_parser_state.current_location, - State#xmerl_sax_parser_state.entity, - 1 - }, - Reason, - [], - State#xmerl_sax_parser_state.event_state}; - {Xml1, State1} -> - parse_binary(Xml1, - State1#xmerl_sax_parser_state{input_type = stream}, - ParseFunction) - end + try + {Xml1, State1} = detect_charset(Xml, State), + parse_binary(Xml1, + State1#xmerl_sax_parser_state{input_type = InputType}, + ParseFunction) + catch + throw:{fatal_error, {State2, Reason}} -> + {fatal_error, + { + State2#xmerl_sax_parser_state.current_location, + State2#xmerl_sax_parser_state.entity, + 1 + }, + Reason, [], + State2#xmerl_sax_parser_state.event_state} + end end. %%---------------------------------------------------------------------- @@ -155,8 +157,8 @@ parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) -> xmerl_sax_parser_utf16be:F(Xml, State); parse_binary(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) -> xmerl_sax_parser_latin1:F(Xml, State); -parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, _) -> - {error, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))}. +parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, State) -> + ?fatal_error(State, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))). %%---------------------------------------------------------------------- %% Function: initial_state/0 @@ -175,9 +177,9 @@ initial_state() -> %%---------------------------------------------------------------------- %% Function: parse_options(Options, State) %% Input: Options = [Option] -%% Option = {event_state, term()} | {event_fun, fun()} | +%% Option = {event_state, term()} | {event_fun, fun()} | %% {continuation_state, term()} | {continuation_fun, fun()} | -%% {encoding, Encoding} | {file_type, FT} +%% {encoding, Encoding} | {file_type, FT} %% FT = normal | dtd %% Encoding = utf8 | utf16le | utf16be | list | iso8859 %% State = #xmerl_sax_parser_state{} @@ -198,7 +200,7 @@ parse_options([{file_type, FT} |Options], State) when FT==normal; FT==dtd -> parse_options(Options, State#xmerl_sax_parser_state{file_type = FT}); parse_options([{encoding, E} |Options], State) -> case check_encoding_option(E) of - {error, Reason} -> + {error, Reason} -> {error, Reason}; Enc -> parse_options(Options, State#xmerl_sax_parser_state{encoding = Enc}) @@ -210,8 +212,7 @@ parse_options([{entity, Entity} |Options], State) -> parse_options([skip_external_dtd |Options], State) -> parse_options(Options, State#xmerl_sax_parser_state{skip_external_dtd = true}); parse_options([O |_], _State) -> - {error, - lists:flatten(io_lib:format("Option: ~p not supported", [O]))}. + {error, lists:flatten(io_lib:format("Option: ~p not supported", [O]))}. check_encoding_option(E) when E==utf8; E=={utf16,little}; E=={utf16,big}; @@ -229,16 +230,10 @@ check_encoding_option(E) -> %% Output: {utf8|utf16le|utf16be|iso8859, Xml, State} %% Description: Detects which character set is used in a binary stream. %%---------------------------------------------------------------------- -detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) -> - throw({error, "Can't detect character encoding due to no indata"}); -detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun, - continuation_state = CState} = State) -> - case CFun(CState) of - {<<>>, _} -> - throw({error, "Can't detect character encoding due to lack of indata"}); - {NewBytes, NewContState} -> - detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState}) - end; +detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = State) -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); +detect_charset(<<>>, State) -> + cf(<<>>, State, fun detect_charset/2); detect_charset(Bytes, State) -> case unicode:bom_to_encoding(Bytes) of {latin1, 0} -> @@ -248,26 +243,40 @@ detect_charset(Bytes, State) -> {RealBytes, State#xmerl_sax_parser_state{encoding=Enc}} end. +detect_charset_1(<<16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#00, 16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#00, 16#3C, 16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); detect_charset_1(<<16#00, 16#3C, 16#00, 16#3F, _/binary>> = Xml, State) -> {Xml, State#xmerl_sax_parser_state{encoding={utf16, big}}}; +detect_charset_1(<<16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#00, 16#3F>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); detect_charset_1(<<16#3C, 16#00, 16#3F, 16#00, _/binary>> = Xml, State) -> {Xml, State#xmerl_sax_parser_state{encoding={utf16, little}}}; -detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>> = Xml, State) -> - case parse_xml_directive(Xml2) of - {error, Reason} -> - {error, Reason}; - AttrList -> - case lists:keysearch("encoding", 1, AttrList) of - {value, {_, E}} -> - case convert_encoding(E) of - {error, Reason} -> - {error, Reason}; - Enc -> - {Xml, State#xmerl_sax_parser_state{encoding=Enc}} - end; - _ -> - {Xml, State} - end +detect_charset_1(<<16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>>, State) -> + {Xml3, State1} = read_until_end_of_xml_directive(Xml2, State), + AttrList = parse_xml_directive(Xml3, State), + case lists:keysearch("encoding", 1, AttrList) of + {value, {_, E}} -> + Enc = convert_encoding(E, State), + {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, + State1#xmerl_sax_parser_state{encoding=Enc}}; + _ -> + {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, State1} end; detect_charset_1(Xml, State) -> {Xml, State}. @@ -278,7 +287,7 @@ detect_charset_1(Xml, State) -> %% Output: utf8 | iso8859 %% Description: Converting 7,8 bit and utf8 encoding strings to internal format. %%---------------------------------------------------------------------- -convert_encoding(Enc) -> %% Just for 7,8 bit + utf8 +convert_encoding(Enc, State) -> %% Just for 7,8 bit + utf8 case string:to_lower(Enc) of "utf-8" -> utf8; "us-ascii" -> utf8; @@ -292,19 +301,19 @@ convert_encoding(Enc) -> %% Just for 7,8 bit + utf8 "iso-8859-7" -> latin1; "iso-8859-8" -> latin1; "iso-8859-9" -> latin1; - _ -> {error, "Unknown encoding: " ++ Enc} + _ -> ?fatal_error(State, "Unknown encoding: " ++ Enc) end. %%---------------------------------------------------------------------- %% Function: parse_xml_directive(Xml) %% Input: Xml = binary() %% Acc = list() -%% Output: +%% Output: %% Description: Parsing the xml declaration from the input stream. %%---------------------------------------------------------------------- -parse_xml_directive(<<C, Rest/binary>>) when ?is_whitespace(C) -> - parse_xml_directive_1(Rest, []). - +parse_xml_directive(<<C, Rest/binary>>, State) when ?is_whitespace(C) -> + parse_xml_directive_1(Rest, [], State). + %%---------------------------------------------------------------------- %% Function: parse_xml_directive_1(Xml, Acc) -> [{Name, Value}] %% Input: Xml = binary() @@ -314,20 +323,20 @@ parse_xml_directive(<<C, Rest/binary>>) when ?is_whitespace(C) -> %% Output: see above %% Description: Parsing the xml declaration from the input stream. %%---------------------------------------------------------------------- -parse_xml_directive_1(<<C, Rest/binary>>, Acc) when ?is_whitespace(C) -> - parse_xml_directive_1(Rest, Acc); -parse_xml_directive_1(<<"?>", _/binary>>, Acc) -> +parse_xml_directive_1(<<C, Rest/binary>>, Acc, State) when ?is_whitespace(C) -> + parse_xml_directive_1(Rest, Acc, State); +parse_xml_directive_1(<<"?>", _/binary>>, Acc, _State) -> Acc; -parse_xml_directive_1(<<C, Rest/binary>>, Acc) when 97 =< C, C =< 122 -> +parse_xml_directive_1(<<C, Rest/binary>>, Acc, State) when 97 =< C, C =< 122 -> {Name, Rest1} = parse_name(Rest, [C]), - Rest2 = parse_eq(Rest1), - {Value, Rest3} = parse_value(Rest2), - parse_xml_directive_1(Rest3, [{Name, Value} |Acc]); -parse_xml_directive_1(_, _) -> - {error, "Unknown attribute in xml directive"}. + Rest2 = parse_eq(Rest1, State), + {Value, Rest3} = parse_value(Rest2, State), + parse_xml_directive_1(Rest3, [{Name, Value} |Acc], State); +parse_xml_directive_1(_, _, State) -> + ?fatal_error(State, "Unknown attribute in xml directive"). %%---------------------------------------------------------------------- -%% Function: parse_xml_directive_1(Xml, Acc) -> Name +%% Function: parse_name(Xml, Acc) -> Name %% Input: Xml = binary() %% Acc = string() %% Output: Name = string() @@ -344,10 +353,12 @@ parse_name(Rest, Acc) -> %% Output: Rest = binary() %% Description: Reads an '=' from the stream. %%---------------------------------------------------------------------- -parse_eq(<<C, Rest/binary>>) when ?is_whitespace(C) -> - parse_eq(Rest); -parse_eq(<<"=", Rest/binary>>) -> - Rest. +parse_eq(<<C, Rest/binary>>, State) when ?is_whitespace(C) -> + parse_eq(Rest, State); +parse_eq(<<"=", Rest/binary>>, _State) -> + Rest; +parse_eq(_, State) -> + ?fatal_error(State, "expecting = or whitespace"). %%---------------------------------------------------------------------- %% Function: parse_value(Xml) -> {Value, Rest} @@ -356,10 +367,12 @@ parse_eq(<<"=", Rest/binary>>) -> %% Rest = binary() %% Description: Parsing an attribute value from the stream. %%---------------------------------------------------------------------- -parse_value(<<C, Rest/binary>>) when ?is_whitespace(C) -> - parse_value(Rest); -parse_value(<<C, Rest/binary>>) when C == $'; C == $" -> - parse_value_1(Rest, C, []). +parse_value(<<C, Rest/binary>>, State) when ?is_whitespace(C) -> + parse_value(Rest, State); +parse_value(<<C, Rest/binary>>, _State) when C == $'; C == $" -> + parse_value_1(Rest, C, []); +parse_value(_, State) -> + ?fatal_error(State, "\', \" or whitespace expected"). %%---------------------------------------------------------------------- %% Function: parse_value_1(Xml, Stop, Acc) -> {Value, Rest} @@ -376,7 +389,7 @@ parse_value_1(<<C, Rest/binary>>, Stop, Acc) -> parse_value_1(Rest, Stop, [C |Acc]). %%====================================================================== -%%Default functions +%% Default functions %%====================================================================== %%---------------------------------------------------------------------- %% Function: default_event_cb(Event, LineNo, State) -> Result @@ -392,7 +405,7 @@ default_event_cb(_Event, _LineNo, State) -> %%---------------------------------------------------------------------- %% Function: default_continuation_cb(IoDevice) -> Result %% IoDevice = iodevice() -%% Output: Result = {[char()], State} +%% Output: Result = {binary(), IoDevice} %% Description: Default continuation callback reading blocks. %%---------------------------------------------------------------------- default_continuation_cb(IoDevice) -> @@ -402,3 +415,82 @@ default_continuation_cb(IoDevice) -> {ok, FileBin} -> {FileBin, IoDevice} end. + +%%---------------------------------------------------------------------- +%% Function: read_until_end_of_xml_directive(Rest, State) -> Result +%% Rest = binary() +%% Output: Result = {binary(), State} +%% Description: Reads a utf8 or latin1 until it finds '?>' +%%---------------------------------------------------------------------- +read_until_end_of_xml_directive(Rest, State) -> + case binary:match(Rest, <<"?>">>) of + nomatch -> + case cf(Rest, State) of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewState} -> + read_until_end_of_xml_directive(NewBytes, NewState) + end; + _ -> + {Rest, State} + end. + + +%%---------------------------------------------------------------------- +%% Function : cf(Rest, State) -> Result +%% Parameters: Rest = binary() +%% State = #xmerl_sax_parser_state{} +%% NextCall = fun() +%% Result : {Rest, State} +%% Description: Function that uses provided fun to read another chunk from +%% input stream and calls the fun in NextCall. +%%---------------------------------------------------------------------- +cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State) -> + ?fatal_error(State, "Continuation function undefined"); +cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) -> + Result = + try + CFun(CState) + catch + throw:ErrorTerm -> + ?fatal_error(State, ErrorTerm); + exit:Reason -> + ?fatal_error(State, {'EXIT', Reason}) + end, + case Result of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewContState} -> + {<<Rest/binary, NewBytes/binary>>, + State#xmerl_sax_parser_state{continuation_state = NewContState}} + end. + +%%---------------------------------------------------------------------- +%% Function : cf(Rest, State, NextCall) -> Result +%% Parameters: Rest = binary() +%% State = #xmerl_sax_parser_state{} +%% NextCall = fun() +%% Result : {Rest, State} +%% Description: Function that uses provided fun to read another chunk from +%% input stream and calls the fun in NextCall. +%%---------------------------------------------------------------------- +cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) -> + ?fatal_error(State, "Continuation function undefined"); +cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, + NextCall) -> + Result = + try + CFun(CState) + catch + throw:ErrorTerm -> + ?fatal_error(State, ErrorTerm); + exit:Reason -> + ?fatal_error(State, {'EXIT', Reason}) + end, + case Result of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewContState} -> + NextCall(<<Rest/binary, NewBytes/binary>>, + State#xmerl_sax_parser_state{continuation_state = NewContState}) + end. diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl index e7067e7629..56a3a42e5f 100644 --- a/lib/xmerl/src/xmerl_sax_parser.hrl +++ b/lib/xmerl/src/xmerl_sax_parser.hrl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -88,14 +88,7 @@ current_location, % Location of the currently parsed XML entity entity, % Parsed XML entity skip_external_dtd = false,% If true the external DTD is skipped during parsing - input_type % Source type: file | stream. - % This field is a preparation for an fix in R17 of a bug in - % the conformance against the standard. - % Today a file which contains two XML documents will be considered - % well-formed and the second is placed in the rest part of the - % return tuple, according to the conformance tests this should fail. - % In the future this will fail if xmerl_sax_aprser:file/2 is used but - % left to the user in the xmerl_sax_aprser:stream/2 case. + input_type % Source type: file | stream }). diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 2bbe0eea1a..1dca9608cb 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -1,7 +1,7 @@ %%-*-erlang-*- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2012. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -29,12 +29,10 @@ %%---------------------------------------------------------------------- %% External exports %%---------------------------------------------------------------------- --export([ - parse/2, +-export([parse/2, parse_dtd/2, is_name_char/1, - is_name_start/1 - ]). + is_name_start/1]). %%---------------------------------------------------------------------- %% Internal exports @@ -66,29 +64,42 @@ %% Description: Parsing XML from input stream. %%---------------------------------------------------------------------- parse(Xml, State) -> - RefTable = ets:new(xmerl_sax_entity_refs, [private]), - - State1 = event_callback(startDocument, State), - - case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of - {ok, Rest, State2} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - {fatal_error, {State2, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(fatal_error, State3, Reason); - {event_receiver_error, State2, {Tag, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(Tag, State3, Reason); - Other -> - _State2 = event_callback(endDocument, State1), - ets:delete(RefTable), - throw(Other) + RefTable = maps:new(), + + try + State1 = event_callback(startDocument, State), + Result = parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), + handle_end_document(Result) + catch + throw:Exception -> + handle_end_document(Exception); + _:OtherError -> + handle_end_document({other, OtherError, State}) end. + % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of + % {ok, Rest, State2} -> + % State3 = event_callback(endDocument, State2), + % case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of + % true -> + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % false -> + % format_error(fatal_error, State3, "Input found after legal document") + % end; + % {fatal_error, {State2, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(fatal_error, State3, Reason); + % {event_receiver_error, State2, {Tag, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(Tag, State3, Reason); + % {endDocument, Rest, State2} -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % Other -> + % _State2 = event_callback(endDocument, State1), + % {fatal_error, Other} + % end. + %%---------------------------------------------------------------------- %% Function: parse_dtd(Xml, State) -> Result %% Input: Xml = string() | binary() @@ -98,38 +109,120 @@ parse(Xml, State) -> %% Description: Parsing XML DTD from input stream. %%---------------------------------------------------------------------- parse_dtd(Xml, State) -> - RefTable = ets:new(xmerl_sax_entity_refs, [private]), - - State1 = event_callback(startDocument, State), - - case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of - {fatal_error, {State2, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(fatal_error, State3, Reason); - {event_receiver_error, State2, {Tag, Reason}} -> - State3 = event_callback(endDocument, State2), - format_error(Tag, State3, Reason); - {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - Other -> - _State2 = event_callback(endDocument, State1), - ets:delete(RefTable), - throw(Other) + RefTable = maps:new(), + + try + State1 = event_callback(startDocument, State), + Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), + handle_end_document(Result) + catch + throw:Exception -> + handle_end_document(Exception); + _:OtherError -> + handle_end_document({other, OtherError, State}) end. + % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of + % {fatal_error, {State2, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(fatal_error, State3, Reason); + % {event_receiver_error, State2, {Tag, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(Tag, State3, Reason); + % {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % Other -> + % _State2 = event_callback(endDocument, State1), + % {fatal_error, Other} + % end. + + %%====================================================================== %% Internal functions %%====================================================================== %%---------------------------------------------------------------------- +%% Function: handle_end_document(ParserResult) -> Result +%% Input: ParseResult = term() +%% Output: Result = {ok, Rest, EventState} | +%% EventState = term() +%% Description: Ends the parsing and formats output +%%---------------------------------------------------------------------- +handle_end_document({ok, Rest, State}) -> + %%ok case from parse + try + State1 = event_callback(endDocument, State), + case check_if_rest_ok(State1#xmerl_sax_parser_state.input_type, Rest) of + true -> + {ok, State1#xmerl_sax_parser_state.event_state, Rest}; + false -> + format_error(fatal_error, State1, "Input found after legal document") + end + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({endDocument, Rest, State}) -> + %% ok case from parse and parse_dtd + try + State1 = event_callback(endDocument, State), + {ok, State1#xmerl_sax_parser_state.event_state, Rest} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({fatal_error, {State, Reason}}) -> + try + State1 = event_callback(endDocument, State), + format_error(fatal_error, State1, Reason) + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({event_receiver_error, State, {Tag, Reason}}) -> + try + State1 = event_callback(endDocument, State), + format_error(Tag, State1, Reason) + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({Rest, State}) when is_record(State, xmerl_sax_parser_state) -> + %%ok case from parse_dtd + try + State1 = event_callback(endDocument, State), + {ok, State1#xmerl_sax_parser_state.event_state, Rest} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({other, Error, State}) -> + try + _State1 = event_callback(endDocument, State), + {fatal_error, Error} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end. + +%%---------------------------------------------------------------------- %% Function: parse_document(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} @@ -138,10 +231,11 @@ parse_dtd(Xml, State) -> %% [1] document ::= prolog element Misc* %%---------------------------------------------------------------------- parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> - {Rest1, State1} = parse_xml_decl(Rest, State), + {Rest1, State1} = parse_byte_order_mark(Rest, State), {Rest2, State2} = parse_misc(Rest1, State1, true), {ok, Rest2, State2}. +?PARSE_BYTE_ORDER_MARK(Bytes, State). %%---------------------------------------------------------------------- %% Function: parse_xml_decl(Rest, State) -> Result @@ -154,12 +248,6 @@ parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> %%---------------------------------------------------------------------- parse_xml_decl(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_1, State) -> - cf(?BYTE_ORDER_MARK_1, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_2, State) -> - cf(?BYTE_ORDER_MARK_2, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_REST(Rest), State) -> - cf(Rest, State, fun parse_xml_decl/2); parse_xml_decl(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING("<?") = Bytes, State) -> @@ -171,31 +259,19 @@ parse_xml_decl(?STRING("<?xm") = Bytes, State) -> parse_xml_decl(?STRING("<?xml") = Bytes, State) -> cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING_REST("<?xml", Rest1), State) -> - parse_xml_decl_1(Rest1, State); -parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> - case unicode:characters_to_list(Bytes, Enc) of - {incomplete, _, _} -> - cf(Bytes, State, fun parse_xml_decl/2); - {error, _Encoded, _Rest} -> - ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); - _ -> - parse_prolog(Bytes, State) - end; -parse_xml_decl(Bytes, State) -> - parse_prolog(Bytes, State). - + parse_xml_decl_rest(Rest1, State); +?PARSE_XML_DECL(Bytes, State). -parse_xml_decl_1(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> +parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> if ?is_whitespace(C) -> {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []), - %State2 = event_callback({processingInstruction, "xml", XmlAttributes}, State1),% The XML decl. should not be reported as a PI parse_prolog(Rest1, State1); true -> parse_prolog(?STRING_REST("<?xml", Bytes), State) end; -parse_xml_decl_1(Bytes, State) -> - unicode_incomplete_check([Bytes, State, fun parse_xml_decl_1/2], undefined). +parse_xml_decl_rest(Bytes, State) -> + unicode_incomplete_check([Bytes, State, fun parse_xml_decl_rest/2], undefined). @@ -217,8 +293,6 @@ parse_prolog(?STRING_REST("<?", Rest), State) -> parse_prolog(Rest1, State1); {endDocument, Rest1, State1} -> parse_prolog(Rest1, State1) - % IValue = ?TO_INPUT_FORMAT("<?"), - % {?APPEND_STRING(IValue, Rest1), State1} end; parse_prolog(?STRING_REST("<!", Rest), State) -> parse_prolog_1(Rest, State); @@ -231,7 +305,6 @@ parse_prolog(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_prolog/2], "expecting < or whitespace"). - parse_prolog_1(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("D") = Bytes, State) -> @@ -443,6 +516,16 @@ check_if_new_doc_allowed(stream, []) -> check_if_new_doc_allowed(_, _) -> false. +check_if_rest_ok(file, []) -> + true; +check_if_rest_ok(file, <<>>) -> + true; +check_if_rest_ok(stream, _) -> + true; +check_if_rest_ok(_, _) -> + false. + + %%---------------------------------------------------------------------- %% Function: parse_pi_1(Rest, State) -> Result %% Input: Rest = string() | binary() @@ -887,11 +970,11 @@ send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) -> parse_eq(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_eq/2); parse_eq(?STRING_REST("=", Rest), State) -> - {Rest, State}; + {Rest, State}; parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> - {_WS, Rest, State1} = - whitespace(Bytes, State, []), - parse_eq(Rest, State1); + {_WS, Rest, State1} = + whitespace(Bytes, State, []), + parse_eq(Rest, State1); parse_eq(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_eq/2], "expecting = or whitespace"). @@ -909,11 +992,11 @@ parse_eq(Bytes, State) -> parse_att_value(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_att_value/2); parse_att_value(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" -> - parse_att_value(Rest, State, C, []); + parse_att_value(Rest, State, C, []); parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> - {_WS, Rest, State1} = - whitespace(Bytes, State, []), - parse_att_value(Rest, State1); + {_WS, Rest, State1} = + whitespace(Bytes, State, []), + parse_att_value(Rest, State1); parse_att_value(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_att_value/2], "\', \" or whitespace expected"). @@ -1025,16 +1108,21 @@ parse_etag(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_etag/2], undefined). - parse_etag_1(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList} - |RestOfETags]} = State, _Tag) -> + |RestOfETags], + input_type=InputType} = State, _Tag) -> State1 = event_callback({endElement, Uri, LocalName, QName}, State), State2 = send_end_prefix_mapping_event(NewNsList, State1), - parse_content(Rest, - State2#xmerl_sax_parser_state{end_tags=RestOfETags, - ns = OldNsList}, - [], true); + case check_if_new_doc_allowed(InputType, RestOfETags) of + true -> + throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}}); + false -> + parse_content(Rest, + State2#xmerl_sax_parser_state{end_tags=RestOfETags, + ns = OldNsList}, + [], true) + end; parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) -> {P,TN} = Tag, ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN); @@ -1052,21 +1140,26 @@ parse_etag_1(Bytes, State, Tag) -> %% Description: Parsing the content part of tags %% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* %%---------------------------------------------------------------------- - parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) -> - case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of - {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State1}; - {fatal_error, {State1, Msg}} -> - case check_if_document_complete(State1, Msg) of - true -> - State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), - {?STRING_EMPTY, State2}; - false -> - ?fatal_error(State1, Msg) - end; - Other -> - throw(Other) + case check_if_document_complete(State, "No more bytes") of + true -> + State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), + {?STRING_EMPTY, State1}; + false -> + case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of + {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> + {Rest, State1}; + {fatal_error, {State1, Msg}} -> + case check_if_document_complete(State1, Msg) of + true -> + State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), + {?STRING_EMPTY, State2}; + false -> + ?fatal_error(State1, Msg) + end; + Other -> + throw(Other) + end end; parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) -> cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); @@ -1095,7 +1188,7 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) -> parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_cdata(Rest1, State1) @@ -1103,7 +1196,7 @@ parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_stag(Rest1, State1) @@ -1230,16 +1323,7 @@ whitespace(?STRING_REST("\r", Rest), State, Acc) -> whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) -> whitespace(Rest, State, [C|Acc]); -whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> - {lists:reverse(Acc), Bytes, State}; -whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> - case unicode:characters_to_list(Bytes, Enc) of - {incomplete, _, _} -> - cf(Bytes, State, Acc, fun whitespace/3); - {error, _Encoded, _Rest} -> - ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) - end. - +?WHITESPACE(Bytes, State, Acc). %%---------------------------------------------------------------------- %% Function: parse_reference(Rest, State, HaveToExist) -> Result @@ -1362,23 +1446,24 @@ parse_pe_reference_1(Bytes, State, Name) -> "missing ; after reference " ++ Name). - %%---------------------------------------------------------------------- -%% Function: insert_reference(Reference, State) -> Result -%% Parameters: Reference = string() +%% Function: insert_reference(Name, Ref, State) -> Result +%% Parameters: Name = string() +%% Ref = {Type, Value} +%% Type = atom() +%% Value = term() %% State = #xmerl_sax_parser_state{} %% Result : %%---------------------------------------------------------------------- -insert_reference({Name, Type, Value}, Table) -> - case ets:lookup(Table, Name) of - [{Name, _, _}] -> - ok; +insert_reference(Name, Value, #xmerl_sax_parser_state{ref_table = Map} = State) -> + case maps:find(Name, Map) of + error -> + State#xmerl_sax_parser_state{ref_table = maps:put(Name, Value, Map)}; _ -> - ets:insert(Table, {Name, Type, Value}) + State end. - %%---------------------------------------------------------------------- %% Function: look_up_reference(Reference, State) -> Result %% Parameters: Reference = string() @@ -1396,8 +1481,8 @@ look_up_reference("apos", _, _) -> look_up_reference("quot", _, _) -> {internal_general, "quot", "\""}; look_up_reference(Name, HaveToExist, State) -> - case ets:lookup(State#xmerl_sax_parser_state.ref_table, Name) of - [{Name, Type, Value}] -> + case maps:find(Name, State#xmerl_sax_parser_state.ref_table) of + {ok, {Type, Value}} -> {Type, Name, Value}; _ -> case HaveToExist of @@ -1479,7 +1564,7 @@ parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) -> parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> {lists:reverse(Acc), Rest, State}; parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> - parse_system_litteral(Rest, State, Stop, [C |Acc]); + parse_system_litteral(Rest, State, Stop, [C |Acc]); parse_system_litteral(Bytes, State, Stop, Acc) -> unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4], undefined). @@ -1651,9 +1736,11 @@ parse_external_entity(State, _PubId, SysId) -> end_tags = []}, - EventState = handle_external_entity(ExtRef, State1), + {EventState, RefTable} = handle_external_entity(ExtRef, State1), - NewState = event_callback({endEntity, SysId}, SaveState#xmerl_sax_parser_state{event_state=EventState}), + NewState = event_callback({endEntity, SysId}, + SaveState#xmerl_sax_parser_state{event_state=EventState, + ref_table=RefTable}), NewState#xmerl_sax_parser_state{file_type=normal}. @@ -1679,8 +1766,9 @@ handle_external_entity({file, FileToOpen}, State) -> current_location=filename:dirname(FileToOpen), entity=filename:basename(FileToOpen), input_type=file}), - file:close(FD), - EntityState#xmerl_sax_parser_state.event_state + ok = file:close(FD), + {EntityState#xmerl_sax_parser_state.event_state, + EntityState#xmerl_sax_parser_state.ref_table} end; handle_external_entity({http, Url}, State) -> @@ -1693,14 +1781,16 @@ handle_external_entity({http, Url}, State) -> ++ file:format_error(Reason)); {ok, FD} -> {?STRING_EMPTY, EntityState} = - parse_external_entity_1(<<>>, + parse_external_entity_byte_order_mark(<<>>, State#xmerl_sax_parser_state{continuation_state=FD, current_location=filename:dirname(Url), entity=filename:basename(Url), input_type=file}), - file:close(FD), - file:delete(TmpFile), - EntityState#xmerl_sax_parser_state.event_state + ok = file:close(FD), + ok = file:delete(TmpFile), + {EntityState#xmerl_sax_parser_state.event_state, + EntityState#xmerl_sax_parser_state.ref_table} + end catch throw:{error, Error} -> @@ -1709,6 +1799,8 @@ handle_external_entity({http, Url}, State) -> handle_external_entity({Tag, _Url}, State) -> ?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)). +?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State). + %%---------------------------------------------------------------------- %% Function : parse_external_entity_1(Rest, State) -> Result %% Parameters: Rest = string() | binary() @@ -1719,18 +1811,12 @@ handle_external_entity({Tag, _Url}, State) -> parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) -> case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State}; + {Rest, State1}; {fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity -> {?STRING_EMPTY, State1}; Other -> throw(Other) end; -parse_external_entity_1(?BYTE_ORDER_MARK_1, State) -> - cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_1/2); -parse_external_entity_1(?BYTE_ORDER_MARK_2, State) -> - cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_1/2); -parse_external_entity_1(?BYTE_ORDER_MARK_REST(Rest), State) -> - parse_external_entity_1(Rest, State); parse_external_entity_1(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_external_entity_1/2); parse_external_entity_1(?STRING("<?") = Bytes, State) -> @@ -2451,24 +2537,24 @@ parse_entity_def(?STRING_EMPTY, State, Name) -> cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3); parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" -> {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), - insert_reference({Name, internal_general, Value}, State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({internalEntityDecl, Name, Value}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name, {internal_general, Value}, State1), + State3 = event_callback({internalEntityDecl, Name, Value}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P -> {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false), {Ndata, Rest2, State2} = parse_ndata(Rest1, State1), case Ndata of undefined -> - insert_reference({Name, external_general, {PubId, SysId}}, - State2#xmerl_sax_parser_state.ref_table), - State3 = event_callback({externalEntityDecl, Name, PubId, SysId}, State2), - {Rest2, State3}; + State3 = insert_reference(Name, {external_general, {PubId, SysId}}, + State2), + State4 = event_callback({externalEntityDecl, Name, PubId, SysId}, State3), + {Rest2, State4}; _ -> - insert_reference({Name, unparsed, {PubId, SysId, Ndata}}, - State2#xmerl_sax_parser_state.ref_table), - State3 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State2), - {Rest2, State3} + State3 = insert_reference(Name, {unparsed, {PubId, SysId, Ndata}}, + State2), + State4 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State3), + {Rest2, State4} end; parse_entity_def(Bytes, State, Name) -> unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3], @@ -2645,19 +2731,19 @@ parse_pe_def(?STRING_EMPTY, State, Name) -> parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" -> {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), Name1 = "%" ++ Name, - insert_reference({Name1, internal_parameter, Value}, - State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({internalEntityDecl, Name1, Value}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name1, {internal_parameter, Value}, + State1), + State3 = event_callback({internalEntityDecl, Name1, Value}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P -> {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false), Name1 = "%" ++ Name, - insert_reference({Name1, external_parameter, {PubId, SysId}}, - State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name1, {external_parameter, {PubId, SysId}}, + State1), + State3 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_pe_def(Bytes, State, Name) -> unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3], "\", \', SYSTEM or PUBLIC expected"). @@ -3289,7 +3375,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C catch throw:ErrorTerm -> ?fatal_error(State, ErrorTerm); - exit:Reason -> + exit:Reason -> ?fatal_error(State, {'EXIT', Reason}) end, case Result of @@ -3468,17 +3554,17 @@ http_get_file(Host, Port, Key) -> receive_msg(Socket, FD, true, SendTimeout) catch throw:{error, Error} -> - file:close(FD), - file:delete(Filename), + ok = file:close(FD), + ok = file:delete(Filename), throw({error, Error}) end; {error, _Reason} -> - file:close(FD), - file:delete(Filename), + ok = file:close(FD), + ok = file:delete(Filename), throw({error, lists:flatten(io_lib:format("Couldn't fetch http://~s:~p/~s", [Host, Port, Key]))}) end, - file:close(FD), + ok = file:close(FD), Filename. %%---------------------------------------------------------------------- @@ -3495,11 +3581,11 @@ receive_msg(Socket, FD, WaitForHeader, Timeout) -> {tcp_closed, Socket} -> ok; {tcp, Socket, Response} when WaitForHeader == false -> - file:write(FD, Response), + ok = file:write(FD, Response), receive_msg(Socket, FD, WaitForHeader, Timeout); {tcp, Socket, Response} -> MsgBody = remove_header(Response), - file:write(FD, MsgBody), + ok = file:write(FD, MsgBody), receive_msg(Socket, FD, false, Timeout); {tcp_error, Socket, _Reason} -> gen_tcp:close(Socket), diff --git a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc index 3361ae837c..6e59347fb8 100644 --- a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,36 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, latin1)). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, undefined_bom2). --define(BYTE_ORDER_MARK_REST(Rest), <<undefined, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc index ce0fdc4e7b..ac89896215 100644 --- a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -36,6 +36,19 @@ %% In the list case we can't use a '++' when matchin against an unbound variable -define(STRING_UNBOUND_REST(MatchChar, Rest), [MatchChar | Rest]). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, undefined_bom2). --define(BYTE_ORDER_MARK_REST(Rest), [undefined|Rest]). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc index 8b1d36eae3..ec89024729 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2011. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,50 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, big})). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/big-utf16, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, <<16#FE>>). +-define(BYTE_ORDER_MARK_1, <<16#FE>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#FE, 16#FF, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc index 63cfa11bf4..566333a045 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2011. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,50 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, little})). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/little-utf16, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, <<16#FF>>). +-define(BYTE_ORDER_MARK_1, <<16#FF>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#FF, 16#FE, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc index b827086a2e..f41d06d013 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,11 +34,55 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, utf8)). - -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/utf8, Rest/binary>>). -define(BYTE_ORDER_MARK_1, <<16#EF>>). -define(BYTE_ORDER_MARK_2, <<16#EF, 16#BB>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#EF, 16#BB, 16#BF, Rest/binary>>). +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_2, State) -> + cf(?BYTE_ORDER_MARK_2, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_2, State) -> + cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_simple_dom.erl b/lib/xmerl/src/xmerl_sax_simple_dom.erl index 2ce2f03827..7b15cd92dc 100644 --- a/lib/xmerl/src/xmerl_sax_simple_dom.erl +++ b/lib/xmerl/src/xmerl_sax_simple_dom.erl @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2009. All Rights Reserved. +%% Copyright Ericsson AB 2009-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -129,8 +129,9 @@ build_dom(endDocument, State#xmerl_sax_simple_dom_state{dom=[Decl, {Tag, Attributes, lists:reverse(Content)}]}; _ -> - ?dbg("~p\n", [D]), - ?error("we're not at end the document when endDocument event is encountered.") + %% endDocument is also sent by the parser when a fault occur to tell + %% the event receiver that no more input will be sent + State end; %% Element diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 2bf3172d87..e543a5a11e 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2013. All Rights Reserved. +%% Copyright Ericsson AB 2003-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -111,13 +111,16 @@ %% <dd>Set to 'true' if xmerl should add to elements missing attributes %% with a defined default value (default 'false').</dd> %% </dl> +%% @type xmlElement() = #xmlElement{}. +%% The record definition is found in xmerl.hrl. +%% @type xmlDocument() = #xmlDocument{}. +%% The record definition is found in xmerl.hrl. %% @type document() = xmlElement() | xmlDocument(). <p> %% The document returned by <tt>xmerl_scan:string/[1,2]</tt> and %% <tt>xmerl_scan:file/[1,2]</tt>. The type of the returned record depends on %% the value of the document option passed to the function. %% </p> - -module(xmerl_scan). -vsn('0.20'). -date('03-09-16'). @@ -276,7 +279,7 @@ int_file_decl(F, Options,_ExtCharset) -> %% @spec string(Text::list()) -> {xmlElement(),Rest} %% Rest = list() -%% @equiv string(Test, []) +%% @equiv string(Text, []) string(Str) -> string(Str, []). @@ -471,8 +474,8 @@ event(_X, S) -> %% into multiple objects (in which case {Acc',Pos',S'} should be returned.) %% If {Acc',S'} is returned, Pos will be incremented by 1 by default. %% Below is an example of an acceptable operation -acc(X = #xmlText{value = Text}, Acc, S) -> - {[X#xmlText{value = Text}|Acc], S}; +acc(#xmlText{value = Text}, [X = #xmlText{value = AccText}], S) -> + {[X#xmlText{value = AccText ++ Text}], S}; acc(X, Acc, S) -> {[X|Acc], S}. @@ -605,7 +608,7 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, schema -> case schemaLocations(Res, S5) of {ok, Schemas} -> - cleanup(S5), + _ = cleanup(S5), %%?dbg("Schemas: ~p~nRes: ~p~ninhertih_options(S): ~p~n", %% [Schemas,Res,inherit_options(S5)]), XSDRes = xmerl_xsd:process_validate(Schemas, Res, @@ -1010,7 +1013,7 @@ scan_optional_version(T,S) -> scan_enc_name([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_enc_name(MoreBytes, S1) end, - fun(S1) -> ?fatal(expected_encoding_name, S1) end, + fatal_fun(expected_encoding_name), S); scan_enc_name([H|T], S0) when H >= $"; H =< $' -> ?bump_col(1), @@ -1020,7 +1023,7 @@ scan_enc_name([H|T], S0) when H >= $"; H =< $' -> scan_enc_name([], S=#xmerl_scanner{continuation_fun = F}, Delim, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_enc_name(MoreBytes, S1, Delim, Acc) end, - fun(S1) -> ?fatal(expected_encoding_name, S1) end, + fatal_fun(expected_encoding_name), S); scan_enc_name([H|T], S0, Delim, Acc) when H >= $a, H =< $z -> ?bump_col(1), @@ -1034,7 +1037,7 @@ scan_enc_name([H|_T],S,_Delim,_Acc) -> scan_enc_name2([], S=#xmerl_scanner{continuation_fun = F}, Delim, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_enc_name2(MoreBytes, S1, Delim, Acc) end, - fun(S1) -> ?fatal(expected_encoding_name, S1) end, + fatal_fun(expected_encoding_name), S); scan_enc_name2([H|T], S0, H, Acc) -> ?bump_col(1), @@ -1058,7 +1061,7 @@ scan_enc_name2([H|T], S0, Delim, Acc) when H == $.; H == $_; H == $- -> scan_xml_vsn([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_xml_vsn(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_xml_vsn([H|T], S) when H==$"; H==$'-> xml_vsn(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1}, H, []). @@ -1066,7 +1069,7 @@ scan_xml_vsn([H|T], S) when H==$"; H==$'-> xml_vsn([], S=#xmerl_scanner{continuation_fun = F}, Delim, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> xml_vsn(MoreBytes, S1, Delim, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); xml_vsn([H|T], S=#xmerl_scanner{col = C}, H, Acc) -> {lists:reverse(Acc), T, S#xmerl_scanner{col = C+1}}; @@ -1089,7 +1092,7 @@ xml_vsn([H|T], S=#xmerl_scanner{col = C}, Delim, Acc) -> scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Pos, Ps) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Pos, Ps) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_pi(Str = [H1,H2,H3 | T],S0=#xmerl_scanner{line = L, col = C}, Pos, Ps) when H1==$x;H1==$X -> @@ -1125,7 +1128,7 @@ scan_pi([], S=#xmerl_scanner{continuation_fun = F}, Target, ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_pi(MoreBytes, S1, Target, L, C, Pos, Ps, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_pi("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, event_fun = Event}, @@ -1152,7 +1155,7 @@ scan_pi2([], S=#xmerl_scanner{continuation_fun = F}, Target, ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_pi2(MoreBytes, S1, Target, L, C, Pos, Ps, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_pi2("?>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, event_fun = Event}, @@ -1180,7 +1183,7 @@ scan_pi2(Str, S0, Target, L, C, Pos, Ps, Acc) -> scan_doctype([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_doctype(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_doctype(T, S) -> {_,T1,S1} = mandatory_strip(T,S), @@ -1194,7 +1197,7 @@ scan_doctype(T, S) -> scan_doctype1([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_doctype1(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_doctype1("PUBLIC" ++ T, S0) -> ?bump_col(6), @@ -1217,7 +1220,7 @@ scan_doctype1(T, S) -> scan_doctype2([], S=#xmerl_scanner{continuation_fun = F},DTD) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_doctype2(MoreBytes, S1, DTD) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_doctype2("[" ++ T, S0, DTD) -> ?bump_col(1), @@ -1237,7 +1240,7 @@ scan_doctype2(_T,S,_DTD) -> scan_doctype3([], S=#xmerl_scanner{continuation_fun = F},DTD) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_doctype3(MoreBytes, S1,DTD) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_doctype3("%" ++ T, S0, DTD) -> ?bump_col(1), @@ -1549,7 +1552,7 @@ scan_decl_sep(T,S) -> scan_conditional_sect([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_conditional_sect(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_conditional_sect("IGNORE" ++ T, S0) -> ?bump_col(6), @@ -1582,7 +1585,7 @@ scan_ignore(Str,S) -> scan_ignore([], S=#xmerl_scanner{continuation_fun = F},Level) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_ignore(MoreBytes, S1,Level) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_ignore("<![" ++ T, S0,Level) -> %% nested conditional section. Topmost condition is ignore, though @@ -1603,7 +1606,7 @@ scan_ignore([_H|T],S0,Level) -> scan_include([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_include(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_include("]]>" ++ T, S0) -> ?bump_col(3), @@ -1745,7 +1748,7 @@ update_attributes1([],Acc) -> scan_attdef([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_attdef(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_attdef(T, S) -> scan_attdef(T, S, _AttrAcc = []). @@ -1754,7 +1757,7 @@ scan_attdef(T, S) -> scan_attdef([], S=#xmerl_scanner{continuation_fun = F}, Attrs) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_attdef(MoreBytes, S1, Attrs) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_attdef(">" ++ T, S0, Attrs) -> ?bump_col(1), @@ -1798,7 +1801,7 @@ scan_attdef2(T, S, Attrs) -> scan_att_type([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_att_type(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_att_type("CDATA" ++ T, S0) -> ?bump_col(5), @@ -1856,7 +1859,7 @@ scan_att_type("%" ++ T, S0) -> scan_notation_type([], S=#xmerl_scanner{continuation_fun = F}, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_notation_type(MoreBytes, S1, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_notation_type(")" ++ T, S0, Acc) -> ?bump_col(1), @@ -1889,7 +1892,7 @@ notation_exists(Name, #xmerl_scanner{rules_read_fun = Read, scan_enumeration([], S=#xmerl_scanner{continuation_fun = F}, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_enumeration(MoreBytes, S1, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_enumeration(")" ++ T, S0, Acc) -> ?bump_col(1), @@ -1907,7 +1910,7 @@ scan_enumeration("|" ++ T, S0, Acc) -> scan_default_decl([], S=#xmerl_scanner{continuation_fun = F}, Type) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_default_decl(MoreBytes, S1, Type) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_default_decl("#REQUIRED" ++ T, S0, _Type) -> ?bump_col(9), @@ -1936,7 +1939,7 @@ default_value(T, S, Type) -> scan_entity([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_entity(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_entity("%" ++ T, #xmerl_scanner{rules_write_fun = Write} = S0) -> %% parameter entity @@ -1974,7 +1977,7 @@ scan_entity_completion(T,S) -> scan_entity_def([], S=#xmerl_scanner{continuation_fun = F}, EName) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_entity_def(MoreBytes, S1, EName) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_entity_def("'" ++ T, S0, EName) -> ?bump_col(1), @@ -2015,7 +2018,7 @@ scan_entity_def(Str, S, EName) -> scan_ndata_decl([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_ndata_decl(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_ndata_decl(Str = ">"++_T, S) -> {[], Str, S}; @@ -2062,7 +2065,7 @@ scan_element("/", S=#xmerl_scanner{continuation_fun = F}, F(fun(MoreBytes, S1) -> scan_element("/" ++ MoreBytes, S1, Pos, Name, StartL, StartC, Attrs, Lang,Parents,NSI,NS,SpaceDefault) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_element([], S=#xmerl_scanner{continuation_fun = F}, Pos, Name, StartL, StartC, Attrs, Lang, Parents, @@ -2071,7 +2074,7 @@ scan_element([], S=#xmerl_scanner{continuation_fun = F}, F(fun(MoreBytes, S1) -> scan_element(MoreBytes, S1, Pos, Name, StartL, StartC, Attrs, Lang,Parents,NSI,NS,SpaceDefault) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_element("/>" ++ T, S0 = #xmerl_scanner{hook_fun = Hook, event_fun = Event, @@ -2099,7 +2102,7 @@ scan_element(">", S=#xmerl_scanner{continuation_fun = F}, F(fun(MoreBytes, S1) -> scan_element(">" ++ MoreBytes, S1, Pos, Name, StartL, StartC, Attrs, Lang,Parents,NSI,NS,SpaceDefault) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_element(">" ++ T, S0 = #xmerl_scanner{event_fun = Event, hook_fun = Hook, @@ -2222,16 +2225,18 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook, AllAttrs = case S#xmerl_scanner.default_attrs of true -> - [ #xmlAttribute{name = AttName, - parents = [{Name, Pos} | Parents], - language = Lang, - nsinfo = NSI, - namespace = Namespace, - value = AttValue, - normalized = true} || - {AttName, AttValue} <- get_default_attrs(S, Name), - AttValue =/= no_value, - not lists:keymember(AttName, #xmlAttribute.name, Attrs) ]; + DefaultAttrs = + [ #xmlAttribute{name = AttName, + parents = [{Name, Pos} | Parents], + language = Lang, + nsinfo = NSI, + namespace = Namespace, + value = AttValue, + normalized = true} || + {AttName, AttValue} <- get_default_attrs(S, Name), + AttValue =/= no_value, + not lists:keymember(AttName, #xmlAttribute.name, Attrs) ], + lists:append(Attrs, DefaultAttrs); false -> Attrs end, @@ -2304,7 +2309,9 @@ expanded_name(Name, [], #xmlNamespace{default = URI}, S) -> expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> {_, Value} = lists:keyfind(Local, 1, Ns), case Name of - 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> + 'xmlns:xml' when Value =:= 'http://www.w3.org/XML/1998/namespace' -> + N; + 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> ?fatal({xml_prefix_cannot_be_redeclared, Value}, S); 'xmlns:xmlns' -> ?fatal({xmlns_prefix_cannot_be_declared, Value}, S); @@ -2318,6 +2325,8 @@ expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> N end end; +expanded_name(_Name, {"xml", Local}, _NS, _S) -> + {'http://www.w3.org/XML/1998/namespace', list_to_atom(Local)}; expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> case lists:keysearch(Prefix, 1, Ns) of {value, {_, URI}} -> @@ -2328,9 +2337,6 @@ expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> ?fatal({namespace_prefix_not_declared, Prefix}, S) end. - - - keyreplaceadd(K, Pos, [H|T], Obj) when K == element(Pos, H) -> [Obj|T]; keyreplaceadd(K, Pos, [H|T], Obj) -> @@ -2344,7 +2350,7 @@ keyreplaceadd(_K, _Pos, [], Obj) -> scan_att_value([], S=#xmerl_scanner{continuation_fun = F},AT) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_att_value(MoreBytes, S1, AT) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_att_value("%"++_T,S=#xmerl_scanner{environment=prolog},_AttType) -> ?fatal({error,{wfc_PEs_In_Internal_Subset}},S); @@ -2385,7 +2391,7 @@ scan_att_chars([],S=#xmerl_scanner{continuation_fun=F},H,Acc,TmpAcc,AT,IsNorm)-> F(fun(MoreBytes, S1) -> scan_att_chars(MoreBytes, S1, H, Acc,TmpAcc,AT,IsNorm) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_att_chars([H|T], S0, H, Acc, TmpAcc,AttType,IsNorm) -> % End quote ?bump_col(1), @@ -2518,7 +2524,7 @@ scan_content("<", S= #xmerl_scanner{continuation_fun = F}, F(fun(MoreBytes, S1) -> scan_content("<" ++ MoreBytes, S1, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_content([], S=#xmerl_scanner{environment={external,{entity,_}}}, _Pos, _Name, _Attrs, _Space, _Lang, _Parents, _NS, Acc,_) -> @@ -2532,7 +2538,7 @@ scan_content([], S=#xmerl_scanner{continuation_fun = F}, F(fun(MoreBytes, S1) -> scan_content(MoreBytes, S1, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_content("</" ++ T, S0, _Pos, Name, _Attrs, _Space, _Lang, _Parents, _NS, Acc,[]) -> @@ -2636,7 +2642,7 @@ scan_content_markup([], S=#xmerl_scanner{continuation_fun = F}, F(fun(MoreBytes, S1) -> scan_content_markup( MoreBytes,S1,Pos,Name, Attrs,Space,Lang,Parents,NS) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_content_markup("![CDATA[" ++ T, S0, Pos, _Name, _Attrs, _Space, _Lang, Parents, _NS) -> @@ -2664,7 +2670,7 @@ scan_char_data([], S=#xmerl_scanner{environment=internal_parsed_entity}, scan_char_data([], S=#xmerl_scanner{continuation_fun = F}, Space, _MUD,Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_char_data(MoreBytes,S1,Space,_MUD,Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_char_data([$&|T], S,Space,"&",Acc) -> scan_char_data(T, S, Space,[], [$&|Acc]); @@ -2716,7 +2722,7 @@ scan_cdata(Str, S, Pos, Parents) -> scan_cdata([], S=#xmerl_scanner{continuation_fun = F}, Pos, Parents, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_cdata(MoreBytes, S1, Pos, Parents, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_cdata("]]>" ++ T, S0, Pos, Parents, Acc) -> ?bump_col(3), @@ -2741,7 +2747,7 @@ scan_cdata(Str, S0, Pos, Parents, Acc) -> scan_reference([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_reference(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_reference("#x" ++ T, S0) -> %% [66] CharRef @@ -2783,7 +2789,7 @@ scan_reference(T, S) -> scan_entity_ref([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_entity_ref(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_entity_ref("amp;" ++ T, S0) -> ?bump_col(4), @@ -2868,7 +2874,7 @@ expand_reference(Name, #xmerl_scanner{rules_read_fun = Read} = S) -> scan_char_ref_dec([], S=#xmerl_scanner{continuation_fun = F}, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_char_ref_dec(MoreBytes, S1, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_char_ref_dec([H|T], S0, Acc) when H >= $0, H =< $9 -> ?bump_col(1), @@ -2883,7 +2889,7 @@ scan_char_ref_dec(";" ++ T, S0, Acc) -> scan_char_ref_hex([], S=#xmerl_scanner{continuation_fun = F}, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_char_ref_hex(MoreBytes, S1, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_char_ref_hex([H|T], S0, Acc) when H >= $0, H =< $9 -> ?bump_col(1), @@ -2957,7 +2963,7 @@ scan_name_no_colons(Str, S) -> scan_name([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_name(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_name(Str = [$:|T], S0 = #xmerl_scanner{namespace_conformant = NSC}) -> if NSC == false -> @@ -3007,7 +3013,7 @@ scan_nmtoken(Str, S, Acc, NSC) -> scan_nmtoken([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_nmtoken(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_nmtoken("%"++T, S0=#xmerl_scanner{environment={external,_}}) -> ?bump_col(1), @@ -3084,7 +3090,7 @@ isLatin1(_,_) -> scan_system_literal([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_system_literal(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_system_literal("\"" ++ T, S) -> scan_system_literal(T, S, $", []); @@ -3096,7 +3102,7 @@ scan_system_literal([], S=#xmerl_scanner{continuation_fun = F}, Delimiter, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_system_literal(MoreBytes,S1,Delimiter,Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_system_literal([H|T], S, H, Acc) -> {lists:reverse(Acc), T, S#xmerl_scanner{col = S#xmerl_scanner.col+1}}; @@ -3114,7 +3120,7 @@ scan_system_literal(Str, S, Delimiter, Acc) -> scan_pubid_literal([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_pubid_literal(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_pubid_literal([H|T], S) when H == $"; H == $' -> scan_pubid_literal(T, S#xmerl_scanner{col = S#xmerl_scanner.col+1}, H, []); @@ -3126,7 +3132,7 @@ scan_pubid_literal([], S=#xmerl_scanner{continuation_fun = F}, Delimiter, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_pubid_literal(MoreBytes,S1,Delimiter,Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_pubid_literal([H|T], S, H, Acc) -> {lists:reverse(Acc), T, S#xmerl_scanner{col = S#xmerl_scanner.col+1}}; @@ -3161,7 +3167,7 @@ is_pubid_char(X) -> scan_contentspec([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_contentspec(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_contentspec("EMPTY" ++ T, S0) -> ?bump_col(5), @@ -3195,7 +3201,7 @@ scan_elem_content([], S=#xmerl_scanner{continuation_fun = F}, Context, Mode, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes,S1) -> scan_elem_content(MoreBytes,S1,Context,Mode,Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_elem_content(")" ++ T, S0, Context, Mode0, Acc0) -> ?bump_col(1), @@ -3282,7 +3288,7 @@ format_elem_content(Other) -> Other. scan_occurrence([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_occurrence(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_occurrence([$?|T], S0) -> ?bump_col(1), @@ -3433,7 +3439,7 @@ wfc_whitespace_betw_attrs([$> |_]=L,S) -> wfc_whitespace_betw_attrs([],S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> wfc_whitespace_betw_attrs(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); wfc_whitespace_betw_attrs(_,S) -> ?fatal({whitespace_required_between_attributes},S). @@ -3477,7 +3483,7 @@ vc_Element_valid(_,_) -> scan_pe_def([], S=#xmerl_scanner{continuation_fun = F}, PEName) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_pe_def(MoreBytes, S1, PEName) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_pe_def("'" ++ T, S0, PEName) -> ?bump_col(1), @@ -3510,7 +3516,7 @@ scan_notation_decl(T, #xmerl_scanner{rules_write_fun = Write, scan_notation_decl1([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_notation_decl1(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_notation_decl1("SYSTEM" ++ T, S0) -> ?bump_col(6), @@ -3536,7 +3542,7 @@ scan_notation_decl1("PUBLIC" ++ T, S0) -> scan_external_id([], S=#xmerl_scanner{continuation_fun = F}) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_external_id(MoreBytes, S1) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_external_id("SYSTEM" ++ T, S0) -> ?bump_col(6), @@ -3582,7 +3588,7 @@ scan_entity_value([], S=#xmerl_scanner{continuation_fun = F}, scan_entity_value(MoreBytes,S1, Delim,Acc,PEName,Namespace,PENesting) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_entity_value([Delim|T], S=#xmerl_scanner{validation=dtd}, Delim,_Acc,PEName,_NS,PENesting) when length(PENesting) /= 0 -> @@ -3850,7 +3856,7 @@ scan_comment1([], S=#xmerl_scanner{continuation_fun = F}, Pos, Comment, Acc) -> ?dbg("cont()...~n", []), F(fun(MoreBytes, S1) -> scan_comment1(MoreBytes, S1, Pos, Comment, Acc) end, - fun(S1) -> ?fatal(unexpected_end, S1) end, + fatal_fun(unexpected_end), S); scan_comment1("-->" ++ T, S0 = #xmerl_scanner{col = C, event_fun = Event, @@ -4100,6 +4106,13 @@ handle_schema_result({error,Reason},S5) -> %%% Helper functions +-compile({inline, [fatal_fun/1]}). + +-spec fatal_fun(_) -> fun((_) -> no_return()). + +fatal_fun(Reason) -> + fun(S) -> ?fatal(Reason, S) end. + fatal(Reason, S) -> exit({fatal, {Reason, {file,S#xmerl_scanner.filename}, diff --git a/lib/xmerl/src/xmerl_sgml.erl b/lib/xmerl/src/xmerl_sgml.erl index 6c0ee55448..0d73df1e02 100644 --- a/lib/xmerl/src/xmerl_sgml.erl +++ b/lib/xmerl/src/xmerl_sgml.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% Copyright Ericsson AB 2004-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_simple.erl b/lib/xmerl/src/xmerl_simple.erl index 32479066cd..4ad939abe0 100644 --- a/lib/xmerl/src/xmerl_simple.erl +++ b/lib/xmerl/src/xmerl_simple.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_text.erl b/lib/xmerl/src/xmerl_text.erl index 459c7d4562..eb1df6e5d3 100644 --- a/lib/xmerl/src/xmerl_text.erl +++ b/lib/xmerl/src/xmerl_text.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_ucs.erl b/lib/xmerl/src/xmerl_ucs.erl index f93d1d15c4..4b1fc30089 100644 --- a/lib/xmerl/src/xmerl_ucs.erl +++ b/lib/xmerl/src/xmerl_ucs.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2005-2011. All Rights Reserved. +%% Copyright Ericsson AB 2005-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_uri.erl b/lib/xmerl/src/xmerl_uri.erl index 78278fba63..aa5a09ce56 100644 --- a/lib/xmerl/src/xmerl_uri.erl +++ b/lib/xmerl/src/xmerl_uri.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2005-2012. All Rights Reserved. +%% Copyright Ericsson AB 2005-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_validate.erl b/lib/xmerl/src/xmerl_validate.erl index 87b9a43ce9..8b4f5b91a2 100644 --- a/lib/xmerl/src/xmerl_validate.erl +++ b/lib/xmerl/src/xmerl_validate.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2011. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_xlate.erl b/lib/xmerl/src/xmerl_xlate.erl index 6bfd9595f0..181faa343d 100644 --- a/lib/xmerl/src/xmerl_xlate.erl +++ b/lib/xmerl/src/xmerl_xlate.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_xml.erl b/lib/xmerl/src/xmerl_xml.erl index 914bbad6be..28802666a4 100644 --- a/lib/xmerl/src/xmerl_xml.erl +++ b/lib/xmerl/src/xmerl_xml.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_xpath.erl b/lib/xmerl/src/xmerl_xpath.erl index 97ae3a1971..6146feba49 100644 --- a/lib/xmerl/src/xmerl_xpath.erl +++ b/lib/xmerl/src/xmerl_xpath.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2011. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -43,13 +43,27 @@ %% </pre> %% %% @type nodeEntity() = -%% xmlElement() -%% | xmlAttribute() -%% | xmlText() -%% | xmlPI() -%% | xmlComment() -%% | xmlNsNode() -%% | xmlDocument() +%% #xmlElement{} +%% | #xmlAttribute{} +%% | #xmlText{} +%% | #xmlPI{} +%% | #xmlComment{} +%% | #xmlNsNode{} +%% | #xmlDocument{} +%% +%% @type docNodes() = #xmlElement{} +%% | #xmlAttribute{} +%% | #xmlText{} +%% | #xmlPI{} +%% | #xmlComment{} +%% | #xmlNsNode{} +%% +%% @type docEntity() = #xmlDocument{} | [docNodes()] +%% +%% @type xPathString() = string() +%% +%% @type parentList() = [{atom(), integer()}] +%% %% @type option_list(). <p>Options allows to customize the behaviour of the %% XPath scanner. %% </p> @@ -115,7 +129,7 @@ string(Str, Doc, Options) -> %% Parents = parentList() %% Doc = nodeEntity() %% Options = option_list() -%% Scalar = xmlObj +%% Scalar = #xmlObj{} %% @doc Extracts the nodes from the parsed XML tree according to XPath. %% xmlObj is a record with fields type and value, %% where type is boolean | number | string diff --git a/lib/xmerl/src/xmerl_xpath_lib.erl b/lib/xmerl/src/xmerl_xpath_lib.erl index 873fc41c4b..1660d7e91f 100644 --- a/lib/xmerl/src/xmerl_xpath_lib.erl +++ b/lib/xmerl/src/xmerl_xpath_lib.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2011. All Rights Reserved. +%% Copyright Ericsson AB 2008-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_xpath_parse.yrl b/lib/xmerl/src/xmerl_xpath_parse.yrl index 14a8c27352..1926e7ee84 100644 --- a/lib/xmerl/src/xmerl_xpath_parse.yrl +++ b/lib/xmerl/src/xmerl_xpath_parse.yrl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2011. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_xpath_pred.erl b/lib/xmerl/src/xmerl_xpath_pred.erl index 582a4e4091..7105fb8bbf 100644 --- a/lib/xmerl/src/xmerl_xpath_pred.erl +++ b/lib/xmerl/src/xmerl_xpath_pred.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2011. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. diff --git a/lib/xmerl/src/xmerl_xpath_scan.erl b/lib/xmerl/src/xmerl_xpath_scan.erl index 53d6032475..9032a01eca 100644 --- a/lib/xmerl/src/xmerl_xpath_scan.erl +++ b/lib/xmerl/src/xmerl_xpath_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2011. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -286,13 +286,13 @@ strip_ws(T) -> T. -special_token('@') -> true; +%% special_token('@') -> true; special_token('::') -> true; special_token(',') -> true; special_token('(') -> true; special_token('[') -> true; special_token('/') -> true; -special_token('//') -> true; +%% special_token('//') -> true; special_token('|') -> true; special_token('+') -> true; special_token('-') -> true; @@ -306,5 +306,4 @@ special_token('and') -> true; special_token('or') -> true; special_token('mod') -> true; special_token('div') -> true; -special_token(_) -> - false. +special_token(_) -> false. diff --git a/lib/xmerl/src/xmerl_xs.erl b/lib/xmerl/src/xmerl_xs.erl index 2a8e221e16..1ce76cfa41 100644 --- a/lib/xmerl/src/xmerl_xs.erl +++ b/lib/xmerl/src/xmerl_xs.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% Copyright Ericsson AB 2003-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -45,7 +45,6 @@ % XSLT package which is written i C++. % See also the <a href="xmerl_xs_examples.html">Tutorial</a>. % </p> - -module(xmerl_xs). -export([xslapply/2, value_of/1, select/2, built_in_rules/2 ]). @@ -71,15 +70,13 @@ %% xslapply(fun template/1, E), %% "</h1>"]; %% </pre> - xslapply(Fun, EList) when is_list(EList) -> - lists:map( Fun, EList); + lists:map(Fun, EList); xslapply(Fun, E = #xmlElement{})-> lists:map( Fun, E#xmlElement.content). - %% @spec value_of(E) -> List -%% E = unknown() +%% E = term() %% %% @doc Concatenates all text nodes within the tree. %% diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl index 3038a54ee6..2836bb0e5b 100644 --- a/lib/xmerl/src/xmerl_xsd.erl +++ b/lib/xmerl/src/xmerl_xsd.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2013. All Rights Reserved. +%% Copyright Ericsson AB 2006-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ %% %CopyrightEnd% %% -%% @doc Interface module for XML Schema vlidation. +%% @doc Interface module for XML Schema validation. %% It handles the W3.org %% <a href="http://www.w3.org/XML/Schema#dev">specifications</a> %% of XML Schema second edition 28 october 2004. For an introduction to @@ -49,6 +49,7 @@ %% <dd>It is possible by this option to provide a state with process %% information from an earlier validation.</dd> %% </dl> +%% @type filename() = string() %% @end %%%------------------------------------------------------------------- -module(xmerl_xsd). @@ -64,12 +65,10 @@ %%---------------------------------------------------------------------- %% External exports %%---------------------------------------------------------------------- --export([ - validate/2,validate/3,process_validate/2,process_validate/3, +-export([validate/2,validate/3,process_validate/2,process_validate/3, process_schema/1,process_schema/2, process_schemas/1,process_schemas/2, - state2file/1,state2file/2,file2state/1,format_error/1 - ]). + state2file/1,state2file/2,file2state/1,format_error/1]). %%---------------------------------------------------------------------- %% Internal exports @@ -140,7 +139,7 @@ state2file(S=#xsd_state{schema_name=SN}) -> %% @spec state2file(State,FileName) -> ok | {error,Reason} %% State = global_state() -%% FileName = filename() +%% FileName = string() %% @doc Saves the schema state with all information of the processed %% schema in a file. You can provide the file name for the saved %% state. FileName is saved with the <code>.xss</code> extension @@ -155,7 +154,7 @@ state2file(S,FileName) when is_record(S,xsd_state) -> %% @spec file2state(FileName) -> {ok,State} | {error,Reason} %% State = global_state() -%% FileName = filename() +%% FileName = string() %% @doc Reads the schema state with all information of the processed %% schema from a file created with <code>state2file/[1,2]</code>. The %% format of this file is internal. The state can then be used @@ -204,7 +203,7 @@ xmerl_xsd_vsn_check(S=#xsd_state{vsn=MD5_VSN}) -> process_validate(Schema,Xml) -> process_validate(Schema,Xml,[]). %% @spec process_validate(Schema,Element,Options) -> Result -%% Schema = filename() +%% Schema = string() %% Element = XmlElement %% Options = option_list() %% Result = {ValidXmlElement,State} | {error,Reason} @@ -284,7 +283,7 @@ validate3(_,_,S) -> process_schema(Schema) -> process_schema(Schema,[]). %% @spec process_schema(Schema,Options) -> Result -%% Schema = filename() +%% Schema = string() %% Result = {ok,State} | {error,Reason} %% State = global_state() %% Reason = [ErrorReason] | ErrorReason @@ -326,7 +325,7 @@ process_schema2({SE,_},State,_Schema) -> process_schemas(Schemas) -> process_schemas(Schemas,[]). %% @spec process_schemas(Schemas,Options) -> Result -%% Schemas = [{NameSpace,filename()}|Schemas] | [] +%% Schemas = [{NameSpace,string()}|Schemas] | [] %% Result = {ok,State} | {error,Reason} %% Reason = [ErrorReason] | ErrorReason %% Options = option_list() @@ -550,7 +549,7 @@ element_content({attribute,S=#xsd_state{scope=Scope}},Att,Env) -> {AttRef,add_ref(S,AttRef)}; Name -> {AttrType,S2} = attribute_type(Att,[Name|Env],S), - S3 = check_cm(attribute,allowed_content(attribute,Env),AttrType,S2), + S3 = check_cm(attribute,allowed_content(attribute,Env),AttrType,S2), {Attr,S4} = attribute_properties(Att#xmlElement.attributes, #schema_attribute{type=AttrType},S3), Object = {attribute, @@ -568,7 +567,7 @@ element_content({element,S},El,Env) -> %% 3.3.3 bullet 2.2 S3 = element_forbidden_properties(El,S2), S4 = element_forbidden_content(El#xmlElement.content,S3), - ElRef = + ElRef = {element, {get_QName(Ref,El#xmlElement.namespace,reset_scope(S)), Occ}}, @@ -813,7 +812,6 @@ element_content({restriction,S},R,Env) -> %% base (resolved by base_type/1) or the type defined in content. {CM,S2} = type(R#xmlElement.content,S,[restriction|Env]), S3 = check_cm(restriction,allowed_content(restriction,Env),CM,S2), - {BaseTypeName,CM2,S4} = restriction_base_type(R,CM,S3), %% a QName %% S5 = add_circularity_mark(BaseTypeName,S4), BaseTypeType = base_type_type(Env), @@ -1178,7 +1176,7 @@ rename_redef_group(Name={LN,Scope,NS},S) -> NewName = {LN,['#redefine'|Scope],NS}, case resolve({group,NewName},S) of {SG=#schema_group{name=Name},_} -> - save_object({group,SG#schema_group{name=NewName}},S), + _ = save_object({group,SG#schema_group{name=NewName}},S), NewName; _ -> failed @@ -1735,20 +1733,20 @@ allowed_content(SorC,_Parents) when SorC==sequence;SorC==choice -> {choice,{1,1}},{sequence,{1,1}}, {any,{1,1}}], occurance={0,unbounded}}]}; -allowed_content(E,_Parents) - when E==any;E==selector;E==field;E==notation;E==include;E==import; - E==anyAttribute -> - {annotation,{0,1}}; -allowed_content(UKK,_Parents) when UKK==unique;UKK==key;UKK==keyref-> - #chain{content= - [{annotation,{0,1}}, - #chain{content= - [{selector,{1,1}},{selector,{1,unbounded}}]}]}; -allowed_content(annotation,_Parents) -> - #alternative{content=[{appinfo,{1,1}},{documentation,{1,1}}], - occurance={0,unbounded}}; -allowed_content(E,_Parents) when E==appinfo;E==documentation -> - {any,{0,unbounded}}; +%% allowed_content(E,_Parents) +%% when E==any;E==selector;E==field;E==notation;E==include;E==import; +%% E==anyAttribute -> +%% {annotation,{0,1}}; +%% allowed_content(UKK,_Parents) when UKK==unique;UKK==key;UKK==keyref-> +%% #chain{content= +%% [{annotation,{0,1}}, +%% #chain{content= +%% [{selector,{1,1}},{selector,{1,unbounded}}]}]}; +%% allowed_content(annotation,_Parents) -> +%% #alternative{content=[{appinfo,{1,1}},{documentation,{1,1}}], +%% occurance={0,unbounded}}; +%% allowed_content(E,_Parents) when E==appinfo;E==documentation -> +%% {any,{0,unbounded}}; allowed_content(simpleType,_Parents) -> #chain{content= [{annotation,{0,1}}, @@ -1768,22 +1766,22 @@ allowed_content(restriction,Parents) -> end; allowed_content(LU,_Parent) when LU==list;LU==union -> #chain{content=[{annotation,{0,1}},{simpleType,{0,1}}]}; -allowed_content(schema,_) -> - #chain{content= - [#alternative{content= - [{include,{1,1}},{import,{1,1}}, - {redefine,{1,1}},{annotation,{1,1}}], - occurance={0,1}}, - #chain{content= - [#alternative{content= - [#alternative{content= - [{simpleType,{1,1}},{complexType,{1,1}}, - {group,{1,1}},{attributeGroup,{1,1}}]}, - {element,{1,1}}, - {attribute,{1,1}}, - {notation,{1,1}}]}, - {annotation,{0,unbounded}}], - occurance={0,unbounded}}]}; +%% allowed_content(schema,_) -> +%% #chain{content= +%% [#alternative{content= +%% [{include,{1,1}},{import,{1,1}}, +%% {redefine,{1,1}},{annotation,{1,1}}], +%% occurance={0,1}}, +%% #chain{content= +%% [#alternative{content= +%% [#alternative{content= +%% [{simpleType,{1,1}},{complexType,{1,1}}, +%% {group,{1,1}},{attributeGroup,{1,1}}]}, +%% {element,{1,1}}, +%% {attribute,{1,1}}, +%% {notation,{1,1}}]}, +%% {annotation,{0,unbounded}}], +%% occurance={0,unbounded}}]}; allowed_content(redefine,_Parents) -> #alternative{content= [{annotation,{1,1}}, @@ -1803,31 +1801,31 @@ allowed_content(extension,Parents) -> allowed_content2(extension,simpleContent); _ -> allowed_content2(extension,complexContent) - end; -allowed_content(minExclusive,_Parents) -> - []; -allowed_content(minInclusive,_Parents) -> - []; -allowed_content(maxExclusive,_Parents) -> - []; -allowed_content(maxInclusive,_Parents) -> - []; -allowed_content(totalDigits,_Parents) -> - []; -allowed_content(fractionDigits,_Parents) -> - []; -allowed_content(length,_Parents) -> - []; -allowed_content(minLength,_Parents) -> - []; -allowed_content(maxLength,_Parents) -> - []; -allowed_content(enumeration,_Parents) -> - []; -allowed_content(whiteSpace,_Parents) -> - []; -allowed_content(pattern,_Parents) -> - []. + end. +%% allowed_content(minExclusive,_Parents) -> +%% []; +%% allowed_content(minInclusive,_Parents) -> +%% []; +%% allowed_content(maxExclusive,_Parents) -> +%% []; +%% allowed_content(maxInclusive,_Parents) -> +%% []; +%% allowed_content(totalDigits,_Parents) -> +%% []; +%% allowed_content(fractionDigits,_Parents) -> +%% []; +%% allowed_content(length,_Parents) -> +%% []; +%% allowed_content(minLength,_Parents) -> +%% []; +%% allowed_content(maxLength,_Parents) -> +%% []; +%% allowed_content(enumeration,_Parents) -> +%% []; +%% allowed_content(whiteSpace,_Parents) -> +%% []; +%% allowed_content(pattern,_Parents) -> +%% []. @@ -1905,9 +1903,9 @@ set_occurance(Ch = #chain{},Occ) -> set_occurance(Alt = #alternative{},Occ) -> Alt#alternative{occurance=Occ}; set_occurance({Name,_},Occ) when is_atom(Name) -> - {Name,Occ}; -set_occurance(CM,_) -> - CM. + {Name,Occ}. +%% set_occurance(CM,_) -> +%% CM. process_external_schema_once(E,Namespace,S) when is_record(E,xmlElement) -> @@ -3436,7 +3434,7 @@ check_keys([Key=#id_constraint{selector={selector,SelectorPath}, {L,S1} when length(L)==length(TargetNodeSet) -> %% Part1: 3.11.4.4.2.1 S2 = key_sequence_uniqueness(L,XMLEl,S1), - save_key(Key#id_constraint{key_sequence=L},S2), + _ = save_key(Key#id_constraint{key_sequence=L},S2), S2; {Err,S1} -> acc_errs(S1,{error_path(XMLEl,XMLEl#xmlElement.name),?MODULE, @@ -4014,7 +4012,7 @@ merge_derived_types(XSDType,InstType,Blocks,Mode,S) -> {error,S2} -> {InstType,S2}; {MergedType,S2} -> - save_merged_type(MergedType,S2), + _ = save_merged_type(MergedType,S2), {MergedType,S2} end. @@ -4970,7 +4968,7 @@ save_schema_element(CM,S=#xsd_state{elementFormDefault = EFD, undefined -> []; _ -> TN end, - save_in_table({schema,TN2},Schema2,S), + _ = save_in_table({schema,TN2},Schema2,S), save_to_file(S). %% other_global_elements(S,ElementList) -> @@ -5006,13 +5004,13 @@ save_to_file(S=#xsd_state{tab2file=TF}) -> {ok,IO}=file:open(filename:rootname(S#xsd_state.schema_name)++".tab", [write]), io:format(IO,"~p~n",[catch ets:tab2list(S#xsd_state.table)]), - file:close(IO); + ok = file:close(IO); false -> ok; IOFile -> {ok,IO}=file:open(IOFile,[write]), io:format(IO,"~p~n",[catch ets:tab2list(S#xsd_state.table)]), - file:close(IO) + ok = file:close(IO) end. save_merged_type(Type=#schema_simple_type{},S) -> @@ -5034,25 +5032,25 @@ save_idc(unique,IDConstr,S) -> save_unique(IDConstr,S). save_key(Key,S) -> - save_object({key,Key},S), + _ = save_object({key,Key},S), S. save_keyref(KeyRef=#id_constraint{category=keyref},S) -> S1 = add_keyref(KeyRef,S), - save_object({keyref,KeyRef},S1), + _ = save_object({keyref,KeyRef},S1), S1; save_keyref(_,S) -> S. save_unique(Unique,S) -> - save_object({unique,Unique},S), + _ = save_object({unique,Unique},S), S. save_substitutionGroup([],S) -> S; save_substitutionGroup([{Head,Members}|SGs],S) -> %% save {head,[members]} - save_in_table({substitutionGroup,Head},Members,S), + _ = save_in_table({substitutionGroup,Head},Members,S), %% save {member,head}, an element can only be a member in one %% substitutionGroup lists:foreach(fun(X)->save_in_table({substitutionGroup_member,X},Head,S) end,Members), @@ -5429,7 +5427,7 @@ add_key_once(Key,N,El,L) -> %% {filename:join([[io_lib:format("/~w(~w)",[X,Y])||{X,Y}<-Parents],Type]),Pos}. %% @spec format_error(Errors) -> Result -%% Errors = error_tuple() | [error_tuple()] +%% Errors = tuple() | [tuple()] %% Result = string() | [string()] %% @doc Formats error descriptions to human readable strings. format_error(L) when is_list(L) -> diff --git a/lib/xmerl/src/xmerl_xsd_type.erl b/lib/xmerl/src/xmerl_xsd_type.erl index 2cd7e85edd..3ee5961522 100644 --- a/lib/xmerl/src/xmerl_xsd_type.erl +++ b/lib/xmerl/src/xmerl_xsd_type.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2011. All Rights Reserved. +%% Copyright Ericsson AB 2006-2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -229,11 +229,13 @@ check_float(Value) -> %% {Mantissa,Exponent}=lists:splitwith(Pred,Value), %% SkipEe = fun([]) -> [];(L) -> tl(L) end, case string:tokens(Value,"eE") of - [Mantissa,Exponent] -> - {ok,_} = check_decimal(Mantissa), - {ok,_} = check_integer(Exponent); - [Mantissa] -> - check_decimal(Mantissa) + [Mantissa,Exponent] -> + {ok,_} = check_decimal(Mantissa), + {ok,_} = check_integer(Exponent), + ok; + [Mantissa] -> + {ok,_} = check_decimal(Mantissa), + ok end, {ok,Value}. %% case {check_decimal(Mantissa), @@ -367,7 +369,7 @@ check_dateTime("+"++_DateTime) -> check_dateTime(DateTime) -> [Date,Time] = string:tokens(DateTime,"T"), [Y,M,D] = string:tokens(Date,"-"), - check_year(Y), + {ok,_} = check_year(Y), {ok,_} = check_positive_integer(M), {ok,_} = check_positive_integer(D), check_time(Time). |