diff options
Diffstat (limited to 'lib/xmerl/src')
-rw-r--r-- | lib/xmerl/src/xmerl_eventp.erl | 84 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_regexp.erl | 4 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_old_dom.erl | 9 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser.erl | 198 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser.hrl | 11 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 411 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc | 38 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_list.erlsrc | 21 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc | 50 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc | 50 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc | 50 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_sax_simple_dom.erl | 7 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_scan.erl | 42 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_xpath.erl | 30 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_xs.erl | 7 | ||||
-rw-r--r-- | lib/xmerl/src/xmerl_xsd.erl | 13 |
16 files changed, 738 insertions, 287 deletions
diff --git a/lib/xmerl/src/xmerl_eventp.erl b/lib/xmerl/src/xmerl_eventp.erl index 2cb76abc6e..8d7ea25e24 100644 --- a/lib/xmerl/src/xmerl_eventp.erl +++ b/lib/xmerl/src/xmerl_eventp.erl @@ -25,6 +25,90 @@ %% Each contain more elaborate settings of xmerl_scan that makes usage of %% the customization functions. %% +%% @type xmlElement() = #xmlElement{}. +%% +%% @type option_list(). <p>Options allow to customize the behaviour of the +%% scanner. +%% See also <a href="xmerl_examples.html">tutorial</a> on customization +%% functions. +%% </p> +%% <p> +%% Possible options are: +%% </p> +%% <dl> +%% <dt><code>{acc_fun, Fun}</code></dt> +%% <dd>Call back function to accumulate contents of entity.</dd> +%% <dt><code>{continuation_fun, Fun} | +%% {continuation_fun, Fun, ContinuationState}</code></dt> +%% <dd>Call back function to decide what to do if the scanner runs into EOF +%% before the document is complete.</dd> +%% <dt><code>{event_fun, Fun} | +%% {event_fun, Fun, EventState}</code></dt> +%% <dd>Call back function to handle scanner events.</dd> +%% <dt><code>{fetch_fun, Fun} | +%% {fetch_fun, Fun, FetchState}</code></dt> +%% <dd>Call back function to fetch an external resource.</dd> +%% <dt><code>{hook_fun, Fun} | +%% {hook_fun, Fun, HookState}</code></dt> +%% <dd>Call back function to process the document entities once +%% identified.</dd> +%% <dt><code>{close_fun, Fun}</code></dt> +%% <dd>Called when document has been completely parsed.</dd> +%% <dt><code>{rules, ReadFun, WriteFun, RulesState} | +%% {rules, Rules}</code></dt> +%% <dd>Handles storing of scanner information when parsing.</dd> +%% <dt><code>{user_state, UserState}</code></dt> +%% <dd>Global state variable accessible from all customization functions</dd> +%% +%% <dt><code>{fetch_path, PathList}</code></dt> +%% <dd>PathList is a list of +%% directories to search when fetching files. If the file in question +%% is not in the fetch_path, the URI will be used as a file +%% name.</dd> +%% <dt><code>{space, Flag}</code></dt> +%% <dd>'preserve' (default) to preserve spaces, 'normalize' to +%% accumulate consecutive whitespace and replace it with one space.</dd> +%% <dt><code>{line, Line}</code></dt> +%% <dd>To specify starting line for scanning in document which contains +%% fragments of XML.</dd> +%% <dt><code>{namespace_conformant, Flag}</code></dt> +%% <dd>Controls whether to behave as a namespace conformant XML parser, +%% 'false' (default) to not otherwise 'true'.</dd> +%% <dt><code>{validation, Flag}</code></dt> +%% <dd>Controls whether to process as a validating XML parser: +%% 'off' (default) no validation, or validation 'dtd' by DTD or 'schema' +%% by XML Schema. 'false' and 'true' options are obsolete +%% (i.e. they may be removed in a future release), if used 'false' +%% equals 'off' and 'true' equals 'dtd'.</dd> +%% <dt><code>{schemaLocation, [{Namespace,Link}|...]}</code></dt> +%% <dd>Tells explicitly which XML Schema documents to use to validate +%% the XML document. Used together with the +%% <code>{validation,schema}</code> option.</dd> +%% <dt><code>{quiet, Flag}</code></dt> +%% <dd>Set to 'true' if xmerl should behave quietly and not output any +%% information to standard output (default 'false').</dd> +%% <dt><code>{doctype_DTD, DTD}</code></dt> +%% <dd>Allows to specify DTD name when it isn't available in the XML +%% document. This option has effect only together with +%% <code>{validation,'dtd'</code> option.</dd> +%% <dt><code>{xmlbase, Dir}</code></dt> +%% <dd>XML Base directory. If using string/1 default is current directory. +%% If using file/1 default is directory of given file.</dd> +%% <dt><code>{encoding, Enc}</code></dt> +%% <dd>Set default character set used (default UTF-8). +%% This character set is used only if not explicitly given by the XML +%% declaration. </dd> +%% <dt><code>{document, Flag}</code></dt> +%% <dd>Set to 'true' if xmerl should return a complete XML document +%% as an xmlDocument record (default 'false').</dd> +%% <dt><code>{comments, Flag}</code></dt> +%% <dd>Set to 'false' if xmerl should skip comments otherwise they will +%% be returned as xmlComment records (default 'true').</dd> +%% <dt><code>{default_attrs, Flag}</code></dt> +%% <dd>Set to 'true' if xmerl should add to elements missing attributes +%% with a defined default value (default 'false').</dd> +%% </dl> +%% -module(xmerl_eventp). -vsn('0.19'). -date('03-09-17'). diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl index fc89b80ff1..1bf8496673 100644 --- a/lib/xmerl/src/xmerl_regexp.erl +++ b/lib/xmerl/src/xmerl_regexp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. +%% Copyright Ericsson AB 2006-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -1154,7 +1154,7 @@ comp_crs([], Last) -> [{Last,maxchar}]. %% build_dfa(NFA, NfaStartState) -> {DFA,DfaStartState}. %% Build a DFA from an NFA using "subset construction". The major %% difference from the book is that we keep the marked and unmarked -%% DFA states in seperate lists. New DFA states are added to the +%% DFA states in separate lists. New DFA states are added to the %% unmarked list and states are marked by moving them to the marked %% list. We assume that the NFA accepting state numbers are in %% ascending order for the rules and use ordsets to keep this order. diff --git a/lib/xmerl/src/xmerl_sax_old_dom.erl b/lib/xmerl/src/xmerl_sax_old_dom.erl index fefcf03fce..6d0d836487 100644 --- a/lib/xmerl/src/xmerl_sax_old_dom.erl +++ b/lib/xmerl/src/xmerl_sax_old_dom.erl @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2009-2016. All Rights Reserved. +%% Copyright Ericsson AB 2009-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -127,9 +127,10 @@ build_dom(endDocument, State#xmerl_sax_old_dom_state{dom=[Decl, Current#xmlElement{ content=lists:reverse(C) }]}; - _ -> - %%?dbg("~p\n", [D]), - ?error("we're not at end the document when endDocument event is encountered.") + _ -> + %% endDocument is also sent by the parser when a fault occur to tell + %% the event receiver that no more input will be sent + State end; %% Element diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl index 318a0cf7f4..e383c4c349 100644 --- a/lib/xmerl/src/xmerl_sax_parser.erl +++ b/lib/xmerl/src/xmerl_sax_parser.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ %% External exports %%---------------------------------------------------------------------- -export([file/2, + stream/3, stream/2]). %%---------------------------------------------------------------------- @@ -63,7 +64,7 @@ %% Description: Parse file containing an XML document. %%---------------------------------------------------------------------- file(Name,Options) -> - case file:open(Name, [raw, read,binary]) of + case file:open(Name, [raw, read_ahead, read,binary]) of {error, Reason} -> {error,{Name, file:format_error(Reason)}}; {ok, FD} -> @@ -72,11 +73,12 @@ file(Name,Options) -> File = filename:basename(Name), ContinuationFun = fun default_continuation_cb/1, Res = stream(<<>>, - [{continuation_fun, ContinuationFun}, - {continuation_state, FD}, - {current_location, CL}, - {entity, File} - |Options]), + [{continuation_fun, ContinuationFun}, + {continuation_state, FD}, + {current_location, CL}, + {entity, File} + |Options], + file), ok = file:close(FD), Res end. @@ -92,19 +94,22 @@ file(Name,Options) -> %% EventState = term() %% Description: Parse a stream containing an XML document. %%---------------------------------------------------------------------- -stream(Xml, Options) when is_list(Xml), is_list(Options) -> +stream(Xml, Options) -> + stream(Xml, Options, stream). + +stream(Xml, Options, InputType) when is_list(Xml), is_list(Options) -> State = parse_options(Options, initial_state()), - case State#xmerl_sax_parser_state.file_type of + case State#xmerl_sax_parser_state.file_type of dtd -> xmerl_sax_parser_list:parse_dtd(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}); + input_type = InputType}); normal -> xmerl_sax_parser_list:parse(Xml, State#xmerl_sax_parser_state{encoding = list, - input_type = stream}) + input_type = InputType}) end; -stream(Xml, Options) when is_binary(Xml), is_list(Options) -> +stream(Xml, Options, InputType) when is_binary(Xml), is_list(Options) -> case parse_options(Options, initial_state()) of {error, Reason} -> {error, Reason}; State -> @@ -115,21 +120,22 @@ stream(Xml, Options) when is_binary(Xml), is_list(Options) -> normal -> parse end, - case detect_charset(Xml, State) of - {error, Reason} -> {fatal_error, - { - State#xmerl_sax_parser_state.current_location, - State#xmerl_sax_parser_state.entity, - 1 - }, - Reason, - [], - State#xmerl_sax_parser_state.event_state}; - {Xml1, State1} -> - parse_binary(Xml1, - State1#xmerl_sax_parser_state{input_type = stream}, - ParseFunction) - end + try + {Xml1, State1} = detect_charset(Xml, State), + parse_binary(Xml1, + State1#xmerl_sax_parser_state{input_type = InputType}, + ParseFunction) + catch + throw:{fatal_error, {State2, Reason}} -> + {fatal_error, + { + State2#xmerl_sax_parser_state.current_location, + State2#xmerl_sax_parser_state.entity, + 1 + }, + Reason, [], + State2#xmerl_sax_parser_state.event_state} + end end. %%---------------------------------------------------------------------- @@ -151,8 +157,8 @@ parse_binary(Xml, #xmerl_sax_parser_state{encoding={utf16,big}}=State, F) -> xmerl_sax_parser_utf16be:F(Xml, State); parse_binary(Xml, #xmerl_sax_parser_state{encoding=latin1}=State, F) -> xmerl_sax_parser_latin1:F(Xml, State); -parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, _) -> - {error, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))}. +parse_binary(_, #xmerl_sax_parser_state{encoding=Enc}, State) -> + ?fatal_error(State, lists:flatten(io_lib:format("Charcter set ~p not supported", [Enc]))). %%---------------------------------------------------------------------- %% Function: initial_state/0 @@ -206,8 +212,7 @@ parse_options([{entity, Entity} |Options], State) -> parse_options([skip_external_dtd |Options], State) -> parse_options(Options, State#xmerl_sax_parser_state{skip_external_dtd = true}); parse_options([O |_], _State) -> - {error, - lists:flatten(io_lib:format("Option: ~p not supported", [O]))}. + {error, lists:flatten(io_lib:format("Option: ~p not supported", [O]))}. check_encoding_option(E) when E==utf8; E=={utf16,little}; E=={utf16,big}; @@ -225,16 +230,10 @@ check_encoding_option(E) -> %% Output: {utf8|utf16le|utf16be|iso8859, Xml, State} %% Description: Detects which character set is used in a binary stream. %%---------------------------------------------------------------------- -detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = _) -> - throw({error, "Can't detect character encoding due to no indata"}); -detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = CFun, - continuation_state = CState} = State) -> - case CFun(CState) of - {<<>>, _} -> - throw({error, "Can't detect character encoding due to lack of indata"}); - {NewBytes, NewContState} -> - detect_charset(NewBytes, State#xmerl_sax_parser_state{continuation_state = NewContState}) - end; +detect_charset(<<>>, #xmerl_sax_parser_state{continuation_fun = undefined} = State) -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); +detect_charset(<<>>, State) -> + cf(<<>>, State, fun detect_charset/2); detect_charset(Bytes, State) -> case unicode:bom_to_encoding(Bytes) of {latin1, 0} -> @@ -244,25 +243,47 @@ detect_charset(Bytes, State) -> {RealBytes, State#xmerl_sax_parser_state{encoding=Enc}} end. +detect_charset_1(<<16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#00, 16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#00, 16#3C, 16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); detect_charset_1(<<16#00, 16#3C, 16#00, 16#3F, _/binary>> = Xml, State) -> {Xml, State#xmerl_sax_parser_state{encoding={utf16, big}}}; +detect_charset_1(<<16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#00>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#00, 16#3F>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); detect_charset_1(<<16#3C, 16#00, 16#3F, 16#00, _/binary>> = Xml, State) -> {Xml, State#xmerl_sax_parser_state{encoding={utf16, little}}}; -detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>> = Xml, State) -> - case parse_xml_directive(Xml2) of +detect_charset_1(<<16#3C>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D>> = Xml, State) -> + cf(Xml, State, fun detect_charset_1/2); +detect_charset_1(<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml2/binary>>, State) -> + {Xml3, State1} = read_until_end_of_xml_directive(Xml2, State), + case parse_xml_directive(Xml3) of {error, Reason} -> - {error, Reason}; + ?fatal_error(State, Reason); AttrList -> case lists:keysearch("encoding", 1, AttrList) of {value, {_, E}} -> case convert_encoding(E) of {error, Reason} -> - {error, Reason}; + ?fatal_error(State, Reason); Enc -> - {Xml, State#xmerl_sax_parser_state{encoding=Enc}} + {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, + State1#xmerl_sax_parser_state{encoding=Enc}} end; _ -> - {Xml, State} + {<<16#3C, 16#3F, 16#78, 16#6D, 16#6C, Xml3/binary>>, State1} end end; detect_charset_1(Xml, State) -> @@ -372,7 +393,7 @@ parse_value_1(<<C, Rest/binary>>, Stop, Acc) -> parse_value_1(Rest, Stop, [C |Acc]). %%====================================================================== -%%Default functions +%% Default functions %%====================================================================== %%---------------------------------------------------------------------- %% Function: default_event_cb(Event, LineNo, State) -> Result @@ -388,7 +409,7 @@ default_event_cb(_Event, _LineNo, State) -> %%---------------------------------------------------------------------- %% Function: default_continuation_cb(IoDevice) -> Result %% IoDevice = iodevice() -%% Output: Result = {[char()], State} +%% Output: Result = {binary(), IoDevice} %% Description: Default continuation callback reading blocks. %%---------------------------------------------------------------------- default_continuation_cb(IoDevice) -> @@ -398,3 +419,82 @@ default_continuation_cb(IoDevice) -> {ok, FileBin} -> {FileBin, IoDevice} end. + +%%---------------------------------------------------------------------- +%% Function: read_until_end_of_xml_directive(Rest, State) -> Result +%% Rest = binary() +%% Output: Result = {binary(), State} +%% Description: Reads a utf8 or latin1 until it finds '?>' +%%---------------------------------------------------------------------- +read_until_end_of_xml_directive(Rest, State) -> + case binary:match(Rest, <<"?>">>) of + nomatch -> + case cf(Rest, State) of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewState} -> + read_until_end_of_xml_directive(NewBytes, NewState) + end; + _ -> + {Rest, State} + end. + + +%%---------------------------------------------------------------------- +%% Function : cf(Rest, State) -> Result +%% Parameters: Rest = binary() +%% State = #xmerl_sax_parser_state{} +%% NextCall = fun() +%% Result : {Rest, State} +%% Description: Function that uses provided fun to read another chunk from +%% input stream and calls the fun in NextCall. +%%---------------------------------------------------------------------- +cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State) -> + ?fatal_error(State, "Continuation function undefined"); +cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State) -> + Result = + try + CFun(CState) + catch + throw:ErrorTerm -> + ?fatal_error(State, ErrorTerm); + exit:Reason -> + ?fatal_error(State, {'EXIT', Reason}) + end, + case Result of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewContState} -> + {<<Rest/binary, NewBytes/binary>>, + State#xmerl_sax_parser_state{continuation_state = NewContState}} + end. + +%%---------------------------------------------------------------------- +%% Function : cf(Rest, State, NextCall) -> Result +%% Parameters: Rest = binary() +%% State = #xmerl_sax_parser_state{} +%% NextCall = fun() +%% Result : {Rest, State} +%% Description: Function that uses provided fun to read another chunk from +%% input stream and calls the fun in NextCall. +%%---------------------------------------------------------------------- +cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) -> + ?fatal_error(State, "Continuation function undefined"); +cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, + NextCall) -> + Result = + try + CFun(CState) + catch + throw:ErrorTerm -> + ?fatal_error(State, ErrorTerm); + exit:Reason -> + ?fatal_error(State, {'EXIT', Reason}) + end, + case Result of + {<<>>, _} -> + ?fatal_error(State, "Can't detect character encoding due to lack of indata"); + {NewBytes, NewContState} -> + NextCall(<<Rest/binary, NewBytes/binary>>, + State#xmerl_sax_parser_state{continuation_state = NewContState}) + end. diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl index 932ab0cec5..56a3a42e5f 100644 --- a/lib/xmerl/src/xmerl_sax_parser.hrl +++ b/lib/xmerl/src/xmerl_sax_parser.hrl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -88,14 +88,7 @@ current_location, % Location of the currently parsed XML entity entity, % Parsed XML entity skip_external_dtd = false,% If true the external DTD is skipped during parsing - input_type % Source type: file | stream. - % This field is a preparation for an fix in R17 of a bug in - % the conformance against the standard. - % Today a file which contains two XML documents will be considered - % well-formed and the second is placed in the rest part of the - % return tuple, according to the conformance tests this should fail. - % In the future this will fail if xmerl_sax_aprser:file/2 is used but - % left to the user in the xmerl_sax_aprser:stream/2 case. + input_type % Source type: file | stream }). diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 4d75805b9b..1dca9608cb 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -1,7 +1,7 @@ %%-*-erlang-*- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -64,29 +64,42 @@ %% Description: Parsing XML from input stream. %%---------------------------------------------------------------------- parse(Xml, State) -> - RefTable = ets:new(xmerl_sax_entity_refs, [private]), - - State1 = event_callback(startDocument, State), - - case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of - {ok, Rest, State2} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - {fatal_error, {State2, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(fatal_error, State3, Reason); - {event_receiver_error, State2, {Tag, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(Tag, State3, Reason); - Other -> - _State2 = event_callback(endDocument, State1), - ets:delete(RefTable), - throw(Other) + RefTable = maps:new(), + + try + State1 = event_callback(startDocument, State), + Result = parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), + handle_end_document(Result) + catch + throw:Exception -> + handle_end_document(Exception); + _:OtherError -> + handle_end_document({other, OtherError, State}) end. + % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of + % {ok, Rest, State2} -> + % State3 = event_callback(endDocument, State2), + % case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of + % true -> + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % false -> + % format_error(fatal_error, State3, "Input found after legal document") + % end; + % {fatal_error, {State2, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(fatal_error, State3, Reason); + % {event_receiver_error, State2, {Tag, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(Tag, State3, Reason); + % {endDocument, Rest, State2} -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % Other -> + % _State2 = event_callback(endDocument, State1), + % {fatal_error, Other} + % end. + %%---------------------------------------------------------------------- %% Function: parse_dtd(Xml, State) -> Result %% Input: Xml = string() | binary() @@ -96,38 +109,120 @@ parse(Xml, State) -> %% Description: Parsing XML DTD from input stream. %%---------------------------------------------------------------------- parse_dtd(Xml, State) -> - RefTable = ets:new(xmerl_sax_entity_refs, [private]), - - State1 = event_callback(startDocument, State), - - case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of - {fatal_error, {State2, Reason}} -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - format_error(fatal_error, State3, Reason); - {event_receiver_error, State2, {Tag, Reason}} -> - State3 = event_callback(endDocument, State2), - format_error(Tag, State3, Reason); - {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> - State3 = event_callback(endDocument, State2), - ets:delete(RefTable), - {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - Other -> - _State2 = event_callback(endDocument, State1), - ets:delete(RefTable), - throw(Other) + RefTable = maps:new(), + + try + State1 = event_callback(startDocument, State), + Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), + handle_end_document(Result) + catch + throw:Exception -> + handle_end_document(Exception); + _:OtherError -> + handle_end_document({other, OtherError, State}) end. + % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of + % {fatal_error, {State2, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(fatal_error, State3, Reason); + % {event_receiver_error, State2, {Tag, Reason}} -> + % State3 = event_callback(endDocument, State2), + % format_error(Tag, State3, Reason); + % {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> + % State3 = event_callback(endDocument, State2), + % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; + % Other -> + % _State2 = event_callback(endDocument, State1), + % {fatal_error, Other} + % end. + + %%====================================================================== %% Internal functions %%====================================================================== %%---------------------------------------------------------------------- +%% Function: handle_end_document(ParserResult) -> Result +%% Input: ParseResult = term() +%% Output: Result = {ok, Rest, EventState} | +%% EventState = term() +%% Description: Ends the parsing and formats output +%%---------------------------------------------------------------------- +handle_end_document({ok, Rest, State}) -> + %%ok case from parse + try + State1 = event_callback(endDocument, State), + case check_if_rest_ok(State1#xmerl_sax_parser_state.input_type, Rest) of + true -> + {ok, State1#xmerl_sax_parser_state.event_state, Rest}; + false -> + format_error(fatal_error, State1, "Input found after legal document") + end + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({endDocument, Rest, State}) -> + %% ok case from parse and parse_dtd + try + State1 = event_callback(endDocument, State), + {ok, State1#xmerl_sax_parser_state.event_state, Rest} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({fatal_error, {State, Reason}}) -> + try + State1 = event_callback(endDocument, State), + format_error(fatal_error, State1, Reason) + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({event_receiver_error, State, {Tag, Reason}}) -> + try + State1 = event_callback(endDocument, State), + format_error(Tag, State1, Reason) + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({Rest, State}) when is_record(State, xmerl_sax_parser_state) -> + %%ok case from parse_dtd + try + State1 = event_callback(endDocument, State), + {ok, State1#xmerl_sax_parser_state.event_state, Rest} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end; +handle_end_document({other, Error, State}) -> + try + _State1 = event_callback(endDocument, State), + {fatal_error, Error} + catch + throw:{event_receiver_error, State2, {Tag, Reason}} -> + format_error(Tag, State2, Reason); + _:Other -> + {fatal_error, Other} + end. + +%%---------------------------------------------------------------------- %% Function: parse_document(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} @@ -136,10 +231,11 @@ parse_dtd(Xml, State) -> %% [1] document ::= prolog element Misc* %%---------------------------------------------------------------------- parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> - {Rest1, State1} = parse_xml_decl(Rest, State), + {Rest1, State1} = parse_byte_order_mark(Rest, State), {Rest2, State2} = parse_misc(Rest1, State1, true), {ok, Rest2, State2}. +?PARSE_BYTE_ORDER_MARK(Bytes, State). %%---------------------------------------------------------------------- %% Function: parse_xml_decl(Rest, State) -> Result @@ -150,15 +246,8 @@ parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> %% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? %% [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' %%---------------------------------------------------------------------- --dialyzer({[no_fail_call, no_match], parse_xml_decl/2}). parse_xml_decl(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_1, State) -> - cf(?BYTE_ORDER_MARK_1, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_2, State) -> - cf(?BYTE_ORDER_MARK_2, State, fun parse_xml_decl/2); -parse_xml_decl(?BYTE_ORDER_MARK_REST(Rest), State) -> - cf(Rest, State, fun parse_xml_decl/2); parse_xml_decl(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING("<?") = Bytes, State) -> @@ -170,31 +259,19 @@ parse_xml_decl(?STRING("<?xm") = Bytes, State) -> parse_xml_decl(?STRING("<?xml") = Bytes, State) -> cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING_REST("<?xml", Rest1), State) -> - parse_xml_decl_1(Rest1, State); -parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> - case unicode:characters_to_list(Bytes, Enc) of - {incomplete, _, _} -> - cf(Bytes, State, fun parse_xml_decl/2); - {error, _Encoded, _Rest} -> - ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); - _ -> - parse_prolog(Bytes, State) - end; -parse_xml_decl(Bytes, State) -> - parse_prolog(Bytes, State). - + parse_xml_decl_rest(Rest1, State); +?PARSE_XML_DECL(Bytes, State). -parse_xml_decl_1(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> +parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> if ?is_whitespace(C) -> {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []), - %State2 = event_callback({processingInstruction, "xml", XmlAttributes}, State1),% The XML decl. should not be reported as a PI parse_prolog(Rest1, State1); true -> parse_prolog(?STRING_REST("<?xml", Bytes), State) end; -parse_xml_decl_1(Bytes, State) -> - unicode_incomplete_check([Bytes, State, fun parse_xml_decl_1/2], undefined). +parse_xml_decl_rest(Bytes, State) -> + unicode_incomplete_check([Bytes, State, fun parse_xml_decl_rest/2], undefined). @@ -216,8 +293,6 @@ parse_prolog(?STRING_REST("<?", Rest), State) -> parse_prolog(Rest1, State1); {endDocument, Rest1, State1} -> parse_prolog(Rest1, State1) - % IValue = ?TO_INPUT_FORMAT("<?"), - % {?APPEND_STRING(IValue, Rest1), State1} end; parse_prolog(?STRING_REST("<!", Rest), State) -> parse_prolog_1(Rest, State); @@ -230,7 +305,6 @@ parse_prolog(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_prolog/2], "expecting < or whitespace"). - parse_prolog_1(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("D") = Bytes, State) -> @@ -442,6 +516,16 @@ check_if_new_doc_allowed(stream, []) -> check_if_new_doc_allowed(_, _) -> false. +check_if_rest_ok(file, []) -> + true; +check_if_rest_ok(file, <<>>) -> + true; +check_if_rest_ok(stream, _) -> + true; +check_if_rest_ok(_, _) -> + false. + + %%---------------------------------------------------------------------- %% Function: parse_pi_1(Rest, State) -> Result %% Input: Rest = string() | binary() @@ -886,11 +970,11 @@ send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) -> parse_eq(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_eq/2); parse_eq(?STRING_REST("=", Rest), State) -> - {Rest, State}; + {Rest, State}; parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> - {_WS, Rest, State1} = - whitespace(Bytes, State, []), - parse_eq(Rest, State1); + {_WS, Rest, State1} = + whitespace(Bytes, State, []), + parse_eq(Rest, State1); parse_eq(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_eq/2], "expecting = or whitespace"). @@ -908,11 +992,11 @@ parse_eq(Bytes, State) -> parse_att_value(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_att_value/2); parse_att_value(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" -> - parse_att_value(Rest, State, C, []); + parse_att_value(Rest, State, C, []); parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> - {_WS, Rest, State1} = - whitespace(Bytes, State, []), - parse_att_value(Rest, State1); + {_WS, Rest, State1} = + whitespace(Bytes, State, []), + parse_att_value(Rest, State1); parse_att_value(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_att_value/2], "\', \" or whitespace expected"). @@ -1024,16 +1108,21 @@ parse_etag(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_etag/2], undefined). - parse_etag_1(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList} - |RestOfETags]} = State, _Tag) -> + |RestOfETags], + input_type=InputType} = State, _Tag) -> State1 = event_callback({endElement, Uri, LocalName, QName}, State), State2 = send_end_prefix_mapping_event(NewNsList, State1), - parse_content(Rest, - State2#xmerl_sax_parser_state{end_tags=RestOfETags, - ns = OldNsList}, - [], true); + case check_if_new_doc_allowed(InputType, RestOfETags) of + true -> + throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}}); + false -> + parse_content(Rest, + State2#xmerl_sax_parser_state{end_tags=RestOfETags, + ns = OldNsList}, + [], true) + end; parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) -> {P,TN} = Tag, ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN); @@ -1051,21 +1140,26 @@ parse_etag_1(Bytes, State, Tag) -> %% Description: Parsing the content part of tags %% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* %%---------------------------------------------------------------------- - parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) -> - case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of - {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State1}; - {fatal_error, {State1, Msg}} -> - case check_if_document_complete(State1, Msg) of - true -> - State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), - {?STRING_EMPTY, State2}; - false -> - ?fatal_error(State1, Msg) - end; - Other -> - throw(Other) + case check_if_document_complete(State, "No more bytes") of + true -> + State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), + {?STRING_EMPTY, State1}; + false -> + case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of + {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> + {Rest, State1}; + {fatal_error, {State1, Msg}} -> + case check_if_document_complete(State1, Msg) of + true -> + State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), + {?STRING_EMPTY, State2}; + false -> + ?fatal_error(State1, Msg) + end; + Other -> + throw(Other) + end end; parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) -> cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); @@ -1094,7 +1188,7 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) -> parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_cdata(Rest1, State1) @@ -1102,7 +1196,7 @@ parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> case ET of [] -> - {Rest, State}; %%LATH : Skicka ignorable WS ??? + {Rest, State}; %% Skicka ignorable WS ??? _ -> State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), parse_stag(Rest1, State1) @@ -1204,7 +1298,6 @@ send_character_event(_, true, String, State) -> %% Description: Parse whitespaces. %% [3] S ::= (#x20 | #x9 | #xD | #xA)+ %%---------------------------------------------------------------------- --dialyzer({no_fail_call, whitespace/3}). whitespace(?STRING_EMPTY, State, Acc) -> case cf(?STRING_EMPTY, State, Acc, fun whitespace/3) of {?STRING_EMPTY, State} -> @@ -1230,16 +1323,7 @@ whitespace(?STRING_REST("\r", Rest), State, Acc) -> whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) -> whitespace(Rest, State, [C|Acc]); -whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> - {lists:reverse(Acc), Bytes, State}; -whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> - case unicode:characters_to_list(Bytes, Enc) of - {incomplete, _, _} -> - cf(Bytes, State, Acc, fun whitespace/3); - {error, _Encoded, _Rest} -> - ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) - end. - +?WHITESPACE(Bytes, State, Acc). %%---------------------------------------------------------------------- %% Function: parse_reference(Rest, State, HaveToExist) -> Result @@ -1362,23 +1446,24 @@ parse_pe_reference_1(Bytes, State, Name) -> "missing ; after reference " ++ Name). - %%---------------------------------------------------------------------- -%% Function: insert_reference(Reference, State) -> Result -%% Parameters: Reference = string() +%% Function: insert_reference(Name, Ref, State) -> Result +%% Parameters: Name = string() +%% Ref = {Type, Value} +%% Type = atom() +%% Value = term() %% State = #xmerl_sax_parser_state{} %% Result : %%---------------------------------------------------------------------- -insert_reference({Name, Type, Value}, Table) -> - case ets:lookup(Table, Name) of - [{Name, _, _}] -> - ok; +insert_reference(Name, Value, #xmerl_sax_parser_state{ref_table = Map} = State) -> + case maps:find(Name, Map) of + error -> + State#xmerl_sax_parser_state{ref_table = maps:put(Name, Value, Map)}; _ -> - ets:insert(Table, {Name, Type, Value}) + State end. - %%---------------------------------------------------------------------- %% Function: look_up_reference(Reference, State) -> Result %% Parameters: Reference = string() @@ -1396,8 +1481,8 @@ look_up_reference("apos", _, _) -> look_up_reference("quot", _, _) -> {internal_general, "quot", "\""}; look_up_reference(Name, HaveToExist, State) -> - case ets:lookup(State#xmerl_sax_parser_state.ref_table, Name) of - [{Name, Type, Value}] -> + case maps:find(Name, State#xmerl_sax_parser_state.ref_table) of + {ok, {Type, Value}} -> {Type, Name, Value}; _ -> case HaveToExist of @@ -1479,7 +1564,7 @@ parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) -> parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> {lists:reverse(Acc), Rest, State}; parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> - parse_system_litteral(Rest, State, Stop, [C |Acc]); + parse_system_litteral(Rest, State, Stop, [C |Acc]); parse_system_litteral(Bytes, State, Stop, Acc) -> unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4], undefined). @@ -1651,9 +1736,11 @@ parse_external_entity(State, _PubId, SysId) -> end_tags = []}, - EventState = handle_external_entity(ExtRef, State1), + {EventState, RefTable} = handle_external_entity(ExtRef, State1), - NewState = event_callback({endEntity, SysId}, SaveState#xmerl_sax_parser_state{event_state=EventState}), + NewState = event_callback({endEntity, SysId}, + SaveState#xmerl_sax_parser_state{event_state=EventState, + ref_table=RefTable}), NewState#xmerl_sax_parser_state{file_type=normal}. @@ -1680,7 +1767,8 @@ handle_external_entity({file, FileToOpen}, State) -> entity=filename:basename(FileToOpen), input_type=file}), ok = file:close(FD), - EntityState#xmerl_sax_parser_state.event_state + {EntityState#xmerl_sax_parser_state.event_state, + EntityState#xmerl_sax_parser_state.ref_table} end; handle_external_entity({http, Url}, State) -> @@ -1693,14 +1781,16 @@ handle_external_entity({http, Url}, State) -> ++ file:format_error(Reason)); {ok, FD} -> {?STRING_EMPTY, EntityState} = - parse_external_entity_1(<<>>, + parse_external_entity_byte_order_mark(<<>>, State#xmerl_sax_parser_state{continuation_state=FD, current_location=filename:dirname(Url), entity=filename:basename(Url), input_type=file}), ok = file:close(FD), ok = file:delete(TmpFile), - EntityState#xmerl_sax_parser_state.event_state + {EntityState#xmerl_sax_parser_state.event_state, + EntityState#xmerl_sax_parser_state.ref_table} + end catch throw:{error, Error} -> @@ -1709,6 +1799,8 @@ handle_external_entity({http, Url}, State) -> handle_external_entity({Tag, _Url}, State) -> ?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)). +?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State). + %%---------------------------------------------------------------------- %% Function : parse_external_entity_1(Rest, State) -> Result %% Parameters: Rest = string() | binary() @@ -1716,22 +1808,15 @@ handle_external_entity({Tag, _Url}, State) -> %% Result : {Rest, State} %% Description: Parse the external entity. %%---------------------------------------------------------------------- --dialyzer({[no_fail_call, no_match], parse_external_entity_1/2}). parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) -> case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State}; + {Rest, State1}; {fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity -> {?STRING_EMPTY, State1}; Other -> throw(Other) end; -parse_external_entity_1(?BYTE_ORDER_MARK_1, State) -> - cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_1/2); -parse_external_entity_1(?BYTE_ORDER_MARK_2, State) -> - cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_1/2); -parse_external_entity_1(?BYTE_ORDER_MARK_REST(Rest), State) -> - parse_external_entity_1(Rest, State); parse_external_entity_1(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_external_entity_1/2); parse_external_entity_1(?STRING("<?") = Bytes, State) -> @@ -2452,24 +2537,24 @@ parse_entity_def(?STRING_EMPTY, State, Name) -> cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3); parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" -> {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), - insert_reference({Name, internal_general, Value}, State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({internalEntityDecl, Name, Value}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name, {internal_general, Value}, State1), + State3 = event_callback({internalEntityDecl, Name, Value}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P -> {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false), {Ndata, Rest2, State2} = parse_ndata(Rest1, State1), case Ndata of undefined -> - insert_reference({Name, external_general, {PubId, SysId}}, - State2#xmerl_sax_parser_state.ref_table), - State3 = event_callback({externalEntityDecl, Name, PubId, SysId}, State2), - {Rest2, State3}; + State3 = insert_reference(Name, {external_general, {PubId, SysId}}, + State2), + State4 = event_callback({externalEntityDecl, Name, PubId, SysId}, State3), + {Rest2, State4}; _ -> - insert_reference({Name, unparsed, {PubId, SysId, Ndata}}, - State2#xmerl_sax_parser_state.ref_table), - State3 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State2), - {Rest2, State3} + State3 = insert_reference(Name, {unparsed, {PubId, SysId, Ndata}}, + State2), + State4 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State3), + {Rest2, State4} end; parse_entity_def(Bytes, State, Name) -> unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3], @@ -2646,19 +2731,19 @@ parse_pe_def(?STRING_EMPTY, State, Name) -> parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" -> {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), Name1 = "%" ++ Name, - insert_reference({Name1, internal_parameter, Value}, - State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({internalEntityDecl, Name1, Value}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name1, {internal_parameter, Value}, + State1), + State3 = event_callback({internalEntityDecl, Name1, Value}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P -> {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false), Name1 = "%" ++ Name, - insert_reference({Name1, external_parameter, {PubId, SysId}}, - State1#xmerl_sax_parser_state.ref_table), - State2 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State1), - {_WS, Rest2, State3} = whitespace(Rest1, State2, []), - parse_def_end(Rest2, State3); + State2 = insert_reference(Name1, {external_parameter, {PubId, SysId}}, + State1), + State3 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State2), + {_WS, Rest2, State4} = whitespace(Rest1, State3, []), + parse_def_end(Rest2, State4); parse_pe_def(Bytes, State, Name) -> unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3], "\", \', SYSTEM or PUBLIC expected"). @@ -3290,7 +3375,7 @@ cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = C catch throw:ErrorTerm -> ?fatal_error(State, ErrorTerm); - exit:Reason -> + exit:Reason -> ?fatal_error(State, {'EXIT', Reason}) end, case Result of diff --git a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc index 961806bf4c..6e59347fb8 100644 --- a/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_latin1.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,36 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, latin1)). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, undefined_bom2). --define(BYTE_ORDER_MARK_REST(Rest), <<undefined, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc index 624a621d92..ac89896215 100644 --- a/lib/xmerl/src/xmerl_sax_parser_list.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_list.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -36,6 +36,19 @@ %% In the list case we can't use a '++' when matchin against an unbound variable -define(STRING_UNBOUND_REST(MatchChar, Rest), [MatchChar | Rest]). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, undefined_bom2). --define(BYTE_ORDER_MARK_REST(Rest), [undefined|Rest]). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc index ff84ece97a..ec89024729 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf16be.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,50 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, big})). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/big-utf16, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, <<16#FE>>). +-define(BYTE_ORDER_MARK_1, <<16#FE>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#FE, 16#FF, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc index a330fce8d0..566333a045 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf16le.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,8 +34,50 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, {utf16, little})). -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/little-utf16, Rest/binary>>). --define(BYTE_ORDER_MARK_1, undefined_bom1). --define(BYTE_ORDER_MARK_2, <<16#FF>>). +-define(BYTE_ORDER_MARK_1, <<16#FF>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#FF, 16#FE, Rest/binary>>). + +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). + +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc index d46d60d237..f41d06d013 100644 --- a/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_utf8.erlsrc @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -34,11 +34,55 @@ -define(APPEND_STRING(Rest, New), <<Rest/binary, New/binary>>). -define(TO_INPUT_FORMAT(Val), unicode:characters_to_binary(Val, unicode, utf8)). - -%% STRING_REST and STRING_UNBOUND_REST is only different in the list case -define(STRING_UNBOUND_REST(MatchChar, Rest), <<MatchChar/utf8, Rest/binary>>). -define(BYTE_ORDER_MARK_1, <<16#EF>>). -define(BYTE_ORDER_MARK_2, <<16#EF, 16#BB>>). -define(BYTE_ORDER_MARK_REST(Rest), <<16#EF, 16#BB, 16#BF, Rest/binary>>). +-define(PARSE_BYTE_ORDER_MARK(Bytes, State), + parse_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_2, State) -> + cf(?BYTE_ORDER_MARK_2, State, fun parse_byte_order_mark/2); + parse_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_xml_decl(Rest, State); + parse_byte_order_mark(Bytes, State) -> + parse_xml_decl(Bytes, State)). + +-define(PARSE_XML_DECL(Bytes, State), + parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, fun parse_xml_decl/2); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); + _ -> + parse_prolog(Bytes, State) + end; + parse_xml_decl(Bytes, State) -> + parse_prolog(Bytes, State)). + +-define(WHITESPACE(Bytes, State, Acc), + whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> + {lists:reverse(Acc), Bytes, State}; + whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> + case unicode:characters_to_list(Bytes, Enc) of + {incomplete, _, _} -> + cf(Bytes, State, Acc, fun whitespace/3); + {error, _Encoded, _Rest} -> + ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))) + end). +-define(PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State), + parse_external_entity_byte_order_mark(?STRING_EMPTY, State) -> + cf(?STRING_EMPTY, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_1, State) -> + cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_2, State) -> + cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_byte_order_mark/2); + parse_external_entity_byte_order_mark(?BYTE_ORDER_MARK_REST(Rest), State) -> + parse_external_entity_1(Rest, State); + parse_external_entity_byte_order_mark(Bytes, State) -> + parse_external_entity_1(Bytes, State)). diff --git a/lib/xmerl/src/xmerl_sax_simple_dom.erl b/lib/xmerl/src/xmerl_sax_simple_dom.erl index 7eb3afd499..7b15cd92dc 100644 --- a/lib/xmerl/src/xmerl_sax_simple_dom.erl +++ b/lib/xmerl/src/xmerl_sax_simple_dom.erl @@ -2,7 +2,7 @@ %%-------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2009-2016. All Rights Reserved. +%% Copyright Ericsson AB 2009-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -129,8 +129,9 @@ build_dom(endDocument, State#xmerl_sax_simple_dom_state{dom=[Decl, {Tag, Attributes, lists:reverse(Content)}]}; _ -> - ?dbg("~p\n", [D]), - ?error("we're not at end the document when endDocument event is encountered.") + %% endDocument is also sent by the parser when a fault occur to tell + %% the event receiver that no more input will be sent + State end; %% Element diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 2147a46a13..a1f6ad4e2c 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. +%% Copyright Ericsson AB 2003-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -111,13 +111,16 @@ %% <dd>Set to 'true' if xmerl should add to elements missing attributes %% with a defined default value (default 'false').</dd> %% </dl> +%% @type xmlElement() = #xmlElement{}. +%% The record definition is found in xmerl.hrl. +%% @type xmlDocument() = #xmlDocument{}. +%% The record definition is found in xmerl.hrl. %% @type document() = xmlElement() | xmlDocument(). <p> %% The document returned by <tt>xmerl_scan:string/[1,2]</tt> and %% <tt>xmerl_scan:file/[1,2]</tt>. The type of the returned record depends on %% the value of the document option passed to the function. %% </p> - -module(xmerl_scan). -vsn('0.20'). -date('03-09-16'). @@ -471,8 +474,8 @@ event(_X, S) -> %% into multiple objects (in which case {Acc',Pos',S'} should be returned.) %% If {Acc',S'} is returned, Pos will be incremented by 1 by default. %% Below is an example of an acceptable operation -acc(X = #xmlText{value = Text}, Acc, S) -> - {[X#xmlText{value = Text}|Acc], S}; +acc(#xmlText{value = Text}, [X = #xmlText{value = AccText}], S) -> + {[X#xmlText{value = AccText ++ Text}], S}; acc(X, Acc, S) -> {[X|Acc], S}. @@ -2222,16 +2225,18 @@ processed_whole_element(S=#xmerl_scanner{hook_fun = _Hook, AllAttrs = case S#xmerl_scanner.default_attrs of true -> - [ #xmlAttribute{name = AttName, - parents = [{Name, Pos} | Parents], - language = Lang, - nsinfo = NSI, - namespace = Namespace, - value = AttValue, - normalized = true} || - {AttName, AttValue} <- get_default_attrs(S, Name), - AttValue =/= no_value, - not lists:keymember(AttName, #xmlAttribute.name, Attrs) ]; + DefaultAttrs = + [ #xmlAttribute{name = AttName, + parents = [{Name, Pos} | Parents], + language = Lang, + nsinfo = NSI, + namespace = Namespace, + value = AttValue, + normalized = true} || + {AttName, AttValue} <- get_default_attrs(S, Name), + AttValue =/= no_value, + not lists:keymember(AttName, #xmlAttribute.name, Attrs) ], + lists:append(Attrs, DefaultAttrs); false -> Attrs end, @@ -2304,7 +2309,9 @@ expanded_name(Name, [], #xmlNamespace{default = URI}, S) -> expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> {_, Value} = lists:keyfind(Local, 1, Ns), case Name of - 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> + 'xmlns:xml' when Value =:= 'http://www.w3.org/XML/1998/namespace' -> + N; + 'xmlns:xml' when Value =/= 'http://www.w3.org/XML/1998/namespace' -> ?fatal({xml_prefix_cannot_be_redeclared, Value}, S); 'xmlns:xmlns' -> ?fatal({xmlns_prefix_cannot_be_declared, Value}, S); @@ -2318,6 +2325,8 @@ expanded_name(Name, N = {"xmlns", Local}, #xmlNamespace{nodes = Ns}, S) -> N end end; +expanded_name(_Name, {"xml", Local}, _NS, _S) -> + {'http://www.w3.org/XML/1998/namespace', list_to_atom(Local)}; expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> case lists:keysearch(Prefix, 1, Ns) of {value, {_, URI}} -> @@ -2328,9 +2337,6 @@ expanded_name(_Name, {Prefix, Local}, #xmlNamespace{nodes = Ns}, S) -> ?fatal({namespace_prefix_not_declared, Prefix}, S) end. - - - keyreplaceadd(K, Pos, [H|T], Obj) when K == element(Pos, H) -> [Obj|T]; keyreplaceadd(K, Pos, [H|T], Obj) -> diff --git a/lib/xmerl/src/xmerl_xpath.erl b/lib/xmerl/src/xmerl_xpath.erl index bbebda1030..6146feba49 100644 --- a/lib/xmerl/src/xmerl_xpath.erl +++ b/lib/xmerl/src/xmerl_xpath.erl @@ -43,13 +43,27 @@ %% </pre> %% %% @type nodeEntity() = -%% xmlElement() -%% | xmlAttribute() -%% | xmlText() -%% | xmlPI() -%% | xmlComment() -%% | xmlNsNode() -%% | xmlDocument() +%% #xmlElement{} +%% | #xmlAttribute{} +%% | #xmlText{} +%% | #xmlPI{} +%% | #xmlComment{} +%% | #xmlNsNode{} +%% | #xmlDocument{} +%% +%% @type docNodes() = #xmlElement{} +%% | #xmlAttribute{} +%% | #xmlText{} +%% | #xmlPI{} +%% | #xmlComment{} +%% | #xmlNsNode{} +%% +%% @type docEntity() = #xmlDocument{} | [docNodes()] +%% +%% @type xPathString() = string() +%% +%% @type parentList() = [{atom(), integer()}] +%% %% @type option_list(). <p>Options allows to customize the behaviour of the %% XPath scanner. %% </p> @@ -115,7 +129,7 @@ string(Str, Doc, Options) -> %% Parents = parentList() %% Doc = nodeEntity() %% Options = option_list() -%% Scalar = xmlObj +%% Scalar = #xmlObj{} %% @doc Extracts the nodes from the parsed XML tree according to XPath. %% xmlObj is a record with fields type and value, %% where type is boolean | number | string diff --git a/lib/xmerl/src/xmerl_xs.erl b/lib/xmerl/src/xmerl_xs.erl index 3e9f6622b8..1ce76cfa41 100644 --- a/lib/xmerl/src/xmerl_xs.erl +++ b/lib/xmerl/src/xmerl_xs.erl @@ -45,7 +45,6 @@ % XSLT package which is written i C++. % See also the <a href="xmerl_xs_examples.html">Tutorial</a>. % </p> - -module(xmerl_xs). -export([xslapply/2, value_of/1, select/2, built_in_rules/2 ]). @@ -71,15 +70,13 @@ %% xslapply(fun template/1, E), %% "</h1>"]; %% </pre> - xslapply(Fun, EList) when is_list(EList) -> - lists:map( Fun, EList); + lists:map(Fun, EList); xslapply(Fun, E = #xmlElement{})-> lists:map( Fun, E#xmlElement.content). - %% @spec value_of(E) -> List -%% E = unknown() +%% E = term() %% %% @doc Concatenates all text nodes within the tree. %% diff --git a/lib/xmerl/src/xmerl_xsd.erl b/lib/xmerl/src/xmerl_xsd.erl index 4b5efae8dd..a89b3159ec 100644 --- a/lib/xmerl/src/xmerl_xsd.erl +++ b/lib/xmerl/src/xmerl_xsd.erl @@ -49,6 +49,7 @@ %% <dd>It is possible by this option to provide a state with process %% information from an earlier validation.</dd> %% </dl> +%% @type filename() = string() %% @end %%%------------------------------------------------------------------- -module(xmerl_xsd). @@ -138,7 +139,7 @@ state2file(S=#xsd_state{schema_name=SN}) -> %% @spec state2file(State,FileName) -> ok | {error,Reason} %% State = global_state() -%% FileName = filename() +%% FileName = string() %% @doc Saves the schema state with all information of the processed %% schema in a file. You can provide the file name for the saved %% state. FileName is saved with the <code>.xss</code> extension @@ -153,7 +154,7 @@ state2file(S,FileName) when is_record(S,xsd_state) -> %% @spec file2state(FileName) -> {ok,State} | {error,Reason} %% State = global_state() -%% FileName = filename() +%% FileName = string() %% @doc Reads the schema state with all information of the processed %% schema from a file created with <code>state2file/[1,2]</code>. The %% format of this file is internal. The state can then be used @@ -202,7 +203,7 @@ xmerl_xsd_vsn_check(S=#xsd_state{vsn=MD5_VSN}) -> process_validate(Schema,Xml) -> process_validate(Schema,Xml,[]). %% @spec process_validate(Schema,Element,Options) -> Result -%% Schema = filename() +%% Schema = string() %% Element = XmlElement %% Options = option_list() %% Result = {ValidXmlElement,State} | {error,Reason} @@ -282,7 +283,7 @@ validate3(_,_,S) -> process_schema(Schema) -> process_schema(Schema,[]). %% @spec process_schema(Schema,Options) -> Result -%% Schema = filename() +%% Schema = string() %% Result = {ok,State} | {error,Reason} %% State = global_state() %% Reason = [ErrorReason] | ErrorReason @@ -324,7 +325,7 @@ process_schema2({SE,_},State,_Schema) -> process_schemas(Schemas) -> process_schemas(Schemas,[]). %% @spec process_schemas(Schemas,Options) -> Result -%% Schemas = [{NameSpace,filename()}|Schemas] | [] +%% Schemas = [{NameSpace,string()}|Schemas] | [] %% Result = {ok,State} | {error,Reason} %% Reason = [ErrorReason] | ErrorReason %% Options = option_list() @@ -5426,7 +5427,7 @@ add_key_once(Key,N,El,L) -> %% {filename:join([[io_lib:format("/~w(~w)",[X,Y])||{X,Y}<-Parents],Type]),Pos}. %% @spec format_error(Errors) -> Result -%% Errors = error_tuple() | [error_tuple()] +%% Errors = tuple() | [tuple()] %% Result = string() | [string()] %% @doc Formats error descriptions to human readable strings. format_error(L) when is_list(L) -> |