From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/xmerl/src/xmerl_scan.erl | 4088 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 4088 insertions(+) create mode 100644 lib/xmerl/src/xmerl_scan.erl (limited to 'lib/xmerl/src/xmerl_scan.erl') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl new file mode 100644 index 0000000000..4e5cc59d8f --- /dev/null +++ b/lib/xmerl/src/xmerl_scan.erl @@ -0,0 +1,4088 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% Description : Simgle-pass XML scanner. See xmerl.hrl for data defs. + +%% @doc This module is the interface to the XML parser, it handles XML 1.0. +%% The XML parser is activated through +%% xmerl_scan:string/[1,2] or +%% xmerl_scan:file/[1,2]. +%% It returns records of the type defined in xmerl.hrl. +%% See also tutorial on customization +%% functions. +%% @type global_state().

+%% The global state of the scanner, represented by the #xmerl_scanner{} record. +%%

+%% @type option_list().

Options allow to customize the behaviour of the +%% scanner. +%% See also tutorial on customization +%% functions. +%%

+%% Possible options are: +%%
+%%
{acc_fun, Fun}
+%%
Call back function to accumulate contents of entity.
+%%
{continuation_fun, Fun} | +%% {continuation_fun, Fun, ContinuationState}
+%%
Call back function to decide what to do if the scanner runs into EOF +%% before the document is complete.
+%%
{event_fun, Fun} | +%% {event_fun, Fun, EventState}
+%%
Call back function to handle scanner events.
+%%
{fetch_fun, Fun} | +%% {fetch_fun, Fun, FetchState}
+%%
Call back function to fetch an external resource.
+%%
{hook_fun, Fun} | +%% {hook_fun, Fun, HookState}
+%%
Call back function to process the document entities once +%% identified.
+%%
{close_fun, Fun}
+%%
Called when document has been completely parsed.
+%%
{rules, ReadFun, WriteFun, RulesState} | +%% {rules, Rules}
+%%
Handles storing of scanner information when parsing.
+%%
{user_state, UserState}
+%%
Global state variable accessible from all customization functions
+%% +%%
{fetch_path, PathList}
+%%
PathList is a list of +%% directories to search when fetching files. If the file in question +%% is not in the fetch_path, the URI will be used as a file +%% name.
+%%
{space, Flag}
+%%
'preserve' (default) to preserve spaces, 'normalize' to +%% accumulate consecutive whitespace and replace it with one space.
+%%
{line, Line}
+%%
To specify starting line for scanning in document which contains +%% fragments of XML.
+%%
{namespace_conformant, Flag}
+%%
Controls whether to behave as a namespace conformant XML parser, +%% 'false' (default) to not otherwise 'true'.
+%%
{validation, Flag}
+%%
Controls whether to process as a validating XML parser: +%% 'off' (default) no validation, or validation 'dtd' by DTD or 'schema' +%% by XML Schema. 'false' and 'true' options are obsolete +%% (i.e. they may be removed in a future release), if used 'false' +%% equals 'off' and 'true' equals 'dtd'.
+%%
{schemaLocation, [{Namespace,Link}|...]}
+%%
Tells explicitly which XML Schema documents to use to validate +%% the XML document. Used together with the +%% {validation,schema} option.
+%%
{quiet, Flag}
+%%
Set to 'true' if xmerl should behave quietly and not output any +%% information to standard output (default 'false').
+%%
{doctype_DTD, DTD}
+%%
Allows to specify DTD name when it isn't available in the XML +%% document. This option has effect only together with +%% {validation,'dtd' option.
+%%
{xmlbase, Dir}
+%%
XML Base directory. If using string/1 default is current directory. +%% If using file/1 default is directory of given file.
+%%
{encoding, Enc}
+%%
Set default character set used (default UTF-8). +%% This character set is used only if not explicitly given by the XML +%% declaration.
+%%
+ +-module(xmerl_scan). +-vsn('0.20'). +-date('03-09-16'). + +%% main API +-export([string/1, string/2, + file/1, file/2]). + +%% access functions for various states +-export([user_state/1, user_state/2, + event_state/1, event_state/2, + hook_state/1, hook_state/2, + rules_state/1, rules_state/2, + fetch_state/1, fetch_state/2, + cont_state/1, cont_state/2]). + +%% helper functions. To xmerl_lib ?? +-export([accumulate_whitespace/4]). + +%-define(debug, 1). +-include("xmerl.hrl"). % record def, macros +-include("xmerl_internal.hrl"). +-include_lib("kernel/include/file.hrl"). + + +-define(fatal(Reason, S), + if + S#xmerl_scanner.quiet -> + ok; + true -> + ok=io:format("~p- fatal: ~p~n", [?LINE, Reason]) + end, + fatal(Reason, S)). + + +-define(ustate(U, S), S#xmerl_scanner{user_state = U}). + + +%% Functions to access the various states + +%%% @spec user_state(S::global_state()) -> global_state() +%%% @equiv user_state(UserState,S) +user_state(#xmerl_scanner{user_state = S}) -> S. + +%%% @spec event_state(S::global_state()) -> global_state() +%%% @equiv event_state(EventState,S) +event_state(#xmerl_scanner{fun_states = #xmerl_fun_states{event = S}}) -> S. + +%%% @spec hook_state(S::global_state()) -> global_state() +%%% @equiv hook_state(HookState,S) +hook_state(#xmerl_scanner{fun_states = #xmerl_fun_states{hook = S}}) -> S. + +%%% @spec rules_state(S::global_state()) -> global_state() +%%% @equiv rules_state(RulesState,S) +rules_state(#xmerl_scanner{fun_states = #xmerl_fun_states{rules = S}}) -> S. + +%%% @spec fetch_state(S::global_state()) -> global_state() +%%% @equiv fetch_state(FetchState,S) +fetch_state(#xmerl_scanner{fun_states = #xmerl_fun_states{fetch = S}}) -> S. + +%%% @spec cont_state(S::global_state()) -> global_state() +%%% @equiv cont_state(ContinuationState,S) +cont_state(#xmerl_scanner{fun_states = #xmerl_fun_states{cont = S}}) -> S. + + +%%%% Functions to modify the various states + +%%% @spec user_state(UserState, S::global_state()) -> global_state() +%%% @doc For controlling the UserState, to be used in a user function. +%%% See tutorial on customization functions. +user_state(X, S) -> + S#xmerl_scanner{user_state = X}. + +%%% @spec event_state(EventState, S::global_state()) -> global_state() +%%% @doc For controlling the EventState, to be used in an event +%%% function, and called at the beginning and at the end of a parsed entity. +%%% See tutorial on customization functions. +event_state(X, S=#xmerl_scanner{fun_states = FS}) -> + FS1 = FS#xmerl_fun_states{event = X}, + S#xmerl_scanner{fun_states = FS1}. + +%%% @spec hook_state(HookState, S::global_state()) -> global_state() +%%% @doc For controlling the HookState, to be used in a hook +%%% function, and called when the parser has parsed a complete entity. +%%% See tutorial on customization functions. +hook_state(X, S=#xmerl_scanner{fun_states = FS}) -> + FS1 = FS#xmerl_fun_states{hook = X}, + S#xmerl_scanner{fun_states = FS1}. + +%%% @spec rules_state(RulesState, S::global_state()) -> global_state() +%%% @doc For controlling the RulesState, to be used in a rules +%%% function, and called when the parser store scanner information in a rules +%%% database. +%%% See tutorial on customization functions. +rules_state(X, S=#xmerl_scanner{fun_states = FS}) -> + FS1 = FS#xmerl_fun_states{rules = X}, + S#xmerl_scanner{fun_states = FS1}. + +%%% @spec fetch_state(FetchState, S::global_state()) -> global_state() +%%% @doc For controlling the FetchState, to be used in a fetch +%%% function, and called when the parser fetch an external resource (eg. a DTD). +%%% See tutorial on customization functions. +fetch_state(X, S=#xmerl_scanner{fun_states = FS}) -> + FS1 = FS#xmerl_fun_states{fetch = X}, + S#xmerl_scanner{fun_states = FS1}. + +%%% @spec cont_state(ContinuationState, S::global_state()) -> global_state() +%%% @doc For controlling the ContinuationState, to be used in a continuation +%%% function, and called when the parser encounters the end of the byte stream. +%%% See tutorial on customization functions. +cont_state(X, S=#xmerl_scanner{fun_states = FS}) -> + FS1 = FS#xmerl_fun_states{cont = X}, + S#xmerl_scanner{fun_states = FS1}. + + +%% @spec file(Filename::string()) -> {xmlElement(),Rest} +%% Rest = list() +%% @equiv file(Filename, []) +file(F) -> + file(F, []). + +%% @spec file(Filename::string(), Options::option_list()) -> {xmlElement(),Rest} +%% Rest = list() +%%% @doc Parse file containing an XML document +file(F, Options) -> + ExtCharset=case lists:keysearch(encoding,1,Options) of + {value,{_,Val}} -> Val; + false -> undefined + end, + case int_file(F,Options,ExtCharset) of + {Res, Tail,S=#xmerl_scanner{close_fun=Close}} -> + Close(S), % for side effects only - final state is dropped + {Res,Tail}; + {error, Reason} -> + {error, Reason} + end. + +int_file(F, Options,_ExtCharset) -> + %%io:format("int_file F=~p~n",[F]), + case file:read_file(F) of + {ok, Bin} -> + int_string(binary_to_list(Bin), Options, filename:dirname(F),F); + Error -> + Error + end. + +int_file_decl(F, Options,_ExtCharset) -> +% io:format("int_file_decl F=~p~n",[F]), + case file:read_file(F) of + {ok, Bin} -> + int_string_decl(binary_to_list(Bin), Options, filename:dirname(F),F); + Error -> + Error + end. + +%% @spec string(Text::list()) -> {xmlElement(),Rest} +%% Rest = list() +%% @equiv string(Test, []) +string(Str) -> + string(Str, []). + +%% @spec string(Text::list(),Options::option_list()) -> {xmlElement(),Rest} +%% Rest = list() +%%% @doc Parse string containing an XML document +string(Str, Options) -> + {Res, Tail, S=#xmerl_scanner{close_fun = Close}} = + int_string(Str, Options,file_name_unknown), + Close(S), % for side effects only - final state is dropped + {Res,Tail}. + +int_string(Str, Options,FileName) -> + {ok, XMLBase} = file:get_cwd(), + int_string(Str, Options, XMLBase, FileName). + +int_string(Str, Options, XMLBase, FileName) -> + S0=initial_state0(Options,XMLBase), + S = S0#xmerl_scanner{filename=FileName}, + %%io:format("int_string1, calling xmerl_lib:detect_charset~n",[]), + + %% In case of no encoding attribute in document utf-8 is default, but + %% another character set may be detected with help of Byte Order Marker or + %% with help of the encoding of the first 4 bytes. + case xmerl_lib:detect_charset(S#xmerl_scanner.encoding,Str) of + {auto,'iso-10646-utf-1',Str2} -> + scan_document(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"}); + {external,'iso-10646-utf-1',Str2} -> + scan_document(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"}); + {undefined,undefined,Str2} -> %% no auto detection + scan_document(Str2, S); + {external,ExtCharset,Str2} -> + %% no auto detection, ExtCharset is an explicitly provided + %% 7 bit,8 bit or utf-8 encoding + scan_document(Str2, S#xmerl_scanner{encoding=atom_to_list(ExtCharset)}) + end. + +int_string_decl(Str, Options, XMLBase, FileName) -> + S0=initial_state0(Options,XMLBase), + S = S0#xmerl_scanner{filename=FileName}, + case xmerl_lib:detect_charset(S#xmerl_scanner.encoding,Str) of + {auto,'iso-10646-utf-1',Str2} -> + scan_decl(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"}); + {external,'iso-10646-utf-1',Str2} -> + scan_decl(Str2, S#xmerl_scanner{encoding="iso-10646-utf-1"}); + {undefined,undefined,Str2} -> + scan_decl(Str2, S); + {external,ExtCharset,Str2} -> + scan_decl(Str2, S#xmerl_scanner{encoding=atom_to_list(ExtCharset)}) + end. + + + +initial_state0(Options,XMLBase) -> + CommonData = common_data(), + initial_state(Options, #xmerl_scanner{ + event_fun = fun event/2, + hook_fun = fun hook/2, + acc_fun = fun acc/3, + fetch_fun = fun fetch/2, + close_fun = fun close/1, + continuation_fun = fun cont/3, + rules_read_fun = fun rules_read/3, + rules_write_fun = fun rules_write/4, + rules_delete_fun= fun rules_delete/3, + xmlbase = XMLBase, + common_data = CommonData + }). + +initial_state([{event_fun, F}|T], S) -> + initial_state(T, S#xmerl_scanner{event_fun = F}); +initial_state([{event_fun, F, ES}|T], S) -> + S1 = event_state(ES, S#xmerl_scanner{event_fun = F}), + initial_state(T, S1); +initial_state([{acc_fun, F}|T], S) -> + initial_state(T, S#xmerl_scanner{acc_fun = F}); +initial_state([{hook_fun, F}|T], S) -> + initial_state(T, S#xmerl_scanner{hook_fun = F}); +initial_state([{hook_fun, F, HS}|T], S) -> + S1 = hook_state(HS, S#xmerl_scanner{hook_fun = F}), + initial_state(T, S1); +initial_state([{close_fun, F}|T], S) -> + initial_state(T, S#xmerl_scanner{close_fun = F}); +initial_state([{fetch_fun, F}|T], S) -> + initial_state(T, S#xmerl_scanner{fetch_fun = F}); +initial_state([{fetch_fun, F, FS}|T], S) -> + S1 = fetch_state(FS, S#xmerl_scanner{fetch_fun = F}), + initial_state(T, S1); +initial_state([{fetch_path, P}|T], S) -> + initial_state(T, S#xmerl_scanner{fetch_path = P}); +initial_state([{continuation_fun, F}|T], S) -> + initial_state(T, S#xmerl_scanner{continuation_fun = F}); +initial_state([{continuation_fun, F, CS}|T], S) -> + S1 = cont_state(CS, S#xmerl_scanner{continuation_fun = F}), + initial_state(T, S1); +initial_state([{rules, R}|T], S) -> + initial_state(T, S#xmerl_scanner{rules = R, + keep_rules = true}); +initial_state([{rules, Read, Write, RS}|T], S) -> + S1 = rules_state(RS, S#xmerl_scanner{rules_read_fun = Read, + rules_write_fun = Write, + keep_rules = true}), + initial_state(T, S1); +initial_state([{user_state, F}|T], S) -> + initial_state(T, S#xmerl_scanner{user_state = F}); +initial_state([{space, L}|T], S) -> + initial_state(T, S#xmerl_scanner{space = L}); +initial_state([{line, L}|T], S) -> + initial_state(T, S#xmerl_scanner{line = L}); +initial_state([{namespace_conformant, F}|T], S) when F==true; F==false -> + initial_state(T, S#xmerl_scanner{namespace_conformant = F}); +initial_state([{validation, F}|T], S) + when F==off; F==dtd; F==schema; F==true; F==false -> + initial_state(T, S#xmerl_scanner{validation = validation_value(F)}); +initial_state([{schemaLocation, SL}|T], S) when is_list(SL) -> + initial_state(T, S#xmerl_scanner{schemaLocation=SL}); +initial_state([{quiet, F}|T], S) when F==true; F==false -> + initial_state(T, S#xmerl_scanner{quiet = F}); +initial_state([{doctype_DTD,DTD}|T], S) -> + initial_state(T,S#xmerl_scanner{doctype_DTD = DTD}); +initial_state([{text_decl,Bool}|T], S) -> + initial_state(T,S#xmerl_scanner{text_decl=Bool}); +initial_state([{environment,Env}|T], S) -> + initial_state(T,S#xmerl_scanner{environment=Env}); +initial_state([{xmlbase, D}|T], S) -> + initial_state(T, S#xmerl_scanner{xmlbase = D}); +initial_state([{encoding, Enc}|T], S) -> + initial_state(T, S#xmerl_scanner{encoding = Enc}); +initial_state([], S=#xmerl_scanner{rules = undefined}) -> + Tab = ets:new(rules, [set, public]), + S#xmerl_scanner{rules = Tab}; +initial_state([], S) -> + S. + +validation_value(true) -> + dtd; +validation_value(false) -> + off; +validation_value(F) -> + F. + +%% Used for compacting (some) indentations. +%% See also fast_accumulate_whitespace(). +common_data() -> + {comdata(lists:duplicate(60, $\s), []), + comdata(lists:duplicate(15, $\t), []), + "\n"}. + +comdata([], CD)-> + list_to_tuple(CD); +comdata([_ | T]=L, CD) -> + comdata(T, [[$\n | L] | CD]). + +%%% ----------------------------------------------------- +%%% Default modifier functions + +%%% Hooks: +%%% - {element, Line, Name, Attrs, Content} +%%% - {processing_instruction, Line, Data} + +hook(X, State) -> + {X, State}. + +%%% Events: +%%% +%%% #xmerl_event{event : started | ended, +%%% line : integer(), +%%% col : integer(), +%%% data} +%%% +%%% Data Events +%%% document started, ended +%%% #xmlElement started, ended +%%% #xmlAttribute ended +%%% #xmlPI ended +%%% #xmlComment ended +%%% #xmlText ended +event(_X, S) -> + S. + +%% The acc/3 function can return either {Acc´, S'} or {Acc', Pos', S'}, +%% where Pos' can be derived from X#xmlElement.pos, X#xmlText.pos, or +%% X#xmlAttribute.pos (whichever is the current object type.) +%% The acc/3 function is not allowed to redefine the type of object +%% being defined, but _is_ allowed to either ignore it or split it +%% into multiple objects (in which case {Acc',Pos',S'} should be returned.) +%% If {Acc',S'} is returned, Pos will be incremented by 1 by default. +%% Below is an example of an acceptable operation +acc(X = #xmlText{value = Text}, Acc, S) -> + {[X#xmlText{value = Text}|Acc], S}; +acc(X, Acc, S) -> + {[X|Acc], S}. + +fetch({system, URI}, S) -> + fetch_URI(URI, S); +fetch({public, _PublicID, URI}, S) -> + fetch_URI(URI, S). + +%%% Always assume an external resource can be found locally! Thus +%%% don't bother fetching with e.g. HTTP. Returns the path where the +%%% resource is found. The path to the external resource is given by +%%% URI directly or the option fetch_path (additional paths) or +%%% directory (base path to external resource) +fetch_URI(URI, S) -> + %% assume URI is a filename + Split = filename:split(URI), + Filename = fun([])->[];(X)->lists:last(X) end (Split), + Fullname = + case Split of %% how about Windows systems? + ["file:"|Name]-> %% absolute path, see RFC2396 sect 3 + %% file:/dtd_name + filename:join(["/"|Name]); + ["/"|Rest] when Rest /= [] -> + %% absolute path name + URI; + ["http:"|_Rest] -> + {http,URI}; + [] -> %% empty systemliteral + []; + _ -> + filename:join(S#xmerl_scanner.xmlbase, URI) + end, + Path = path_locate(S#xmerl_scanner.fetch_path, Filename, Fullname), + ?dbg("fetch(~p) -> {file, ~p}.~n", [URI, Path]), + {ok, Path, S}. + +path_locate(_, _, {http,_}=URI) -> + URI; +path_locate(_, _, []) -> + []; +path_locate([Dir|Dirs], FN, FullName) -> + F = filename:join(Dir, FN), + case file:read_file_info(F) of + {ok, #file_info{type = regular}} -> + {file,F}; + _ -> + path_locate(Dirs, FN, FullName) + end; +path_locate([], _FN, FullName) -> + {file,FullName}. + + +cont(_F, Exception, US) -> + Exception(US). + +close(S) -> + S. + + +%%% ----------------------------------------------------- +%%% Scanner + +%%% [1] document ::= prolog element Misc* +scan_document(Str0, S=#xmerl_scanner{event_fun = Event, + line = L, col = C, + environment=Env, + encoding=Charset, + validation=ValidateResult}) -> + S1 = Event(#xmerl_event{event = started, + line = L, + col = C, + data = document}, S), + + %% Transform to given character set. + %% Note that if another character set is given in the encoding + %% attribute in a XML declaration that one will be used later + Str=if + Charset == "utf-8" -> + Str0; + Charset=/=undefined -> % Default character set is UTF-8 + xmerl_ucs:to_unicode(Str0,list_to_atom(Charset)); + true -> %% Charset is undefined if no external input is + %% given, and no auto detection of character + %% encoding was made. + Str0 + end, +%% M1 = erlang:memory(), +%% io:format("Memory status before prolog: ~p~n",[M1]), + {T1, S2} = scan_prolog(Str, S1, _StartPos = 1), +%% M2 = erlang:memory(), +%% io:format("Memory status after prolog: ~p~n",[M2]), + %%io:format("scan_document 2, prolog parsed~n",[]), + T2 = scan_mandatory("<",T1,1,S2,expected_element_start_tag), +%% M3 = erlang:memory(), +%% io:format("Memory status before element: ~p~n",[M3]), + {Res, T3, S3} =scan_element(T2,S2,_StartPos = 1), +%% M4 = erlang:memory(), +%% io:format("Memory status after element: ~p~n",[M4]), + {Tail, S4}=scan_misc(T3, S3, _StartPos = 1), +%% M5 = erlang:memory(), +%% io:format("Memory status after misc: ~p~n",[M5]), + + S5 = #xmerl_scanner{} = Event(#xmerl_event{event = ended, + line = S4#xmerl_scanner.line, + col = S4#xmerl_scanner.col, + data = document}, S4), + + {Res2,S6} = case validation_mode(ValidateResult) of + off -> + {Res,cleanup(S5)}; + dtd when Env == element; Env == prolog -> + check_decl2(S5), + case xmerl_validate:validate(S5,Res) of + {'EXIT',{error,Reason}} -> + S5b=cleanup(S5), + ?fatal({failed_validation,Reason}, S5b); + {'EXIT',Reason} -> + S5b=cleanup(S5), + ?fatal({failed_validation,Reason}, S5b); + {error,Reason} -> + S5b=cleanup(S5), + ?fatal({failed_validation,Reason}, S5b); + {error,Reason,_Next} -> + S5b=cleanup(S5), + ?fatal({failed_validation,Reason}, S5b); + _XML -> + {Res,cleanup(S5)} + end; + schema -> + case schemaLocations(Res,S5) of + {ok,Schemas} -> + cleanup(S5), + %%io:format("Schemas: ~p~nRes: ~p~ninhertih_options(S): ~p~n", + %% [Schemas,Res,inherit_options(S5)]), + XSDRes = xmerl_xsd:process_validate(Schemas,Res, + inherit_options(S5)), + handle_schema_result(XSDRes,S5); + _ -> + {Res,cleanup(S5)} + end; + _ -> + {Res,cleanup(S5)} + end, + + {Res2, Tail, S6}. + + +scan_decl(Str, S=#xmerl_scanner{event_fun = Event, + line = L, col = C, + environment=_Env, + encoding=_Charset, + validation=_ValidateResult}) -> + S1 = Event(#xmerl_event{event = started, + line = L, + col = C, + data = document}, S), + + case scan_prolog(Str, S1, _StartPos = 1) of + {T2="<"++_, S2} -> + {{S2#xmerl_scanner.user_state,T2},[],S2}; + {[], S2}-> + {[],[],S2}; + {T2, S2} -> + {_,_,S3} = scan_content(T2,S2,[],_Attrs=[],S2#xmerl_scanner.space, + _Lang=[],_Parents=[],#xmlNamespace{}), + {T2,[],S3} + end. + + +%%% [22] Prolog +%%% prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? +%%% +%% empty text declarations are handled by the first function clause. +scan_prolog([], S=#xmerl_scanner{continuation_fun = F}, Pos) -> + ?dbg("cont()...~n", []), + F(fun(MoreBytes, S1) -> scan_prolog(MoreBytes, S1, Pos) end, + fun(S1) -> {[], S1} end, + S); +scan_prolog(" + {Charset,T3, S3}= + if + Col==1,L==1,S0#xmerl_scanner.text_decl==true -> + ?dbg("prolog(\" + ?dbg("prolog(\" + ?fatal({xml_declaration_must_be_first_in_doc,Col,L},S0) + end, + %% Charset0 is either (1) 'iso-10646-utf-1' (transformation by + %% auto detection), (2) undefined (no auto detection and no + %% external encoding), (3) any other encoding format that must be + %% conformant to the internal explicitly given encoding. The two + %% former cases implies that the explicit internal encoding + %% (Charset) may be different from Charset0. + + %% Now transform to declared character set. + if + Charset==Charset0 -> % Document already transformed to this charset! + scan_prolog(T3, S3, Pos); + Charset0=/=undefined -> + %% For example may an external entity + %% have the BOM for utf-16 and the internal + %% explicit encoding='utf-16', then it will be auto + %% detected and transformed, Charset0 will be + %% 'iso-10646-utf-1', and Charset will be 'utf-16', all + %% legal. + %% + scan_prolog(T3,S3#xmerl_scanner{encoding=Charset0},Pos); + Charset == "utf-8" -> + scan_prolog(T3, S3, Pos); + Charset=/=undefined -> % Document not previously transformed + T4=xmerl_ucs:to_unicode(T3,list_to_atom(Charset)), + scan_prolog(T4, S3, Pos); + true -> % No encoding info given + scan_prolog(T3, S3, Pos) + end; +scan_prolog(" + ?dbg("prolog(\" xmerl_ucs:to_unicode(T,'utf-8'); + true -> T + end, + {T2, S1} = scan_doctype(T1, S), + scan_misc(T2, S1, Pos); +scan_prolog(Str="%"++_T,S=#xmerl_scanner{environment={external,_}},_Pos) -> + scan_ext_subset(Str,S); +scan_prolog(Str, S0 = #xmerl_scanner{user_state=_US,encoding=_Charset},Pos) -> + ?dbg("prolog(\"<\")~n", []), + + %% Check for Comments, PI before possible DOCTYPE declaration + ?bump_col(1), + %% If no known character set assume it is UTF-8 + T=if +%% Charset==undefined -> xmerl_ucs:to_unicode(Str,'utf-8'); + true -> Str + end, + {T1, S1}=scan_misc(T, S, Pos), + scan_prolog2(T1,S1,Pos). + + + +scan_prolog2([], S=#xmerl_scanner{continuation_fun = F}, Pos) -> + ?dbg("cont()...~n", []), + F(fun(MoreBytes, S1) -> scan_prolog2(MoreBytes, S1, Pos) end, + fun(S1) -> {[], S1} end, + S); +scan_prolog2(" + ?dbg("prolog(\" + ?dbg("prolog(\" + ?dbg("prolog(\"<\")~n", []), + + %% Here we consider the DTD provided by doctype_DTD option, + S1 = + case S0 of + #xmerl_scanner{validation=dtd,doctype_DTD=DTD} when is_list(DTD) -> + S=fetch_DTD(undefined,S0), + check_decl(S), + S; + _ -> S0 + end, + %% Check for more Comments and PI after DOCTYPE declaration +% ?bump_col(1), + scan_misc(Str, S1, Pos). + + + + +%%% [27] Misc ::= Comment | PI | S +%% Note: +%% - Neither of Comment and PI are returned in the resulting parsed +%% structure. +%% - scan_misc/3 implements Misc* as that is how the rule is always used +scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos) -> + ?dbg("cont()...~n", []), + F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos) end, + fun(S1) -> {[], S1} end, + S); +scan_misc(""++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) -> + ?bump_col(3), + scan_entity_value(T,S,Delim,["-->"|Acc],PEName,NS, + pe_pop("-->",PENesting,S)); +%% Stop delimeter for ConditionalSection +scan_entity_value("]]>"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) -> + ?bump_col(3), + scan_entity_value(T,S,Delim,["]]>"|Acc],PEName,NS, + pe_pop("]]>",PENesting,S)); +%% Stop delimeter added to match a content start delimeter included +scan_entity_value("/>"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) -> + ?bump_col(2), + scan_entity_value(T,S,Delim,["/>"|Acc],PEName,NS, + pe_pop("/>",PENesting,S)); +scan_entity_value(")"++ T,S0,Delim,Acc,PEName, parameter=NS,PENesting) -> + ?bump_col(1), + scan_entity_value(T,S,Delim,[")"|Acc],PEName,NS, + pe_pop(")",PENesting,S)); +scan_entity_value("\n"++T, S, Delim, Acc, PEName,Namespace,PENesting) -> + scan_entity_value(T, S#xmerl_scanner{line=S#xmerl_scanner.line+1}, + Delim, ["\n"|Acc], PEName,Namespace,PENesting); +scan_entity_value(Str, S0, Delim, Acc, PEName,Namespace,PENesting) -> + {Ch,T} = to_ucs(S0#xmerl_scanner.encoding,Str), + case xmerl_lib:is_char(Ch) of + true -> + ?bump_col(1), + scan_entity_value(T, S, Delim, [Ch|Acc], PEName,Namespace,PENesting); + false -> + ?fatal({unexpected_char,Ch}, S0) + end. + + + +save_refed_entity_name(Name,PEName,S) -> + case predefined_entity(Name) of + true -> + S; + _ -> + save_refed_entity_name1(Name,PEName,S) + end. + +save_refed_entity_name1(Name,PEName, + S=#xmerl_scanner{entity_references=ERefs}) -> + case lists:keysearch(PEName,1,ERefs) of + {value,{_,Refs}} -> + NewRefs = + case lists:member(Name,Refs) of + true ->Refs; + _ -> [Name|Refs] + end, + S#xmerl_scanner{entity_references=lists:keyreplace(PEName,1,ERefs, + {PEName,NewRefs}) + }; + _ -> + S#xmerl_scanner{entity_references=[{PEName,[Name]}|ERefs]} + end. + + + +pe_push(Tok,Stack,_S) when Tok==" + [Tok|Stack]; +pe_push(Tok,Stack,#xmerl_scanner{validation=dtd}) + when Tok==")";Tok==">";Tok=="?>";Tok=="]]>";Tok=="-->";Tok=="/>"-> + [Tok|Stack]; +pe_push(_,Stack,_S) -> + Stack. + +pe_pop(">",[" Rest; +pe_pop("?>",[" Rest; +pe_pop("-->",[""++_T,parameter,dtd) -> "-->"; +pe_nesting_token("]]>"++_T,parameter,dtd) -> "]]>"; +pe_nesting_token(")"++_T,parameter,dtd) -> ")"; +pe_nesting_token("/>"++_T,parameter,dtd) -> "/>"; +pe_nesting_token(_,_,_) -> false. + +predefined_entity(amp) -> true; +predefined_entity(lt) -> true; +predefined_entity(gt) -> true; +predefined_entity(apos) -> true; +predefined_entity(quot) -> true; +predefined_entity(_) -> false. + +check_entity_recursion(EName, + S=#xmerl_scanner{entity_references=EntityRefList}) -> + Set = sofs:family(EntityRefList), + case catch sofs:family_to_digraph(Set, [acyclic]) of + {'EXIT',{cyclic,_}} -> + ?fatal({illegal_recursion_in_Entity, EName}, S); + DG -> + digraph:delete(DG), + ok + end. + + + + +%%%%%%% [15] Comment +scan_comment(Str, S) -> + scan_comment(Str, S, _Pos = undefined, _Parents = [], _Lang = []). + +scan_comment(Str,S=#xmerl_scanner{col=C,event_fun=Event}, Pos, Parents, Lang) -> + Comment = #xmlComment{pos = Pos, + parents = Parents, + language = Lang, + value = undefined}, + S1 = #xmerl_scanner{} = Event(#xmerl_event{event = started, + line = S#xmerl_scanner.line, + col = C, + pos = Pos, + data = Comment}, S), + + scan_comment1(Str, S1, Pos, Comment, _Acc = []). + +scan_comment1([], S=#xmerl_scanner{continuation_fun = F}, + Pos, Comment, Acc) -> + ?dbg("cont()...~n", []), + F(fun(MoreBytes, S1) -> scan_comment1(MoreBytes, S1, Pos, Comment, Acc) end, + fun(S1) -> ?fatal(unexpected_end, S1) end, + S); +scan_comment1("-->" ++ T, S0 = #xmerl_scanner{col = C, + event_fun = Event, + hook_fun = Hook}, + _Pos, Comment, Acc) -> + ?bump_col(3), + Comment1 = Comment#xmlComment{value = lists:reverse(Acc)}, + S1=#xmerl_scanner{}=Event(#xmerl_event{event = ended, + line=S#xmerl_scanner.line, + col = C, + data = Comment1}, S), + {Ret, S2} = Hook(Comment1, S1), + {_,T3,S3}=strip(T,S2), + {Ret,T3,S3}; +scan_comment1("--"++T,S,_Pos,_Comment,_Acc) -> + ?fatal({invalid_comment,"--"++[hd(T)]}, S); +scan_comment1("\n" ++ T, S=#xmerl_scanner{line = L}, Pos, Cmt, Acc) -> + scan_comment1(T, S#xmerl_scanner{line=L+1,col=1},Pos, Cmt, "\n" ++ Acc); +scan_comment1("\r\n" ++ T, S=#xmerl_scanner{line = L}, Pos, Cmt, Acc) -> + %% CR followed by LF is read as a single LF + scan_comment1(T, S#xmerl_scanner{line=L+1,col=1}, Pos, Cmt, "\n" ++ Acc); +scan_comment1("\r" ++ T, S=#xmerl_scanner{line = L}, Pos, Cmt, Acc) -> + %% CR not followed by LF is read as a LF + scan_comment1(T, S#xmerl_scanner{line=L+1,col=1}, Pos, Cmt, "\n" ++ Acc); +scan_comment1(Str, S=#xmerl_scanner{col = C}, Pos, Cmt, Acc) -> + {Ch,T} = wfc_legal_char(Str,S), + scan_comment1(T, S#xmerl_scanner{col=C+1}, Pos, Cmt, [Ch|Acc]). + +%%%%%%% + +scan_markup_completion_gt([$>|_R]=T,S) -> + {T,S}; +scan_markup_completion_gt([$%|T],S0) -> + ?bump_col(1), + {Name,T1,S1} = scan_pe_reference(T,S), + ExpandedRef = expand_pe_reference(Name,S1,as_PE), + {_,T2,S2} = strip(ExpandedRef++T1,S1), + scan_markup_completion_gt(T2,S2); +scan_markup_completion_gt(T,S) -> + ?fatal({error,{malformed_syntax_entity_completion,T}},S). + + +scan_mandatory(Pattern,T,N,S,ErrorMsg) -> + case lists:prefix(Pattern,T) of + true -> + lists:nthtail(N,T); + _ -> + ?fatal(ErrorMsg,S) + end. + + +strip(Str,S) -> + strip(Str,S,all). + +strip([], S=#xmerl_scanner{continuation_fun = F},_) -> + ?dbg("cont()... stripping whitespace~n", []), + F(fun(MoreBytes, S1) -> strip(MoreBytes, S1) end, + fun(S1) -> {[], [], S1} end, + S); +strip("\s" ++ T, S=#xmerl_scanner{col = C},Lim) -> + strip(T, S#xmerl_scanner{col = C+1},Lim); +strip("\t" ++ _T, S ,no_tab) -> + ?fatal({error,{no_tab_allowed}},S); +strip("\t" ++ T, S=#xmerl_scanner{col = C},Lim) -> + strip(T, S#xmerl_scanner{col = expand_tab(C)},Lim); +strip("\n" ++ T, S=#xmerl_scanner{line = L},Lim) -> + strip(T, S#xmerl_scanner{line = L+1, col = 1},Lim); +strip("\r\n" ++ T, S=#xmerl_scanner{line = L},Lim) -> + %% CR followed by LF is read as a single LF + strip(T, S#xmerl_scanner{line = L+1, col = 1},Lim); +strip("\r" ++ T, S=#xmerl_scanner{line = L},Lim) -> + %% CR not followed by LF is read as a LF + strip(T, S#xmerl_scanner{line = L+1, col = 1},Lim); +strip(Str, S,_Lim) -> + {[], Str, S}. + +%% demands a whitespace, though a parameter entity is ok, it will +%% expand with a whitespace on each side. +mandatory_strip([],S) -> + ?fatal({error,{whitespace_was_expected}},S); +mandatory_strip(T,S) when ?whitespace(hd(T)) -> + strip(T,S,all); +mandatory_strip([$%|T],S) when ?whitespace(hd(T)) -> %this is not a PERefence, but an PEDeclaration + ?fatal({error,{whitespace_was_expected}},S); +mandatory_strip([$%|_T]=T,S) -> + {[],T,S}; +mandatory_strip(_T,S) -> + ?fatal({error,{whitespace_was_expected}},S). + +%% strip but don't accept tab +pub_id_strip(Str, S) -> + strip(Str,S,no_tab). + + +normalize("&"++T,S,IsNorm) -> + case scan_reference(T, S) of + {ExpRef, T1, S1} when ?whitespace(hd(ExpRef)) -> + ExpRef2 = string_to_char_set(S#xmerl_scanner.encoding,ExpRef), + normalize(ExpRef2++T1,S1,IsNorm); + _ -> + {"&"++T,S,IsNorm} + end; +normalize(T,S,IsNorm) -> + case strip(T,S) of + {_,T,S} -> + {T,S,IsNorm}; + {_,T1,S1} -> + {T1,S1,true} + end. + + +%% Optimization: +%% - avoid building list of spaces or tabs; +%% - avoid reverse; +%% - compact two common indentation patterns. +%% Note: only to be called when a \n was found. +fast_accumulate_whitespace(" " ++ T, S, _) -> + fast_acc_spaces(T, S, 1); +fast_accumulate_whitespace("\t"++T, S, _) -> + fast_acc_tabs(T, S, 1); +fast_accumulate_whitespace("<"++_=R, S, _T) -> + #xmerl_scanner{common_data = CD, line = Line} = S, + {done, {element(3, CD), R, S#xmerl_scanner{col = 1, line = Line + 1}}}; +fast_accumulate_whitespace(_, S, T) -> + accumulate_whitespace(T, S, []). + +fast_acc_spaces(" " ++ T, S, N) -> + fast_acc_spaces(T, S, N + 1); +fast_acc_spaces(T, S, N) -> + fast_acc_end(T, S, N, N, $\s, 1). + +fast_acc_tabs("\t" ++ T, S, N) -> + fast_acc_tabs(T, S, N + 1); +fast_acc_tabs(T, S, N) -> + fast_acc_end(T, S, N, N * 8 + 1, $\t, 2). + +fast_acc_end(T, S, N, Col, C, CD_I) -> + #xmerl_scanner{common_data = CD, line = Line0} = S, + Line = Line0 + 1, + try + $< = hd(T), + {done,{element(N, element(CD_I, CD)), T, + S#xmerl_scanner{col = Col, line = Line}}} + catch _:_ -> + accumulate_whitespace(T, S, Line, Col, lists:duplicate(N, C)++"\n") + end. + + +%%% @spec accumulate_whitespace(T::string(),S::global_state(), +%%% atom(),Acc::string()) -> {Acc, T1, S1} +%%% +%%% @doc Function to accumulate and normalize whitespace. +accumulate_whitespace(T, S, preserve, Acc) -> + accumulate_whitespace(T, S, Acc); +accumulate_whitespace(T, S, normalize, Acc) -> + {_WsAcc, T1, S1} = accumulate_whitespace(T, S, []), + {[$\s|Acc], T1, S1}. + +accumulate_whitespace(T, S, Acc) -> + #xmerl_scanner{line = Line, col = Col} = S, + accumulate_whitespace(T, S, Line, Col, Acc). + +accumulate_whitespace([], S0, Line, Col, Acc) -> + #xmerl_scanner{continuation_fun = F} = S0, + S = S0#xmerl_scanner{line = Line, col = Col}, + ?dbg("cont()...~n", []), + F(fun(MoreBytes, S1) -> accumulate_whitespace(MoreBytes, S1, Acc) end, + fun(S1) -> {Acc, [], S1} end, + S); +accumulate_whitespace("\s" ++ T, S, Line, Col, Acc) -> + accumulate_whitespace(T, S, Line, Col+1, [$\s|Acc]); +accumulate_whitespace("\t" ++ T, S, Line, Col, Acc) -> + accumulate_whitespace(T, S, Line, expand_tab(Col), [$\t|Acc]); +accumulate_whitespace("\n" ++ T, S, Line, _Col, Acc) -> + accumulate_whitespace(T, S, Line+1, 1, [$\n|Acc]); +accumulate_whitespace("\r\n" ++ T, S, Line, _Col, Acc) -> + %% CR followed by LF is read as a single LF + accumulate_whitespace(T, S, Line+1, 1, [$\n|Acc]); +accumulate_whitespace("\r" ++ T, S, Line, _Col, Acc) -> + %% CR not followed by LF is read as a LF + accumulate_whitespace(T, S, Line+1, 1, [$\n|Acc]); +accumulate_whitespace(Str, S, Line, Col, Acc) -> + {Acc, Str, S#xmerl_scanner{line = Line, col = Col}}. + +expand_tab(Col) -> + Rem = (Col-1) rem 8, + _NewCol = Col + 8 - Rem. + +%% validation_mode(Validation) +%% Validation = off | dtd | schema | true | false +%% true and false are obsolete +validation_mode(false) -> + off; +validation_mode(true) -> + dtd; +validation_mode(Other) -> + Other. + + +schemaLocations(El,#xmerl_scanner{schemaLocation=[]}) -> + schemaLocations(El); +schemaLocations(El,#xmerl_scanner{schemaLocation=SL}) -> + case SL of + [{_,_}|_] -> + {ok,SL}; + _ -> + schemaLocations(El) + end. + +schemaLocations(#xmlElement{attributes=Atts,xmlbase=_Base}) -> + Pred = fun(#xmlAttribute{name=schemaLocation}) -> false; + (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false; + (_) -> true + end, + case lists:dropwhile(Pred,Atts) of + [#xmlAttribute{value=Paths}|_] -> + + case string:tokens(Paths," ") of + L when length(L) > 0 -> + case length(L) rem 2 of + 0 -> + PairList = + fun([],_Fun) -> + []; + ([SLNS,SLLoc|Rest],Fun) -> + [{SLNS,SLLoc}|Fun(Rest,Fun)] + end, + {ok,PairList(L,PairList)}; + _ -> + {error,{schemaLocation_attribute,namespace_location_not_in_pair}} + end; + _ -> + {error,{missing_schemaLocation}} + end; + [] -> + {error,{missing_schemaLocation}} + end. + +inherit_options(S) -> + %%io:format("xsdbase: ~p~n",[S#xmerl_scanner.xmlbase]), + [{xsdbase,S#xmerl_scanner.xmlbase}]. + +handle_schema_result({XSDRes=#xmlElement{},_},S5) -> + {XSDRes,S5}; +handle_schema_result({error,Reason},S5) -> + ?fatal({failed_schema_validation,Reason},S5). + +%%% Helper functions + +fatal(Reason, S) -> + exit({fatal, {Reason, + {file,S#xmerl_scanner.filename}, + {line,S#xmerl_scanner.line}, + {col,S#xmerl_scanner.col}}}). + +%% preformat formats tokens in L1 and L2, L2 separated by Sep into a +%% list +preformat(L1,L2,Sep) -> + Format1= lists:flatten(lists:duplicate(length(L1)-1,"~s ")++"~s"), + Format2 = lists:flatten(lists:duplicate(length(L2)-1, + " ~s"++Sep)++" ~s"), + + lists:flatten(io_lib:format(Format1++Format2,L1++L2)). + + +%% BUG when we are many balise none attributes has save in rules +rules_write(Context, Name, Value, #xmerl_scanner{rules = T} = S) -> + case ets:lookup(T, {Context, Name}) of + [] -> + ets:insert(T, {{Context, Name}, Value}); + _ -> + ok + end, + S. + + +rules_read(Context, Name, #xmerl_scanner{rules = T}) -> + case ets:lookup(T, {Context, Name}) of + [] -> + undefined; + [{_, V}] -> + V + end. + +rules_delete(Context,Name,#xmerl_scanner{rules = T}) -> + ets:delete(T,{Context,Name}). + +to_ucs(Encoding, Chars) when Encoding=="utf-8"; Encoding == undefined -> + utf8_2_ucs(Chars); +to_ucs(_,[C|Rest]) -> + {C,Rest}. + +utf8_2_ucs([A,B,C,D|Rest]) when A band 16#f8 =:= 16#f0, + B band 16#c0 =:= 16#80, + C band 16#c0 =:= 16#80, + D band 16#c0 =:= 16#80 -> + %% 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv + case ((D band 16#3f) bor ((C band 16#3f) bsl 6) bor + ((B band 16#3f) bsl 12) bor ((A band 16#07) bsl 18)) of + Ch when Ch >= 16#10000 -> + {Ch,Rest}; + Ch -> + {{error,{bad_character,Ch}},Rest} + end; +utf8_2_ucs([A,B,C|Rest]) when A band 16#f0 =:= 16#e0, + B band 16#c0 =:= 16#80, + C band 16#c0 =:= 16#80 -> + %% 1110vvvv 10vvvvvv 10vvvvvv + case ((C band 16#3f) bor ((B band 16#3f) bsl 6) bor + ((A band 16#0f) bsl 12)) of + Ch when Ch >= 16#800 -> + {Ch,Rest}; + Ch -> + {{error,{bad_character,Ch}},Rest} + end; +utf8_2_ucs([A,B|Rest]) when A band 16#e0 =:= 16#c0, + B band 16#c0 =:= 16#80 -> + %% 110vvvvv 10vvvvvv + case ((B band 16#3f) bor ((A band 16#1f) bsl 6)) of + Ch when Ch >= 16#80 -> + {Ch,Rest}; + Ch -> + {{error,{bad_character,Ch}},Rest} + end; +utf8_2_ucs([A|Rest]) when A < 16#80 -> + {A,Rest}; +utf8_2_ucs([A|Rest]) -> + {{error,{bad_character,A}},Rest}. + +to_char_set("iso-10646-utf-1",Ch) -> + [Ch]; +to_char_set(UTF8,Ch) when UTF8 =:= "utf-8"; UTF8 =:= undefined -> + ucs_2_utf8(Ch); +to_char_set(_,Ch) -> + [Ch]. + +ucs_2_utf8(Ch) when Ch < 128 -> + %% 0vvvvvvv + [Ch]; +ucs_2_utf8(Ch) when Ch < 16#0800 -> + %% Ch: -----vvv vvvvvvvv + %% 110vvvvv 10vvvvvv + %% O1 = (Ch band 16#07c0) bsr 6, + %% O2 = (Ch band 16#003f), + [((Ch band 16#07c0) bsr 6) bor 16#c0,(Ch band 16#003f) bor 16#80]; +ucs_2_utf8(Ch) when Ch < 16#10000 -> + %% Ch: vvvvvvvv vvvvvvvv + %% 1110vvvv 10vvvvvv 10vvvvvv + %% O1 = (Ch band 16#f000) bsr 12 + %% O2 = (Ch band 16#0fc0) bsr 6 + %% O3 = (Ch band 16#003f) + [((Ch band 16#f000) bsr 12) bor 16#e0, + ((Ch band 16#0fc0) bsr 6) bor 16#80, + (Ch band 16#003f) bor 16#80]; +ucs_2_utf8(Ch) when Ch < 16#200000 -> + %% Ch: ---vvvvv vvvvvvvv vvvvvvvv + %% 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv + %% O1 = (Ch band 16#1c0000) bsr 18 + %% O2 = (Ch band 16#03f000) bsr 12 + %% O3 = (Ch band 16#000fc0) bsr 6 + %% O4 = (Ch band 16#00003f) + [((Ch band 16#1c0000) bsr 18) bor 16#f0, + ((Ch band 16#03f000) bsr 12) bor 16#80, + ((Ch band 16#000fc0) bsr 6) bor 16#80, + (Ch band 16#00003f) bor 16#80]. + + +string_to_char_set(Enc,Str) when Enc =:= "utf-8"; Enc =:= undefined -> + lists:flatten([ucs_2_utf8(X)||X <- Str]); +string_to_char_set(_,Str) -> + Str. + +%% diagnose(Line) -> +%% Mem=erlang:memory(), +%% {OldTot,OldLine} = get_total(), +%% NewTot = +%% case {lists:keysearch(total,1,Mem),OldTot*1.1} of +%% {{_,{_,Tot}},Tot110} when Tot > Tot110 -> +%% io:format("From ~p to ~p, total memory: ~p (~p)~n",[OldLine,Line,Tot,OldTot]), +%% Tot; +%% {{_,{_,Tot}},_} -> +%% Tot +%% end, +%% put_total({NewTot,Line}). + +%% get_total() -> +%% case get(xmerl_mem) of +%% undefined -> +%% put(xmerl_mem,{0,0}), +%% {0,0}; +%% M -> M +%% end. + +%% put_total(M) -> +%% put(xmerl_mem,M). -- cgit v1.2.3