diff options
Diffstat (limited to 'lib/xmerl/src/xmerl_eventp.erl')
-rw-r--r-- | lib/xmerl/src/xmerl_eventp.erl | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/lib/xmerl/src/xmerl_eventp.erl b/lib/xmerl/src/xmerl_eventp.erl new file mode 100644 index 0000000000..ad5c3cbc47 --- /dev/null +++ b/lib/xmerl/src/xmerl_eventp.erl @@ -0,0 +1,366 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%%% Description : Simple event-based processor (front-end to xmerl_scan) + +%% @doc Simple event-based front-ends to xmerl_scan for processing +%% of XML documents in streams and for parsing in SAX style. +%% Each contain more elaborate settings of xmerl_scan that makes usage of +%% the customization functions. +%% +-module(xmerl_eventp). +-vsn('0.19'). +-date('03-09-17'). + +-export([stream/2,stream_sax/4, file_sax/4, string_sax/4]). + +% -export([cont/3, rules_read/3,rules_write/4,fetch/2,close/1]). + +-include("xmerl.hrl"). +-include("xmerl_internal.hrl"). +-include_lib("kernel/include/file.hrl"). + +%% @spec stream(Fname::string(), Options::option_list()) -> xmlElement() +%% +%% @doc Parse file containing an XML document as a stream, DOM style. +%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a +%% <code>continuation_fun</code> for handling streams of XML data. +%% Note that the <code>continuation_fun</code>, <code>acc_fun</code>, +%% <code>fetch_fun</code>, <code>rules</code> and <code>close_fun</code> +%% options cannot be user defined using this parser. +stream(Fname, Options) -> + AccF = fun(X, Acc, S) -> acc(X,Acc,S) end, + case file:open(Fname, [read, raw, binary]) of + {ok, Fd} -> + B0 = list_to_binary([]), + ContS = [{B0, Fname, Fd}], + Opts=scanner_options(Options, + [{continuation_fun, fun cont/3, ContS}, + {acc_fun, AccF}, + {fetch_fun, fun fetch/2}, + {rules,fun rules_read/3,fun rules_write/4,""}, + {close_fun, fun close/1}]), + xmerl_scan:string([], Opts); + Error -> + Error + end. + + +%% @spec stream_sax(Fname,CallBackModule,UserState,Options) -> xmlElement() +%% Fname = string() +%% CallBackModule = atom() +%% Options = option_list() +%% +%% @doc Parse file containing an XML document as a stream, SAX style. +%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a +%% <code>continuation_fun</code> for handling streams of XML data. +%% Note that the <code>continuation_fun</code>, <code>acc_fun</code>, +%% <code>fetch_fun</code>, <code>rules</code>, <code>hook_fun</code>, +%% <code>close_fun</code> and <code>user_state</code> options cannot be user +%% defined using this parser. +stream_sax(Fname, CallBack, UserState,Options) -> + US={xmerl:callbacks(CallBack), UserState}, + AccF = fun(X, Acc, S) -> acc(X,Acc,S) end, + HookF= + fun(ParsedEntity, S) -> + {CBs,Arg}=xmerl_scan:user_state(S), +% io:format("stream_sax Arg=~p~n",[Arg]), + case ParsedEntity of + #xmlComment{} -> % Toss away comments... + {[],S}; + _ -> % Use callback module for the rest +% io:format("stream_sax ParsedEntity=~p~n",[ParsedEntity]), + case xmerl:export_element(ParsedEntity,CBs,Arg) of + {error,Reason} -> + throw({error,Reason}); + Resp -> +% io:format("stream_sax Resp=~p~n",[Resp]), + {Resp,xmerl_scan:user_state({CBs,Resp},S)} + end + end + end, + case file:open(Fname, [read, raw, binary]) of + {ok, Fd} -> + B0 = list_to_binary([]), + ContS = [{B0, Fname, Fd}], + Opts=scanner_options(Options, + [{acc_fun, AccF}, + {close_fun, fun close/1}, + {continuation_fun, fun cont/3, ContS}, + {fetch_fun, fun fetch/2}, + {hook_fun,HookF}, + {rules,fun rules_read/3,fun rules_write/4,""}, + {user_state,US}]), + xmerl_scan:string([], Opts); + Error -> + Error + end. + + +%% @spec file_sax(Fname::string(), CallBackModule::atom(), UserState, +%% Options::option_list()) -> NewUserState +%% +%% @doc Parse file containing an XML document, SAX style. +%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a +%% <code>hook_fun</code> for using xmerl export functionality directly after +%% an entity is parsed. +file_sax(Fname,CallBack, UserState, Options) -> + US={xmerl:callbacks(CallBack), UserState}, + AccF=fun(X,Acc,S) -> {[X|Acc], S} end, + HookF= + fun(ParsedEntity, S) -> + {CBs,Arg}=xmerl_scan:user_state(S), + case ParsedEntity of + #xmlComment{} -> % Toss away comments... + {[],S}; + _ -> % Use callback module for the rest + case xmerl:export_element(ParsedEntity,CBs,Arg) of + {error,Reason} -> + throw({error,Reason}); + Resp -> + {Resp,xmerl_scan:user_state({CBs,Resp},S)} + end + end + end, + + Opts=scanner_options(Options,[{acc_fun, AccF}, + {hook_fun,HookF}, + {user_state,US}]), + xmerl_scan:file(Fname,Opts). + + +%% @spec string_sax(String::list(), CallBackModule::atom(), UserState, +%% Options::option_list()) -> +%% xmlElement() +%% +%% @doc Parse file containing an XML document, SAX style. +%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a +%% <code>hook_fun</code> for using xmerl export functionality directly after +%% an entity is parsed. +string_sax(String,CallBack, UserState, Options) -> + US={xmerl:callbacks(CallBack), UserState}, + AccF=fun(X,Acc,S) -> {[X|Acc], S} end, + HookF= + fun(ParsedEntity, S) -> + {CBs,Arg}=xmerl_scan:user_state(S), + case ParsedEntity of + #xmlComment{} -> % Toss away comments... + {[],S}; + _ -> % Use callback module for the rest + case xmerl:export_element(ParsedEntity,CBs,Arg) of + {error,Reason} -> + throw({error,Reason}); + Resp -> + {Resp,xmerl_scan:user_state({CBs,Resp},S)} + end + end + end, + + Opts=scanner_options(Options,[{acc_fun, AccF}, + {hook_fun,HookF}, + {user_state,US}]), + xmerl_scan:string(String,Opts). + + + +%%% ---------------------------------------------------------------------------- +%%% Streaming support functions + +%%% Continuation callback function for xmerl_scan +cont(F, Exception, S) -> + case xmerl_scan:cont_state(S) of + [{_Fname, eof}|_] -> + Exception(S); + [{Sofar, Fname, Fd}|T] -> + cont2(F, Exception, Sofar, Fd, Fname, T, S) + end. + + +cont2(F, Exception, Sofar, Fd, Fname, T, S) -> + case catch read_chunk(Fd, Fname, Sofar) of + {ok, Bin} -> + find_good_split(list_to_binary([Sofar,Bin]), + F,Exception,Fd,Fname,T,S); + eof -> + file:close(Fd), + NewS = xmerl_scan:cont_state([{Fname, eof}|T], S), + F(binary_to_list(Sofar), NewS); + Error -> + exit(Error) + end. + +read_chunk(Fd, _Fname, _Sofar) -> + file:read(Fd, 8192). + +-ifndef(no_bitsyntax). + +find_good_split(Bin, F, Exception, Fd, Fname, T, S) -> + find_good_split(size(Bin)-1, Bin, F, Exception, Fd, Fname, T, S). + +find_good_split(0, B, F, Exception, Fd, Fname, T, S) -> + cont2(F, Exception, B, Fd, Fname, T, S); +find_good_split(Size, B, F, Exception, Fd, Fname, T, S) -> + case B of + <<_Bytes:Size/binary, H/integer, Tail/binary>> when ?whitespace(H) -> + {SubB,_} = split_binary(B, Size+1), + NewS = xmerl_scan:cont_state([{Tail, Fname, Fd}|T], S), + F(binary_to_list(SubB), NewS); + _ -> + find_good_split(Size-1, B, F, Exception, Fd, Fname, T, S) + end. + +-else. + +find_good_split(Bin, F, Exception, Fd, Fname, T, S) -> + find_good_split(size(Bin), Bin, F, Exception, Fd, Fname, T, S). + +find_good_split(0, B, F, Exception, Fd, Fname, T, S) -> + cont2(F, Exception, B, Fd, Fname, T, S); +find_good_split(Size, B, F, Exception, Fd, Fname, T, S) -> + case binary_to_list(B, Size, Size) of + [H] when ?whitespace(H) -> + {SubB,Tail} = split_binary(B, Size), + NewS = xmerl_scan:cont_state([{Tail, Fname, Fd}|T], S), + F(binary_to_list(SubB), NewS); + _ -> + find_good_split(Size-1, B, F, Exception, Fd, Fname, T, S) + end. + +-endif. + + + +%%% Accumulator callback function for xmerl_scan +acc(X = #xmlText{value = Text}, Acc, S) -> + case detect_nul_text(Text) of + ok-> + {[X#xmlText{value = lists:flatten(Text)}|Acc], S}; + nok-> + {Acc,S} + end; +acc(X, Acc, S) -> + {[X|Acc], S}. + +%%% don't acc xmlText when text contains only " " , "\n" and "\t". +detect_nul_text([H|T]) when H==10; H==32; H==9-> + detect_nul_text(T); +detect_nul_text([]) -> + nok; +detect_nul_text(_)-> + ok. + + + +%%% Fetch callback function for xmerl_scan +fetch({system, URI}, S) -> + fetch_URI(URI, S); +fetch({public, _PublicID, URI}, S) -> + fetch_URI(URI, S). + +fetch_URI(URI, S) -> + %% assume URI is a filename + Split = filename:split(URI), + Filename = lists:last(Split), + Fullname = + case Split of + ["/", _|_] -> + %% absolute path name + URI; + ["file:",Name]-> + %% file:/dtd_name + filename:join(S#xmerl_scanner.xmlbase, Name); + _ -> + filename:join(S#xmerl_scanner.xmlbase, URI) + end, + File = path_locate(S#xmerl_scanner.fetch_path, Filename, Fullname), + ?dbg("fetch(~p) -> {file, ~p}.~n", [URI, File]), + case file:open(File, [read, raw, binary]) of + {ok, Fd} -> + ContS=xmerl_scan:cont_state(S), + NewS=xmerl_scan:cont_state([{list_to_binary([]),File,Fd}|ContS],S), + {ok, {string, []}, NewS}; + _Error -> + ?dbg("ERROR fetch(~p) -> ~p~n", [URI, _Error]), + {ok, not_fetched, S} + end. + +path_locate([Dir|Dirs], FN, FullName) -> + F = filename:join(Dir, FN), + case file:read_file_info(F) of + {ok, #file_info{type = regular}} -> + F; + _ -> + path_locate(Dirs, FN, FullName) + end; +path_locate([], _FN, FullName) -> + FullName. + +%%% Close callback function for xmerl_scan +close(S) -> + ContS = xmerl_scan:cont_state(S), + case ContS of + [{_Fname, eof}|T] -> + xmerl_scan:cont_state(T, S); + [{_Sofar, _Fname, Fd}|T] -> + file:close(Fd), + xmerl_scan:cont_state(T, S) + end. + + +%%% Rules callback functions for xmerl_scan +rules_write(Context, Name, Value, #xmerl_scanner{rules = undefined}=S) -> + Tab = ets:new(rules, [set, public]), + rules_write(Context, Name, Value, S#xmerl_scanner{rules = Tab}); +rules_write(Context, Name, Value, #xmerl_scanner{rules = T} = S) -> + ets:insert(T, {{Context, Name}, Value}), + S. + +rules_read(_Context, _Name, #xmerl_scanner{rules = undefined}) -> + undefined; +rules_read(Context, Name, #xmerl_scanner{rules = T}) -> + case ets:lookup(T, {Context, Name}) of + [] -> + undefined; + [{_K, V}] -> + V + end. + + + +%%% ---------------------------------------------------------------------------- +%%% Generic helper functions + +scanner_options([H|T], Opts) -> + case catch keyreplace(H, 1, Opts) of + false -> + scanner_options(T, [H|Opts]); + NewOpts -> + scanner_options(T, NewOpts) + end; +scanner_options([], Opts) -> + Opts. + +keyreplace(X, Pos, [H|T]) when element(Pos, X) == element(Pos, H) -> + [X|T]; +keyreplace(X, Pos, [H|T]) -> + [H|keyreplace(X, Pos, T)]; +keyreplace(_, _Pos, []) -> + throw(false). + + |