aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src/xmerl_eventp.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xmerl/src/xmerl_eventp.erl')
-rw-r--r--lib/xmerl/src/xmerl_eventp.erl366
1 files changed, 366 insertions, 0 deletions
diff --git a/lib/xmerl/src/xmerl_eventp.erl b/lib/xmerl/src/xmerl_eventp.erl
new file mode 100644
index 0000000000..ad5c3cbc47
--- /dev/null
+++ b/lib/xmerl/src/xmerl_eventp.erl
@@ -0,0 +1,366 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%% Description : Simple event-based processor (front-end to xmerl_scan)
+
+%% @doc Simple event-based front-ends to xmerl_scan for processing
+%% of XML documents in streams and for parsing in SAX style.
+%% Each contain more elaborate settings of xmerl_scan that makes usage of
+%% the customization functions.
+%%
+-module(xmerl_eventp).
+-vsn('0.19').
+-date('03-09-17').
+
+-export([stream/2,stream_sax/4, file_sax/4, string_sax/4]).
+
+% -export([cont/3, rules_read/3,rules_write/4,fetch/2,close/1]).
+
+-include("xmerl.hrl").
+-include("xmerl_internal.hrl").
+-include_lib("kernel/include/file.hrl").
+
+%% @spec stream(Fname::string(), Options::option_list()) -> xmlElement()
+%%
+%% @doc Parse file containing an XML document as a stream, DOM style.
+%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
+%% <code>continuation_fun</code> for handling streams of XML data.
+%% Note that the <code>continuation_fun</code>, <code>acc_fun</code>,
+%% <code>fetch_fun</code>, <code>rules</code> and <code>close_fun</code>
+%% options cannot be user defined using this parser.
+stream(Fname, Options) ->
+ AccF = fun(X, Acc, S) -> acc(X,Acc,S) end,
+ case file:open(Fname, [read, raw, binary]) of
+ {ok, Fd} ->
+ B0 = list_to_binary([]),
+ ContS = [{B0, Fname, Fd}],
+ Opts=scanner_options(Options,
+ [{continuation_fun, fun cont/3, ContS},
+ {acc_fun, AccF},
+ {fetch_fun, fun fetch/2},
+ {rules,fun rules_read/3,fun rules_write/4,""},
+ {close_fun, fun close/1}]),
+ xmerl_scan:string([], Opts);
+ Error ->
+ Error
+ end.
+
+
+%% @spec stream_sax(Fname,CallBackModule,UserState,Options) -> xmlElement()
+%% Fname = string()
+%% CallBackModule = atom()
+%% Options = option_list()
+%%
+%% @doc Parse file containing an XML document as a stream, SAX style.
+%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
+%% <code>continuation_fun</code> for handling streams of XML data.
+%% Note that the <code>continuation_fun</code>, <code>acc_fun</code>,
+%% <code>fetch_fun</code>, <code>rules</code>, <code>hook_fun</code>,
+%% <code>close_fun</code> and <code>user_state</code> options cannot be user
+%% defined using this parser.
+stream_sax(Fname, CallBack, UserState,Options) ->
+ US={xmerl:callbacks(CallBack), UserState},
+ AccF = fun(X, Acc, S) -> acc(X,Acc,S) end,
+ HookF=
+ fun(ParsedEntity, S) ->
+ {CBs,Arg}=xmerl_scan:user_state(S),
+% io:format("stream_sax Arg=~p~n",[Arg]),
+ case ParsedEntity of
+ #xmlComment{} -> % Toss away comments...
+ {[],S};
+ _ -> % Use callback module for the rest
+% io:format("stream_sax ParsedEntity=~p~n",[ParsedEntity]),
+ case xmerl:export_element(ParsedEntity,CBs,Arg) of
+ {error,Reason} ->
+ throw({error,Reason});
+ Resp ->
+% io:format("stream_sax Resp=~p~n",[Resp]),
+ {Resp,xmerl_scan:user_state({CBs,Resp},S)}
+ end
+ end
+ end,
+ case file:open(Fname, [read, raw, binary]) of
+ {ok, Fd} ->
+ B0 = list_to_binary([]),
+ ContS = [{B0, Fname, Fd}],
+ Opts=scanner_options(Options,
+ [{acc_fun, AccF},
+ {close_fun, fun close/1},
+ {continuation_fun, fun cont/3, ContS},
+ {fetch_fun, fun fetch/2},
+ {hook_fun,HookF},
+ {rules,fun rules_read/3,fun rules_write/4,""},
+ {user_state,US}]),
+ xmerl_scan:string([], Opts);
+ Error ->
+ Error
+ end.
+
+
+%% @spec file_sax(Fname::string(), CallBackModule::atom(), UserState,
+%% Options::option_list()) -> NewUserState
+%%
+%% @doc Parse file containing an XML document, SAX style.
+%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
+%% <code>hook_fun</code> for using xmerl export functionality directly after
+%% an entity is parsed.
+file_sax(Fname,CallBack, UserState, Options) ->
+ US={xmerl:callbacks(CallBack), UserState},
+ AccF=fun(X,Acc,S) -> {[X|Acc], S} end,
+ HookF=
+ fun(ParsedEntity, S) ->
+ {CBs,Arg}=xmerl_scan:user_state(S),
+ case ParsedEntity of
+ #xmlComment{} -> % Toss away comments...
+ {[],S};
+ _ -> % Use callback module for the rest
+ case xmerl:export_element(ParsedEntity,CBs,Arg) of
+ {error,Reason} ->
+ throw({error,Reason});
+ Resp ->
+ {Resp,xmerl_scan:user_state({CBs,Resp},S)}
+ end
+ end
+ end,
+
+ Opts=scanner_options(Options,[{acc_fun, AccF},
+ {hook_fun,HookF},
+ {user_state,US}]),
+ xmerl_scan:file(Fname,Opts).
+
+
+%% @spec string_sax(String::list(), CallBackModule::atom(), UserState,
+%% Options::option_list()) ->
+%% xmlElement()
+%%
+%% @doc Parse file containing an XML document, SAX style.
+%% Wrapper for a call to the XML parser <code>xmerl_scan</code> with a
+%% <code>hook_fun</code> for using xmerl export functionality directly after
+%% an entity is parsed.
+string_sax(String,CallBack, UserState, Options) ->
+ US={xmerl:callbacks(CallBack), UserState},
+ AccF=fun(X,Acc,S) -> {[X|Acc], S} end,
+ HookF=
+ fun(ParsedEntity, S) ->
+ {CBs,Arg}=xmerl_scan:user_state(S),
+ case ParsedEntity of
+ #xmlComment{} -> % Toss away comments...
+ {[],S};
+ _ -> % Use callback module for the rest
+ case xmerl:export_element(ParsedEntity,CBs,Arg) of
+ {error,Reason} ->
+ throw({error,Reason});
+ Resp ->
+ {Resp,xmerl_scan:user_state({CBs,Resp},S)}
+ end
+ end
+ end,
+
+ Opts=scanner_options(Options,[{acc_fun, AccF},
+ {hook_fun,HookF},
+ {user_state,US}]),
+ xmerl_scan:string(String,Opts).
+
+
+
+%%% ----------------------------------------------------------------------------
+%%% Streaming support functions
+
+%%% Continuation callback function for xmerl_scan
+cont(F, Exception, S) ->
+ case xmerl_scan:cont_state(S) of
+ [{_Fname, eof}|_] ->
+ Exception(S);
+ [{Sofar, Fname, Fd}|T] ->
+ cont2(F, Exception, Sofar, Fd, Fname, T, S)
+ end.
+
+
+cont2(F, Exception, Sofar, Fd, Fname, T, S) ->
+ case catch read_chunk(Fd, Fname, Sofar) of
+ {ok, Bin} ->
+ find_good_split(list_to_binary([Sofar,Bin]),
+ F,Exception,Fd,Fname,T,S);
+ eof ->
+ file:close(Fd),
+ NewS = xmerl_scan:cont_state([{Fname, eof}|T], S),
+ F(binary_to_list(Sofar), NewS);
+ Error ->
+ exit(Error)
+ end.
+
+read_chunk(Fd, _Fname, _Sofar) ->
+ file:read(Fd, 8192).
+
+-ifndef(no_bitsyntax).
+
+find_good_split(Bin, F, Exception, Fd, Fname, T, S) ->
+ find_good_split(size(Bin)-1, Bin, F, Exception, Fd, Fname, T, S).
+
+find_good_split(0, B, F, Exception, Fd, Fname, T, S) ->
+ cont2(F, Exception, B, Fd, Fname, T, S);
+find_good_split(Size, B, F, Exception, Fd, Fname, T, S) ->
+ case B of
+ <<_Bytes:Size/binary, H/integer, Tail/binary>> when ?whitespace(H) ->
+ {SubB,_} = split_binary(B, Size+1),
+ NewS = xmerl_scan:cont_state([{Tail, Fname, Fd}|T], S),
+ F(binary_to_list(SubB), NewS);
+ _ ->
+ find_good_split(Size-1, B, F, Exception, Fd, Fname, T, S)
+ end.
+
+-else.
+
+find_good_split(Bin, F, Exception, Fd, Fname, T, S) ->
+ find_good_split(size(Bin), Bin, F, Exception, Fd, Fname, T, S).
+
+find_good_split(0, B, F, Exception, Fd, Fname, T, S) ->
+ cont2(F, Exception, B, Fd, Fname, T, S);
+find_good_split(Size, B, F, Exception, Fd, Fname, T, S) ->
+ case binary_to_list(B, Size, Size) of
+ [H] when ?whitespace(H) ->
+ {SubB,Tail} = split_binary(B, Size),
+ NewS = xmerl_scan:cont_state([{Tail, Fname, Fd}|T], S),
+ F(binary_to_list(SubB), NewS);
+ _ ->
+ find_good_split(Size-1, B, F, Exception, Fd, Fname, T, S)
+ end.
+
+-endif.
+
+
+
+%%% Accumulator callback function for xmerl_scan
+acc(X = #xmlText{value = Text}, Acc, S) ->
+ case detect_nul_text(Text) of
+ ok->
+ {[X#xmlText{value = lists:flatten(Text)}|Acc], S};
+ nok->
+ {Acc,S}
+ end;
+acc(X, Acc, S) ->
+ {[X|Acc], S}.
+
+%%% don't acc xmlText when text contains only " " , "\n" and "\t".
+detect_nul_text([H|T]) when H==10; H==32; H==9->
+ detect_nul_text(T);
+detect_nul_text([]) ->
+ nok;
+detect_nul_text(_)->
+ ok.
+
+
+
+%%% Fetch callback function for xmerl_scan
+fetch({system, URI}, S) ->
+ fetch_URI(URI, S);
+fetch({public, _PublicID, URI}, S) ->
+ fetch_URI(URI, S).
+
+fetch_URI(URI, S) ->
+ %% assume URI is a filename
+ Split = filename:split(URI),
+ Filename = lists:last(Split),
+ Fullname =
+ case Split of
+ ["/", _|_] ->
+ %% absolute path name
+ URI;
+ ["file:",Name]->
+ %% file:/dtd_name
+ filename:join(S#xmerl_scanner.xmlbase, Name);
+ _ ->
+ filename:join(S#xmerl_scanner.xmlbase, URI)
+ end,
+ File = path_locate(S#xmerl_scanner.fetch_path, Filename, Fullname),
+ ?dbg("fetch(~p) -> {file, ~p}.~n", [URI, File]),
+ case file:open(File, [read, raw, binary]) of
+ {ok, Fd} ->
+ ContS=xmerl_scan:cont_state(S),
+ NewS=xmerl_scan:cont_state([{list_to_binary([]),File,Fd}|ContS],S),
+ {ok, {string, []}, NewS};
+ _Error ->
+ ?dbg("ERROR fetch(~p) -> ~p~n", [URI, _Error]),
+ {ok, not_fetched, S}
+ end.
+
+path_locate([Dir|Dirs], FN, FullName) ->
+ F = filename:join(Dir, FN),
+ case file:read_file_info(F) of
+ {ok, #file_info{type = regular}} ->
+ F;
+ _ ->
+ path_locate(Dirs, FN, FullName)
+ end;
+path_locate([], _FN, FullName) ->
+ FullName.
+
+%%% Close callback function for xmerl_scan
+close(S) ->
+ ContS = xmerl_scan:cont_state(S),
+ case ContS of
+ [{_Fname, eof}|T] ->
+ xmerl_scan:cont_state(T, S);
+ [{_Sofar, _Fname, Fd}|T] ->
+ file:close(Fd),
+ xmerl_scan:cont_state(T, S)
+ end.
+
+
+%%% Rules callback functions for xmerl_scan
+rules_write(Context, Name, Value, #xmerl_scanner{rules = undefined}=S) ->
+ Tab = ets:new(rules, [set, public]),
+ rules_write(Context, Name, Value, S#xmerl_scanner{rules = Tab});
+rules_write(Context, Name, Value, #xmerl_scanner{rules = T} = S) ->
+ ets:insert(T, {{Context, Name}, Value}),
+ S.
+
+rules_read(_Context, _Name, #xmerl_scanner{rules = undefined}) ->
+ undefined;
+rules_read(Context, Name, #xmerl_scanner{rules = T}) ->
+ case ets:lookup(T, {Context, Name}) of
+ [] ->
+ undefined;
+ [{_K, V}] ->
+ V
+ end.
+
+
+
+%%% ----------------------------------------------------------------------------
+%%% Generic helper functions
+
+scanner_options([H|T], Opts) ->
+ case catch keyreplace(H, 1, Opts) of
+ false ->
+ scanner_options(T, [H|Opts]);
+ NewOpts ->
+ scanner_options(T, NewOpts)
+ end;
+scanner_options([], Opts) ->
+ Opts.
+
+keyreplace(X, Pos, [H|T]) when element(Pos, X) == element(Pos, H) ->
+ [X|T];
+keyreplace(X, Pos, [H|T]) ->
+ [H|keyreplace(X, Pos, T)];
+keyreplace(_, _Pos, []) ->
+ throw(false).
+
+