From 5b2375ff231353c059a340d95b317d998df279fc Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Fri, 28 Oct 2011 10:37:49 +0200 Subject: Fix bugs and add a flag to skip comments Add flag {comments, Flag} to xmerl_scan for filtering of comments. Default (true) is that xmlComment records are returned from the scanner and this flag should be set to false if one don't want comments in the output. Fix some bugs to get the test cases to run clean. --- lib/xmerl/src/xmerl_scan.erl | 110 ++++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 43 deletions(-) (limited to 'lib/xmerl/src/xmerl_scan.erl') diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 5f61fe2bb1..ec7ea534d6 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -103,6 +103,9 @@ %%
{document, Flag}
%%
Set to 'true' if xmerl should return a complete XML document %% as an xmlDocument record (default 'false').
+%%
{comments, Flag}
+%%
Set to 'false' if xmerl should skip comments otherwise they will +%% be returned as xmlComment records (default 'true').
%%
{default_attrs, Flag}
%%
Set to 'true' if xmerl should add to elements missing attributes %% with a defined default value (default 'false').
@@ -394,6 +397,8 @@ initial_state([{doctype_DTD,DTD}|T], S) -> initial_state(T,S#xmerl_scanner{doctype_DTD = DTD}); initial_state([{document, F}|T], S) when is_boolean(F) -> initial_state(T,S#xmerl_scanner{document = F}); +initial_state([{comments, F}|T], S) when is_boolean(F) -> + initial_state(T,S#xmerl_scanner{comments = F}); initial_state([{default_attrs, F}|T], S) when is_boolean(F) -> initial_state(T,S#xmerl_scanner{default_attrs = F}); initial_state([{text_decl,Bool}|T], S) -> @@ -546,8 +551,8 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, Str=if Charset == "utf-8" -> Str0; - Charset=/=undefined -> % Default character set is UTF-8 - xmerl_ucs:to_unicode(Str0,list_to_atom(Charset)); + Charset =/= undefined -> % Default character set is UTF-8 + xmerl_ucs:to_unicode(Str0, list_to_atom(Charset)); true -> %% Charset is undefined if no external input is %% given, and no auto detection of character %% encoding was made. @@ -559,10 +564,10 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, %% M2 = erlang:memory(), %% io:format("Memory status after prolog: ~p~n",[M2]), %%io:format("scan_document 2, prolog parsed~n",[]), - T2 = scan_mandatory("<",T1,1,S2,expected_element_start_tag), + T2 = scan_mandatory("<", T1, 1, S2, expected_element_start_tag), %% M3 = erlang:memory(), %% io:format("Memory status before element: ~p~n",[M3]), - {Res, T3, S3} =scan_element(T2,S2,Pos), + {Res, T3, S3} = scan_element(T2,S2,Pos), %% M4 = erlang:memory(), %% io:format("Memory status after element: ~p~n",[M4]), {Misc, _Pos1, Tail, S4}=scan_misc(T3, S3, Pos + 1), @@ -574,41 +579,41 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, col = S4#xmerl_scanner.col, data = document}, S4), - {Res2,S6} = case validation_mode(ValidateResult) of + {Res2, S6} = case validation_mode(ValidateResult) of off -> - {Res,cleanup(S5)}; + {Res, cleanup(S5)}; dtd when Env == element; Env == prolog -> check_decl2(S5), - case xmerl_validate:validate(S5,Res) of - {'EXIT',{error,Reason}} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {'EXIT',Reason} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {error,Reason} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {error,Reason,_Next} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); + case xmerl_validate:validate(S5, Res) of + {'EXIT', {error, Reason}} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {'EXIT', Reason} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {error, Reason} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {error, Reason, _Next} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); _XML -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end; schema -> - case schemaLocations(Res,S5) of - {ok,Schemas} -> + case schemaLocations(Res, S5) of + {ok, Schemas} -> cleanup(S5), %%io:format("Schemas: ~p~nRes: ~p~ninhertih_options(S): ~p~n", %% [Schemas,Res,inherit_options(S5)]), - XSDRes = xmerl_xsd:process_validate(Schemas,Res, + XSDRes = xmerl_xsd:process_validate(Schemas, Res, inherit_options(S5)), - handle_schema_result(XSDRes,S5); + handle_schema_result(XSDRes, S5); _ -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end; _ -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end, Res3 = @@ -617,7 +622,7 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, Content = lists:reverse(Prolog, [Res2 | lists:reverse(Misc)]), #xmlDocument{content = Content}; false -> - Res2#xmlElement{pos = 1} + Res2 end, {Res3, Tail, S6}. @@ -633,11 +638,11 @@ scan_decl(Str, S=#xmerl_scanner{event_fun = Event, data = document}, S), case scan_prolog(Str, S1, _StartPos = 1) of - {T2="<"++_, S2} -> + {_,_,T2="<"++_, S2} -> {{S2#xmerl_scanner.user_state,T2},[],S2}; - {[], S2}-> + {_,_,[], S2}-> {[],[],S2}; - {T2, S2} -> + {_,_,T2, S2} -> {_,_,S3} = scan_content(T2,S2,[],_Attrs=[],S2#xmerl_scanner.space, _Lang=[],_Parents=[],#xmlNamespace{}), {T2,[],S3} @@ -783,16 +788,22 @@ scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) -> F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos, Acc) end, fun(S1) -> {Acc, Pos, [], S1} end, S); -scan_misc("