diff options
Diffstat (limited to 'lib/xmerl/src/xmerl_scan.erl')
-rw-r--r-- | lib/xmerl/src/xmerl_scan.erl | 110 |
1 files changed, 67 insertions, 43 deletions
diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl index 5f61fe2bb1..ec7ea534d6 100644 --- a/lib/xmerl/src/xmerl_scan.erl +++ b/lib/xmerl/src/xmerl_scan.erl @@ -103,6 +103,9 @@ %% <dt><code>{document, Flag}</code></dt> %% <dd>Set to 'true' if xmerl should return a complete XML document %% as an xmlDocument record (default 'false').</dd> +%% <dt><code>{comments, Flag}</code></dt> +%% <dd>Set to 'false' if xmerl should skip comments otherwise they will +%% be returned as xmlComment records (default 'true').</dd> %% <dt><code>{default_attrs, Flag}</code></dt> %% <dd>Set to 'true' if xmerl should add to elements missing attributes %% with a defined default value (default 'false').</dd> @@ -394,6 +397,8 @@ initial_state([{doctype_DTD,DTD}|T], S) -> initial_state(T,S#xmerl_scanner{doctype_DTD = DTD}); initial_state([{document, F}|T], S) when is_boolean(F) -> initial_state(T,S#xmerl_scanner{document = F}); +initial_state([{comments, F}|T], S) when is_boolean(F) -> + initial_state(T,S#xmerl_scanner{comments = F}); initial_state([{default_attrs, F}|T], S) when is_boolean(F) -> initial_state(T,S#xmerl_scanner{default_attrs = F}); initial_state([{text_decl,Bool}|T], S) -> @@ -546,8 +551,8 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, Str=if Charset == "utf-8" -> Str0; - Charset=/=undefined -> % Default character set is UTF-8 - xmerl_ucs:to_unicode(Str0,list_to_atom(Charset)); + Charset =/= undefined -> % Default character set is UTF-8 + xmerl_ucs:to_unicode(Str0, list_to_atom(Charset)); true -> %% Charset is undefined if no external input is %% given, and no auto detection of character %% encoding was made. @@ -559,10 +564,10 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, %% M2 = erlang:memory(), %% io:format("Memory status after prolog: ~p~n",[M2]), %%io:format("scan_document 2, prolog parsed~n",[]), - T2 = scan_mandatory("<",T1,1,S2,expected_element_start_tag), + T2 = scan_mandatory("<", T1, 1, S2, expected_element_start_tag), %% M3 = erlang:memory(), %% io:format("Memory status before element: ~p~n",[M3]), - {Res, T3, S3} =scan_element(T2,S2,Pos), + {Res, T3, S3} = scan_element(T2,S2,Pos), %% M4 = erlang:memory(), %% io:format("Memory status after element: ~p~n",[M4]), {Misc, _Pos1, Tail, S4}=scan_misc(T3, S3, Pos + 1), @@ -574,41 +579,41 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, col = S4#xmerl_scanner.col, data = document}, S4), - {Res2,S6} = case validation_mode(ValidateResult) of + {Res2, S6} = case validation_mode(ValidateResult) of off -> - {Res,cleanup(S5)}; + {Res, cleanup(S5)}; dtd when Env == element; Env == prolog -> check_decl2(S5), - case xmerl_validate:validate(S5,Res) of - {'EXIT',{error,Reason}} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {'EXIT',Reason} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {error,Reason} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); - {error,Reason,_Next} -> - S5b=cleanup(S5), - ?fatal({failed_validation,Reason}, S5b); + case xmerl_validate:validate(S5, Res) of + {'EXIT', {error, Reason}} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {'EXIT', Reason} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {error, Reason} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); + {error, Reason, _Next} -> + S5b = cleanup(S5), + ?fatal({failed_validation, Reason}, S5b); _XML -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end; schema -> - case schemaLocations(Res,S5) of - {ok,Schemas} -> + case schemaLocations(Res, S5) of + {ok, Schemas} -> cleanup(S5), %%io:format("Schemas: ~p~nRes: ~p~ninhertih_options(S): ~p~n", %% [Schemas,Res,inherit_options(S5)]), - XSDRes = xmerl_xsd:process_validate(Schemas,Res, + XSDRes = xmerl_xsd:process_validate(Schemas, Res, inherit_options(S5)), - handle_schema_result(XSDRes,S5); + handle_schema_result(XSDRes, S5); _ -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end; _ -> - {Res,cleanup(S5)} + {Res, cleanup(S5)} end, Res3 = @@ -617,7 +622,7 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event, Content = lists:reverse(Prolog, [Res2 | lists:reverse(Misc)]), #xmlDocument{content = Content}; false -> - Res2#xmlElement{pos = 1} + Res2 end, {Res3, Tail, S6}. @@ -633,11 +638,11 @@ scan_decl(Str, S=#xmerl_scanner{event_fun = Event, data = document}, S), case scan_prolog(Str, S1, _StartPos = 1) of - {T2="<"++_, S2} -> + {_,_,T2="<"++_, S2} -> {{S2#xmerl_scanner.user_state,T2},[],S2}; - {[], S2}-> + {_,_,[], S2}-> {[],[],S2}; - {T2, S2} -> + {_,_,T2, S2} -> {_,_,S3} = scan_content(T2,S2,[],_Attrs=[],S2#xmerl_scanner.space, _Lang=[],_Parents=[],#xmlNamespace{}), {T2,[],S3} @@ -783,16 +788,22 @@ scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) -> F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos, Acc) end, fun(S1) -> {Acc, Pos, [], S1} end, S); -scan_misc("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F}, Pos, Acc) -> % Comment +scan_misc("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F, comments=CF}, Pos, Acc) -> % Comment ?bump_col(4), {C, T1, S1} = scan_comment(T, S, Pos, _Parents = [], _Lang = []), - {Acc2, Pos2, S3} = case F(C, Acc, S1) of - {Acc1, S2} -> - {Acc1, Pos + 1, S2}; - {Acc1, Pos1, S2} -> - {Acc1, Pos1, S2} - end, - scan_misc(T1,S3,Pos2,Acc2); + case CF of + true -> + {Acc2, Pos2, S3} = + case F(C, Acc, S1) of + {Acc1, S2} -> + {Acc1, Pos + 1, S2}; + {Acc1, Pos1, S2} -> + {Acc1, Pos1, S2} + end, + scan_misc(T1, S3, Pos2, Acc2); + false -> + scan_misc(T1, S1, Pos, Acc) + end; scan_misc("<?" ++ T, S0=#xmerl_scanner{acc_fun = F}, Pos, Acc) -> % PI ?dbg("prolog(\"<?\")~n", []), ?bump_col(2), @@ -2553,6 +2564,23 @@ scan_content("&" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) - _ -> scan_content(string_to_char_set(S1#xmerl_scanner.encoding,ExpRef)++T1,S1,Pos,Name,Attrs,Space,Lang,Parents,NS,Acc,[]) end; +scan_content("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F, comments=CF}, Pos, Name, Attrs, Space, + Lang, Parents, NS, Acc,[]) -> + ?bump_col(4), + {C, T1, S1} = scan_comment(T, S, Pos, Parents, Lang), + case CF of + true -> + {Acc2, Pos2, S3} = + case F(C, Acc, S1) of + {Acc1, S2} -> + {Acc1, Pos + 1, S2}; + {Acc1, Pos1, S2} -> + {Acc1, Pos1, S2} + end, + scan_content(T1, S3, Pos2, Name, Attrs, Space, Lang, Parents, NS, Acc2,[]); + false -> + scan_content(T1, S1, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) + end; scan_content("<" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) -> ?bump_col(1), {Markup, T1, S1} = @@ -2612,10 +2640,6 @@ scan_content_markup("![CDATA[" ++ T, S0, Pos, _Name, _Attrs, scan_content_markup("?"++T,S0,Pos,_Name,_Attrs,_Space,_Lang,Parents,_NS) -> ?bump_col(1), scan_pi(T, S, Pos, Parents); -scan_content_markup("!--" ++ T, S0, Pos, _Name, _Attrs, - _Space, Lang, Parents, _NS) -> - ?bump_col(1), - scan_comment(T, S, Pos, Parents, Lang); scan_content_markup(T, S, Pos, _Name, _Attrs, Space, Lang, Parents, NS) -> scan_element(T, S, Pos, Space, Lang, Parents, NS). @@ -4033,7 +4057,7 @@ schemaLocations(El,#xmerl_scanner{schemaLocation=SL}) -> schemaLocations(#xmlElement{attributes=Atts,xmlbase=_Base}) -> Pred = fun(#xmlAttribute{name=schemaLocation}) -> false; - (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false; + (#xmlAttribute{nsinfo={_,"schemaLocation"}}) -> false; (_) -> true end, case lists:dropwhile(Pred,Atts) of |