aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xmerl/src/xmerl_scan.erl
diff options
context:
space:
mode:
authorLars Thorsen <[email protected]>2011-10-28 10:37:49 +0200
committerLars Thorsen <[email protected]>2011-11-11 12:07:04 +0100
commit5b2375ff231353c059a340d95b317d998df279fc (patch)
treed8ef0bf3b9c3be19f5569920793f2e3443ce9ee4 /lib/xmerl/src/xmerl_scan.erl
parentef8ba559efdef989a5c608ac40635410500395df (diff)
downloadotp-5b2375ff231353c059a340d95b317d998df279fc.tar.gz
otp-5b2375ff231353c059a340d95b317d998df279fc.tar.bz2
otp-5b2375ff231353c059a340d95b317d998df279fc.zip
Fix bugs and add a flag to skip comments
Add flag {comments, Flag} to xmerl_scan for filtering of comments. Default (true) is that xmlComment records are returned from the scanner and this flag should be set to false if one don't want comments in the output. Fix some bugs to get the test cases to run clean.
Diffstat (limited to 'lib/xmerl/src/xmerl_scan.erl')
-rw-r--r--lib/xmerl/src/xmerl_scan.erl110
1 files changed, 67 insertions, 43 deletions
diff --git a/lib/xmerl/src/xmerl_scan.erl b/lib/xmerl/src/xmerl_scan.erl
index 5f61fe2bb1..ec7ea534d6 100644
--- a/lib/xmerl/src/xmerl_scan.erl
+++ b/lib/xmerl/src/xmerl_scan.erl
@@ -103,6 +103,9 @@
%% <dt><code>{document, Flag}</code></dt>
%% <dd>Set to 'true' if xmerl should return a complete XML document
%% as an xmlDocument record (default 'false').</dd>
+%% <dt><code>{comments, Flag}</code></dt>
+%% <dd>Set to 'false' if xmerl should skip comments otherwise they will
+%% be returned as xmlComment records (default 'true').</dd>
%% <dt><code>{default_attrs, Flag}</code></dt>
%% <dd>Set to 'true' if xmerl should add to elements missing attributes
%% with a defined default value (default 'false').</dd>
@@ -394,6 +397,8 @@ initial_state([{doctype_DTD,DTD}|T], S) ->
initial_state(T,S#xmerl_scanner{doctype_DTD = DTD});
initial_state([{document, F}|T], S) when is_boolean(F) ->
initial_state(T,S#xmerl_scanner{document = F});
+initial_state([{comments, F}|T], S) when is_boolean(F) ->
+ initial_state(T,S#xmerl_scanner{comments = F});
initial_state([{default_attrs, F}|T], S) when is_boolean(F) ->
initial_state(T,S#xmerl_scanner{default_attrs = F});
initial_state([{text_decl,Bool}|T], S) ->
@@ -546,8 +551,8 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event,
Str=if
Charset == "utf-8" ->
Str0;
- Charset=/=undefined -> % Default character set is UTF-8
- xmerl_ucs:to_unicode(Str0,list_to_atom(Charset));
+ Charset =/= undefined -> % Default character set is UTF-8
+ xmerl_ucs:to_unicode(Str0, list_to_atom(Charset));
true -> %% Charset is undefined if no external input is
%% given, and no auto detection of character
%% encoding was made.
@@ -559,10 +564,10 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event,
%% M2 = erlang:memory(),
%% io:format("Memory status after prolog: ~p~n",[M2]),
%%io:format("scan_document 2, prolog parsed~n",[]),
- T2 = scan_mandatory("<",T1,1,S2,expected_element_start_tag),
+ T2 = scan_mandatory("<", T1, 1, S2, expected_element_start_tag),
%% M3 = erlang:memory(),
%% io:format("Memory status before element: ~p~n",[M3]),
- {Res, T3, S3} =scan_element(T2,S2,Pos),
+ {Res, T3, S3} = scan_element(T2,S2,Pos),
%% M4 = erlang:memory(),
%% io:format("Memory status after element: ~p~n",[M4]),
{Misc, _Pos1, Tail, S4}=scan_misc(T3, S3, Pos + 1),
@@ -574,41 +579,41 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event,
col = S4#xmerl_scanner.col,
data = document}, S4),
- {Res2,S6} = case validation_mode(ValidateResult) of
+ {Res2, S6} = case validation_mode(ValidateResult) of
off ->
- {Res,cleanup(S5)};
+ {Res, cleanup(S5)};
dtd when Env == element; Env == prolog ->
check_decl2(S5),
- case xmerl_validate:validate(S5,Res) of
- {'EXIT',{error,Reason}} ->
- S5b=cleanup(S5),
- ?fatal({failed_validation,Reason}, S5b);
- {'EXIT',Reason} ->
- S5b=cleanup(S5),
- ?fatal({failed_validation,Reason}, S5b);
- {error,Reason} ->
- S5b=cleanup(S5),
- ?fatal({failed_validation,Reason}, S5b);
- {error,Reason,_Next} ->
- S5b=cleanup(S5),
- ?fatal({failed_validation,Reason}, S5b);
+ case xmerl_validate:validate(S5, Res) of
+ {'EXIT', {error, Reason}} ->
+ S5b = cleanup(S5),
+ ?fatal({failed_validation, Reason}, S5b);
+ {'EXIT', Reason} ->
+ S5b = cleanup(S5),
+ ?fatal({failed_validation, Reason}, S5b);
+ {error, Reason} ->
+ S5b = cleanup(S5),
+ ?fatal({failed_validation, Reason}, S5b);
+ {error, Reason, _Next} ->
+ S5b = cleanup(S5),
+ ?fatal({failed_validation, Reason}, S5b);
_XML ->
- {Res,cleanup(S5)}
+ {Res, cleanup(S5)}
end;
schema ->
- case schemaLocations(Res,S5) of
- {ok,Schemas} ->
+ case schemaLocations(Res, S5) of
+ {ok, Schemas} ->
cleanup(S5),
%%io:format("Schemas: ~p~nRes: ~p~ninhertih_options(S): ~p~n",
%% [Schemas,Res,inherit_options(S5)]),
- XSDRes = xmerl_xsd:process_validate(Schemas,Res,
+ XSDRes = xmerl_xsd:process_validate(Schemas, Res,
inherit_options(S5)),
- handle_schema_result(XSDRes,S5);
+ handle_schema_result(XSDRes, S5);
_ ->
- {Res,cleanup(S5)}
+ {Res, cleanup(S5)}
end;
_ ->
- {Res,cleanup(S5)}
+ {Res, cleanup(S5)}
end,
Res3 =
@@ -617,7 +622,7 @@ scan_document(Str0, S=#xmerl_scanner{event_fun = Event,
Content = lists:reverse(Prolog, [Res2 | lists:reverse(Misc)]),
#xmlDocument{content = Content};
false ->
- Res2#xmlElement{pos = 1}
+ Res2
end,
{Res3, Tail, S6}.
@@ -633,11 +638,11 @@ scan_decl(Str, S=#xmerl_scanner{event_fun = Event,
data = document}, S),
case scan_prolog(Str, S1, _StartPos = 1) of
- {T2="<"++_, S2} ->
+ {_,_,T2="<"++_, S2} ->
{{S2#xmerl_scanner.user_state,T2},[],S2};
- {[], S2}->
+ {_,_,[], S2}->
{[],[],S2};
- {T2, S2} ->
+ {_,_,T2, S2} ->
{_,_,S3} = scan_content(T2,S2,[],_Attrs=[],S2#xmerl_scanner.space,
_Lang=[],_Parents=[],#xmlNamespace{}),
{T2,[],S3}
@@ -783,16 +788,22 @@ scan_misc([], S=#xmerl_scanner{continuation_fun = F}, Pos, Acc) ->
F(fun(MoreBytes, S1) -> scan_misc(MoreBytes, S1, Pos, Acc) end,
fun(S1) -> {Acc, Pos, [], S1} end,
S);
-scan_misc("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F}, Pos, Acc) -> % Comment
+scan_misc("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F, comments=CF}, Pos, Acc) -> % Comment
?bump_col(4),
{C, T1, S1} = scan_comment(T, S, Pos, _Parents = [], _Lang = []),
- {Acc2, Pos2, S3} = case F(C, Acc, S1) of
- {Acc1, S2} ->
- {Acc1, Pos + 1, S2};
- {Acc1, Pos1, S2} ->
- {Acc1, Pos1, S2}
- end,
- scan_misc(T1,S3,Pos2,Acc2);
+ case CF of
+ true ->
+ {Acc2, Pos2, S3} =
+ case F(C, Acc, S1) of
+ {Acc1, S2} ->
+ {Acc1, Pos + 1, S2};
+ {Acc1, Pos1, S2} ->
+ {Acc1, Pos1, S2}
+ end,
+ scan_misc(T1, S3, Pos2, Acc2);
+ false ->
+ scan_misc(T1, S1, Pos, Acc)
+ end;
scan_misc("<?" ++ T, S0=#xmerl_scanner{acc_fun = F}, Pos, Acc) -> % PI
?dbg("prolog(\"<?\")~n", []),
?bump_col(2),
@@ -2553,6 +2564,23 @@ scan_content("&" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) -
_ ->
scan_content(string_to_char_set(S1#xmerl_scanner.encoding,ExpRef)++T1,S1,Pos,Name,Attrs,Space,Lang,Parents,NS,Acc,[])
end;
+scan_content("<!--" ++ T, S0=#xmerl_scanner{acc_fun = F, comments=CF}, Pos, Name, Attrs, Space,
+ Lang, Parents, NS, Acc,[]) ->
+ ?bump_col(4),
+ {C, T1, S1} = scan_comment(T, S, Pos, Parents, Lang),
+ case CF of
+ true ->
+ {Acc2, Pos2, S3} =
+ case F(C, Acc, S1) of
+ {Acc1, S2} ->
+ {Acc1, Pos + 1, S2};
+ {Acc1, Pos1, S2} ->
+ {Acc1, Pos1, S2}
+ end,
+ scan_content(T1, S3, Pos2, Name, Attrs, Space, Lang, Parents, NS, Acc2,[]);
+ false ->
+ scan_content(T1, S1, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[])
+ end;
scan_content("<" ++ T, S0, Pos, Name, Attrs, Space, Lang, Parents, NS, Acc,[]) ->
?bump_col(1),
{Markup, T1, S1} =
@@ -2612,10 +2640,6 @@ scan_content_markup("![CDATA[" ++ T, S0, Pos, _Name, _Attrs,
scan_content_markup("?"++T,S0,Pos,_Name,_Attrs,_Space,_Lang,Parents,_NS) ->
?bump_col(1),
scan_pi(T, S, Pos, Parents);
-scan_content_markup("!--" ++ T, S0, Pos, _Name, _Attrs,
- _Space, Lang, Parents, _NS) ->
- ?bump_col(1),
- scan_comment(T, S, Pos, Parents, Lang);
scan_content_markup(T, S, Pos, _Name, _Attrs, Space, Lang, Parents, NS) ->
scan_element(T, S, Pos, Space, Lang, Parents, NS).
@@ -4033,7 +4057,7 @@ schemaLocations(El,#xmerl_scanner{schemaLocation=SL}) ->
schemaLocations(#xmlElement{attributes=Atts,xmlbase=_Base}) ->
Pred = fun(#xmlAttribute{name=schemaLocation}) -> false;
- (#xmlAttribute{namespace={_,"schemaLocation"}}) -> false;
+ (#xmlAttribute{nsinfo={_,"schemaLocation"}}) -> false;
(_) -> true
end,
case lists:dropwhile(Pred,Atts) of