diff options
Diffstat (limited to 'lib/xmerl/src/xmerl_validate.erl')
-rw-r--r-- | lib/xmerl/src/xmerl_validate.erl | 663 |
1 files changed, 663 insertions, 0 deletions
diff --git a/lib/xmerl/src/xmerl_validate.erl b/lib/xmerl/src/xmerl_validate.erl new file mode 100644 index 0000000000..893e23ca34 --- /dev/null +++ b/lib/xmerl/src/xmerl_validate.erl @@ -0,0 +1,663 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(xmerl_validate). + +-export([validate/2]). + + +-include("xmerl.hrl"). % record def, macros + + + +%% +type validate(xmerl_scanner(),xmlElement())-> +%% xmlElment() | {error,tuple()}. +validate(#xmerl_scanner{doctype_name=DTName,doctype_DTD=OpProv}, + #xmlElement{name=Name}) + when DTName=/=Name,OpProv=/=option_provided-> + {error, {mismatched_root_element,Name,DTName}}; +validate(#xmerl_scanner{rules=Rules}=S, + XML=#xmlElement{name=Name})-> + catch do_validation(read_rules(Rules,Name),XML,Rules,S); +validate(_, XML) -> + {error, {no_xml_element, XML}}. + + + +%% +type validate(rules(),xmlElement())-> +%% {ok,xmlElement()} | {error,tuple()}. +do_validation(undefined,#xmlElement{name=Name}, _Rules,_S) -> + {error,{unknown_element,Name}}; +do_validation(El_Rule,XML,Rules,S)-> + case catch valid_attributes(El_Rule#xmlElement.attributes, + XML#xmlElement.attributes,S) of + {'EXIT',Reason} -> + {error,Reason}; + {error,Reason} -> + {error,Reason}; + Attr_2-> +% XML_=XML#xmlElement{attributes=Attr_2}, + El_Rule_Cont = El_Rule#xmlElement.content, + WSActionMode = ws_action_mode(El_Rule#xmlElement.elementdef, + El_Rule_Cont,S), + XML_Cont = XML#xmlElement.content, + check_direct_ws_SDD(XML_Cont,WSActionMode), + case valid_contents(El_Rule_Cont, + XML_Cont,Rules,S,WSActionMode) of + {error,Reason}-> + {error,Reason}; + {error,Reason,N}-> + {error,Reason,N}; + XMLS -> + XML#xmlElement{attributes=Attr_2,content=XMLS} + end + end. + +check_direct_ws_SDD(XML,always_preserve) -> + case XML of + [#xmlText{}|_Rest] -> + exit({error,{illegal_whitespace_standalone_doc,XML}}); + _ -> ok + end, + case lists:reverse(XML) of + [#xmlText{}|_Rest2] -> + exit({error,{illegal_whitespace_standalone_doc,XML}}); + _ -> ok + end; +check_direct_ws_SDD(_,_) -> ok. + +ws_action_mode({external,_},Content,#xmerl_scanner{standalone=yes}) -> + case element_content(Content) of + children -> + always_preserve; + _ -> + preserve + end; +ws_action_mode(_,_,_) -> + preserve. + +element_content(A) when is_atom(A),A /= any, A /= empty -> + children; +element_content({choice,L}) when is_list(L) -> + element_content(L); +element_content({seq,L}) when is_list(L) -> + element_content(L); +element_content(['#PCDATA'|_T]) -> + mixed; +element_content('#PCDATA') -> + mixed; +element_content({'*',Rest}) -> + element_content(Rest); +element_content(_) -> children. + +%% +type read_rules(DTD::atom(),Element_Name::atom())-> +%% undefined | xmlElement(). +read_rules(_, pcdata) -> + pcdata; +read_rules(T, Name) -> + case ets:lookup(T, {elem_def, Name}) of + [] -> + undefined; + [{_K, V}] -> + V + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%% Attributes Validation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +deftype attribute_rule() = {Attr_Name::atom(),attribute_type(), +%% attribute_priority()}. + +%% +type valid_attributes([attribute_rule()],[xmlAttribute()])-> +%% [xmlAttribute()] | {error,attribute_unknow}. +valid_attributes(All_Attr,[#xmlAttribute{}|_T]=Attr,S)-> + single_ID_definition(All_Attr), + vc_Name_Token_IDREFS(All_Attr,Attr), + lists:foreach(fun(#xmlAttribute{name=Name})-> + case is_attribute_exist(Name,All_Attr) of + true -> + ok; + false -> + exit({error,{attribute_unknown,Name}}) + end + end, + Attr), + lists:flatten(lists:foldl(fun({Name,DataType,IF,DefDecl,Env},Attr_2)-> + Attr_2++ + [valid_attribute(Name,DataType,IF, + DefDecl,Attr,Env,S)] + end,[],All_Attr)); +valid_attributes([],[],_) -> + []; +valid_attributes(All_Attr,[],S) -> + single_ID_definition(All_Attr), + lists:flatten(lists:foldl(fun({Name,DataType,IF,DefDecl,Env},Attr_2)-> + Attr_2++[valid_attribute(Name, + DataType,IF, + DefDecl, + [], + Env,S)] + end,[],All_Attr)). + +%%%% [60] DefaultDecl::= +%%%% '#REQUIRED' | '#IMPLIED' +%%%% | (('#FIXED' S)? AttValue) +%% +deftype attribute_priority = '#REQUIRED'|'#FIXED'|'#IMPLIED'. + +%% +type valid_attribute(Name::atom(),DataType::attribute_value(), +%% IF::attribute_priority(),[xmlAttribute()])-> +%% [xmlAttribute()] | exit(). +valid_attribute(Name,DataType,IF,DefaultDecl,List_of_Attributes,Env,S)-> + SA = S#xmerl_scanner.standalone, + Attr=search_attr(Name,List_of_Attributes), + check_SDD_validity(SA,Env,Attr,IF), + case {DefaultDecl,IF,Attr} of + {'#REQUIRED',_,no_attribute}-> + exit({error,{Name,is_required}}); + {'#IMPLIED',_,no_attribute}-> + []; %% and no default value + {'#FIXED',DefVal,#xmlAttribute{value=DefVal}=Attr} -> + Attr; + {'#FIXED',A,no_attribute} -> + #xmlAttribute{name=Name,value=A}; % FIXED declare value becomes default. + {'#FIXED',A,B} -> + exit({error,{fixed_default_value_missmatch,A,B}}); + {_,Value,no_attribute} when is_list(Value)-> + #xmlAttribute{name=Name,value=Value}; + {_,_,#xmlAttribute{}=Attr}-> + %% do test data value, and default_value + test_attribute_value(DataType,Attr,IF,S); + {DefDecl,Else,XML} -> + exit({error,{unknow_attribute_type,DefDecl,Else,XML}}) + end. + +vc_Name_Token_IDREFS([{Name,Type,_,_,_}|Rest],Attrs) + when Type=='NMTOKEN';Type=='NMTOKENS'-> + case lists:keysearch(Name,#xmlAttribute.name,Attrs) of + {value,A} -> + valid_nmtoken_value(A#xmlAttribute.value,Type); + _ -> ok + end, + vc_Name_Token_IDREFS(Rest,Attrs); +vc_Name_Token_IDREFS([{Name,Type,_,_,_}|Rest],Attrs) + when Type=='IDREFS'-> + case lists:keysearch(Name,#xmlAttribute.name,Attrs) of + {value,A} -> + valid_IDREFS(A#xmlAttribute.value,Type); + _ -> ok + end, + vc_Name_Token_IDREFS(Rest,Attrs); +vc_Name_Token_IDREFS([_H|Rest],Attrs) -> + vc_Name_Token_IDREFS(Rest,Attrs); +vc_Name_Token_IDREFS([],_) -> ok. + +valid_nmtoken_value([],'NMTOKENS') -> + exit({error,{at_least_one_Nmtoken_required}}); +% valid_nmtoken_value([H|_T] = L,'NMTOKENS') when is_list(H) -> +% ValidChar = +% fun(X) -> +% case xmerl_lib:is_namechar(X) of +% false -> +% exit({error,{invalid_character_in_Nmtoken,X}}); +% _ -> ok +% end +% end, +% ValidCharList = +% fun([Nmtok|T],F) -> +% lists:foreach(ValidChar,Nmtok), +% F(T,F); +% ([],_) -> ok +% end, +% ValidCharList(L,ValidChar); +valid_nmtoken_value(Nmtok,_) -> + ValidChar = + fun(X) when ?whitespace(X),Nmtok=='NMTOKENS' -> + ok; + (X) -> + case xmerl_lib:is_namechar(X) of + false -> + exit({error,{invalid_character_in_Nmtoken,X}}); + _ -> ok + end + end, + lists:foreach(ValidChar,Nmtok). + +valid_IDREFS([],'IDREFS') -> + exit({error,{at_least_one_IDREF_Name_required}}); +valid_IDREFS(_Str,'IDREFS') -> + ok. + +single_ID_definition([{_,'ID',_,_,_}=Att1|Rest]) -> + case lists:keysearch('ID',2,Rest) of + {value,Att2} -> + exit({error,{just_one_ID_definition_allowed,Att1,Att2}}); + _ -> ok + end; +single_ID_definition([_H|T]) -> + single_ID_definition(T); +single_ID_definition([]) -> + ok. + +check_SDD_validity(yes,{external,_},#xmlAttribute{name=Name,normalized=true},_) -> + exit({error,{externally_defed_attribute_normalized_in_standalone_doc,Name}}); +check_SDD_validity(yes,{external,_},no_attribute,V) when V /= no_value-> + exit({error,{externally_defed_attribute_with_default_value_missing_in_standalone_doc}}); +check_SDD_validity(_,_,_,_) -> + ok. + +search_attr(Name,[#xmlAttribute{name=Name}=H|_T])-> + H; +search_attr(Name,[#xmlAttribute{}|T])-> + search_attr(Name,T); +search_attr(_Name,_T) -> + no_attribute. + +is_attribute_exist(Name,[{Name,_,_,_,_}|_T])-> + true; +is_attribute_exist(Name,[{_Attr,_,_,_,_}|T]) -> + is_attribute_exist(Name,T); +is_attribute_exist(_Name,[]) -> + false. + +%%%%[54] AttType::= StringType | TokenizedType | EnumeratedType +%%%%[55] StringType::= 'CDATA' +%%%%[56] TokenizedType::= 'ID'|'IDREF'| 'IDREFS'|'ENTITY'| 'ENTITIES' +%%%% | 'NMTOKEN'| 'NMTOKENS' +%%%%[57] EnumeratedType::= NotationType | Enumeration +%%%%[58] NotationType::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' +%%%%[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' + +%% +deftype attribute_type()-> 'CDATA' | 'ID'|'IDREF'| 'IDREFS'|'ENTITY'| +%% 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS' +%% {enumeration,[List_of_value::atom()]}. + +%% +type test_attribute_value(attribute_type(),xmlAttribute())-> +%% xmlAttribute()| exit. +%%%% test the constraint validity of Attribute value. +test_attribute_value('CDATA',#xmlAttribute{}=Attr,_,_) -> + Attr; +test_attribute_value('NMTOKEN',#xmlAttribute{name=Name,value=V}=Attr, + Default,_S) -> + Fun = + fun (X)-> + case xmerl_lib:is_namechar(X) of + true-> + ok; + false-> + %%io:format("Warning*** nmtoken,value_incorrect: ~p~n",[V]), + exit({error,{invalid_value_nmtoken,Name,V}}) + end + end, + lists:foreach(Fun,V), + if + is_list(Default) -> + lists:foreach(Fun,Default); + true -> ok + end, + Attr; +test_attribute_value('NMTOKENS',#xmlAttribute{name=Name,value=V}=Attr, + Default,_S) -> + Fun = + fun (X)-> + case xmerl_lib:is_namechar(X) of + true-> + ok; + false when ?whitespace(X)-> + ok; + false -> + exit({error,{invalid_value_nmtokens,Name,V}}) + end + end, + lists:foreach(Fun,V), + if + is_list(Default) -> + lists:foreach(Fun,Default); + true -> ok + end, + Attr; +test_attribute_value(Ent,#xmlAttribute{name=_Name,value=V}=Attr,_Default, + S=#xmerl_scanner{rules_read_fun=Read}) + when Ent == 'ENTITY'; Ent == 'ENTITIES'-> + %% The default value is already checked + NameListFun = + fun([],Acc,_) -> + lists:reverse(Acc); + (Str,Acc,Fun) -> + {N,Str2} = scan_name(Str,[]), + Fun(Str2,[N|Acc],Fun) + end, + NameList = NameListFun(V,[],NameListFun), + VC_Entity_Name = + fun(X) -> + case Read(entity,X,S) of + {_,external,{_,{ndata,_}}} -> + ok; + _ -> exit({error,{vc_Entity_Name,X,V}}) + end + end, + lists:foreach(VC_Entity_Name,NameList), + Attr; +test_attribute_value({Type,L},#xmlAttribute{value=Value}=Attr,Default,_S) + when Type == enumeration; Type == notation -> + ValidDefault = + if + is_atom(Default) -> true; + true -> lists:member(list_to_atom(Default),L) + end, + NoDuplicatesFun = + fun(_,_,notation) -> true; + ([],_,_) -> true; + ([H|T],F,Enum) -> + case lists:member(H,T) of + true -> false; + _ -> F(T,F,Enum) + end + end, + NoDuplicates = NoDuplicatesFun(L,NoDuplicatesFun,Type), + case {lists:member(list_to_atom(Value),L),ValidDefault,NoDuplicates} of + {true,true,true}-> + Attr; + {false,_,_} -> + exit({error,{attribute_value_unknow,Value,{list,L}}}); + {_,false,_} -> + exit({error,{attribute_default_value_unknow,Default,{list,L}}}); + {_,_,false} -> + exit({error,{duplicate_tokens_not_allowed,{list,L}}}) + end; +test_attribute_value(_Rule,Attr,_,_) -> +% io:format("Attr Value*****~nRule~p~nValue~p~n",[Rule,Attr]), + Attr. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%% Contents Validation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%Element-content Models +%%%%[47] children::= (choice | seq) ('?' | '*' | '+')? +%%%%[48] cp::= (Name | choice | seq) ('?' | '*' | '+')? +%%%%[49] choice::= '(' S? cp ( S? '|' S? cp )+ S? ')' +%%%%[50] seq::= '(' S? cp ( S? ',' S? cp )* S? ')' +%%%%[51] Mixed::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' +%%%% | '(' S? '#PCDATA' S? ')' + + +%% +type valid_contents([rule()],[xmlElement()])-> +%% [xmlElement() | {error,???}. +valid_contents(Rule,XMLS,Rules,S,WSActionMode)-> + case parse(Rule,XMLS,Rules,WSActionMode,S) of + {XML_N,[]}-> + lists:flatten(XML_N); + {_,[#xmlElement{name=Name}|_T]} -> + exit({error,{element,Name,isnt_comprise_in_the_rule,Rule}}); + {_,[#xmlText{}=Txt|_T]} -> + exit({error,{element,text,Txt,isnt_comprise_in_the_rule,Rule}}); + {error,Reason} -> + {error,Reason}; + {error,Reason,N} -> + {error,Reason,N} + end. + +parse({'*',SubRule},XMLS,Rules,WSaction,S)-> + star(SubRule,XMLS,Rules,WSaction,[],S); +parse({'+',SubRule},XMLS,Rules,WSaction,S) -> + plus(SubRule,XMLS,Rules,WSaction,S); +parse({choice,CHOICE},XMLS,Rules,WSaction,S)-> +% case XMLS of +% [] -> +% io:format("~p~n",[{choice,CHOICE,[]}]); +% [#xmlElement{name=Name,pos=Pos}|_] -> +% io:format("~p~n",[{choice,CHOICE,{Name,Pos}}]); +% [#xmlText{value=V}|_] -> +% io:format("~p~n",[{choice,CHOICE,{text,V}}]) +% end, + choice(CHOICE,XMLS,Rules,WSaction,S); +parse(empty,[],_Rules,_WSaction,_S) -> + {[],[]}; +parse({'?',SubRule},XMLS,Rules,_WSaction,S)-> + question(SubRule,XMLS,Rules,S); +parse({seq,List},XMLS,Rules,WSaction,S) -> + seq(List,XMLS,Rules,WSaction,S); +parse(El_Name,[#xmlElement{name=El_Name}=XML|T],Rules,_WSaction,S) + when is_atom(El_Name)-> + case do_validation(read_rules(Rules,El_Name),XML,Rules,S) of + {error,R} -> +% {error,R}; + exit(R); + {error,R,_N}-> +% {error,R,N}; + exit(R); + XML_-> + {[XML_],T} + end; +parse(any,Cont,Rules,_WSaction,S) -> + case catch parse_any(Cont,Rules,S) of + Err = {error,_} -> Err; + ValidContents -> {ValidContents,[]} + end; +parse(El_Name,[#xmlElement{name=Name}|_T]=S,_Rules,_WSa,_S) when is_atom(El_Name)-> + {error, + {element_seq_not_conform,{wait,El_Name},{is,Name}}, + {{next,S},{act,[]}} }; +parse(_El_Name,[#xmlPI{}=H|T],_Rules,_WSa,_S) -> + {[H],T}; +parse('#PCDATA',XML,_Rules,_WSa,_S)-> + %%% PCDATA it is 0 , 1 or more #xmlText{}. + parse_pcdata(XML); +parse(El_Name,[#xmlText{}|_T]=S,_Rules,_WSa,_S)-> + {error, + {text_in_place_of,El_Name}, + {{next,S},{act,[]}}}; +parse([],_,_,_,_) -> + {error,no_rule}; +parse(Rule,[],_,_,_) -> + {error,{no_xml_element,Rule}}. + +parse_any([],_Rules,_S) -> + []; +parse_any([H|T],Rules,S) -> + case parse_any(H,Rules,S) of + [Cont] -> + [Cont|parse_any(T,Rules,S)]; + Err -> throw(Err) + end; +parse_any(#xmlElement{}=XML,Rules,S) -> + case do_validation(read_rules(Rules,el_name(XML)),XML,Rules,S) of + {error,R} -> + {error,R}; + {error,R,N}-> + {error,R,N}; + XML_-> + [XML_] + end; +parse_any(El,_Rules,_S) -> + [El]. + + + +%% XXX remove first function clause +% choice(_Choice,[#xmlText{}=T|R],_Rules) -> +% {[T],R}; +choice([CH|CHS],[_XML|_T]=XMLS,Rules,WSaction,S)-> + {WS,XMLS1} = whitespace_action(XMLS,ws_action(WSaction,remove)), + case parse(CH,XMLS1,Rules,ws_action(WSaction,remove),S) of + {error,_R} -> + choice(CHS,XMLS,Rules,WSaction,S); + {error,_R,_N} -> + choice(CHS,XMLS,Rules,WSaction,S); %% XXX add a case {[],XML} + {[],XMLS1} -> %% Maybe a sequence with * or ? elements that + %% didn't match + case CHS of + [] -> % choice has succeded but without matching XMLS1 + {[],XMLS1}; + _ -> % there are more choice alternatives to try with + choice(CHS,XMLS1,Rules,WSaction,S) + end; +%% choice(CHS,XMLS1,Rules,WSaction,S); + {Tree,XMLS2}-> + {WS2,XMLS3} = whitespace_action(XMLS2,ws_action(WSaction,remove)), + {WS2++[Tree]++WS,XMLS3} + end; +choice([],XMLS,_,WSaction,_S)-> + case whitespace_action(XMLS,ws_action(WSaction,remove)) of + Res={_,[]} -> Res; + _ -> + {error,element_unauthorize_in_choice,{{next,XMLS},{act,[]}}} + end; +choice(_,[],_,_,_S) -> + {[],[]}. + +plus(Rule,XMLS,Rules,WSaction,S) -> + %% 1 or more + {WS,XMLS1}=whitespace_action(XMLS,WSaction), + case parse(Rule,XMLS1,Rules,WSaction,S) of + {error, Reason,_XML} -> + {error, Reason}; + {error, X} -> + {error, X}; + {Tree, XMLS2} -> + case star(Rule, XMLS2,Rules,WSaction,[],S) of + {[], _} -> + {WS++[Tree], XMLS2}; + {Tree_1, XMLS3} -> + {WS++[Tree]++Tree_1, XMLS3} + end + end. + +star(_Rule,XML,_Rules,_WSa,Tree,_S) when length(XML)==0-> + {[Tree],[]}; +star(Rule,XMLS,Rules,WSaction,Tree,S) -> + {WS,XMLS1} = whitespace_action(XMLS,WSaction), + case parse(Rule,XMLS1,Rules,WSaction,S) of + {error, _E, {{next,N},{act,A}}}-> + %%io:format("Error~p~n",[_E]), + {WS++Tree++A,N}; + {error, _E}-> + %%io:format("Error~p~n",[_E]), +% {WS++[Tree],[]}; + case whitespace_action(XMLS,ws_action(WSaction,remove)) of + {[],_} -> + {WS++[Tree],XMLS}; + {WS2,XMLS2} -> + {WS2++[Tree],XMLS2} + end; + {Tree1,XMLS2}-> + star(Rule,XMLS2,Rules,WSaction,Tree++WS++[Tree1],S) + end. + +question(_Rule, [],_Rules,_S) -> + {[],[]}; +question(Rule, Toks,Rules,S) -> + %% 0 or 1 + case parse(Rule, Toks,Rules,preserve,S) of + {error, _E, _Next}-> + {[],Toks}; + {error, _E} -> + {[], Toks}; + {T,Toks1} -> + {T, Toks1} + end. + +seq(H,Toks,Rules,WSaction,S)-> + case seq2(H,Toks,Rules,[],WSaction,S) of + {error,E}-> + {error,E}; + {error,R,N}-> + {error,R,N}; + {Tree,Toks2}-> + {Tree,Toks2} + end. + +seq2([],[],_,Tree,_WSa,_S)-> + {Tree,[]}; +% seq2([],[#xmlElement{name=Name}|_T]=XMLS,_,Tree,_WSa,_S)-> +% {error,{sequence_finish,Name,isnt_in_the_right_place}, +% {{next,XMLS},{act,Tree}}}; +seq2([],[#xmlText{}]=XML,_,Tree,_WSa,_S)-> + case whitespace_action(XML,remove) of + {[],_} -> + {error,sequence_finish,{{next,XML},{act,Tree}}}; + {WS,Rest} -> + {WS++Tree,Rest} + end; +seq2([],Rest,_,Tree,_WSa,_S) -> + {WS,Rest2}=whitespace_action(Rest,remove), + {WS++Tree,Rest2}; +seq2([H|T],Toks,Rules,Tree,WSaction,S) -> + {WS,Toks1} = whitespace_action(Toks,ws_action(WSaction,remove)), + case parse(H,Toks1,Rules,remove,S) of %% H maybe only match parts of Toks + {error,Reason,_XML}-> + {error,Reason}; + {error,E}-> + {error,E}; + {[],Toks2}-> + seq2(T,Toks2,Rules,Tree,WSaction,S); + {Tree1,Toks2} when is_list(Tree1)-> + seq2(T,Toks2,Rules,Tree++WS++Tree1,WSaction,S); + {Tree1,Toks2}-> + seq2(T,Toks2,Rules,Tree++WS++[Tree1],WSaction,S) + end. + +el_name(#xmlElement{name=Name})-> + Name. + +parse_pcdata([#xmlText{}=H|T])-> + parse_pcdata(T,[H]); +parse_pcdata(H) -> + {[],H}. + +parse_pcdata([#xmlText{}=H|T],Acc)-> + parse_pcdata(T,Acc++[H]); +parse_pcdata(H,Acc) -> + {Acc,H}. + +whitespace([]) -> + true; +whitespace([H|T]) when ?whitespace(H) -> + whitespace(T); +whitespace(_) -> + false. + +whitespace_action(XML,remove) -> + whitespace_remove(XML,[]); +whitespace_action(XML,_) -> + {[],XML}. + +whitespace_remove([#xmlText{value=V,type=text}=T|R]=L,Acc) -> + case whitespace(V) of + true -> + whitespace_remove(R,[T|Acc]); + _ -> + {lists:reverse(Acc),L} + end; +whitespace_remove(L,Acc) -> + {lists:reverse(Acc),L}. + +ws_action(always_preserve=A,_) -> + A; +ws_action(_,B) -> + B. + +scan_name(N,_) when is_atom(N) -> + N; +scan_name([$\s|T],Acc) -> + {list_to_atom(lists:reverse(Acc)),T}; +scan_name([H|T],Acc) -> + scan_name(T,[H|Acc]); +scan_name("",Acc) -> + {list_to_atom(lists:reverse(Acc)),[]}. |