From 1a5796cd12061ebb21e7e51a0b7bdf05ed4786a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 2 Sep 2010 16:56:23 +0200 Subject: xmerl: Add doc/examples directory Needed by the test suite. --- lib/xmerl/doc/examples/xml/xmerl.xml | 523 +++++++++++++++++++++++++++++++++++ 1 file changed, 523 insertions(+) create mode 100755 lib/xmerl/doc/examples/xml/xmerl.xml (limited to 'lib/xmerl/doc/examples/xml/xmerl.xml') diff --git a/lib/xmerl/doc/examples/xml/xmerl.xml b/lib/xmerl/doc/examples/xml/xmerl.xml new file mode 100755 index 0000000000..f02282dbef --- /dev/null +++ b/lib/xmerl/doc/examples/xml/xmerl.xml @@ -0,0 +1,523 @@ + + + +
+ + XMerL - XML processing tools for Erlang + Reference Manual + + + Ulf + Wiger + + + + + 1.02003-02-04 + Converted xml from html + + + + XMerL tools contains xmerl_scan; a non-validating XML + processor, xmerl_xpath; a XPath implementation, xmerl for export + of XML trees to HTML, XML or text and xmerl_xs for XSLT like + transforms in erlang. + + + + +
+ xmerl_scan - the XML processor + The (non-validating) XML processor is activated through + xmerl_scan:string/[1,2] or + xmerl_scan:file/[1,2]. + It returns records of the type defined in xmerl.hrl. + + + As far as I can tell, xmerl_scan implements the complete XML + 1.0 spec, including: + + entity expansion + fetching and parsing external DTDs + contitional processing + UniCode + XML Names + + +xmerl_scan:string(Text [ , Options ]) -> #xmlElement{}. +xmerl_scan:file(Filename [ , Options ]) -> #xmlElement{}. + + The Options are basically to specify the behaviour of the + scanner. See the source code for details, but you can specify + funs to handle scanner events (event_fun), process the document + entities once identified (hook_fun), and decide what to do if the + scanner runs into eof before the document is complete + (continuation_fun). + + You can also specify a path (fetch_path) as a list of + directories to search when fetching files. If the file in question + is not in the fetch_path, the URI will be used as a file + name. + + +
+ Customization functions + The XML processor offers a number of hooks for + customization. These hooks are defined as function objects, and + can be provided by the caller. + + The following customization functions are available. If + they also have access to their own state variable, the access + function for this state is identified within parentheses: + + + + event function ( + xmerl_scan:event_state/[1,2] + ) + + hook function ( + xmerl_scan:hook_state/[1,2] + ) + + fetch function ( + xmerl_scan:fetch_state/[1,2] ) + + + continuation function ( + xmerl_scan:cont_state/[1,2] ) + + + rules function ( + xmerl_scan:rules_state/[1,2] ) + + + accumulator function + + close function + + + + For all of the above state access functions, the function + with one argument + (e.g. event_fun(GlobalState)) + will read the state variable, while the function with two + arguments (e.g.: event_fun(NewStateData, + GlobalState)) will modify it. + + For each function, the description starts with the syntax + for specifying the function in the + Options list. The general forms + are {Tag, Fun}, or + {Tag, Fun, LocalState}. The + second form can be used to initialize the state variable in + question. + +
+ User State + + All customization functions are free to access a + "User state" variable. Care must of course be taken + to coordinate the use of this state. It is recommended that + functions, which do not really have anything to contribute to + the "global" user state, use their own state + variable instead. Another option (used in + e.g. xmerl_eventp.erl) is for + customization functions to share one of the local states (in + xmerl_eventp.erl, the + continuation function and the fetch function both acces the + cont_state.) + + Functions to access user state: + + + + + xmerl_scan:user_state(GlobalState) + + + xmerl_scan:user_state(UserState', + GlobalState) + + + +
+
+ Event Function + + {event_fun, fun()} | {event_fun, fun(), + LocalState} + + The event function is called at the beginning and at the + end of a parsed entity. It has the following format and + semantics: + + + + EventState = xmerl_scan:event_state(GlobalState), + EventState' = foo(Event, EventState), + GlobalState' = xmerl_scan:event_state(EventState', GlobalState) +end. +]]> + +
+
+ Hook Function + {hook_fun, fun()} | {hook_fun, fun(), + LocalState} + + + +The hook function is called when the processor has parsed a complete +entity. Format and semantics: + + + + HookState = xmerl_scan:hook_state(GlobalState), + {TransformedEntity, HookState'} = foo(Entity, HookState), + GlobalState' = xmerl_scan:hook_state(HookState', GlobalState), + {TransformedEntity, GlobalState'} +end. +]]> + + The relationship between the event function, the hook + function and the accumulator function is as follows: + + + The event function is first called with an + 'ended' event for the parsed entity. + + The hook function is called, possibly + re-formatting the entity. + + The acc function is called in order to + (optionally) add the re-formatted entity to the contents of + its parent element. + + + +
+
+ Fetch Function + +{fetch_fun, fun()} | {fetch_fun, fun(), LocalState} + +The fetch function is called in order to fetch an external resource +(e.g. a DTD). + +The fetch function can respond with three different return values: + + + + +Format and semantics: + + + + FetchState = xmerl_scan:fetch_state(GlobalState), + Result = foo(URI, FetchState). % Result being one of the above +end. +]]> + +
+
+ Continuation Function + +{continuation_fun, fun()} | {continuation_fun, fun(), LocalState} + +The continuation function is called when the parser encounters the end +of the byte stream. Format and semantics: + + + + ContState = xmerl_scan:cont_state(GlobalState), + {Result, ContState'} = get_more_bytes(ContState), + GlobalState' = xmerl_scan:cont_state(ContState', GlobalState), + case Result of + [] -> + GlobalState' = xmerl_scan:cont_state(ContState', GlobalState), + Exception(GlobalState'); + MoreBytes -> + {MoreBytes', Rest} = end_on_whitespace_char(MoreBytes), + ContState'' = update_cont_state(Rest, ContState'), + GlobalState' = xmerl_scan:cont_state(ContState'', GlobalState), + Continue(MoreBytes', GlobalState') + end +end. +]]> +
+
+ Rules Functions + + +{rules, ReadFun : fun(), WriteFun : fun(), LocalState} | +{rules, Table : ets()} + + The rules functions take care of storing scanner + information in a rules database. User-provided rules functions + may opt to store the information in mnesia, or perhaps in the + user_state(LocalState). + + The following modes exist: + + + + If the user doesn't specify an option, the + scanner creates an ets table, and uses built-in functions to + read and write data to it. When the scanner is done, the ets + table is deleted. + + If the user specifies an ets table via the + {rules, Table} option, the + scanner uses this table. When the scanner is done, it does + not delete the table. + + If the user specifies read and write + functions, the scanner will use them instead. + + + + The format for the read and write functions are as + follows: + + + + NewScannerState. +ReadFun(Context, Name, ScannerState) -> Definition | undefined. +]]> + + Here is a summary of the data objects currently being + written by the scanner: + + + Scanner data objects + + + + Context + Key Value + Definition + + + + + notation + NotationName + {system, SL} | {public, PIDL, SL} + + + elem_def + ElementName + #xmlElement{content = ContentSpec} + + + parameter_entity + PEName + PEDef + + + entity + EntityName + EntityDef + + + +
+ + + + + When <Elem> is not wrapped with +<Occurrence>, (Occurrence = once) is implied. + +
+
+ Accumulator Function + {acc_fun, fun()} | {acc_fun, fun(), + LocalState} + + The accumulator function is called to accumulate the + contents of an entity.When parsing very large files, it may + not be desireable to do so.In this case, an acc function can + be provided that simply doesn't accumulate. + + Note that it is possible to even modify the parsed + entity before accumulating it, but this must be done with + care. xmerl_scan performs + post-processing of the element for namespace management. Thus, + the element must keep its original structure for this to + work. + + The acc function has the following format and + semantics: + + + + {[X|Acc], GlobalState}. + +%% non-accumulating acc fun +fun(ParsedEntity, Acc, GlobalState) -> + {Acc, GlobalState}. +]]> +
+
+ Close Function + + The close function is called when a document (either the + main document or an external DTD) has been completely + parsed. When xmerl_scan was started using + xmerl_scan:file/[1,2], the + file will be read in full, and closed immediately, before the + parsing starts, so when the close function is called, it will + not need to actually close the file. In this case, the close + function will be a good place to modify the state + variables. + + Format and semantics: + + + + GlobalState' = .... % state variables may be altered +]]> +
+ +
+ +
+ +
+ XPATH + + + + [DocEntity] + +DocEntity : #xmlElement{} + | #xmlAttribute{} + | #xmlText{} + | #xmlPI{} + | #xmlComment{} +]]> + + The xmerl_xpath module does seem to handle the entire XPATH + 1.0 spec, but I haven't tested that much yet. The grammar is + defined in + xmerl_xpath_parse.yrl. The core + functions are defined in + xmerl_xpath_pred.erl. +
+
+ Some useful shell commands for debugging the XPath parser + + + -1")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 * 6 div 2")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 + 6 mod 2")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 * 6")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("5 * 6")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("-----6")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("parent::node()")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("descendant-or-self::node()")). +xmerl_xpath_parse:parse(xmerl_xpath_scan:tokens("parent::processing-instruction('foo')")).]]> +
+
+ Erlang Data Structure Export + + The idea as follows: + + The Erlang data structure should look like this: + + + + Some short forms are allowed: + + {Tag, [], Content} +Tag -> {Tag, [], []} +]]> + + Note that content lists must be flat, but strings can be + deep. + + It is also allowed to include normal + #xml... elements in the simple + format. + + xmerl:export_simple(Data, + Callback) takes the above data structure and + exports it, using the callback module + Callback. + + The callback module should contain hook functions for all + tags present in the data structure. The hook function must have + the format: + Tag(Data, Attrs, Parents, E) + + + where E is an #xmlElement{} + record (see xmerl.hrl). + + Attrs is converted from the simple [{Key, + Value}] to + [#xmlAttribute{}] + + Parents is a list of [{ParentTag, + ParentTagPosition}]. + + The hook function should return either the Data to be + exported, or the tuple {'#xml-redefine#', + NewStructure}, where + NewStructure is an element (which + can be simple), or a (simple-) content list wrapped in a 1-tuple + as {NewContent}. + + The callback module can inherit definitions from other + callback modules, through the required function + '#xml-interitance#() -> + [ModuleName]. + + As long as a tag is represented in one of the callback + modules, things will work. It is of course also possible to + redefine a tag. +
+ XSLT like transforms + See separate document xmerl_xs.html + . +
+
+ +
-- cgit v1.2.3