diff options
author | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/parsetools/doc | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/parsetools/doc')
-rw-r--r-- | lib/parsetools/doc/html/.gitignore | 0 | ||||
-rw-r--r-- | lib/parsetools/doc/man3/.gitignore | 0 | ||||
-rw-r--r-- | lib/parsetools/doc/pdf/.gitignore | 0 | ||||
-rw-r--r-- | lib/parsetools/doc/src/Makefile | 119 | ||||
-rw-r--r-- | lib/parsetools/doc/src/book.xml | 46 | ||||
-rw-r--r-- | lib/parsetools/doc/src/fascicules.xml | 15 | ||||
-rw-r--r-- | lib/parsetools/doc/src/leex.xml | 455 | ||||
-rw-r--r-- | lib/parsetools/doc/src/make.dep | 21 | ||||
-rw-r--r-- | lib/parsetools/doc/src/note.gif | bin | 0 -> 1539 bytes | |||
-rw-r--r-- | lib/parsetools/doc/src/notes.xml | 308 | ||||
-rw-r--r-- | lib/parsetools/doc/src/notes_history.xml | 87 | ||||
-rw-r--r-- | lib/parsetools/doc/src/part_notes.xml | 43 | ||||
-rw-r--r-- | lib/parsetools/doc/src/ref_man.xml | 42 | ||||
-rw-r--r-- | lib/parsetools/doc/src/user_guide.gif | bin | 0 -> 1581 bytes | |||
-rw-r--r-- | lib/parsetools/doc/src/warning.gif | bin | 0 -> 1498 bytes | |||
-rw-r--r-- | lib/parsetools/doc/src/yecc.xml | 529 |
16 files changed, 1665 insertions, 0 deletions
diff --git a/lib/parsetools/doc/html/.gitignore b/lib/parsetools/doc/html/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/parsetools/doc/html/.gitignore diff --git a/lib/parsetools/doc/man3/.gitignore b/lib/parsetools/doc/man3/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/parsetools/doc/man3/.gitignore diff --git a/lib/parsetools/doc/pdf/.gitignore b/lib/parsetools/doc/pdf/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/parsetools/doc/pdf/.gitignore diff --git a/lib/parsetools/doc/src/Makefile b/lib/parsetools/doc/src/Makefile new file mode 100644 index 0000000000..6e693e0cf0 --- /dev/null +++ b/lib/parsetools/doc/src/Makefile @@ -0,0 +1,119 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1997-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../../vsn.mk +VSN=$(PARSETOOLS_VSN) +APPLICATION=parsetools + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/$(APPLICATION)-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +XML_APPLICATION_FILES = ref_man.xml +XML_REF3_FILES = yecc.xml leex.xml + +XML_PART_FILES = part_notes.xml +XML_CHAPTER_FILES = notes.xml + +BOOK_FILES = book.xml + +XML_FILES = \ + $(BOOK_FILES) $(XML_CHAPTER_FILES) \ + $(XML_PART_FILES) $(XML_REF3_FILES) $(XML_APPLICATION_FILES) + +GIF_FILES = \ + note.gif + +XML_HTML_FILES = \ + notes_history.xml + +# ---------------------------------------------------- + +HTML_FILES = $(XML_APPLICATION_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_HTML_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_PART_FILES:%.xml=$(HTMLDIR)/%.html) + +INFO_FILE = ../../info + +MAN3_FILES = $(XML_REF3_FILES:%.xml=$(MAN3DIR)/%.3) + +HTML_REF_MAN_FILE = $(HTMLDIR)/index.html + +TOP_PDF_FILE = $(PDFDIR)/$(APPLICATION)-$(VSN).pdf + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +XML_FLAGS += +DVIPS_FLAGS += + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- +$(HTMLDIR)/%.gif: %.gif + $(INSTALL_DATA) $< $@ + +docs: pdf html man + +$(TOP_PDF_FILE): $(XML_FILES) + +pdf: $(TOP_PDF_FILE) + +html: gifs $(HTML_REF_MAN_FILE) + +clean clean_docs: + rm -rf $(HTMLDIR)/* + rm -f $(MAN3DIR)/* + rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo) + rm -f errs core *~ + +man: $(MAN3_FILES) + +gifs: $(GIF_FILES:%=$(HTMLDIR)/%) + +debug opt: + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_docs_spec: docs + $(INSTALL_DIR) $(RELSYSDIR)/doc/pdf + $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELSYSDIR)/doc/pdf + $(INSTALL_DIR) $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(HTMLDIR)/* \ + $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(INFO_FILE) $(RELSYSDIR) + $(INSTALL_DIR) $(RELEASE_PATH)/man/man3 + $(INSTALL_DATA) $(MAN3DIR)/* $(RELEASE_PATH)/man/man3 + + +release_spec: + diff --git a/lib/parsetools/doc/src/book.xml b/lib/parsetools/doc/src/book.xml new file mode 100644 index 0000000000..93a107b798 --- /dev/null +++ b/lib/parsetools/doc/src/book.xml @@ -0,0 +1,46 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE book SYSTEM "book.dtd"> + +<book xmlns:xi="http://www.w3.org/2001/XInclude"> + <header titlestyle="normal"> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Parse Tools</title> + <prepared>Carl Velin</prepared> + <docno></docno> + <date>1997-05-02</date> + <rev>1.0</rev> + <file>book.sgml</file> + </header> + <insidecover> + </insidecover> + <pagetext>Parse Tools</pagetext> + <preamble> + <contents level="2"></contents> + </preamble> + <applications> + <xi:include href="ref_man.xml"/> + </applications> + <releasenotes> + <xi:include href="notes.xml"/> + </releasenotes> + <listofterms></listofterms> + <index></index> +</book> + diff --git a/lib/parsetools/doc/src/fascicules.xml b/lib/parsetools/doc/src/fascicules.xml new file mode 100644 index 0000000000..43090b4aed --- /dev/null +++ b/lib/parsetools/doc/src/fascicules.xml @@ -0,0 +1,15 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE fascicules SYSTEM "fascicules.dtd"> + +<fascicules> + <fascicule file="ref_man" href="ref_man_frame.html" entry="yes"> + Reference Manual + </fascicule> + <fascicule file="part_notes" href="part_notes_frame.html" entry="no"> + Release Notes + </fascicule> + <fascicule file="" href="../../../../doc/print.html" entry="no"> + Off-Print + </fascicule> +</fascicules> + diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml new file mode 100644 index 0000000000..c113b586df --- /dev/null +++ b/lib/parsetools/doc/src/leex.xml @@ -0,0 +1,455 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>2009</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + Copyright (c) 2008,2009 Robert Virding. All rights reserved. + </legalnotice> + + <title>leex</title> + <prepared>Robert Virding</prepared> + <responsible>nobody</responsible> + <docno></docno> + <approved>nobody</approved> + <checked></checked> + <date>2009-05-07</date> + <rev>A</rev> + <file>leex.xml</file> + </header> + <module>leex</module> + <modulesummary>Lexical analyzer generator for Erlang</modulesummary> + <description> + <p>A regular expression based lexical analyzer generator for + Erlang, similar to lex or flex.</p> + <note><p>The Leex module should be considered experimental + as it will be subject to changes in future releases.</p></note> + </description> + <section> + <title>DATA TYPES</title> + <code type="none"> +ErrorInfo = {ErrorLine,module(),error_descriptor()} +ErrorLine = integer() +Token = tuple()</code> + </section> + <funcs> + <func> + <name>file(FileName) -> ok | error</name> + <name>file(FileName, Options) -> ok | error</name> + <fsummary>Generate a lexical analyzer</fsummary> + <type> + <v>FileName = filename()</v> + <v>Options = Option | [Option]</v> + <v>Option = - see below -</v> + <v>FileReturn = {ok, Scannerfile} + | {ok, Scannerfile, Warnings} + | error + | {error, Warnings, Errors}</v> + <v>Scannerfile = filename()</v> + <v>Warnings = Errors = [{filename(), [ErrorInfo]}]</v> + </type> + <desc> + <p>Generates a lexical analyzer from the definition in the input + file. The input file has the extension <c>.xrl</c>. This is + added to the filename if it is not given. The resulting module + is the Xrl filename without the <c>.xrl</c> extension.</p> + + <p>The current options are:</p> + <taglist> + <tag><c>dfa_graph</c></tag> + <item><p>Generates a <c>.dot</c> file which contains a + description of the DFA in a format which can be viewed with + Graphviz, <c>www.graphviz.com</c>.</p> + </item> + <tag><c>{includefile,Includefile}</c></tag> + <item><p>Uses a specific or customised prologue file + instead of default + <c>lib/parsetools/include/leexinc.hrl</c> which is + otherwise included.</p> + </item> + <tag><c>{report_errors, bool()}</c></tag> + <item><p>Causes errors to be printed as they occur. Default is + <c>true</c>.</p> + </item> + <tag><c>{report_warnings, bool()}</c></tag> + <item><p>Causes warnings to be printed as they occur. Default is + <c>true</c>.</p> + </item> + <tag><c>{report, bool()}</c></tag> + <item><p>This is a short form for both <c>report_errors</c> and + <c>report_warnings</c>.</p> + </item> + <tag><c>{return_errors, bool()}</c></tag> + <item><p>If this flag is set, <c>{error, Errors, Warnings}</c> + is returned when there are errors. Default is <c>false</c>.</p> + </item> + <tag><c>{return_warnings, bool()}</c></tag> + <item><p>If this flag is set, an extra field containing + <c>Warnings</c> is added to the tuple returned upon + success. Default is <c>false</c>.</p> + </item> + <tag><c>{return, bool()}</c></tag> + <item><p>This is a short form for both <c>return_errors</c> and + <c>return_warnings</c>.</p> + </item> + <tag><c>{scannerfile, Scannerfile}</c></tag> + <item><p><c>Scannerfile</c> is the name of the file that + will contain the Erlang scanner code that is generated. + The default (<c>""</c>) is to add the extension + <c>.erl</c> to <c>FileName</c> stripped of the + <c>.xrl</c> extension.</p> + </item> + <tag><c>{verbose, bool()}</c></tag> + <item><p>Outputs information from parsing the input file and + generating the internal tables.</p> + </item> + </taglist> + <p>Any of the Boolean options can be set to <c>true</c> by + stating the name of the option. For example, <c>verbose</c> + is equivalent to <c>{verbose, true}</c>.</p> + <p>Leex will add the extension <c>.hrl</c> to the + <c>Includefile</c> name and the extension <c>.erl</c> to the + <c>Scannerfile</c> name, unless the extension is already + there.</p> + </desc> + </func> + <func> + <name>format_error(ErrorInfo) -> Chars</name> + <fsummary>Return an English description of a an error tuple.</fsummary> + <type> + <v>Chars = [char() | Chars]</v> + </type> + <desc> + <p>Returns a string which describes the error + <c>ErrorInfo</c> returned when there is an error in a + regular expression.</p> + </desc> + </func> + </funcs> + + + <section> + <title>GENERATED SCANNER EXPORTS</title> + <p>The following functions are exported by the generated scanner.</p> + </section> + + <funcs> + <func> + <name>string(String) -> StringRet</name> + <name>string(String, StartLine) -> StringRet</name> + <fsummary>Generated by Leex</fsummary> + <type> + <v>String = string()</v> + <v>StringRet = {ok,Tokens,EndLine} | ErrorInfo</v> + <v>Tokens = [Token]</v> + <v>EndLine = StartLine = integer()</v> + </type> + <desc> + <p>Scans <c>String</c> and returns all the tokens in it, or an + error.</p> + <note><p>It is an error if not all of the characters in + <c>String</c> are consumed.</p></note> + </desc> + </func> + + <func> + <name>token(Cont, Chars) -> {more,Cont1} | {done,TokenRet,RestChars} + </name> + <name>token(Cont, Chars, StartLine) -> {more,Cont1} + | {done,TokenRet,RestChars} + </name> + <fsummary>Generated by Leex</fsummary> + <type> + <v>Cont = [] | Cont1</v> + <v>Cont1 = tuple()</v> + <v>Chars = RestChars = string() | eof</v> + <v>TokenRet = {ok, Token, EndLine} + | {eof, EndLine} + | ErrorInfo</v> + <v>StartLine = EndLine = integer()</v> + </type> + <desc> + <p>This is a re-entrant call to try and scan one token from + <c>Chars</c>. If there are enough characters in <c>Chars</c> + to either scan a token or detect an error then this will be + returned with <c>{done,...}</c>. Otherwise + <c>{cont,Cont}</c> will be returned where <c>Cont</c> is + used in the next call to <c>token()</c> with more characters + to try an scan the token. This is continued until a token + has been scanned. <c>Cont</c> is initially <c>[]</c>.</p> + + <p>It is not designed to be called directly by an application + but used through the i/o system where it can typically be + called in an application by:</p> + <code> +io:request(InFile, {get_until,Prompt,Module,token,[Line]}) + -> TokenRet</code> + </desc> + </func> + + <func> + <name>tokens(Cont, Chars) -> {more,Cont1} | {done,TokensRet,RestChars} + </name> + <name>tokens(Cont, Chars, StartLine) -> + {more,Cont1} | {done,TokensRet,RestChars} + </name> + <fsummary>Generated by Leex</fsummary> + <type> + <v>Cont = [] | Cont1</v> + <v>Cont1 = tuple()</v> + <v>Chars = RestChars = string() | eof</v> + <v>TokensRet = {ok, Tokens, EndLine} + | {eof, EndLine} + | ErrorInfo</v> + <v>Tokens = [Token]</v> + <v>StartLine = EndLine = integer()</v> + </type> + <desc> + <p>This is a re-entrant call to try and scan tokens from + <c>Chars</c>. If there are enough characters in <c>Chars</c> + to either scan tokens or detect an error then this will be + returned with <c>{done,...}</c>. Otherwise + <c>{cont,Cont}</c> will be returned where <c>Cont</c> is + used in the next call to <c>tokens()</c> with more + characters to try an scan the tokens. This is continued + until all tokens have been scanned. <c>Cont</c> is initially + <c>[]</c>.</p> + + <p>This functions differs from <c>token</c> in that it will + continue to scan tokens upto and including an + <c>{end_token,Token}</c> has been scanned (see next + section). It will then return all the tokens. This is + typically used for scanning grammars like Erlang where there + is an explicit end token, <c>'.'</c>. If no end token is + found then the whole file will be scanned and returned. If + an error occurs then all tokens upto and including the next + end token will be skipped.</p> + + <p>It is not designed to be called directly by an application + but used through the i/o system where it can typically be + called in an application by:</p> + <code> +io:request(InFile, {get_until,Prompt,Module,tokens,[Line]}) + -> TokensRet</code> + </desc> + </func> + </funcs> + + <section> + <title>Input File Format</title> + <p>Erlang style comments starting with a <c>%</c> are allowed in + scanner files. A definition file has the following format:</p> + <code> +<Header> + +Definitions. + +<Macro Definitions> + +Rules. + +<Token Rules> + +Erlang code. + +<Erlang code></code> + + <p>The "Definitions.", "Rules." and "Erlang code." headings are + mandatory and must occur at the beginning of a source line. The + <Header>, <Macro Definitions> and <Erlang code> + sections may be empty but there must be at least one rule.</p> + + <p>Macro definitions have the following format:</p> + + <code> +NAME = VALUE</code> + + <p>and there must be spaces around <c>=</c>. Macros can be used in + the regular expressions of rules by writing <c>{NAME}</c>.</p> + + <note><p>When macros are expanded in expressions the macro calls + are replaced by the macro value without any form of quoting or + enclosing in parentheses.</p></note> + + <p>Rules have the following format:</p> + + <code> +<Regexp> : <Erlang code>.</code> + + <p>The <Regexp> must occur at the start of a line and not + include any blanks; use <c>\\t</c> and <c>\\s</c> to include TAB + and SPACE characters in the regular expression. If <Regexp> + matches then the corresponding <Erlang code> is evaluated to + generate a token. With the Erlang code the following predefined + variables are available:</p> + + <taglist> + <tag><c>TokenChars</c></tag> + <item><p>A list of the characters in the matched token.</p> + </item> + <tag><c>TokenLen</c></tag> + <item><p>The number of characters in the matched token.</p> + </item> + <tag><c>TokenLine</c></tag> + <item><p>The line number where the token occurred.</p> + </item> + </taglist> + + <p>The code must return:</p> + + <taglist> + <tag><c>{token,Token}</c></tag> + <item><p>Return <c>Token</c> to the caller.</p> + </item> + <tag><c>{end_token,Token}</c></tag> + <item><p>Return <c>Token</c> and is last token in a tokens call.</p> + </item> + <tag><c>skip_token</c></tag> + <item><p>Skip this token completely.</p> + </item> + <tag><c>{error,ErrString}</c></tag> + <item><p>An error in the token, <c>ErrString</c> is a string + describing the error.</p> + </item> + </taglist> + + <p>It is also possible to push back characters into the input + characters with the following returns:</p> + + <list> + <item><c>{token,Token,PushBackList}</c></item> + <item><c>{end_token,Token,PushBackList}</c></item> + <item><c>{skip_token,PushBackList}</c></item> + </list> + + <p>These have the same meanings as the normal returns but the + characters in <c>PushBackList</c> will be prepended to the input + characters and scanned for the next token. Note that pushing + back a newline will mean the line numbering will no longer be + correct.</p> + + <note><p>Pushing back characters gives you unexpected + possibilities to cause the scanner to loop!</p></note> + + <p>The following example would match a simple Erlang integer or + float and return a token which could be sent to the Erlang + parser:</p> + <code> +D = [0-9] + +{D}+ : + {token,{integer,TokenLine,list_to_integer(TokenChars)}}. + +{D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? : + {token,{float,TokenLine,list_to_float(TokenChars)}}.</code> + + <p>The Erlang code in the "Erlang code." section is written into + the output file directly after the module declaration and + predefined exports declaration so it is possible to add extra + exports, define imports and other attributes which are then + visible in the whole file.</p> + </section> + + <section> + <title>Regular Expressions</title> + + <p>The regular expressions allowed here is a subset of the set + found in <c>egrep</c> and in the AWK programming language, as + defined in the book, The AWK Programming Language, by A. V. Aho, + B. W. Kernighan, P. J. Weinberger. They are composed of the + following characters:</p> + + <taglist> + <tag><c>c</c></tag> + <item><p>Matches the non-metacharacter c.</p> + </item> + <tag><c>\\c</c></tag> + <item><p>Matches the escape sequence or literal character c.</p> + </item> + <tag><c>.</c></tag> + <item><p>Matches any character.</p> + </item> + <tag><c>^</c></tag> + <item><p>Matches the beginning of a string.</p> + </item> + <tag><c>$</c></tag> + <item><p>Matches the end of a string.</p></item> + <tag><c>[abc...]</c></tag> + <item><p>Character class, which matches any of the characters + <c>abc...</c>. Character ranges are specified by a pair of + characters separated by a <c>-</c>.</p> + </item> + <tag><c>[^abc...]</c></tag> + <item><p>Negated character class, which matches any character + except <c>abc...</c>.</p> + </item> + <tag><c>r1 | r2</c></tag> + <item><p>Alternation. It matches either <c>r1</c> or <c>r2</c>.</p> + </item> + <tag><c>r1r2</c></tag> + <item><p>Concatenation. It matches <c>r1</c> and then <c>r2</c>.</p> + </item> + <tag><c>r+</c></tag> + <item><p>Matches one or more <c>rs</c>.</p> + </item> + <tag><c>r*</c></tag> + <item><p>Matches zero or more <c>rs</c>.</p> + </item> + <tag><c>r?</c></tag> + <item><p>Matches zero or one <c>rs</c>.</p> + </item> + <tag><c>(r)</c></tag> + <item><p>Grouping. It matches <c>r</c>.</p> + </item> + </taglist> + + <p>The escape sequences allowed are the same as for Erlang strings:</p> + + <taglist> + <tag><c>\\b</c></tag> + <item><p>Backspace.</p></item> + <tag><c>\\f</c></tag> + <item><p>Form feed.</p></item> + <tag><c>\\n</c></tag> + <item><p>Newline (line feed).</p></item> + <tag><c>\\r</c></tag> + <item><p>Carriage return.</p></item> + <tag><c>\\t</c></tag> + <item><p>Tab.</p></item> + <tag><c>\\e</c></tag> + <item><p>Escape.</p></item> + <tag><c>\\v</c></tag> + <item><p>Vertical tab.</p></item> + <tag><c>\\s</c></tag> + <item><p>Space.</p></item> + <tag><c>\\d</c></tag> + <item><p>Delete.</p></item> + <tag><c>\\ddd</c></tag> + <item><p>The octal value <c>ddd</c>.</p></item> + <tag><c>\\xhh</c></tag> + <item><p>The hexadecimal value <c>hh</c>.</p></item> + <tag><c>\\x{h...}</c></tag> + <item><p>The hexadecimal value <c>h...</c>.</p></item> + <tag><c>\\c</c></tag> + <item><p>Any other character literally, for example <c>\\\\</c> for + backslash, <c>\\"</c> for <c>"</c>.</p> + </item> + </taglist> + + <p>The following examples define Erlang data types:</p> + <code> +Atoms [a-z][0-9a-zA-Z_]* + +Variables [A-Z_][0-9a-zA-Z_]* + +Floats (\\+|-)?[0-9]+\\.[0-9]+((E|e)(\\+|-)?[0-9]+)?</code> + + <note><p>Anchoring a regular expression with <c>^</c> and <c>$</c> + is not implemented in the current version of Leex and just + generates a parse error.</p></note> + </section> +</erlref> diff --git a/lib/parsetools/doc/src/make.dep b/lib/parsetools/doc/src/make.dep new file mode 100644 index 0000000000..3a09ecdedd --- /dev/null +++ b/lib/parsetools/doc/src/make.dep @@ -0,0 +1,21 @@ +# ---------------------------------------------------- +# >>>> Do not edit this file <<<< +# This file was automaticly generated by +# /home/otp/bin/docdepend +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# TeX files that the DVI file depend on +# ---------------------------------------------------- + +book.dvi: book.tex leex.tex ref_man.tex yecc.tex + +# ---------------------------------------------------- +# Source inlined when transforming from source to LaTeX +# ---------------------------------------------------- + +book.tex: ref_man.xml + +ref_man.tex: ../../../../system/doc/definitions/term.defs + diff --git a/lib/parsetools/doc/src/note.gif b/lib/parsetools/doc/src/note.gif Binary files differnew file mode 100644 index 0000000000..6fffe30419 --- /dev/null +++ b/lib/parsetools/doc/src/note.gif diff --git a/lib/parsetools/doc/src/notes.xml b/lib/parsetools/doc/src/notes.xml new file mode 100644 index 0000000000..2947517717 --- /dev/null +++ b/lib/parsetools/doc/src/notes.xml @@ -0,0 +1,308 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Parsetools Release Notes</title> + <prepared>otp_appnotes</prepared> + <docno>nil</docno> + <date>nil</date> + <rev>nil</rev> + <file>notes.xml</file> + </header> + <p>This document describes the changes made to the Parsetools application.</p> + +<section><title>Parsetools 2.0.1</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>Leex no longer uses the deprecated <c>regexp</c> + module. (Thanks to Robert Virding.).</p> + <p> + Own Id: OTP-8231</p> + </item> + </list> + </section> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p>A minor bug in <c>leex(3)</c> has been fixed.</p> + <p> + Own Id: OTP-8197</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 2.0</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>Leex, a lexical analyzer generator for Erlang, + has been added to Parsetools. This initial version + should be considered experimental; it is known + that there will be changes and additions. + (Thanks to Robert Virding.).</p> + <p> + Own Id: OTP-8013</p> + </item> + </list> + </section> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p>The parsers generated by Yecc now report correct + error lines when possible.</p> + <p> + Own Id: OTP-7969</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.7</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p>A bug in yeccpre.hrl introduced in R13A has been + fixed.</p> + <p> + Own Id: OTP-7945</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.6</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>Updated file headers.</p> + <p> + Own Id: OTP-7798</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.5</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>The <c>yecc</c> grammar has been augmented with an + optional header section. (Thanks to Richard + Carlsson.)</p> + <p> + Own Id: OTP-7292</p> + </item> + </list> + </section> + +</section> + + +<section><title>Parsetools 1.4.4</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>The size of the code generated by Yecc has + been reduced. The code is also faster.</p> + <p>Macros can now be used in actions.</p> + <p> + Own Id: OTP-7224</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.3</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p><c>tuple_size/1</c> and <c>byte_size/1</c> have been + substituted for <c>size/1</c>.</p> + <p> + Own Id: OTP-7009</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.2</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>The size of the code generated by yecc has been + reduced. </p> + <p>A note regarding the <c>includefile</c> option: + although yecc can cope with includefiles based on some + earlier <c>yeccpre.hrl</c> it is recommended for + efficiency reasons to update includefiles as to follow + the pattern in the latest <c>yeccpre.hrl</c>.</p> + <p> + Own Id: OTP-6851</p> + </item> + </list> + </section> + +</section> + + <section> + <title>Parsetools 1.4.1.1</title> + + <section> + <title>Improvements and New Features</title> + <list type="bulleted"> + <item> + <p>Minor Makefile changes.</p> + <p>Own Id: OTP-6689</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.4.1</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>A bug concerning precedence declarations of + non-terminals "one level up" has been fixed in yecc.</p> + <p>Own Id: OTP-6362</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.4</title> + + <section> + <title>Improvements and Fixed Bugs</title> + <p>Several modifications of Yecc have been made:</p> + <list type="bulleted"> + <item> + <p>The new functions <c>file/1,2</c> take the + role of the old functions <c>yecc/2,3,4</c>. The + latter functions are no longer documented but are + kept for backward compatibility.</p> + </item> + <item> + <p>More checks of the grammar file have been + implemented. Examples are warnings for unused + non-terminals and duplicated declarations.</p> + </item> + <item> + <p>Invalid pseudo variables are no longer + replaced by <c>'$undefined'</c> but cause a failure.</p> + </item> + <item> + <p>Reserved words no longer need to be quoted + when used as terminals or non-terminals.</p> + </item> + <item> + <p>When compiling the generated parser file errors + and warnings concerning user code refer to + the grammar file, not the parser file.</p> + </item> + <item> + <p>Yecc emits a warning if there are conflicts + in the grammar. The new declaration <c>Expect</c> can + be used to suppress this warning.</p> + </item> + <item> + <p>The new operator precedence declaration + <c>Nonassoc</c> can be used to declare operators with + no associativity.</p> + </item> + <item> + <p>Precedence can be given to more than one operator + with one single operator precedence declaration.</p> + </item> + <item> + <p>The function <c>parse_and_scan/1</c> in the + default includefile accepts <c>{Function, A}</c> + as well as {{M,F}, A} as tokenizer function. + Exceptions in the tokenizer are never caught.</p> + </item> + <item> + <p>The functions <c>yecc:file/1,2</c> can be accessed + from the Erlang shell via the new functions <c>c:y/1,2</c> + in STDLIB.</p> + </item> + </list> + <p>See yecc(3) for further details.</p> + <p>Own Id: OTP-5366</p> + </section> + </section> + + <section> + <title>Parsetools 1.3.2</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>A bug in <c>Yecc</c> that was introduced in R9B has been + removed. Another bug concerning precedence declaration + "one level up" has been fixed.</p> + <p>Own Id: OTP-5461</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.3.1</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>A bug in the file <c>parsetools/include/yeccpre.hrl</c> + caused <c>yecc:parse_and_scan/1</c> to always report a + parse failure when the lexer reported end-of-file. This + problem has been fixed.</p> + <p>Own Id: OTP-5369</p> + </item> + </list> + </section> + </section> +</chapter> + diff --git a/lib/parsetools/doc/src/notes_history.xml b/lib/parsetools/doc/src/notes_history.xml new file mode 100644 index 0000000000..6a63812bcb --- /dev/null +++ b/lib/parsetools/doc/src/notes_history.xml @@ -0,0 +1,87 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2006</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Parsetools Release Notes</title> + <prepared>Hans Bolinder</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date>06-02-20</date> + <rev>A</rev> + <file>notes_history.sgml</file> + </header> + + <section> + <title>Parsetools 1.3</title> + + <section> + <title>Improvements and New Features</title> + <list type="bulleted"> + <item> + <p>The source code was cleaned up from unused variables to + eliminate compiler warnings. No other changes.</p> + <p>Own Id: OTP-5185</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.2</title> + <p>No release notes.</p> + </section> + + <section> + <title>Parsetools 1.1</title> + <p>No release notes.</p> + </section> + + <section> + <title>Parsetools 1.0.1</title> + + <section> + <title>Fixed Bugs and malfunctions</title> + <list type="bulleted"> + <item> + <p>Correction in <c>yeccpre.hrl</c> to give correct syntax + error info when the offending token was of the form + <c>{Class, Line, Value}</c>.</p> + <p>Own Id: OTP-1881</p> + </item> + <item> + <p>The <c>yecc</c> function does now accept atoms in the + <c>Grammarfile</c>, <c>Parserfile</c> and <c>Includefile</c> + arguments.</p> + <p>Own Id: OTP-1405</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.0</title> + <p>There are no changes since Erlang 4.3.</p> + </section> +</chapter> + diff --git a/lib/parsetools/doc/src/part_notes.xml b/lib/parsetools/doc/src/part_notes.xml new file mode 100644 index 0000000000..308fc95e35 --- /dev/null +++ b/lib/parsetools/doc/src/part_notes.xml @@ -0,0 +1,43 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE part SYSTEM "part.dtd"> + +<part xmlns:xi="http://www.w3.org/2001/XInclude"> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>PARSETOOLS Release Notes</title> + <prepared>Carl Velin</prepared> + <docno></docno> + <date>1997-04-28</date> + <rev>1.0</rev> + <file>part_notes.sgml</file> + </header> + <description> + <p>The <em>Parsetools</em> application contains utilities for + parsing and scanning. Yecc is an <term id="LALR-1"></term>parser + generator for Erlang, similar to yacc. Yecc takes a <term + id="BNF"></term>grammar definition as input, and produces Erlang + code for a parser as output. Leex is a regular expression based + lexical analyzer generator for Erlang, similar to lex or flex.</p> + <p>There are also release notes for + <seealso marker="notes_history">older versions</seealso>.</p> + </description> + <xi:lude href="notes.xml"/> +</part> + diff --git a/lib/parsetools/doc/src/ref_man.xml b/lib/parsetools/doc/src/ref_man.xml new file mode 100644 index 0000000000..52f1c687da --- /dev/null +++ b/lib/parsetools/doc/src/ref_man.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE application SYSTEM "application.dtd"> + +<application xmlns:xi="http://www.w3.org/2001/XInclude"> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Parsetools Reference Manual</title> + <prepared>Carl Velin</prepared> + <docno></docno> + <date>1997-04-28</date> + <rev>1.0</rev> + <file>application.sgml</file> + </header> + <description> + <p>The <em>Parsetools</em> application contains utilities for + parsing and scanning. Yecc is an <term id="LALR-1"></term>parser + generator for Erlang, similar to yacc. Yecc takes a <term + id="BNF"></term>grammar definition as input, and produces Erlang + code for a parser as output. Leex is a regular expression based + lexical analyzer generator for Erlang, similar to lex or flex.</p> + </description> + <xi:include href="yecc.xml"/> + <xi:include href="leex.xml"/> +</application> + diff --git a/lib/parsetools/doc/src/user_guide.gif b/lib/parsetools/doc/src/user_guide.gif Binary files differnew file mode 100644 index 0000000000..e6275a803d --- /dev/null +++ b/lib/parsetools/doc/src/user_guide.gif diff --git a/lib/parsetools/doc/src/warning.gif b/lib/parsetools/doc/src/warning.gif Binary files differnew file mode 100644 index 0000000000..96af52360e --- /dev/null +++ b/lib/parsetools/doc/src/warning.gif diff --git a/lib/parsetools/doc/src/yecc.xml b/lib/parsetools/doc/src/yecc.xml new file mode 100644 index 0000000000..81f1550b0a --- /dev/null +++ b/lib/parsetools/doc/src/yecc.xml @@ -0,0 +1,529 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>1996</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>yecc</title> + <prepared>Carl Wilhelm Welin</prepared> + <responsible>Carl Wilhelm Welin</responsible> + <docno></docno> + <approved>Bjarne Däcker</approved> + <checked></checked> + <date>1997-01-27</date> + <rev>B</rev> + <file>yecc.sgml</file> + </header> + <module>yecc</module> + <modulesummary>LALR-1 Parser Generator</modulesummary> + <description> + <p>An LALR-1 parser generator for Erlang, similar to <c>yacc</c>. + Takes a BNF grammar definition as input, and produces Erlang code + for a parser. </p> + <p>To understand this text, you also have to + look at the <c>yacc</c> documentation in the UNIX(TM) manual. This + is most probably necessary in order to understand the idea of a + parser generator, and the principle and problems of LALR parsing + with finite look-ahead.</p> + </description> + <funcs> + <func> + <name>file(Grammarfile [, Options]) -> YeccRet</name> + <fsummary>Give information about resolved and unresolved parse action conflicts.</fsummary> + <type> + <v>Grammarfile = filename()</v> + <v>Options = Option | [Option]</v> + <v>Option = - see below -</v> + <v>YeccRet = {ok, Parserfile} | {ok, Parserfile, Warnings} | error | {error, Warnings, Errors}</v> + <v>Parserfile = filename()</v> + <v>Warnings = Errors = [{filename(), [ErrorInfo]}]</v> + <v>ErrorInfo = {ErrorLine, module(), Reason}</v> + <v>ErrorLine = integer()</v> + <v>Reason = - formatable by format_error/1 -</v> + </type> + <desc> + <p><c>Grammarfile</c> is the file of declarations and grammar + rules. Returns <c>ok</c> upon success, or <c>error</c> if + there are errors. An Erlang file containing the parser is + created if there are no errors. The options are: + </p> + <taglist> + <tag><c>{parserfile, Parserfile}</c>.</tag> + <item><c>Parserfile</c> is the name of the file that will + contain the Erlang parser code that is generated. The + default (<c>""</c>) is to add the extension <c>.erl</c> + to <c>Grammarfile</c> stripped of the <c>.yrl</c> + extension. + </item> + <tag><c>{includefile, Includefile}</c>.</tag> + <item>Indicates a customized prologue file which the user + may want to use instead of the default file + <c>lib/parsetools/include/yeccpre.hrl</c> which is + otherwise included at the beginning of the resulting + parser file. <em>N.B.</em> The <c>Includefile</c> is + included 'as is' in the parser file, so it must not have a + module declaration of its own, and it should not be + compiled. It must, however, contain the necessary export + declarations. The default is indicated by <c>""</c>. + </item> + <tag><c>{report_errors, bool()}</c>.</tag> + <item>Causes errors to be printed as they occur. Default is + <c>true</c>. + </item> + <tag><c>{report_warnings, bool()}</c>.</tag> + <item>Causes warnings to be printed as they occur. Default is + <c>true</c>. + </item> + <tag><c>{report, bool()}</c>.</tag> + <item>This is a short form for both <c>report_errors</c> and + <c>report_warnings</c>. + </item> + <tag><c>{return_errors, bool()}</c>.</tag> + <item>If this flag is set, <c>{error, Errors, Warnings}</c> + is returned when there are errors. Default is + <c>false</c>. + </item> + <tag><c>{return_warnings, bool()}</c>.</tag> + <item>If this flag is set, an extra field containing + <c>Warnings</c> is added to the tuple returned upon + success. Default is <c>false</c>. + </item> + <tag><c>{return, bool()}</c>.</tag> + <item>This is a short form for both <c>return_errors</c> and + <c>return_warnings</c>. + </item> + <tag><c>{verbose, bool()}</c>. </tag> + <item>Determines whether the parser generator should give + full information about resolved and unresolved parse + action conflicts (<c>true</c>), or only about those + conflicts that prevent a parser from being generated from + the input grammar (<c>false</c>, the default). + </item> + </taglist> + <p>Any of the Boolean options can be set to <c>true</c> by + stating the name of the option. For example, <c>verbose</c> + is equivalent to <c>{verbose, true}</c>. + </p> + <p>The value of the <c>Parserfile</c> option stripped of the + <c>.erl</c> extension is used by Yecc as the module name of + the generated parser file.</p> + <p>Yecc will add the extension <c>.yrl</c> to the + <c>Grammarfile</c> name, the extension <c>.hrl</c> to the + <c>Includefile</c> name, and the extension <c>.erl</c> to + the <c>Parserfile</c> name, unless the extension is already + there.</p> + </desc> + </func> + <func> + <name>format_error(Reason) -> Chars</name> + <fsummary>Return an English description of a an error tuple.</fsummary> + <type> + <v>Reason = - as returned by yecc:file/1,2 -</v> + <v>Chars = [char() | Chars]</v> + </type> + <desc> + <p>Returns a descriptive string in English of an error tuple + returned by <c>yecc:file/1,2</c>. This function is mainly + used by the compiler invoking Yecc.</p> + </desc> + </func> + </funcs> + + <section> + <title>Pre-Processing</title> + <p>A <c>scanner</c> to pre-process the text (program, etc.) to be + parsed is not provided in the <c>yecc</c> module. The scanner + serves as a kind of lexicon look-up routine. It is possible to + write a grammar that uses only character tokens as terminal + symbols, thereby eliminating the need for a scanner, but this + would make the parser larger and slower.</p> + <p>The user should implement a scanner that segments the input + text, and turns it into one or more lists of tokens. Each token + should be a tuple containing information about syntactic + category, position in the text (e.g. line number), and the + actual terminal symbol found in the text: <c>{Category, LineNumber, Symbol}</c>.</p> + <p>If a terminal symbol is the only member of a category, and the + symbol name is identical to the category name, the token format + may be <c>{Symbol, LineNumber}</c>.</p> + <p>A list of tokens produced by the scanner should end with a + special <c>end_of_input</c> tuple which the parser is looking + for. The format of this tuple should be <c>{Endsymbol, LastLineNumber}</c>, where <c>Endsymbol</c> is an identifier + that is distinguished from all the terminal and non-terminal + categories of the syntax rules. The <c>Endsymbol</c> may be + declared in the grammar file (see below).</p> + <p>The simplest case is to segment the input string into a list of + identifiers (atoms) and use those atoms both as categories and + values of the tokens. For example, the input string <c>aaa bbb 777, X</c> may be scanned (tokenized) as:</p> + <code type="none"> +[{aaa, 1}, {bbb, 1}, {777, 1}, {',' , 1}, {'X', 1}, + {'$end', 1}]. </code> + <p>This assumes that this is the first line of the input text, and + that <c>'$end'</c> is the distinguished <c>end_of_input</c> + symbol.</p> + <p>The Erlang scanner in the <c>io</c> module can be used as a + starting point when writing a new scanner. Study + <c>yeccscan.erl</c> in order to see how a filter can be added on + top of <c>io:scan_erl_form/3</c> to provide a scanner for + Yecc that tokenizes grammar files before parsing them + with the Yecc parser. A more general approach to scanner + implementation is to use a scanner generator. A scanner + generator in Erlang called <c>leex</c> is under development.</p> + </section> + + <section> + <title>Grammar Definition Format</title> + <p>Erlang style <c>comments</c>, starting with a <c>'%'</c>, are + allowed in grammar files.</p> + <p>Each <c>declaration</c> or <c>rule</c> ends with a dot (the + character <c>'.'</c>).</p> + <p>The grammar starts with an optional <c>header</c> section. The + header is put first in the generated file, before the module + declaration. The purpose of the header is to provide a means to + make the documentation generated by <c>EDoc</c> look nicer. Each + header line should be enclosed in double quotes, and newlines + will be inserted between the lines. For example:</p> + <code> +Header "%% Copyright (C)" +"%% @private" +"%% @Author John"</code> + <p>Next comes a declaration of the <c>nonterminal categories</c> + to be used in the rules. For example:</p> + <code type="none"> +Nonterminals sentence nounphrase verbphrase. </code> + <p>A non-terminal category can be used at the left hand side (= + <c>lhs</c>, or <c>head</c>) of a grammar rule. It can also + appear at the right hand side of rules.</p> + <p>Next comes a declaration of the <c>terminal categories</c>, + which are the categories of tokens produced by the scanner. For + example:</p> + <code type="none"> +Terminals article adjective noun verb. </code> + <p>Terminal categories may only appear in the right hand sides (= + <c>rhs</c>) of grammar rules.</p> + <p>Next comes a declaration of the <c>rootsymbol</c>, or start + category of the grammar. For example:</p> + <code type="none"> +Rootsymbol sentence. </code> + <p>This symbol should appear in the lhs of at least one grammar + rule. This is the most general syntactic category which the + parser ultimately will parse every input string into.</p> + <p>After the rootsymbol declaration comes an optional declaration + of the <c>end_of_input</c> symbol that your scanner is expected + to use. For example:</p> + <code type="none"> +Endsymbol '$end'. </code> + <p>Next comes one or more declarations of <c>operator precedences</c>, if needed. These are used to resolve + shift/reduce conflicts (see <c>yacc</c> documentation).</p> + <p>Examples of operator declarations:</p> + <code type="none"> +Right 100 '='. +Nonassoc 200 '==' '=/='. +Left 300 '+'. +Left 400 '*'. +Unary 500 '-'. </code> + <p>These declarations mean that <c>'='</c> is defined as a + <c>right associative binary</c> operator with precedence 100, + <c>'=='</c> and <c>'=/='</c> are operators with <c>no associativity</c>, <c>'+'</c> and <c>'*'</c> are <c>left associative binary</c> operators, where <c>'*'</c> takes + precedence over <c>'+'</c> (the normal case), and <c>'-'</c> is + a <c>unary</c> operator of higher precedence than <c>'*'</c>. + The fact that '==' has no associativity means that an expression + like <c>a == b == c</c> is considered a syntax error.</p> + <p>Certain rules are assigned precedence: each rule gets its + precedence from the last terminal symbol mentioned in the right + hand side of the rule. It is also possible to declare precedence + for non-terminals, "one level up". This is practical when an + operator is overloaded (see also example 3 below).</p> + <p>Next come the <c>grammar rules</c>. Each rule has the general + form</p> + <code type="none"> +Left_hand_side -> Right_hand_side : Associated_code. </code> + <p>The left hand side is a non-terminal category. The right hand + side is a sequence of one or more non-terminal or terminal + symbols with spaces between. The associated code is a sequence + of zero or more Erlang expressions (with commas <c>','</c> as + separators). If the associated code is empty, the separating + colon <c>':'</c> is also omitted. A final dot marks the end of + the rule.</p> + <p>Symbols such as <c>'{'</c>, <c>'.'</c>, etc., have to be + enclosed in single quotes when used as terminal or non-terminal + symbols in grammar rules. The use of the symbols + <c>'$empty'</c>, <c>'$end'</c>, and <c>'$undefined'</c> should + be avoided.</p> + <p>The last part of the grammar file is an optional section with + Erlang code (= function definitions) which is included 'as is' + in the resulting parser file. This section must start with the + pseudo declaration, or key words</p> + <code type="none"> +Erlang code. </code> + <p>No syntax rule definitions or other declarations may follow + this section. To avoid conflicts with internal variables, do not + use variable names beginning with two underscore characters + ('__') in the Erlang code in this section, or in the code + associated with the individual syntax rules.</p> + <p>The optional <c>expect</c> declaration can be placed anywhere + before the last optional section with Erlang code. It is used + for suppressing the warning about conflicts that is ordinarily + given if the grammar is ambiguous. An example:</p> + <code type="none"> +Expect 2. </code> + <p>The warning is given if the number of shift/reduce conflicts + differs from 2, or if there are reduce/reduce conflicts. + </p> + </section> + + <section> + <title>Examples</title> + <p>A grammar to parse list expressions (with empty associated + code):</p> + <code type="none"> +Nonterminals list elements element. +Terminals atom '(' ')'. +Rootsymbol list. +list -> '(' ')'. +list -> '(' elements ')'. +elements -> element. +elements -> element elements. +element -> atom. +element -> list. </code> + <p>This grammar can be used to generate a parser which parses list + expressions, such as <c>(), (a), (peter charles), (a (b c) d (())), ...</c> provided that your scanner tokenizes, for + example, the input <c>(peter charles)</c> as follows:</p> + <code type="none"> +[{'(', 1} , {atom, 1, peter}, {atom, 1, charles}, {')', 1}, + {'$end', 1}] </code> + <p>When a grammar rule is used by the parser to parse (part of) + the input string as a grammatical phrase, the associated code is + evaluated, and the value of the last expression becomes the + value of the parsed phrase. This value may be used by the parser + later to build structures that are values of higher phrases of + which the current phrase is a part. The values initially + associated with terminal category phrases, i.e. input tokens, + are the token tuples themselves.</p> + <p>Below is an example of the grammar above with structure + building code added:</p> + <code type="none"> +list -> '(' ')' : nil. +list -> '(' elements ')' : '$2'. +elements -> element : {cons, '$1', nil}. +elements -> element elements : {cons, '$1', '$2'}. +element -> atom : '$1'. +element -> list : '$1'. </code> + <p>With this code added to the grammar rules, the parser produces + the following value (structure) when parsing the input string + <c>(a b c).</c>. This still assumes that this was the first + input line that the scanner tokenized:</p> + <code type="none"> +{cons, {atom, 1, a,} {cons, {atom, 1, b}, + {cons, {atom, 1, c}, nil}}} </code> + <p>The associated code contains <c>pseudo variables</c><c>'$1'</c>, <c>'$2'</c>, <c>'$3'</c>, etc. which refer to (are + bound to) the values associated previously by the parser with + the symbols of the right hand side of the rule. When these + symbols are terminal categories, the values are token tuples of + the input string (see above).</p> + <p>The associated code may not only be used to build structures + associated with phrases, but may also be used for syntactic and + semantic tests, printout actions (for example for tracing), etc. + during the parsing process. Since tokens contain positional + (line number) information, it is possible to produce error + messages which contain line numbers. If there is no associated + code after the right hand side of the rule, the value + <c>'$undefined'</c> is associated with the phrase.</p> + <p>The right hand side of a grammar rule may be empty. This is + indicated by using the special symbol <c>'$empty'</c> as rhs. + Then the list grammar above may be simplified to:</p> + <code type="none"> +list -> '(' elements ')' : '$2'. +elements -> element elements : {cons, '$1', '$2'}. +elements -> '$empty' : nil. +element -> atom : '$1'. +element -> list : '$1'. </code> + </section> + + <section> + <title>Generating a Parser</title> + <p>To call the parser generator, use the following command:</p> + <code type="none"> +yecc:file(Grammarfile). </code> + <p>An error message from Yecc will be shown if the grammar + is not of the LALR type (for example too ambiguous). + Shift/reduce conflicts are resolved in favor of shifting if + there are no operator precedence declarations. Refer to the + <c>yacc</c> documentation on the use of operator precedence.</p> + <p>The output file contains Erlang source code for a parser module + with module name equal to the <c>Parserfile</c> parameter. After + compilation, the parser can be called as follows (the module + name is assumed to be <c>myparser</c>):</p> + <code type="none"> +myparser:parse(myscanner:scan(Inport)) </code> + <p>The call format may be different if a customized prologue file + has been included when generating the parser instead of the + default file <c>lib/parsetools/include/yeccpre.hrl</c>.</p> + <p>With the standard prologue, this call will return either + <c>{ok, Result}</c>, where <c>Result</c> is a structure that the + Erlang code of the grammar file has built, or <c>{error, {Line_number, Module, Message}}</c> if there was a syntax error + in the input.</p> + <p><c>Message</c> is something which may be converted into a + string by calling <c>Module:format_error(Message)</c> + and printed with <c>io:format/3</c>.</p> + <note> + <p>By default, the parser that was generated will not print out + error messages to the screen. The user will have to do this + either by printing the returned error messages, or by + inserting tests and print instructions in the Erlang code + associated with the syntax rules of the grammar file.</p> + </note> + <p>It is also possible to make the parser ask for more input + tokens when needed if the following call format is used:</p> + <code type="none"> +myparser:parse_and_scan({Function, Args}) +myparser:parse_and_scan({Mod, Tokenizer, Args}) </code> + <p>The tokenizer <c>Function</c> is either a fun or a tuple + <c>{Mod, Tokenizer}</c>. The call <c>apply(Function, Args)</c> + or <c>apply({Mod, Tokenizer}, Args)</c> is executed whenever a + new token is needed. This, for example, makes it possible to + parse from a file, token by token.</p> + <p>The tokenizer used above has to be implemented so as to return + one of the following:</p> + <code type="none"> +{ok, Tokens, Endline} +{eof, Endline} +{error, Error_description, Endline} </code> + <p>This conforms to the format used by the scanner in the Erlang + <c>io</c> library module.</p> + <p>If <c>{eof, Endline}</c> is returned immediately, the call to + <c>parse_and_scan/1</c> returns <c>{ok, eof}</c>. If <c>{eof, Endline}</c> is returned before the parser expects end of input, + <c>parse_and_scan/1</c> will, of course, return an error message + (see above). Otherwise <c>{ok, Result}</c> is returned.</p> + </section> + + <section> + <title>More Examples</title> + <p>1. A grammar for parsing infix arithmetic expressions into + prefix notation, without operator precedence:</p> + <code type="none"> +Nonterminals E T F. +Terminals '+' '*' '(' ')' number. +Rootsymbol E. +E -> E '+' T: ['$1', '$2', '$3']. +E -> T : '$1'. +T -> T '*' F: ['$1', '$2', '$3']. +T -> F : '$1'. +F -> '(' E ')' : '$2'. +F -> number : '$1'. </code> + <p>2. The same with operator precedence becomes simpler:</p> + <code type="none"> +Nonterminals E. +Terminals '+' '*' '(' ')' number. +Rootsymbol E. +Left 100 '+'. +Left 200 '*'. +E -> E '+' E : ['$1', '$2', '$3']. +E -> E '*' E : ['$1', '$2', '$3']. +E -> '(' E ')' : '$2'. +E -> number : '$1'. </code> + <p>3. An overloaded minus operator:</p> + <code type="none"> +Nonterminals E uminus. +Terminals '*' '-' number. +Rootsymbol E. + +Left 100 '-'. +Left 200 '*'. +Unary 300 uminus. + +E -> E '-' E. +E -> E '*' E. +E -> uminus. +E -> number. + +uminus -> '-' E. </code> + <p>4. The Yecc grammar that is used for parsing grammar + files, including itself:</p> + <code type="none"> +Nonterminals +grammar declaration rule head symbol symbols attached_code +token tokens. +Terminals +atom float integer reserved_symbol reserved_word string char var +'->' ':' dot. +Rootsymbol grammar. +Endsymbol '$end'. +grammar -> declaration : '$1'. +grammar -> rule : '$1'. +declaration -> symbol symbols dot: {'$1', '$2'}. +rule -> head '->' symbols attached_code dot: {rule, ['$1' | '$3'], + '$4'}. +head -> symbol : '$1'. +symbols -> symbol : ['$1']. +symbols -> symbol symbols : ['$1' | '$2']. +attached_code -> ':' tokens : {erlang_code, '$2'}. +attached_code -> '$empty' : {erlang_code, + [{atom, 0, '$undefined'}]}. +tokens -> token : ['$1']. +tokens -> token tokens : ['$1' | '$2']. +symbol -> var : value_of('$1'). +symbol -> atom : value_of('$1'). +symbol -> integer : value_of('$1'). +symbol -> reserved_word : value_of('$1'). +token -> var : '$1'. +token -> atom : '$1'. +token -> float : '$1'. +token -> integer : '$1'. +token -> string : '$1'. +token -> char : '$1'. +token -> reserved_symbol : {value_of('$1'), line_of('$1')}. +token -> reserved_word : {value_of('$1'), line_of('$1')}. +token -> '->' : {'->', line_of('$1')}. +token -> ':' : {':', line_of('$1')}. +Erlang code. +value_of(Token) -> + element(3, Token). +line_of(Token) -> + element(2, Token). </code> + <note> + <p>The symbols <c>'->'</c>, and <c>':'</c> have to be treated in + a special way, as they are meta symbols of the grammar + notation, as well as terminal symbols of the Yecc + grammar.</p> + </note> + <p>5. The file <c>erl_parse.yrl</c> in the <c>lib/stdlib/src</c> + directory contains the grammar for Erlang.</p> + <note> + <p>Syntactic tests are used in the code associated with some + rules, and an error is thrown (and caught by the generated + parser to produce an error message) when a test fails. The + same effect can be achieved with a call to + <c>return_error(Error_line, Message_string)</c>, which is + defined in the <c>yeccpre.hrl</c> default header file.</p> + </note> + </section> + + <section> + <title>Files</title> + <code type="none"> +lib/parsetools/include/yeccpre.hrl </code> + </section> + + <section> + <title>See Also</title> + <p>Aho & Johnson: 'LR Parsing', ACM Computing Surveys, vol. 6:2, 1974.</p> + </section> +</erlref> + |