The R13B03 release.OTP_R13B03

author: Erlang/OTP <[email protected]> 2009-11-20 14:54:40 +0000
committer: Erlang/OTP <[email protected]> 2009-11-20 14:54:40 +0000
commit: 84adefa331c4159d432d22840663c38f155cd4c1 (patch)
tree: bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/parsetools/doc/src
download: otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
13 files changed, 1665 insertions, 0 deletions
diff --git a/lib/parsetools/doc/src/Makefile b/lib/parsetools/doc/src/Makefile
new file mode 100644
index 0000000000..6e693e0cf0
--- /dev/null
+++ b/lib/parsetools/doc/src/Makefile
@@ -0,0 +1,119 @@
+#
+# %CopyrightBegin%
+# 
+# Copyright Ericsson AB 1997-2009. All Rights Reserved.
+# 
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+# 
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+# 
+# %CopyrightEnd%
+#
+
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../../vsn.mk
+VSN=$(PARSETOOLS_VSN)
+APPLICATION=parsetools
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/$(APPLICATION)-$(VSN)
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+XML_APPLICATION_FILES = ref_man.xml
+XML_REF3_FILES = yecc.xml leex.xml
+
+XML_PART_FILES = part_notes.xml
+XML_CHAPTER_FILES = notes.xml
+
+BOOK_FILES = book.xml
+
+XML_FILES = \
+	$(BOOK_FILES) $(XML_CHAPTER_FILES) \
+	$(XML_PART_FILES) $(XML_REF3_FILES) $(XML_APPLICATION_FILES)
+
+GIF_FILES = \
+	note.gif
+
+XML_HTML_FILES = \
+	notes_history.xml
+
+# ----------------------------------------------------
+
+HTML_FILES = $(XML_APPLICATION_FILES:%.xml=$(HTMLDIR)/%.html) \
+	$(XML_HTML_FILES:%.xml=$(HTMLDIR)/%.html) \
+	$(XML_PART_FILES:%.xml=$(HTMLDIR)/%.html)
+
+INFO_FILE = ../../info
+
+MAN3_FILES = $(XML_REF3_FILES:%.xml=$(MAN3DIR)/%.3)
+
+HTML_REF_MAN_FILE = $(HTMLDIR)/index.html
+
+TOP_PDF_FILE = $(PDFDIR)/$(APPLICATION)-$(VSN).pdf
+
+# ----------------------------------------------------
+# FLAGS 
+# ----------------------------------------------------
+XML_FLAGS += 
+DVIPS_FLAGS += 
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+$(HTMLDIR)/%.gif: %.gif
+	$(INSTALL_DATA) $< $@
+
+docs: pdf html man
+
+$(TOP_PDF_FILE): $(XML_FILES)
+
+pdf: $(TOP_PDF_FILE)
+
+html: gifs $(HTML_REF_MAN_FILE)
+
+clean clean_docs:
+	rm -rf $(HTMLDIR)/*
+	rm -f $(MAN3DIR)/*
+	rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo)
+	rm -f errs core *~
+
+man: $(MAN3_FILES)
+
+gifs: $(GIF_FILES:%=$(HTMLDIR)/%)
+
+debug opt: 
+
+# ----------------------------------------------------
+# Release Target
+# ---------------------------------------------------- 
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_docs_spec: docs
+	$(INSTALL_DIR) $(RELSYSDIR)/doc/pdf
+	$(INSTALL_DATA) $(TOP_PDF_FILE) $(RELSYSDIR)/doc/pdf
+	$(INSTALL_DIR) $(RELSYSDIR)/doc/html
+	$(INSTALL_DATA) $(HTMLDIR)/* \
+		$(RELSYSDIR)/doc/html
+	$(INSTALL_DATA) $(INFO_FILE) $(RELSYSDIR)
+	$(INSTALL_DIR) $(RELEASE_PATH)/man/man3
+	$(INSTALL_DATA) $(MAN3DIR)/* $(RELEASE_PATH)/man/man3
+
+
+release_spec:
+
diff --git a/lib/parsetools/doc/src/book.xml b/lib/parsetools/doc/src/book.xml
new file mode 100644
index 0000000000..93a107b798
--- /dev/null
+++ b/lib/parsetools/doc/src/book.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE book SYSTEM "book.dtd">
+
+<book xmlns:xi="http://www.w3.org/2001/XInclude">
+  <header titlestyle="normal">
+    <copyright>
+      <year>1997</year><year>2009</year>
+      <holder>Ericsson AB. All Rights Reserved.</holder>
+    </copyright>
+    <legalnotice>
+      The contents of this file are subject to the Erlang Public License,
+      Version 1.1, (the "License"); you may not use this file except in
+      compliance with the License. You should have received a copy of the
+      Erlang Public License along with this software. If not, it can be
+      retrieved online at http://www.erlang.org/.
+    
+      Software distributed under the License is distributed on an "AS IS"
+      basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+      the License for the specific language governing rights and limitations
+      under the License.
+    
+    </legalnotice>
+
+    <title>Parse Tools</title>
+    <prepared>Carl Velin</prepared>
+    <docno></docno>
+    <date>1997-05-02</date>
+    <rev>1.0</rev>
+    <file>book.sgml</file>
+  </header>
+  <insidecover>
+  </insidecover>
+  <pagetext>Parse Tools</pagetext>
+  <preamble>
+    <contents level="2"></contents>
+  </preamble>
+  <applications>
+    <xi:include href="ref_man.xml"/>
+  </applications>
+  <releasenotes>
+    <xi:include href="notes.xml"/>
+  </releasenotes>
+  <listofterms></listofterms>
+  <index></index>
+</book>
+
diff --git a/lib/parsetools/doc/src/fascicules.xml b/lib/parsetools/doc/src/fascicules.xml
new file mode 100644
index 0000000000..43090b4aed
--- /dev/null
+++ b/lib/parsetools/doc/src/fascicules.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE fascicules SYSTEM "fascicules.dtd">
+
+<fascicules>
+  <fascicule file="ref_man" href="ref_man_frame.html" entry="yes">
+    Reference Manual
+  </fascicule>
+  <fascicule file="part_notes" href="part_notes_frame.html" entry="no">
+    Release Notes
+  </fascicule>
+  <fascicule file="" href="../../../../doc/print.html" entry="no">
+    Off-Print
+  </fascicule>
+</fascicules>
+
diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml
new file mode 100644
index 0000000000..c113b586df
--- /dev/null
+++ b/lib/parsetools/doc/src/leex.xml
@@ -0,0 +1,455 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+  <header>
+    <copyright>
+      <year>2009</year><year>2009</year>
+      <holder>Ericsson AB. All Rights Reserved.</holder>
+    </copyright>
+    <legalnotice>
+      Copyright (c) 2008,2009 Robert Virding. All rights reserved.
+    </legalnotice>
+
+    <title>leex</title>
+    <prepared>Robert Virding</prepared>
+    <responsible>nobody</responsible>
+    <docno></docno>
+    <approved>nobody</approved>
+    <checked></checked>
+    <date>2009-05-07</date>
+    <rev>A</rev>
+    <file>leex.xml</file>
+  </header>
+  <module>leex</module>
+  <modulesummary>Lexical analyzer generator for Erlang</modulesummary>
+  <description>
+    <p>A regular expression based lexical analyzer generator for
+      Erlang, similar to lex or flex.</p>
+    <note><p>The Leex module should be considered experimental
+      as it will be subject to changes in future releases.</p></note>
+  </description>
+  <section>
+    <title>DATA TYPES</title>
+    <code type="none">
+ErrorInfo = {ErrorLine,module(),error_descriptor()}
+ErrorLine = integer()
+Token = tuple()</code>
+  </section>
+  <funcs>
+    <func>
+      <name>file(FileName) -> ok | error</name>
+      <name>file(FileName, Options) -> ok | error</name>
+      <fsummary>Generate a lexical analyzer</fsummary>
+      <type>
+        <v>FileName = filename()</v>
+        <v>Options = Option | [Option]</v>
+        <v>Option =&nbsp;-&nbsp;see below&nbsp;-</v>
+        <v>FileReturn = {ok, Scannerfile} 
+                      | {ok, Scannerfile, Warnings}
+                      | error
+                      | {error, Warnings, Errors}</v>
+        <v>Scannerfile = filename()</v>
+        <v>Warnings = Errors = [{filename(), [ErrorInfo]}]</v>
+      </type>
+      <desc>
+        <p>Generates a lexical analyzer from the definition in the input
+          file. The input file has the extension <c>.xrl</c>. This is
+          added to the filename if it is not given. The resulting module
+          is the Xrl filename without the <c>.xrl</c> extension.</p>
+
+        <p>The current options are:</p>
+          <taglist>
+            <tag><c>dfa_graph</c></tag>
+            <item><p>Generates a <c>.dot</c> file which contains a
+              description of the DFA in a format which can be viewed with
+              Graphviz, <c>www.graphviz.com</c>.</p>
+            </item>
+            <tag><c>{includefile,Includefile}</c></tag>
+            <item><p>Uses a specific or customised prologue file
+              instead of default
+              <c>lib/parsetools/include/leexinc.hrl</c> which is
+              otherwise included.</p>
+            </item>
+            <tag><c>{report_errors, bool()}</c></tag>
+            <item><p>Causes errors to be printed as they occur. Default is
+              <c>true</c>.</p>
+            </item>
+            <tag><c>{report_warnings, bool()}</c></tag>
+            <item><p>Causes warnings to be printed as they occur. Default is
+              <c>true</c>.</p>
+            </item>
+            <tag><c>{report, bool()}</c></tag>
+            <item><p>This is a short form for both <c>report_errors</c> and
+              <c>report_warnings</c>.</p>
+            </item>
+            <tag><c>{return_errors, bool()}</c></tag>
+            <item><p>If this flag is set, <c>{error, Errors, Warnings}</c>
+              is returned when there are errors. Default is <c>false</c>.</p>
+            </item>
+            <tag><c>{return_warnings, bool()}</c></tag>
+            <item><p>If this flag is set, an extra field containing
+              <c>Warnings</c> is added to the tuple returned upon
+               success. Default is <c>false</c>.</p>
+            </item>
+            <tag><c>{return, bool()}</c></tag>
+            <item><p>This is a short form for both <c>return_errors</c> and
+              <c>return_warnings</c>.</p>
+            </item>
+            <tag><c>{scannerfile, Scannerfile}</c></tag>
+            <item><p><c>Scannerfile</c> is the name of the file that
+             will contain the Erlang scanner code that is generated.
+             The default (<c>""</c>) is to add the extension
+             <c>.erl</c> to <c>FileName</c> stripped of the
+             <c>.xrl</c> extension.</p>
+            </item>
+            <tag><c>{verbose, bool()}</c></tag>
+            <item><p>Outputs information from parsing the input file and
+              generating the internal tables.</p>
+            </item>
+          </taglist>
+        <p>Any of the Boolean options can be set to <c>true</c> by 
+          stating the name of the option. For example, <c>verbose</c>
+          is equivalent to <c>{verbose, true}</c>.</p>
+        <p>Leex will add the extension <c>.hrl</c> to the 
+          <c>Includefile</c> name and the extension <c>.erl</c> to the
+          <c>Scannerfile</c> name, unless the extension is already
+          there.</p>
+      </desc>
+    </func>
+    <func>
+      <name>format_error(ErrorInfo) -> Chars</name>
+      <fsummary>Return an English description of a an error tuple.</fsummary>
+      <type>
+        <v>Chars = [char() | Chars]</v>
+      </type>
+      <desc>
+        <p>Returns a string which describes the error
+          <c>ErrorInfo</c> returned when there is an error in a
+          regular expression.</p>
+      </desc>
+    </func>
+  </funcs>
+  
+
+  <section>
+    <title>GENERATED SCANNER EXPORTS</title>
+    <p>The following functions are exported by the generated scanner.</p>
+  </section>
+
+  <funcs>
+    <func>
+      <name>string(String) -> StringRet</name>
+      <name>string(String, StartLine) -> StringRet</name>
+      <fsummary>Generated by Leex</fsummary>
+      <type>
+        <v>String = string()</v>
+        <v>StringRet = {ok,Tokens,EndLine} | ErrorInfo</v>
+        <v>Tokens = [Token]</v>
+        <v>EndLine = StartLine = integer()</v>
+      </type>
+      <desc>
+        <p>Scans <c>String</c> and returns all the tokens in it, or an
+          error.</p>
+        <note><p>It is an error if not all of the characters in
+          <c>String</c> are consumed.</p></note>
+      </desc>
+    </func>
+
+    <func>
+      <name>token(Cont, Chars) -> {more,Cont1} | {done,TokenRet,RestChars}
+      </name>
+      <name>token(Cont, Chars, StartLine) -> {more,Cont1} 
+             | {done,TokenRet,RestChars}
+      </name>
+      <fsummary>Generated by Leex</fsummary>
+      <type>
+        <v>Cont = [] | Cont1</v>
+        <v>Cont1 = tuple()</v>
+        <v>Chars = RestChars = string() | eof</v>
+        <v>TokenRet = {ok, Token, EndLine} 
+                    | {eof, EndLine}
+                    | ErrorInfo</v>
+        <v>StartLine = EndLine = integer()</v>
+      </type>
+      <desc>
+        <p>This is a re-entrant call to try and scan one token from
+          <c>Chars</c>. If there are enough characters in <c>Chars</c>
+          to either scan a token or detect an error then this will be
+          returned with <c>{done,...}</c>. Otherwise
+          <c>{cont,Cont}</c> will be returned where <c>Cont</c> is
+          used in the next call to <c>token()</c> with more characters
+          to try an scan the token. This is continued until a token
+          has been scanned. <c>Cont</c> is initially <c>[]</c>.</p>
+ 
+        <p>It is not designed to be called directly by an application
+          but used through the i/o system where it can typically be
+          called in an application by:</p>
+        <code>
+io:request(InFile, {get_until,Prompt,Module,token,[Line]})
+  -> TokenRet</code>
+      </desc>
+    </func>
+
+    <func>
+      <name>tokens(Cont, Chars) -> {more,Cont1} | {done,TokensRet,RestChars}
+      </name>
+      <name>tokens(Cont, Chars, StartLine) -> 
+               {more,Cont1} | {done,TokensRet,RestChars}
+      </name>
+      <fsummary>Generated by Leex</fsummary>
+      <type>
+        <v>Cont = [] | Cont1</v>
+        <v>Cont1 = tuple()</v>
+        <v>Chars = RestChars = string() | eof</v>
+        <v>TokensRet = {ok, Tokens, EndLine} 
+                     | {eof, EndLine}
+                     | ErrorInfo</v>
+        <v>Tokens = [Token]</v>
+        <v>StartLine = EndLine = integer()</v>
+      </type>
+      <desc>
+        <p>This is a re-entrant call to try and scan tokens from
+          <c>Chars</c>. If there are enough characters in <c>Chars</c>
+          to either scan tokens or detect an error then this will be
+          returned with <c>{done,...}</c>. Otherwise
+          <c>{cont,Cont}</c> will be returned where <c>Cont</c> is
+          used in the next call to <c>tokens()</c> with more
+          characters to try an scan the tokens. This is continued
+          until all tokens have been scanned. <c>Cont</c> is initially
+          <c>[]</c>.</p>
+ 
+        <p>This functions differs from <c>token</c> in that it will
+          continue to scan tokens upto and including an
+          <c>{end_token,Token}</c> has been scanned (see next
+          section). It will then return all the tokens. This is
+          typically used for scanning grammars like Erlang where there
+          is an explicit end token, <c>'.'</c>. If no end token is
+          found then the whole file will be scanned and returned. If
+          an error occurs then all tokens upto and including the next
+          end token will be skipped.</p>
+ 
+        <p>It is not designed to be called directly by an application
+          but used through the i/o system where it can typically be
+          called in an application by:</p>
+        <code>
+io:request(InFile, {get_until,Prompt,Module,tokens,[Line]})
+  -> TokensRet</code>
+      </desc>
+    </func>
+  </funcs>
+
+  <section>
+    <title>Input File Format</title>
+    <p>Erlang style comments starting with a <c>%</c> are allowed in
+      scanner files. A definition file has the following format:</p>
+    <code>
+&lt;Header>
+
+Definitions.
+
+&lt;Macro Definitions>
+
+Rules.
+
+&lt;Token Rules>
+
+Erlang code.
+
+&lt;Erlang code></code>
+ 
+    <p>The "Definitions.", "Rules." and "Erlang code." headings are
+      mandatory and must occur at the beginning of a source line. The
+      &lt;Header>, &lt;Macro Definitions> and &lt;Erlang code>
+      sections may be empty but there must be at least one rule.</p>
+ 
+    <p>Macro definitions have the following format:</p>
+
+    <code>
+NAME = VALUE</code>
+
+    <p>and there must be spaces around <c>=</c>. Macros can be used in
+      the regular expressions of rules by writing <c>{NAME}</c>.</p>
+
+    <note><p>When macros are expanded in expressions the macro calls
+      are replaced by the macro value without any form of quoting or
+      enclosing in parentheses.</p></note>
+ 
+    <p>Rules have the following format:</p>
+
+    <code>
+&lt;Regexp> : &lt;Erlang code>.</code>
+ 
+    <p>The &lt;Regexp> must occur at the start of a line and not
+      include any blanks; use <c>\\t</c> and <c>\\s</c> to include TAB
+      and SPACE characters in the regular expression. If &lt;Regexp>
+      matches then the corresponding &lt;Erlang code> is evaluated to
+      generate a token. With the Erlang code the following predefined
+      variables are available:</p>
+ 
+    <taglist>
+      <tag><c>TokenChars</c></tag>
+      <item><p>A list of the characters in the matched token.</p>
+      </item>
+      <tag><c>TokenLen</c></tag>
+      <item><p>The number of characters in the matched token.</p>
+      </item>
+      <tag><c>TokenLine</c></tag>
+      <item><p>The line number where the token occurred.</p>
+      </item>
+    </taglist>
+ 
+    <p>The code must return:</p>
+ 
+    <taglist>
+      <tag><c>{token,Token}</c></tag>
+      <item><p>Return <c>Token</c> to the caller.</p>
+      </item>
+      <tag><c>{end_token,Token}</c></tag>
+      <item><p>Return <c>Token</c> and is last token in a tokens call.</p>
+      </item>
+      <tag><c>skip_token</c></tag>
+      <item><p>Skip this token completely.</p>
+      </item>
+      <tag><c>{error,ErrString}</c></tag>
+      <item><p>An error in the token, <c>ErrString</c> is a string
+         describing the error.</p>
+      </item>
+    </taglist>
+ 
+    <p>It is also possible to push back characters into the input
+      characters with the following returns:</p>
+ 
+    <list>
+      <item><c>{token,Token,PushBackList}</c></item>
+      <item><c>{end_token,Token,PushBackList}</c></item>
+      <item><c>{skip_token,PushBackList}</c></item>
+    </list>
+ 
+    <p>These have the same meanings as the normal returns but the
+      characters in <c>PushBackList</c> will be prepended to the input
+      characters and scanned for the next token. Note that pushing
+      back a newline will mean the line numbering will no longer be
+      correct.</p>
+
+    <note><p>Pushing back characters gives you unexpected
+      possibilities to cause the scanner to loop!</p></note>
+ 
+    <p>The following example would match a simple Erlang integer or
+      float and return a token which could be sent to the Erlang
+      parser:</p>
+    <code>
+D = [0-9]
+
+{D}+ :
+  {token,{integer,TokenLine,list_to_integer(TokenChars)}}.
+
+{D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? :
+  {token,{float,TokenLine,list_to_float(TokenChars)}}.</code>
+ 
+    <p>The Erlang code in the "Erlang code." section is written into
+      the output file directly after the module declaration and
+      predefined exports declaration so it is possible to add extra
+      exports, define imports and other attributes which are then
+      visible in the whole file.</p>
+  </section>
+  
+  <section>
+    <title>Regular Expressions</title>
+ 
+    <p>The regular expressions allowed here is a subset of the set
+      found in <c>egrep</c> and in the AWK programming language, as
+      defined in the book, The AWK Programming Language, by A. V. Aho,
+      B. W. Kernighan, P. J. Weinberger. They are composed of the
+      following characters:</p>
+ 
+    <taglist>
+      <tag><c>c</c></tag>
+      <item><p>Matches the non-metacharacter c.</p>
+      </item>
+      <tag><c>\\c</c></tag>
+      <item><p>Matches the escape sequence or literal character c.</p>
+      </item>
+      <tag><c>.</c></tag>
+      <item><p>Matches any character.</p>
+      </item>
+      <tag><c>^</c></tag>
+      <item><p>Matches the beginning of a string.</p>
+      </item>
+      <tag><c>$</c></tag>
+      <item><p>Matches the end of a string.</p></item>
+      <tag><c>[abc...]</c></tag>
+      <item><p>Character class, which matches any of the characters
+        <c>abc...</c>. Character ranges are specified by a pair of
+        characters separated by a <c>-</c>.</p>
+      </item>
+      <tag><c>[^abc...]</c></tag>
+      <item><p>Negated character class, which matches any character
+        except <c>abc...</c>.</p>
+      </item>
+      <tag><c>r1 | r2</c></tag>
+      <item><p>Alternation. It matches either <c>r1</c> or <c>r2</c>.</p>
+      </item>
+      <tag><c>r1r2</c></tag>
+      <item><p>Concatenation. It matches <c>r1</c> and then <c>r2</c>.</p>
+      </item>
+      <tag><c>r+</c></tag>
+      <item><p>Matches one or more <c>rs</c>.</p>
+      </item>
+      <tag><c>r*</c></tag>
+      <item><p>Matches zero or more <c>rs</c>.</p>
+      </item>
+      <tag><c>r?</c></tag>
+      <item><p>Matches zero or one <c>rs</c>.</p>
+      </item>
+      <tag><c>(r)</c></tag>
+      <item><p>Grouping. It matches <c>r</c>.</p>
+      </item>
+    </taglist>
+ 
+    <p>The escape sequences allowed are the same as for Erlang strings:</p>
+
+    <taglist>
+      <tag><c>\\b</c></tag>
+      <item><p>Backspace.</p></item>
+      <tag><c>\\f</c></tag>
+      <item><p>Form feed.</p></item>
+      <tag><c>\\n</c></tag>
+      <item><p>Newline (line feed).</p></item>
+      <tag><c>\\r</c></tag>
+      <item><p>Carriage return.</p></item>
+      <tag><c>\\t</c></tag>
+      <item><p>Tab.</p></item>
+      <tag><c>\\e</c></tag>
+      <item><p>Escape.</p></item>
+      <tag><c>\\v</c></tag>
+      <item><p>Vertical tab.</p></item>
+      <tag><c>\\s</c></tag>
+      <item><p>Space.</p></item>
+      <tag><c>\\d</c></tag>
+      <item><p>Delete.</p></item>
+      <tag><c>\\ddd</c></tag>
+      <item><p>The octal value <c>ddd</c>.</p></item>
+      <tag><c>\\xhh</c></tag>
+      <item><p>The hexadecimal value <c>hh</c>.</p></item>
+      <tag><c>\\x{h...}</c></tag>
+      <item><p>The hexadecimal value <c>h...</c>.</p></item>
+      <tag><c>\\c</c></tag>
+      <item><p>Any other character literally, for example <c>\\\\</c> for
+        backslash, <c>\\"</c> for <c>"</c>.</p>
+      </item>
+    </taglist>
+ 
+    <p>The following examples define Erlang data types:</p>
+    <code> 
+Atoms [a-z][0-9a-zA-Z_]*
+
+Variables [A-Z_][0-9a-zA-Z_]*
+
+Floats (\\+|-)?[0-9]+\\.[0-9]+((E|e)(\\+|-)?[0-9]+)?</code>
+ 
+    <note><p>Anchoring a regular expression with <c>^</c> and <c>$</c>
+      is not implemented in the current version of Leex and just
+      generates a parse error.</p></note>
+  </section>
+</erlref>
diff --git a/lib/parsetools/doc/src/make.dep b/lib/parsetools/doc/src/make.dep
new file mode 100644
index 0000000000..3a09ecdedd
--- /dev/null
+++ b/lib/parsetools/doc/src/make.dep
@@ -0,0 +1,21 @@
+# ----------------------------------------------------
+# >>>> Do not edit this file <<<<
+# This file was automaticly generated by
+# /home/otp/bin/docdepend
+# ----------------------------------------------------
+
+
+# ----------------------------------------------------
+# TeX files that the DVI file depend on
+# ----------------------------------------------------
+
+book.dvi: book.tex leex.tex ref_man.tex yecc.tex
+
+# ----------------------------------------------------
+# Source inlined when transforming from source to LaTeX
+# ----------------------------------------------------
+
+book.tex: ref_man.xml
+
+ref_man.tex: ../../../../system/doc/definitions/term.defs
+
diff --git a/lib/parsetools/doc/src/note.gif b/lib/parsetools/doc/src/note.gif
new file mode 100644
index 0000000000..6fffe30419
--- /dev/null
+++ b/lib/parsetools/doc/src/note.gif
diff --git a/lib/parsetools/doc/src/notes.xml b/lib/parsetools/doc/src/notes.xml
new file mode 100644
index 0000000000..2947517717
--- /dev/null
+++ b/lib/parsetools/doc/src/notes.xml
@@ -0,0 +1,308 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+  <header>
+    <copyright>
+      <year>1997</year><year>2009</year>
+      <holder>Ericsson AB. All Rights Reserved.</holder>
+    </copyright>
+    <legalnotice>
+      The contents of this file are subject to the Erlang Public License,
+      Version 1.1, (the "License"); you may not use this file except in
+      compliance with the License. You should have received a copy of the
+      Erlang Public License along with this software. If not, it can be
+      retrieved online at http://www.erlang.org/.
+    
+      Software distributed under the License is distributed on an "AS IS"
+      basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+      the License for the specific language governing rights and limitations
+      under the License.
+    
+    </legalnotice>
+
+    <title>Parsetools Release Notes</title>
+    <prepared>otp_appnotes</prepared>
+    <docno>nil</docno>
+    <date>nil</date>
+    <rev>nil</rev>
+    <file>notes.xml</file>
+  </header>
+  <p>This document describes the changes made to the Parsetools application.</p>
+
+<section><title>Parsetools 2.0.1</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+            <p>Leex no longer uses the deprecated <c>regexp</c>
+		module. (Thanks to Robert Virding.).</p>
+          <p>
+	    Own Id: OTP-8231</p>
+        </item>
+      </list>
+    </section>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+            <p>A minor bug in <c>leex(3)</c> has been fixed.</p>
+          <p>
+	    Own Id: OTP-8197</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Parsetools 2.0</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>Leex, a lexical analyzer generator for Erlang,
+		has been added to Parsetools. This initial version
+		should be considered experimental; it is known
+		that there will be changes and additions.
+		(Thanks to Robert Virding.).</p>
+          <p>
+	    Own Id: OTP-8013</p>
+        </item>
+      </list>
+    </section>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+            <p>The parsers generated by Yecc now report correct
+            error lines when possible.</p>
+          <p>
+	    Own Id: OTP-7969</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Parsetools 1.4.7</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p>A bug in yeccpre.hrl introduced in R13A has been
+	    fixed.</p>
+          <p>
+	    Own Id: OTP-7945</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Parsetools 1.4.6</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>Updated file headers.</p>
+          <p>
+	    Own Id: OTP-7798</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Parsetools 1.4.5</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>The <c>yecc</c> grammar has been augmented with an
+	    optional header section. (Thanks to Richard
+	    Carlsson.)</p>
+          <p>
+	    Own Id: OTP-7292</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+
+<section><title>Parsetools 1.4.4</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>The size of the code generated by Yecc has 
+	    been reduced. The  code is also faster.</p>
+	    <p>Macros can now be used in actions.</p>
+          <p>
+	    Own Id: OTP-7224</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Parsetools 1.4.3</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p><c>tuple_size/1</c> and <c>byte_size/1</c> have been
+	    substituted for <c>size/1</c>.</p>
+          <p>
+	    Own Id: OTP-7009</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Parsetools 1.4.2</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>The size of the code generated by yecc has been
+	    reduced. </p>
+	    <p>A note regarding the <c>includefile</c> option:
+	    although yecc can cope with includefiles based on some
+	    earlier <c>yeccpre.hrl</c> it is recommended for
+	    efficiency reasons to update includefiles as to follow
+	    the pattern in the latest <c>yeccpre.hrl</c>.</p>
+          <p>
+	    Own Id: OTP-6851</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+  <section>
+    <title>Parsetools 1.4.1.1</title>
+
+    <section>
+      <title>Improvements and New Features</title>
+      <list type="bulleted">
+        <item>
+          <p>Minor Makefile changes.</p>
+          <p>Own Id: OTP-6689</p>
+        </item>
+      </list>
+    </section>
+  </section>
+
+  <section>
+    <title>Parsetools 1.4.1</title>
+
+    <section>
+      <title>Fixed Bugs and Malfunctions</title>
+      <list type="bulleted">
+        <item>
+          <p>A bug concerning precedence declarations of
+            non-terminals "one level up" has been fixed in yecc.</p>
+          <p>Own Id: OTP-6362</p>
+        </item>
+      </list>
+    </section>
+  </section>
+
+  <section>
+    <title>Parsetools 1.4</title>
+
+    <section>
+      <title>Improvements and Fixed Bugs</title>
+      <p>Several modifications of Yecc have been made:</p>
+      <list type="bulleted">
+        <item>
+          <p>The new functions <c>file/1,2</c> take the
+            role of the old functions <c>yecc/2,3,4</c>. The
+            latter functions are no longer documented but are
+            kept for backward compatibility.</p>
+        </item>
+        <item>
+          <p>More checks of the grammar file have been 
+            implemented. Examples are warnings for unused 
+            non-terminals and duplicated declarations.</p>
+        </item>
+        <item>
+          <p>Invalid pseudo variables are no longer 
+            replaced by <c>'$undefined'</c> but cause a failure.</p>
+        </item>
+        <item>
+          <p>Reserved words no longer need to be quoted
+            when used as terminals or non-terminals.</p>
+        </item>
+        <item>
+          <p>When compiling the generated parser file errors
+            and warnings concerning user code refer to 
+            the grammar file, not the parser file.</p>
+        </item>
+        <item>
+          <p>Yecc emits a warning if there are conflicts
+            in the grammar. The new declaration <c>Expect</c> can
+            be used to suppress this warning.</p>
+        </item>
+        <item>
+          <p>The new operator precedence declaration
+            <c>Nonassoc</c> can be used to declare operators with
+            no associativity.</p>
+        </item>
+        <item>
+          <p>Precedence can be given to more than one operator
+            with one single operator precedence declaration.</p>
+        </item>
+        <item>
+          <p>The function <c>parse_and_scan/1</c> in the
+            default includefile accepts <c>{Function, A}</c>
+            as well as {{M,F}, A} as tokenizer function.
+            Exceptions in the tokenizer are never caught.</p>
+        </item>
+        <item>
+          <p>The functions <c>yecc:file/1,2</c> can be accessed
+            from the Erlang shell via the new functions <c>c:y/1,2</c> 
+            in STDLIB.</p>
+        </item>
+      </list>
+      <p>See yecc(3) for further details.</p>
+      <p>Own Id: OTP-5366</p>
+    </section>
+  </section>
+
+  <section>
+    <title>Parsetools 1.3.2</title>
+
+    <section>
+      <title>Fixed Bugs and Malfunctions</title>
+      <list type="bulleted">
+        <item>
+          <p>A bug in <c>Yecc</c> that was introduced in R9B has been
+            removed. Another bug concerning precedence declaration
+            "one level up" has been fixed.</p>
+          <p>Own Id: OTP-5461</p>
+        </item>
+      </list>
+    </section>
+  </section>
+
+  <section>
+    <title>Parsetools 1.3.1</title>
+
+    <section>
+      <title>Fixed Bugs and Malfunctions</title>
+      <list type="bulleted">
+        <item>
+          <p>A bug in the file <c>parsetools/include/yeccpre.hrl</c>
+            caused <c>yecc:parse_and_scan/1</c> to always report a
+            parse failure when the lexer reported end-of-file. This
+            problem has been fixed.</p>
+          <p>Own Id: OTP-5369</p>
+        </item>
+      </list>
+    </section>
+  </section>
+</chapter>
+
diff --git a/lib/parsetools/doc/src/notes_history.xml b/lib/parsetools/doc/src/notes_history.xml
new file mode 100644
index 0000000000..6a63812bcb
--- /dev/null
+++ b/lib/parsetools/doc/src/notes_history.xml
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE chapter SYSTEM "chapter.dtd">
+
+<chapter>
+  <header>
+    <copyright>
+      <year>2006</year><year>2009</year>
+      <holder>Ericsson AB. All Rights Reserved.</holder>
+    </copyright>
+    <legalnotice>
+      The contents of this file are subject to the Erlang Public License,
+      Version 1.1, (the "License"); you may not use this file except in
+      compliance with the License. You should have received a copy of the
+      Erlang Public License along with this software. If not, it can be
+      retrieved online at http://www.erlang.org/.
+    
+      Software distributed under the License is distributed on an "AS IS"
+      basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+      the License for the specific language governing rights and limitations
+      under the License.
+    
+    </legalnotice>
+
+    <title>Parsetools Release Notes</title>
+    <prepared>Hans Bolinder</prepared>
+    <responsible></responsible>
+    <docno></docno>
+    <approved></approved>
+    <checked></checked>
+    <date>06-02-20</date>
+    <rev>A</rev>
+    <file>notes_history.sgml</file>
+  </header>
+
+  <section>
+    <title>Parsetools 1.3</title>
+
+    <section>
+      <title>Improvements and New Features</title>
+      <list type="bulleted">
+        <item>
+          <p>The source code was cleaned up from unused variables to
+            eliminate compiler warnings. No other changes.</p>
+          <p>Own Id: OTP-5185</p>
+        </item>
+      </list>
+    </section>
+  </section>
+
+  <section>
+    <title>Parsetools 1.2</title>
+    <p>No release notes.</p>
+  </section>
+
+  <section>
+    <title>Parsetools 1.1</title>
+    <p>No release notes.</p>
+  </section>
+
+  <section>
+    <title>Parsetools 1.0.1</title>
+
+    <section>
+      <title>Fixed Bugs and malfunctions</title>
+      <list type="bulleted">
+        <item>
+          <p>Correction in <c>yeccpre.hrl</c> to give correct syntax 
+            error info when the offending token was of the form 
+            <c>{Class, Line, Value}</c>.</p>
+          <p>Own Id: OTP-1881</p>
+        </item>
+        <item>
+          <p>The <c>yecc</c> function does now accept atoms in the 
+            <c>Grammarfile</c>, <c>Parserfile</c> and <c>Includefile</c> 
+            arguments.</p>
+          <p>Own Id: OTP-1405</p>
+        </item>
+      </list>
+    </section>
+  </section>
+
+  <section>
+    <title>Parsetools 1.0</title>
+    <p>There are no changes since Erlang 4.3.</p>
+  </section>
+</chapter>
+
diff --git a/lib/parsetools/doc/src/part_notes.xml b/lib/parsetools/doc/src/part_notes.xml
new file mode 100644
index 0000000000..308fc95e35
--- /dev/null
+++ b/lib/parsetools/doc/src/part_notes.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE part SYSTEM "part.dtd">
+
+<part xmlns:xi="http://www.w3.org/2001/XInclude">
+  <header>
+    <copyright>
+      <year>1997</year><year>2009</year>
+      <holder>Ericsson AB. All Rights Reserved.</holder>
+    </copyright>
+    <legalnotice>
+      The contents of this file are subject to the Erlang Public License,
+      Version 1.1, (the "License"); you may not use this file except in
+      compliance with the License. You should have received a copy of the
+      Erlang Public License along with this software. If not, it can be
+      retrieved online at http://www.erlang.org/.
+    
+      Software distributed under the License is distributed on an "AS IS"
+      basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+      the License for the specific language governing rights and limitations
+      under the License.
+    
+    </legalnotice>
+
+    <title>PARSETOOLS Release Notes</title>
+    <prepared>Carl Velin</prepared>
+    <docno></docno>
+    <date>1997-04-28</date>
+    <rev>1.0</rev>
+    <file>part_notes.sgml</file>
+  </header>
+  <description>
+    <p>The <em>Parsetools</em> application contains utilities for
+      parsing and scanning. Yecc is an <term id="LALR-1"></term>parser
+      generator for Erlang, similar to yacc. Yecc takes a <term
+      id="BNF"></term>grammar definition as input, and produces Erlang
+      code for a parser as output. Leex is a regular expression based
+      lexical analyzer generator for Erlang, similar to lex or flex.</p>
+    <p>There are also release notes for
+      <seealso marker="notes_history">older versions</seealso>.</p>
+  </description>
+  <xi:lude href="notes.xml"/>
+</part>
+
diff --git a/lib/parsetools/doc/src/ref_man.xml b/lib/parsetools/doc/src/ref_man.xml
new file mode 100644
index 0000000000..52f1c687da
--- /dev/null
+++ b/lib/parsetools/doc/src/ref_man.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE application SYSTEM "application.dtd">
+
+<application xmlns:xi="http://www.w3.org/2001/XInclude">
+  <header>
+    <copyright>
+      <year>1997</year><year>2009</year>
+      <holder>Ericsson AB. All Rights Reserved.</holder>
+    </copyright>
+    <legalnotice>
+      The contents of this file are subject to the Erlang Public License,
+      Version 1.1, (the "License"); you may not use this file except in
+      compliance with the License. You should have received a copy of the
+      Erlang Public License along with this software. If not, it can be
+      retrieved online at http://www.erlang.org/.
+    
+      Software distributed under the License is distributed on an "AS IS"
+      basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+      the License for the specific language governing rights and limitations
+      under the License.
+    
+    </legalnotice>
+
+    <title>Parsetools Reference Manual</title>
+    <prepared>Carl Velin</prepared>
+    <docno></docno>
+    <date>1997-04-28</date>
+    <rev>1.0</rev>
+    <file>application.sgml</file>
+  </header>
+  <description>
+    <p>The <em>Parsetools</em> application contains utilities for
+      parsing and scanning. Yecc is an <term id="LALR-1"></term>parser
+      generator for Erlang, similar to yacc. Yecc takes a <term
+      id="BNF"></term>grammar definition as input, and produces Erlang
+      code for a parser as output. Leex is a regular expression based
+      lexical analyzer generator for Erlang, similar to lex or flex.</p>
+  </description>
+  <xi:include href="yecc.xml"/>
+  <xi:include href="leex.xml"/>
+</application>
+
diff --git a/lib/parsetools/doc/src/user_guide.gif b/lib/parsetools/doc/src/user_guide.gif
new file mode 100644
index 0000000000..e6275a803d
--- /dev/null
+++ b/lib/parsetools/doc/src/user_guide.gif
diff --git a/lib/parsetools/doc/src/warning.gif b/lib/parsetools/doc/src/warning.gif
new file mode 100644
index 0000000000..96af52360e
--- /dev/null
+++ b/lib/parsetools/doc/src/warning.gif
diff --git a/lib/parsetools/doc/src/yecc.xml b/lib/parsetools/doc/src/yecc.xml
new file mode 100644
index 0000000000..81f1550b0a
--- /dev/null
+++ b/lib/parsetools/doc/src/yecc.xml
@@ -0,0 +1,529 @@
+<?xml version="1.0" encoding="latin1" ?>
+<!DOCTYPE erlref SYSTEM "erlref.dtd">
+
+<erlref>
+  <header>
+    <copyright>
+      <year>1996</year><year>2009</year>
+      <holder>Ericsson AB. All Rights Reserved.</holder>
+    </copyright>
+    <legalnotice>
+      The contents of this file are subject to the Erlang Public License,
+      Version 1.1, (the "License"); you may not use this file except in
+      compliance with the License. You should have received a copy of the
+      Erlang Public License along with this software. If not, it can be
+      retrieved online at http://www.erlang.org/.
+    
+      Software distributed under the License is distributed on an "AS IS"
+      basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+      the License for the specific language governing rights and limitations
+      under the License.
+    
+    </legalnotice>
+
+    <title>yecc</title>
+    <prepared>Carl Wilhelm Welin</prepared>
+    <responsible>Carl Wilhelm Welin</responsible>
+    <docno></docno>
+    <approved>Bjarne D&auml;cker</approved>
+    <checked></checked>
+    <date>1997-01-27</date>
+    <rev>B</rev>
+    <file>yecc.sgml</file>
+  </header>
+  <module>yecc</module>
+  <modulesummary>LALR-1 Parser Generator</modulesummary>
+  <description>
+    <p>An LALR-1 parser generator for Erlang, similar to <c>yacc</c>.
+      Takes a BNF grammar definition as input, and produces Erlang code
+      for a parser. </p>
+    <p>To understand this text, you also have to
+      look at the <c>yacc</c> documentation in the UNIX(TM) manual. This
+      is most probably necessary in order to understand the idea of a
+      parser generator, and the principle and problems of LALR parsing
+      with finite look-ahead.</p>
+  </description>
+  <funcs>
+    <func>
+      <name>file(Grammarfile [, Options]) -> YeccRet</name>
+      <fsummary>Give information about resolved and unresolved parse action conflicts.</fsummary>
+      <type>
+        <v>Grammarfile = filename()</v>
+        <v>Options = Option | [Option]</v>
+        <v>Option =&nbsp;-&nbsp;see below&nbsp;-</v>
+        <v>YeccRet = {ok, Parserfile}  | {ok, Parserfile, Warnings}  | error  | {error, Warnings, Errors}</v>
+        <v>Parserfile = filename()</v>
+        <v>Warnings = Errors = [{filename(), [ErrorInfo]}]</v>
+        <v>ErrorInfo  = {ErrorLine, module(), Reason}</v>
+        <v>ErrorLine = integer()</v>
+        <v>Reason =&nbsp;-&nbsp;formatable by format_error/1&nbsp;-</v>
+      </type>
+      <desc>
+        <p><c>Grammarfile</c> is the file of declarations and grammar
+          rules. Returns <c>ok</c> upon success, or <c>error</c> if
+          there are errors. An Erlang file containing the parser is
+          created if there are no errors. The options are:
+          </p>
+        <taglist>
+          <tag><c>{parserfile, Parserfile}</c>.</tag>
+          <item><c>Parserfile</c> is the name of the file that will
+           contain the Erlang parser code that is generated. The
+           default (<c>""</c>) is to add the extension <c>.erl</c>
+           to <c>Grammarfile</c> stripped of the <c>.yrl</c>
+           extension.
+          </item>
+          <tag><c>{includefile, Includefile}</c>.</tag>
+          <item>Indicates a customized prologue file which the user
+           may want to use instead of the default file
+          <c>lib/parsetools/include/yeccpre.hrl</c> which is
+           otherwise included at the beginning of the resulting
+           parser file. <em>N.B.</em> The <c>Includefile</c> is
+           included 'as is' in the parser file, so it must not have a
+           module declaration of its own, and it should not be
+           compiled. It must, however, contain the necessary export
+           declarations. The default is indicated by <c>""</c>.
+          </item>
+          <tag><c>{report_errors, bool()}</c>.</tag>
+          <item>Causes errors to be printed as they occur. Default is
+          <c>true</c>.
+          </item>
+          <tag><c>{report_warnings, bool()}</c>.</tag>
+          <item>Causes warnings to be printed as they occur. Default is
+          <c>true</c>.
+          </item>
+          <tag><c>{report, bool()}</c>.</tag>
+          <item>This is a short form for both <c>report_errors</c> and
+          <c>report_warnings</c>.
+          </item>
+          <tag><c>{return_errors, bool()}</c>.</tag>
+          <item>If this flag is set, <c>{error, Errors, Warnings}</c>
+           is returned when there are errors. Default is
+          <c>false</c>.
+          </item>
+          <tag><c>{return_warnings, bool()}</c>.</tag>
+          <item>If this flag is set, an extra field containing
+          <c>Warnings</c> is added to the tuple returned upon
+           success. Default is <c>false</c>.
+          </item>
+          <tag><c>{return, bool()}</c>.</tag>
+          <item>This is a short form for both <c>return_errors</c> and
+          <c>return_warnings</c>.
+          </item>
+          <tag><c>{verbose, bool()}</c>. </tag>
+          <item>Determines whether the parser generator should give
+           full information about resolved and unresolved parse
+           action conflicts (<c>true</c>), or only about those
+           conflicts that prevent a parser from being generated from
+           the input grammar (<c>false</c>, the default).
+          </item>
+        </taglist>
+        <p>Any of the Boolean options can be set to <c>true</c> by 
+          stating the name of the option. For example, <c>verbose</c>
+          is equivalent to <c>{verbose, true}</c>.
+          </p>
+        <p>The value of the <c>Parserfile</c> option stripped of the
+          <c>.erl</c> extension is used by Yecc as the module name of
+          the generated parser file.</p>
+        <p>Yecc will add the extension <c>.yrl</c> to the
+          <c>Grammarfile</c> name, the extension <c>.hrl</c> to the
+          <c>Includefile</c> name, and the extension <c>.erl</c> to
+          the <c>Parserfile</c> name, unless the extension is already
+          there.</p>
+      </desc>
+    </func>
+    <func>
+      <name>format_error(Reason) -> Chars</name>
+      <fsummary>Return an English description of a an error tuple.</fsummary>
+      <type>
+        <v>Reason =&nbsp;-&nbsp;as returned by yecc:file/1,2&nbsp;-</v>
+        <v>Chars = [char() | Chars]</v>
+      </type>
+      <desc>
+        <p>Returns a descriptive string in English of an error tuple
+          returned by <c>yecc:file/1,2</c>. This function is mainly
+          used by the compiler invoking Yecc.</p>
+      </desc>
+    </func>
+  </funcs>
+
+  <section>
+    <title>Pre-Processing</title>
+    <p>A <c>scanner</c> to pre-process the text (program, etc.) to be
+      parsed is not provided in the <c>yecc</c> module. The scanner
+      serves as a kind of lexicon look-up routine. It is possible to
+      write a grammar that uses only character tokens as terminal
+      symbols, thereby eliminating the need for a scanner, but this
+      would make the parser larger and slower.</p>
+    <p>The user should implement a scanner that segments the input
+      text, and turns it into one or more lists of tokens. Each token
+      should be a tuple containing information about syntactic
+      category, position in the text (e.g. line number), and the
+      actual terminal symbol found in the text: <c>{Category, LineNumber, Symbol}</c>.</p>
+    <p>If a terminal symbol is the only member of a category, and the
+      symbol name is identical to the category name, the token format
+      may be <c>{Symbol, LineNumber}</c>.</p>
+    <p>A list of tokens produced by the scanner should end with a
+      special <c>end_of_input</c> tuple which the parser is looking
+      for. The format of this tuple should be <c>{Endsymbol, LastLineNumber}</c>, where <c>Endsymbol</c> is an identifier
+      that is distinguished from all the terminal and non-terminal
+      categories of the syntax rules. The <c>Endsymbol</c> may be
+      declared in the grammar file (see below).</p>
+    <p>The simplest case is to segment the input string into a list of
+      identifiers (atoms) and use those atoms both as categories and
+      values of the tokens. For example, the input string <c>aaa bbb 777, X</c> may be scanned (tokenized) as:</p>
+    <code type="none">
+[{aaa, 1}, {bbb, 1}, {777, 1}, {',' , 1}, {'X', 1},
+ {'$end', 1}].    </code>
+    <p>This assumes that this is the first line of the input text, and
+      that <c>'$end'</c> is the distinguished <c>end_of_input</c>
+      symbol.</p>
+    <p>The Erlang scanner in the <c>io</c> module can be used as a
+      starting point when writing a new scanner. Study
+      <c>yeccscan.erl</c> in order to see how a filter can be added on
+      top of <c>io:scan_erl_form/3</c> to provide a scanner for
+      Yecc that tokenizes grammar files before parsing them
+      with the Yecc parser. A more general approach to scanner
+      implementation is to use a scanner generator. A scanner
+      generator in Erlang called <c>leex</c> is under development.</p>
+  </section>
+
+  <section>
+    <title>Grammar Definition Format</title>
+    <p>Erlang style <c>comments</c>, starting with a <c>'%'</c>, are
+      allowed in grammar files.</p>
+    <p>Each <c>declaration</c> or <c>rule</c> ends with a dot (the
+      character <c>'.'</c>).</p>
+    <p>The grammar starts with an optional <c>header</c> section. The
+      header is put first in the generated file, before the module
+      declaration. The purpose of the header is to provide a means to
+      make the documentation generated by <c>EDoc</c> look nicer. Each
+      header line should be enclosed in double quotes, and newlines
+      will be inserted between the lines. For example:</p>
+    <code>
+Header "%% Copyright (C)"
+"%% @private"
+"%% @Author John"</code>
+    <p>Next comes a declaration of the <c>nonterminal categories</c>
+      to be used in the rules. For example:</p>
+    <code type="none">
+Nonterminals sentence nounphrase verbphrase.    </code>
+    <p>A non-terminal category can be used at the left hand side (=
+      <c>lhs</c>, or <c>head</c>) of a grammar rule. It can also
+      appear at the right hand side of rules.</p>
+    <p>Next comes a declaration of the <c>terminal categories</c>,
+      which are the categories of tokens produced by the scanner. For
+      example:</p>
+    <code type="none">
+Terminals article adjective noun verb.    </code>
+    <p>Terminal categories may only appear in the right hand sides (=
+      <c>rhs</c>) of grammar rules.</p>
+    <p>Next comes a declaration of the <c>rootsymbol</c>, or start
+      category of the grammar. For example:</p>
+    <code type="none">
+Rootsymbol sentence.    </code>
+    <p>This symbol should appear in the lhs of at least one grammar
+      rule. This is the most general syntactic category which the
+      parser ultimately will parse every input string into.</p>
+    <p>After the rootsymbol declaration comes an optional declaration
+      of the <c>end_of_input</c> symbol that your scanner is expected
+      to use. For example:</p>
+    <code type="none">
+Endsymbol '$end'.    </code>
+    <p>Next comes one or more declarations of <c>operator precedences</c>, if needed. These are used to resolve
+      shift/reduce conflicts (see <c>yacc</c> documentation).</p>
+    <p>Examples of operator declarations:</p>
+    <code type="none">
+Right 100 '='.
+Nonassoc 200 '==' '=/='.
+Left 300 '+'.
+Left 400 '*'.
+Unary 500 '-'.    </code>
+    <p>These declarations mean that <c>'='</c> is defined as a
+      <c>right associative binary</c> operator with precedence 100,
+      <c>'=='</c> and <c>'=/='</c> are operators with <c>no associativity</c>, <c>'+'</c> and <c>'*'</c> are <c>left associative binary</c> operators, where <c>'*'</c> takes
+      precedence over <c>'+'</c> (the normal case), and <c>'-'</c> is
+      a <c>unary</c> operator of higher precedence than <c>'*'</c>.
+      The fact that '==' has no associativity means that an expression
+      like <c>a == b == c</c> is considered a syntax error.</p>
+    <p>Certain rules are assigned precedence: each rule gets its
+      precedence from the last terminal symbol mentioned in the right
+      hand side of the rule. It is also possible to declare precedence
+      for non-terminals, "one level up". This is practical when an
+      operator is overloaded (see also example 3 below).</p>
+    <p>Next come the <c>grammar rules</c>. Each rule has the general
+      form</p>
+    <code type="none">
+Left_hand_side -> Right_hand_side : Associated_code.    </code>
+    <p>The left hand side is a non-terminal category. The right hand
+      side is a sequence of one or more non-terminal or terminal
+      symbols with spaces between. The associated code is a sequence
+      of zero or more Erlang expressions (with commas <c>','</c> as
+      separators). If the associated code is empty, the separating
+      colon <c>':'</c> is also omitted. A final dot marks the end of
+      the rule.</p>
+    <p>Symbols such as <c>'{'</c>, <c>'.'</c>, etc., have to be
+      enclosed in single quotes when used as terminal or non-terminal
+      symbols in grammar rules. The use of the symbols
+      <c>'$empty'</c>, <c>'$end'</c>, and <c>'$undefined'</c> should
+      be avoided.</p>
+    <p>The last part of the grammar file is an optional section with
+      Erlang code (= function definitions) which is included 'as is'
+      in the resulting parser file. This section must start with the
+      pseudo declaration, or key words</p>
+    <code type="none">
+Erlang code.    </code>
+    <p>No syntax rule definitions or other declarations may follow
+      this section. To avoid conflicts with internal variables, do not
+      use variable names beginning with two underscore characters
+      ('__') in the Erlang code in this section, or in the code
+      associated with the individual syntax rules.</p>
+    <p>The optional <c>expect</c> declaration can be placed anywhere
+      before the last optional section with Erlang code. It is used
+      for suppressing the warning about conflicts that is ordinarily
+      given if the grammar is ambiguous. An example:</p>
+    <code type="none">
+Expect 2.    </code>
+    <p>The warning is given if the number of shift/reduce conflicts
+      differs from 2, or if there are reduce/reduce conflicts.
+      </p>
+  </section>
+
+  <section>
+    <title>Examples</title>
+    <p>A grammar to parse list expressions (with empty associated
+      code):</p>
+    <code type="none">
+Nonterminals list elements element.
+Terminals atom '(' ')'.
+Rootsymbol list.
+list -> '(' ')'.
+list -> '(' elements ')'.
+elements -> element.
+elements -> element elements.
+element -> atom.
+element -> list.    </code>
+    <p>This grammar can be used to generate a parser which parses list
+      expressions, such as <c>(), (a), (peter charles), (a (b c) d (())), ...</c> provided that your scanner tokenizes, for
+      example, the input <c>(peter charles)</c> as follows:</p>
+    <code type="none">
+[{'(', 1} , {atom, 1, peter}, {atom, 1, charles}, {')', 1}, 
+ {'$end', 1}]    </code>
+    <p>When a grammar rule is used by the parser to parse (part of)
+      the input string as a grammatical phrase, the associated code is
+      evaluated, and the value of the last expression becomes the
+      value of the parsed phrase. This value may be used by the parser
+      later to build structures that are values of higher phrases of
+      which the current phrase is a part. The values initially
+      associated with terminal category phrases, i.e. input tokens,
+      are the token tuples themselves.</p>
+    <p>Below is an example of the grammar above with structure
+      building code added:</p>
+    <code type="none">
+list -> '(' ')' : nil.
+list -> '(' elements ')' : '$2'.
+elements -> element : {cons, '$1', nil}.
+elements -> element elements : {cons, '$1', '$2'}.
+element -> atom : '$1'.
+element -> list : '$1'.    </code>
+    <p>With this code added to the grammar rules, the parser produces
+      the following value (structure) when parsing the input string
+      <c>(a b c).</c>. This still assumes that this was the first
+      input line that the scanner tokenized:</p>
+    <code type="none">
+{cons, {atom, 1, a,} {cons, {atom, 1, b},
+                            {cons, {atom, 1, c}, nil}}}    </code>
+    <p>The associated code contains <c>pseudo variables</c><c>'$1'</c>, <c>'$2'</c>, <c>'$3'</c>, etc. which refer to (are
+      bound to) the values associated previously by the parser with
+      the symbols of the right hand side of the rule. When these
+      symbols are terminal categories, the values are token tuples of
+      the input string (see above).</p>
+    <p>The associated code may not only be used to build structures
+      associated with phrases, but may also be used for syntactic and
+      semantic tests, printout actions (for example for tracing), etc.
+      during the parsing process. Since tokens contain positional
+      (line number) information, it is possible to produce error
+      messages which contain line numbers. If there is no associated
+      code after the right hand side of the rule, the value
+      <c>'$undefined'</c> is associated with the phrase.</p>
+    <p>The right hand side of a grammar rule may be empty. This is
+      indicated by using the special symbol <c>'$empty'</c> as rhs.
+      Then the list grammar above may be simplified to:</p>
+    <code type="none">
+list -> '(' elements ')' : '$2'.
+elements -> element elements : {cons, '$1', '$2'}.
+elements -> '$empty' : nil.
+element -> atom : '$1'.
+element -> list : '$1'.    </code>
+  </section>
+
+  <section>
+    <title>Generating a Parser</title>
+    <p>To call the parser generator, use the following command:</p>
+    <code type="none">
+yecc:file(Grammarfile).    </code>
+    <p>An error message from Yecc will be shown if the grammar
+      is not of the LALR type (for example too ambiguous).
+      Shift/reduce conflicts are resolved in favor of shifting if
+      there are no operator precedence declarations. Refer to the
+      <c>yacc</c> documentation on the use of operator precedence.</p>
+    <p>The output file contains Erlang source code for a parser module
+      with module name equal to the <c>Parserfile</c> parameter. After
+      compilation, the parser can be called as follows (the module
+      name is assumed to be <c>myparser</c>):</p>
+    <code type="none">
+myparser:parse(myscanner:scan(Inport))    </code>
+    <p>The call format may be different if a customized prologue file
+      has been included when generating the parser instead of the
+      default file <c>lib/parsetools/include/yeccpre.hrl</c>.</p>
+    <p>With the standard prologue, this call will return either
+      <c>{ok, Result}</c>, where <c>Result</c> is a structure that the
+      Erlang code of the grammar file has built, or <c>{error, {Line_number, Module, Message}}</c> if there was a syntax error
+      in the input.</p>
+    <p><c>Message</c> is something which may be converted into a
+      string by calling <c>Module:format_error(Message)</c>
+      and printed with <c>io:format/3</c>.</p>
+    <note>
+      <p>By default, the parser that was generated will not print out
+        error messages to the screen. The user will have to do this
+        either by printing the returned error messages, or by
+        inserting tests and print instructions in the Erlang code
+        associated with the syntax rules of the grammar file.</p>
+    </note>
+    <p>It is also possible to make the parser ask for more input
+      tokens when needed if the following call format is used:</p>
+    <code type="none">
+myparser:parse_and_scan({Function, Args})
+myparser:parse_and_scan({Mod, Tokenizer, Args})    </code>
+    <p>The tokenizer <c>Function</c> is either a fun or a tuple
+      <c>{Mod, Tokenizer}</c>. The call <c>apply(Function, Args)</c>
+      or <c>apply({Mod, Tokenizer}, Args)</c> is executed whenever a
+      new token is needed. This, for example, makes it possible to
+      parse from a file, token by token.</p>
+    <p>The tokenizer used above has to be implemented so as to return
+      one of the following:</p>
+    <code type="none">
+{ok, Tokens, Endline}
+{eof, Endline}
+{error, Error_description, Endline}    </code>
+    <p>This conforms to the format used by the scanner in the Erlang
+      <c>io</c> library module.</p>
+    <p>If <c>{eof, Endline}</c> is returned immediately, the call to
+      <c>parse_and_scan/1</c> returns <c>{ok, eof}</c>. If <c>{eof, Endline}</c> is returned before the parser expects end of input,
+      <c>parse_and_scan/1</c> will, of course, return an error message
+      (see above). Otherwise <c>{ok, Result}</c> is returned.</p>
+  </section>
+
+  <section>
+    <title>More Examples</title>
+    <p>1. A grammar for parsing infix arithmetic expressions into
+      prefix notation, without operator precedence:</p>
+    <code type="none">
+Nonterminals E T F.
+Terminals '+' '*' '(' ')' number.
+Rootsymbol E.
+E -> E '+' T: ['$1', '$2', '$3'].
+E -> T : '$1'.
+T -> T '*' F: ['$1', '$2', '$3'].
+T -> F : '$1'.
+F -> '(' E ')' : '$2'.
+F -> number : '$1'.    </code>
+    <p>2. The same with operator precedence becomes simpler:</p>
+    <code type="none">
+Nonterminals E.
+Terminals '+' '*' '(' ')' number.
+Rootsymbol E.
+Left 100 '+'.
+Left 200 '*'.
+E -> E '+' E : ['$1', '$2', '$3'].
+E -> E '*' E : ['$1', '$2', '$3'].
+E -> '(' E ')' : '$2'.
+E -> number : '$1'.    </code>
+    <p>3. An overloaded minus operator:</p>
+    <code type="none">
+Nonterminals E uminus.
+Terminals '*' '-' number.
+Rootsymbol E.
+
+Left 100 '-'.
+Left 200 '*'.
+Unary 300 uminus.
+
+E -> E '-' E.
+E -> E '*' E.
+E -> uminus.
+E -> number.
+
+uminus -> '-' E.    </code>
+    <p>4. The Yecc grammar that is used for parsing grammar
+      files, including itself:</p>
+    <code type="none">
+Nonterminals
+grammar declaration rule head symbol symbols attached_code
+token tokens.
+Terminals
+atom float integer reserved_symbol reserved_word string char var
+'->' ':' dot.
+Rootsymbol grammar.
+Endsymbol '$end'.
+grammar -> declaration : '$1'.
+grammar -> rule : '$1'.
+declaration -> symbol symbols dot: {'$1', '$2'}.
+rule -> head '->' symbols attached_code dot: {rule, ['$1' | '$3'], 
+        '$4'}.
+head -> symbol : '$1'.
+symbols -> symbol : ['$1'].
+symbols -> symbol symbols : ['$1' | '$2'].
+attached_code -> ':' tokens : {erlang_code, '$2'}.
+attached_code -> '$empty' : {erlang_code, 
+                 [{atom, 0, '$undefined'}]}.
+tokens -> token : ['$1'].
+tokens -> token tokens : ['$1' | '$2'].
+symbol -> var : value_of('$1').
+symbol -> atom : value_of('$1').
+symbol -> integer : value_of('$1').
+symbol -> reserved_word : value_of('$1').
+token -> var : '$1'.
+token -> atom : '$1'.
+token -> float : '$1'.
+token -> integer : '$1'.
+token -> string : '$1'.
+token -> char : '$1'.
+token -> reserved_symbol : {value_of('$1'), line_of('$1')}.
+token -> reserved_word : {value_of('$1'), line_of('$1')}.
+token -> '->' : {'->', line_of('$1')}.
+token -> ':' : {':', line_of('$1')}.
+Erlang code.
+value_of(Token) ->
+    element(3, Token).
+line_of(Token) ->
+    element(2, Token).    </code>
+    <note>
+      <p>The symbols <c>'->'</c>, and <c>':'</c> have to be treated in
+        a special way, as they are meta symbols of the grammar
+        notation, as well as terminal symbols of the Yecc
+        grammar.</p>
+    </note>
+    <p>5. The file <c>erl_parse.yrl</c> in the <c>lib/stdlib/src</c>
+      directory contains the grammar for Erlang.</p>
+    <note>
+      <p>Syntactic tests are used in the code associated with some
+        rules, and an error is thrown (and caught by the generated
+        parser to produce an error message) when a test fails. The
+        same effect can be achieved with a call to
+        <c>return_error(Error_line, Message_string)</c>, which is
+        defined in the <c>yeccpre.hrl</c> default header file.</p>
+    </note>
+  </section>
+
+  <section>
+    <title>Files</title>
+    <code type="none">
+lib/parsetools/include/yeccpre.hrl    </code>
+  </section>
+
+  <section>
+    <title>See Also</title>
+    <p>Aho &amp; Johnson: 'LR Parsing', ACM Computing Surveys, vol. 6:2, 1974.</p>
+  </section>
+</erlref>
+
author	Erlang/OTP <[email protected]>	2009-11-20 14:54:40 +0000
committer	Erlang/OTP <[email protected]>	2009-11-20 14:54:40 +0000
commit	84adefa331c4159d432d22840663c38f155cd4c1 (patch)
tree	bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/parsetools/doc/src
download	otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip