diff options
author | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/parsetools | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/parsetools')
32 files changed, 7588 insertions, 0 deletions
diff --git a/lib/parsetools/AUTHORS b/lib/parsetools/AUTHORS new file mode 100644 index 0000000000..608bc32b04 --- /dev/null +++ b/lib/parsetools/AUTHORS @@ -0,0 +1,10 @@ +Yecc was written by Carl Wilhelm Welin +H�kan Mattsson made it faster by using ets tables. + +Leex was written by Robert Virding. + +Original Authors and Contributors: + +Carl Wilhelm Welin +H�kan Mattsson +Hans Bolinder diff --git a/lib/parsetools/Makefile b/lib/parsetools/Makefile new file mode 100644 index 0000000000..ae915551fe --- /dev/null +++ b/lib/parsetools/Makefile @@ -0,0 +1,38 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Macros +# ---------------------------------------------------- + +SUB_DIRECTORIES = src doc/src + +include vsn.mk +VSN = $(PARSETOOLS_VSN) + +SPECIAL_TARGETS = + +# ---------------------------------------------------- +# Default Subdir Targets +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_subdir.mk + diff --git a/lib/parsetools/doc/html/.gitignore b/lib/parsetools/doc/html/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/parsetools/doc/html/.gitignore diff --git a/lib/parsetools/doc/man3/.gitignore b/lib/parsetools/doc/man3/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/parsetools/doc/man3/.gitignore diff --git a/lib/parsetools/doc/pdf/.gitignore b/lib/parsetools/doc/pdf/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/parsetools/doc/pdf/.gitignore diff --git a/lib/parsetools/doc/src/Makefile b/lib/parsetools/doc/src/Makefile new file mode 100644 index 0000000000..6e693e0cf0 --- /dev/null +++ b/lib/parsetools/doc/src/Makefile @@ -0,0 +1,119 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1997-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../../vsn.mk +VSN=$(PARSETOOLS_VSN) +APPLICATION=parsetools + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/$(APPLICATION)-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +XML_APPLICATION_FILES = ref_man.xml +XML_REF3_FILES = yecc.xml leex.xml + +XML_PART_FILES = part_notes.xml +XML_CHAPTER_FILES = notes.xml + +BOOK_FILES = book.xml + +XML_FILES = \ + $(BOOK_FILES) $(XML_CHAPTER_FILES) \ + $(XML_PART_FILES) $(XML_REF3_FILES) $(XML_APPLICATION_FILES) + +GIF_FILES = \ + note.gif + +XML_HTML_FILES = \ + notes_history.xml + +# ---------------------------------------------------- + +HTML_FILES = $(XML_APPLICATION_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_HTML_FILES:%.xml=$(HTMLDIR)/%.html) \ + $(XML_PART_FILES:%.xml=$(HTMLDIR)/%.html) + +INFO_FILE = ../../info + +MAN3_FILES = $(XML_REF3_FILES:%.xml=$(MAN3DIR)/%.3) + +HTML_REF_MAN_FILE = $(HTMLDIR)/index.html + +TOP_PDF_FILE = $(PDFDIR)/$(APPLICATION)-$(VSN).pdf + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +XML_FLAGS += +DVIPS_FLAGS += + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- +$(HTMLDIR)/%.gif: %.gif + $(INSTALL_DATA) $< $@ + +docs: pdf html man + +$(TOP_PDF_FILE): $(XML_FILES) + +pdf: $(TOP_PDF_FILE) + +html: gifs $(HTML_REF_MAN_FILE) + +clean clean_docs: + rm -rf $(HTMLDIR)/* + rm -f $(MAN3DIR)/* + rm -f $(TOP_PDF_FILE) $(TOP_PDF_FILE:%.pdf=%.fo) + rm -f errs core *~ + +man: $(MAN3_FILES) + +gifs: $(GIF_FILES:%=$(HTMLDIR)/%) + +debug opt: + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_docs_spec: docs + $(INSTALL_DIR) $(RELSYSDIR)/doc/pdf + $(INSTALL_DATA) $(TOP_PDF_FILE) $(RELSYSDIR)/doc/pdf + $(INSTALL_DIR) $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(HTMLDIR)/* \ + $(RELSYSDIR)/doc/html + $(INSTALL_DATA) $(INFO_FILE) $(RELSYSDIR) + $(INSTALL_DIR) $(RELEASE_PATH)/man/man3 + $(INSTALL_DATA) $(MAN3DIR)/* $(RELEASE_PATH)/man/man3 + + +release_spec: + diff --git a/lib/parsetools/doc/src/book.xml b/lib/parsetools/doc/src/book.xml new file mode 100644 index 0000000000..93a107b798 --- /dev/null +++ b/lib/parsetools/doc/src/book.xml @@ -0,0 +1,46 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE book SYSTEM "book.dtd"> + +<book xmlns:xi="http://www.w3.org/2001/XInclude"> + <header titlestyle="normal"> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Parse Tools</title> + <prepared>Carl Velin</prepared> + <docno></docno> + <date>1997-05-02</date> + <rev>1.0</rev> + <file>book.sgml</file> + </header> + <insidecover> + </insidecover> + <pagetext>Parse Tools</pagetext> + <preamble> + <contents level="2"></contents> + </preamble> + <applications> + <xi:include href="ref_man.xml"/> + </applications> + <releasenotes> + <xi:include href="notes.xml"/> + </releasenotes> + <listofterms></listofterms> + <index></index> +</book> + diff --git a/lib/parsetools/doc/src/fascicules.xml b/lib/parsetools/doc/src/fascicules.xml new file mode 100644 index 0000000000..43090b4aed --- /dev/null +++ b/lib/parsetools/doc/src/fascicules.xml @@ -0,0 +1,15 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE fascicules SYSTEM "fascicules.dtd"> + +<fascicules> + <fascicule file="ref_man" href="ref_man_frame.html" entry="yes"> + Reference Manual + </fascicule> + <fascicule file="part_notes" href="part_notes_frame.html" entry="no"> + Release Notes + </fascicule> + <fascicule file="" href="../../../../doc/print.html" entry="no"> + Off-Print + </fascicule> +</fascicules> + diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml new file mode 100644 index 0000000000..c113b586df --- /dev/null +++ b/lib/parsetools/doc/src/leex.xml @@ -0,0 +1,455 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>2009</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + Copyright (c) 2008,2009 Robert Virding. All rights reserved. + </legalnotice> + + <title>leex</title> + <prepared>Robert Virding</prepared> + <responsible>nobody</responsible> + <docno></docno> + <approved>nobody</approved> + <checked></checked> + <date>2009-05-07</date> + <rev>A</rev> + <file>leex.xml</file> + </header> + <module>leex</module> + <modulesummary>Lexical analyzer generator for Erlang</modulesummary> + <description> + <p>A regular expression based lexical analyzer generator for + Erlang, similar to lex or flex.</p> + <note><p>The Leex module should be considered experimental + as it will be subject to changes in future releases.</p></note> + </description> + <section> + <title>DATA TYPES</title> + <code type="none"> +ErrorInfo = {ErrorLine,module(),error_descriptor()} +ErrorLine = integer() +Token = tuple()</code> + </section> + <funcs> + <func> + <name>file(FileName) -> ok | error</name> + <name>file(FileName, Options) -> ok | error</name> + <fsummary>Generate a lexical analyzer</fsummary> + <type> + <v>FileName = filename()</v> + <v>Options = Option | [Option]</v> + <v>Option = - see below -</v> + <v>FileReturn = {ok, Scannerfile} + | {ok, Scannerfile, Warnings} + | error + | {error, Warnings, Errors}</v> + <v>Scannerfile = filename()</v> + <v>Warnings = Errors = [{filename(), [ErrorInfo]}]</v> + </type> + <desc> + <p>Generates a lexical analyzer from the definition in the input + file. The input file has the extension <c>.xrl</c>. This is + added to the filename if it is not given. The resulting module + is the Xrl filename without the <c>.xrl</c> extension.</p> + + <p>The current options are:</p> + <taglist> + <tag><c>dfa_graph</c></tag> + <item><p>Generates a <c>.dot</c> file which contains a + description of the DFA in a format which can be viewed with + Graphviz, <c>www.graphviz.com</c>.</p> + </item> + <tag><c>{includefile,Includefile}</c></tag> + <item><p>Uses a specific or customised prologue file + instead of default + <c>lib/parsetools/include/leexinc.hrl</c> which is + otherwise included.</p> + </item> + <tag><c>{report_errors, bool()}</c></tag> + <item><p>Causes errors to be printed as they occur. Default is + <c>true</c>.</p> + </item> + <tag><c>{report_warnings, bool()}</c></tag> + <item><p>Causes warnings to be printed as they occur. Default is + <c>true</c>.</p> + </item> + <tag><c>{report, bool()}</c></tag> + <item><p>This is a short form for both <c>report_errors</c> and + <c>report_warnings</c>.</p> + </item> + <tag><c>{return_errors, bool()}</c></tag> + <item><p>If this flag is set, <c>{error, Errors, Warnings}</c> + is returned when there are errors. Default is <c>false</c>.</p> + </item> + <tag><c>{return_warnings, bool()}</c></tag> + <item><p>If this flag is set, an extra field containing + <c>Warnings</c> is added to the tuple returned upon + success. Default is <c>false</c>.</p> + </item> + <tag><c>{return, bool()}</c></tag> + <item><p>This is a short form for both <c>return_errors</c> and + <c>return_warnings</c>.</p> + </item> + <tag><c>{scannerfile, Scannerfile}</c></tag> + <item><p><c>Scannerfile</c> is the name of the file that + will contain the Erlang scanner code that is generated. + The default (<c>""</c>) is to add the extension + <c>.erl</c> to <c>FileName</c> stripped of the + <c>.xrl</c> extension.</p> + </item> + <tag><c>{verbose, bool()}</c></tag> + <item><p>Outputs information from parsing the input file and + generating the internal tables.</p> + </item> + </taglist> + <p>Any of the Boolean options can be set to <c>true</c> by + stating the name of the option. For example, <c>verbose</c> + is equivalent to <c>{verbose, true}</c>.</p> + <p>Leex will add the extension <c>.hrl</c> to the + <c>Includefile</c> name and the extension <c>.erl</c> to the + <c>Scannerfile</c> name, unless the extension is already + there.</p> + </desc> + </func> + <func> + <name>format_error(ErrorInfo) -> Chars</name> + <fsummary>Return an English description of a an error tuple.</fsummary> + <type> + <v>Chars = [char() | Chars]</v> + </type> + <desc> + <p>Returns a string which describes the error + <c>ErrorInfo</c> returned when there is an error in a + regular expression.</p> + </desc> + </func> + </funcs> + + + <section> + <title>GENERATED SCANNER EXPORTS</title> + <p>The following functions are exported by the generated scanner.</p> + </section> + + <funcs> + <func> + <name>string(String) -> StringRet</name> + <name>string(String, StartLine) -> StringRet</name> + <fsummary>Generated by Leex</fsummary> + <type> + <v>String = string()</v> + <v>StringRet = {ok,Tokens,EndLine} | ErrorInfo</v> + <v>Tokens = [Token]</v> + <v>EndLine = StartLine = integer()</v> + </type> + <desc> + <p>Scans <c>String</c> and returns all the tokens in it, or an + error.</p> + <note><p>It is an error if not all of the characters in + <c>String</c> are consumed.</p></note> + </desc> + </func> + + <func> + <name>token(Cont, Chars) -> {more,Cont1} | {done,TokenRet,RestChars} + </name> + <name>token(Cont, Chars, StartLine) -> {more,Cont1} + | {done,TokenRet,RestChars} + </name> + <fsummary>Generated by Leex</fsummary> + <type> + <v>Cont = [] | Cont1</v> + <v>Cont1 = tuple()</v> + <v>Chars = RestChars = string() | eof</v> + <v>TokenRet = {ok, Token, EndLine} + | {eof, EndLine} + | ErrorInfo</v> + <v>StartLine = EndLine = integer()</v> + </type> + <desc> + <p>This is a re-entrant call to try and scan one token from + <c>Chars</c>. If there are enough characters in <c>Chars</c> + to either scan a token or detect an error then this will be + returned with <c>{done,...}</c>. Otherwise + <c>{cont,Cont}</c> will be returned where <c>Cont</c> is + used in the next call to <c>token()</c> with more characters + to try an scan the token. This is continued until a token + has been scanned. <c>Cont</c> is initially <c>[]</c>.</p> + + <p>It is not designed to be called directly by an application + but used through the i/o system where it can typically be + called in an application by:</p> + <code> +io:request(InFile, {get_until,Prompt,Module,token,[Line]}) + -> TokenRet</code> + </desc> + </func> + + <func> + <name>tokens(Cont, Chars) -> {more,Cont1} | {done,TokensRet,RestChars} + </name> + <name>tokens(Cont, Chars, StartLine) -> + {more,Cont1} | {done,TokensRet,RestChars} + </name> + <fsummary>Generated by Leex</fsummary> + <type> + <v>Cont = [] | Cont1</v> + <v>Cont1 = tuple()</v> + <v>Chars = RestChars = string() | eof</v> + <v>TokensRet = {ok, Tokens, EndLine} + | {eof, EndLine} + | ErrorInfo</v> + <v>Tokens = [Token]</v> + <v>StartLine = EndLine = integer()</v> + </type> + <desc> + <p>This is a re-entrant call to try and scan tokens from + <c>Chars</c>. If there are enough characters in <c>Chars</c> + to either scan tokens or detect an error then this will be + returned with <c>{done,...}</c>. Otherwise + <c>{cont,Cont}</c> will be returned where <c>Cont</c> is + used in the next call to <c>tokens()</c> with more + characters to try an scan the tokens. This is continued + until all tokens have been scanned. <c>Cont</c> is initially + <c>[]</c>.</p> + + <p>This functions differs from <c>token</c> in that it will + continue to scan tokens upto and including an + <c>{end_token,Token}</c> has been scanned (see next + section). It will then return all the tokens. This is + typically used for scanning grammars like Erlang where there + is an explicit end token, <c>'.'</c>. If no end token is + found then the whole file will be scanned and returned. If + an error occurs then all tokens upto and including the next + end token will be skipped.</p> + + <p>It is not designed to be called directly by an application + but used through the i/o system where it can typically be + called in an application by:</p> + <code> +io:request(InFile, {get_until,Prompt,Module,tokens,[Line]}) + -> TokensRet</code> + </desc> + </func> + </funcs> + + <section> + <title>Input File Format</title> + <p>Erlang style comments starting with a <c>%</c> are allowed in + scanner files. A definition file has the following format:</p> + <code> +<Header> + +Definitions. + +<Macro Definitions> + +Rules. + +<Token Rules> + +Erlang code. + +<Erlang code></code> + + <p>The "Definitions.", "Rules." and "Erlang code." headings are + mandatory and must occur at the beginning of a source line. The + <Header>, <Macro Definitions> and <Erlang code> + sections may be empty but there must be at least one rule.</p> + + <p>Macro definitions have the following format:</p> + + <code> +NAME = VALUE</code> + + <p>and there must be spaces around <c>=</c>. Macros can be used in + the regular expressions of rules by writing <c>{NAME}</c>.</p> + + <note><p>When macros are expanded in expressions the macro calls + are replaced by the macro value without any form of quoting or + enclosing in parentheses.</p></note> + + <p>Rules have the following format:</p> + + <code> +<Regexp> : <Erlang code>.</code> + + <p>The <Regexp> must occur at the start of a line and not + include any blanks; use <c>\\t</c> and <c>\\s</c> to include TAB + and SPACE characters in the regular expression. If <Regexp> + matches then the corresponding <Erlang code> is evaluated to + generate a token. With the Erlang code the following predefined + variables are available:</p> + + <taglist> + <tag><c>TokenChars</c></tag> + <item><p>A list of the characters in the matched token.</p> + </item> + <tag><c>TokenLen</c></tag> + <item><p>The number of characters in the matched token.</p> + </item> + <tag><c>TokenLine</c></tag> + <item><p>The line number where the token occurred.</p> + </item> + </taglist> + + <p>The code must return:</p> + + <taglist> + <tag><c>{token,Token}</c></tag> + <item><p>Return <c>Token</c> to the caller.</p> + </item> + <tag><c>{end_token,Token}</c></tag> + <item><p>Return <c>Token</c> and is last token in a tokens call.</p> + </item> + <tag><c>skip_token</c></tag> + <item><p>Skip this token completely.</p> + </item> + <tag><c>{error,ErrString}</c></tag> + <item><p>An error in the token, <c>ErrString</c> is a string + describing the error.</p> + </item> + </taglist> + + <p>It is also possible to push back characters into the input + characters with the following returns:</p> + + <list> + <item><c>{token,Token,PushBackList}</c></item> + <item><c>{end_token,Token,PushBackList}</c></item> + <item><c>{skip_token,PushBackList}</c></item> + </list> + + <p>These have the same meanings as the normal returns but the + characters in <c>PushBackList</c> will be prepended to the input + characters and scanned for the next token. Note that pushing + back a newline will mean the line numbering will no longer be + correct.</p> + + <note><p>Pushing back characters gives you unexpected + possibilities to cause the scanner to loop!</p></note> + + <p>The following example would match a simple Erlang integer or + float and return a token which could be sent to the Erlang + parser:</p> + <code> +D = [0-9] + +{D}+ : + {token,{integer,TokenLine,list_to_integer(TokenChars)}}. + +{D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? : + {token,{float,TokenLine,list_to_float(TokenChars)}}.</code> + + <p>The Erlang code in the "Erlang code." section is written into + the output file directly after the module declaration and + predefined exports declaration so it is possible to add extra + exports, define imports and other attributes which are then + visible in the whole file.</p> + </section> + + <section> + <title>Regular Expressions</title> + + <p>The regular expressions allowed here is a subset of the set + found in <c>egrep</c> and in the AWK programming language, as + defined in the book, The AWK Programming Language, by A. V. Aho, + B. W. Kernighan, P. J. Weinberger. They are composed of the + following characters:</p> + + <taglist> + <tag><c>c</c></tag> + <item><p>Matches the non-metacharacter c.</p> + </item> + <tag><c>\\c</c></tag> + <item><p>Matches the escape sequence or literal character c.</p> + </item> + <tag><c>.</c></tag> + <item><p>Matches any character.</p> + </item> + <tag><c>^</c></tag> + <item><p>Matches the beginning of a string.</p> + </item> + <tag><c>$</c></tag> + <item><p>Matches the end of a string.</p></item> + <tag><c>[abc...]</c></tag> + <item><p>Character class, which matches any of the characters + <c>abc...</c>. Character ranges are specified by a pair of + characters separated by a <c>-</c>.</p> + </item> + <tag><c>[^abc...]</c></tag> + <item><p>Negated character class, which matches any character + except <c>abc...</c>.</p> + </item> + <tag><c>r1 | r2</c></tag> + <item><p>Alternation. It matches either <c>r1</c> or <c>r2</c>.</p> + </item> + <tag><c>r1r2</c></tag> + <item><p>Concatenation. It matches <c>r1</c> and then <c>r2</c>.</p> + </item> + <tag><c>r+</c></tag> + <item><p>Matches one or more <c>rs</c>.</p> + </item> + <tag><c>r*</c></tag> + <item><p>Matches zero or more <c>rs</c>.</p> + </item> + <tag><c>r?</c></tag> + <item><p>Matches zero or one <c>rs</c>.</p> + </item> + <tag><c>(r)</c></tag> + <item><p>Grouping. It matches <c>r</c>.</p> + </item> + </taglist> + + <p>The escape sequences allowed are the same as for Erlang strings:</p> + + <taglist> + <tag><c>\\b</c></tag> + <item><p>Backspace.</p></item> + <tag><c>\\f</c></tag> + <item><p>Form feed.</p></item> + <tag><c>\\n</c></tag> + <item><p>Newline (line feed).</p></item> + <tag><c>\\r</c></tag> + <item><p>Carriage return.</p></item> + <tag><c>\\t</c></tag> + <item><p>Tab.</p></item> + <tag><c>\\e</c></tag> + <item><p>Escape.</p></item> + <tag><c>\\v</c></tag> + <item><p>Vertical tab.</p></item> + <tag><c>\\s</c></tag> + <item><p>Space.</p></item> + <tag><c>\\d</c></tag> + <item><p>Delete.</p></item> + <tag><c>\\ddd</c></tag> + <item><p>The octal value <c>ddd</c>.</p></item> + <tag><c>\\xhh</c></tag> + <item><p>The hexadecimal value <c>hh</c>.</p></item> + <tag><c>\\x{h...}</c></tag> + <item><p>The hexadecimal value <c>h...</c>.</p></item> + <tag><c>\\c</c></tag> + <item><p>Any other character literally, for example <c>\\\\</c> for + backslash, <c>\\"</c> for <c>"</c>.</p> + </item> + </taglist> + + <p>The following examples define Erlang data types:</p> + <code> +Atoms [a-z][0-9a-zA-Z_]* + +Variables [A-Z_][0-9a-zA-Z_]* + +Floats (\\+|-)?[0-9]+\\.[0-9]+((E|e)(\\+|-)?[0-9]+)?</code> + + <note><p>Anchoring a regular expression with <c>^</c> and <c>$</c> + is not implemented in the current version of Leex and just + generates a parse error.</p></note> + </section> +</erlref> diff --git a/lib/parsetools/doc/src/make.dep b/lib/parsetools/doc/src/make.dep new file mode 100644 index 0000000000..3a09ecdedd --- /dev/null +++ b/lib/parsetools/doc/src/make.dep @@ -0,0 +1,21 @@ +# ---------------------------------------------------- +# >>>> Do not edit this file <<<< +# This file was automaticly generated by +# /home/otp/bin/docdepend +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# TeX files that the DVI file depend on +# ---------------------------------------------------- + +book.dvi: book.tex leex.tex ref_man.tex yecc.tex + +# ---------------------------------------------------- +# Source inlined when transforming from source to LaTeX +# ---------------------------------------------------- + +book.tex: ref_man.xml + +ref_man.tex: ../../../../system/doc/definitions/term.defs + diff --git a/lib/parsetools/doc/src/note.gif b/lib/parsetools/doc/src/note.gif Binary files differnew file mode 100644 index 0000000000..6fffe30419 --- /dev/null +++ b/lib/parsetools/doc/src/note.gif diff --git a/lib/parsetools/doc/src/notes.xml b/lib/parsetools/doc/src/notes.xml new file mode 100644 index 0000000000..2947517717 --- /dev/null +++ b/lib/parsetools/doc/src/notes.xml @@ -0,0 +1,308 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Parsetools Release Notes</title> + <prepared>otp_appnotes</prepared> + <docno>nil</docno> + <date>nil</date> + <rev>nil</rev> + <file>notes.xml</file> + </header> + <p>This document describes the changes made to the Parsetools application.</p> + +<section><title>Parsetools 2.0.1</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>Leex no longer uses the deprecated <c>regexp</c> + module. (Thanks to Robert Virding.).</p> + <p> + Own Id: OTP-8231</p> + </item> + </list> + </section> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p>A minor bug in <c>leex(3)</c> has been fixed.</p> + <p> + Own Id: OTP-8197</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 2.0</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>Leex, a lexical analyzer generator for Erlang, + has been added to Parsetools. This initial version + should be considered experimental; it is known + that there will be changes and additions. + (Thanks to Robert Virding.).</p> + <p> + Own Id: OTP-8013</p> + </item> + </list> + </section> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p>The parsers generated by Yecc now report correct + error lines when possible.</p> + <p> + Own Id: OTP-7969</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.7</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p>A bug in yeccpre.hrl introduced in R13A has been + fixed.</p> + <p> + Own Id: OTP-7945</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.6</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>Updated file headers.</p> + <p> + Own Id: OTP-7798</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.5</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>The <c>yecc</c> grammar has been augmented with an + optional header section. (Thanks to Richard + Carlsson.)</p> + <p> + Own Id: OTP-7292</p> + </item> + </list> + </section> + +</section> + + +<section><title>Parsetools 1.4.4</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>The size of the code generated by Yecc has + been reduced. The code is also faster.</p> + <p>Macros can now be used in actions.</p> + <p> + Own Id: OTP-7224</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.3</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p><c>tuple_size/1</c> and <c>byte_size/1</c> have been + substituted for <c>size/1</c>.</p> + <p> + Own Id: OTP-7009</p> + </item> + </list> + </section> + +</section> + +<section><title>Parsetools 1.4.2</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>The size of the code generated by yecc has been + reduced. </p> + <p>A note regarding the <c>includefile</c> option: + although yecc can cope with includefiles based on some + earlier <c>yeccpre.hrl</c> it is recommended for + efficiency reasons to update includefiles as to follow + the pattern in the latest <c>yeccpre.hrl</c>.</p> + <p> + Own Id: OTP-6851</p> + </item> + </list> + </section> + +</section> + + <section> + <title>Parsetools 1.4.1.1</title> + + <section> + <title>Improvements and New Features</title> + <list type="bulleted"> + <item> + <p>Minor Makefile changes.</p> + <p>Own Id: OTP-6689</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.4.1</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>A bug concerning precedence declarations of + non-terminals "one level up" has been fixed in yecc.</p> + <p>Own Id: OTP-6362</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.4</title> + + <section> + <title>Improvements and Fixed Bugs</title> + <p>Several modifications of Yecc have been made:</p> + <list type="bulleted"> + <item> + <p>The new functions <c>file/1,2</c> take the + role of the old functions <c>yecc/2,3,4</c>. The + latter functions are no longer documented but are + kept for backward compatibility.</p> + </item> + <item> + <p>More checks of the grammar file have been + implemented. Examples are warnings for unused + non-terminals and duplicated declarations.</p> + </item> + <item> + <p>Invalid pseudo variables are no longer + replaced by <c>'$undefined'</c> but cause a failure.</p> + </item> + <item> + <p>Reserved words no longer need to be quoted + when used as terminals or non-terminals.</p> + </item> + <item> + <p>When compiling the generated parser file errors + and warnings concerning user code refer to + the grammar file, not the parser file.</p> + </item> + <item> + <p>Yecc emits a warning if there are conflicts + in the grammar. The new declaration <c>Expect</c> can + be used to suppress this warning.</p> + </item> + <item> + <p>The new operator precedence declaration + <c>Nonassoc</c> can be used to declare operators with + no associativity.</p> + </item> + <item> + <p>Precedence can be given to more than one operator + with one single operator precedence declaration.</p> + </item> + <item> + <p>The function <c>parse_and_scan/1</c> in the + default includefile accepts <c>{Function, A}</c> + as well as {{M,F}, A} as tokenizer function. + Exceptions in the tokenizer are never caught.</p> + </item> + <item> + <p>The functions <c>yecc:file/1,2</c> can be accessed + from the Erlang shell via the new functions <c>c:y/1,2</c> + in STDLIB.</p> + </item> + </list> + <p>See yecc(3) for further details.</p> + <p>Own Id: OTP-5366</p> + </section> + </section> + + <section> + <title>Parsetools 1.3.2</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>A bug in <c>Yecc</c> that was introduced in R9B has been + removed. Another bug concerning precedence declaration + "one level up" has been fixed.</p> + <p>Own Id: OTP-5461</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.3.1</title> + + <section> + <title>Fixed Bugs and Malfunctions</title> + <list type="bulleted"> + <item> + <p>A bug in the file <c>parsetools/include/yeccpre.hrl</c> + caused <c>yecc:parse_and_scan/1</c> to always report a + parse failure when the lexer reported end-of-file. This + problem has been fixed.</p> + <p>Own Id: OTP-5369</p> + </item> + </list> + </section> + </section> +</chapter> + diff --git a/lib/parsetools/doc/src/notes_history.xml b/lib/parsetools/doc/src/notes_history.xml new file mode 100644 index 0000000000..6a63812bcb --- /dev/null +++ b/lib/parsetools/doc/src/notes_history.xml @@ -0,0 +1,87 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE chapter SYSTEM "chapter.dtd"> + +<chapter> + <header> + <copyright> + <year>2006</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Parsetools Release Notes</title> + <prepared>Hans Bolinder</prepared> + <responsible></responsible> + <docno></docno> + <approved></approved> + <checked></checked> + <date>06-02-20</date> + <rev>A</rev> + <file>notes_history.sgml</file> + </header> + + <section> + <title>Parsetools 1.3</title> + + <section> + <title>Improvements and New Features</title> + <list type="bulleted"> + <item> + <p>The source code was cleaned up from unused variables to + eliminate compiler warnings. No other changes.</p> + <p>Own Id: OTP-5185</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.2</title> + <p>No release notes.</p> + </section> + + <section> + <title>Parsetools 1.1</title> + <p>No release notes.</p> + </section> + + <section> + <title>Parsetools 1.0.1</title> + + <section> + <title>Fixed Bugs and malfunctions</title> + <list type="bulleted"> + <item> + <p>Correction in <c>yeccpre.hrl</c> to give correct syntax + error info when the offending token was of the form + <c>{Class, Line, Value}</c>.</p> + <p>Own Id: OTP-1881</p> + </item> + <item> + <p>The <c>yecc</c> function does now accept atoms in the + <c>Grammarfile</c>, <c>Parserfile</c> and <c>Includefile</c> + arguments.</p> + <p>Own Id: OTP-1405</p> + </item> + </list> + </section> + </section> + + <section> + <title>Parsetools 1.0</title> + <p>There are no changes since Erlang 4.3.</p> + </section> +</chapter> + diff --git a/lib/parsetools/doc/src/part_notes.xml b/lib/parsetools/doc/src/part_notes.xml new file mode 100644 index 0000000000..308fc95e35 --- /dev/null +++ b/lib/parsetools/doc/src/part_notes.xml @@ -0,0 +1,43 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE part SYSTEM "part.dtd"> + +<part xmlns:xi="http://www.w3.org/2001/XInclude"> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>PARSETOOLS Release Notes</title> + <prepared>Carl Velin</prepared> + <docno></docno> + <date>1997-04-28</date> + <rev>1.0</rev> + <file>part_notes.sgml</file> + </header> + <description> + <p>The <em>Parsetools</em> application contains utilities for + parsing and scanning. Yecc is an <term id="LALR-1"></term>parser + generator for Erlang, similar to yacc. Yecc takes a <term + id="BNF"></term>grammar definition as input, and produces Erlang + code for a parser as output. Leex is a regular expression based + lexical analyzer generator for Erlang, similar to lex or flex.</p> + <p>There are also release notes for + <seealso marker="notes_history">older versions</seealso>.</p> + </description> + <xi:lude href="notes.xml"/> +</part> + diff --git a/lib/parsetools/doc/src/ref_man.xml b/lib/parsetools/doc/src/ref_man.xml new file mode 100644 index 0000000000..52f1c687da --- /dev/null +++ b/lib/parsetools/doc/src/ref_man.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE application SYSTEM "application.dtd"> + +<application xmlns:xi="http://www.w3.org/2001/XInclude"> + <header> + <copyright> + <year>1997</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>Parsetools Reference Manual</title> + <prepared>Carl Velin</prepared> + <docno></docno> + <date>1997-04-28</date> + <rev>1.0</rev> + <file>application.sgml</file> + </header> + <description> + <p>The <em>Parsetools</em> application contains utilities for + parsing and scanning. Yecc is an <term id="LALR-1"></term>parser + generator for Erlang, similar to yacc. Yecc takes a <term + id="BNF"></term>grammar definition as input, and produces Erlang + code for a parser as output. Leex is a regular expression based + lexical analyzer generator for Erlang, similar to lex or flex.</p> + </description> + <xi:include href="yecc.xml"/> + <xi:include href="leex.xml"/> +</application> + diff --git a/lib/parsetools/doc/src/user_guide.gif b/lib/parsetools/doc/src/user_guide.gif Binary files differnew file mode 100644 index 0000000000..e6275a803d --- /dev/null +++ b/lib/parsetools/doc/src/user_guide.gif diff --git a/lib/parsetools/doc/src/warning.gif b/lib/parsetools/doc/src/warning.gif Binary files differnew file mode 100644 index 0000000000..96af52360e --- /dev/null +++ b/lib/parsetools/doc/src/warning.gif diff --git a/lib/parsetools/doc/src/yecc.xml b/lib/parsetools/doc/src/yecc.xml new file mode 100644 index 0000000000..81f1550b0a --- /dev/null +++ b/lib/parsetools/doc/src/yecc.xml @@ -0,0 +1,529 @@ +<?xml version="1.0" encoding="latin1" ?> +<!DOCTYPE erlref SYSTEM "erlref.dtd"> + +<erlref> + <header> + <copyright> + <year>1996</year><year>2009</year> + <holder>Ericsson AB. All Rights Reserved.</holder> + </copyright> + <legalnotice> + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + </legalnotice> + + <title>yecc</title> + <prepared>Carl Wilhelm Welin</prepared> + <responsible>Carl Wilhelm Welin</responsible> + <docno></docno> + <approved>Bjarne Däcker</approved> + <checked></checked> + <date>1997-01-27</date> + <rev>B</rev> + <file>yecc.sgml</file> + </header> + <module>yecc</module> + <modulesummary>LALR-1 Parser Generator</modulesummary> + <description> + <p>An LALR-1 parser generator for Erlang, similar to <c>yacc</c>. + Takes a BNF grammar definition as input, and produces Erlang code + for a parser. </p> + <p>To understand this text, you also have to + look at the <c>yacc</c> documentation in the UNIX(TM) manual. This + is most probably necessary in order to understand the idea of a + parser generator, and the principle and problems of LALR parsing + with finite look-ahead.</p> + </description> + <funcs> + <func> + <name>file(Grammarfile [, Options]) -> YeccRet</name> + <fsummary>Give information about resolved and unresolved parse action conflicts.</fsummary> + <type> + <v>Grammarfile = filename()</v> + <v>Options = Option | [Option]</v> + <v>Option = - see below -</v> + <v>YeccRet = {ok, Parserfile} | {ok, Parserfile, Warnings} | error | {error, Warnings, Errors}</v> + <v>Parserfile = filename()</v> + <v>Warnings = Errors = [{filename(), [ErrorInfo]}]</v> + <v>ErrorInfo = {ErrorLine, module(), Reason}</v> + <v>ErrorLine = integer()</v> + <v>Reason = - formatable by format_error/1 -</v> + </type> + <desc> + <p><c>Grammarfile</c> is the file of declarations and grammar + rules. Returns <c>ok</c> upon success, or <c>error</c> if + there are errors. An Erlang file containing the parser is + created if there are no errors. The options are: + </p> + <taglist> + <tag><c>{parserfile, Parserfile}</c>.</tag> + <item><c>Parserfile</c> is the name of the file that will + contain the Erlang parser code that is generated. The + default (<c>""</c>) is to add the extension <c>.erl</c> + to <c>Grammarfile</c> stripped of the <c>.yrl</c> + extension. + </item> + <tag><c>{includefile, Includefile}</c>.</tag> + <item>Indicates a customized prologue file which the user + may want to use instead of the default file + <c>lib/parsetools/include/yeccpre.hrl</c> which is + otherwise included at the beginning of the resulting + parser file. <em>N.B.</em> The <c>Includefile</c> is + included 'as is' in the parser file, so it must not have a + module declaration of its own, and it should not be + compiled. It must, however, contain the necessary export + declarations. The default is indicated by <c>""</c>. + </item> + <tag><c>{report_errors, bool()}</c>.</tag> + <item>Causes errors to be printed as they occur. Default is + <c>true</c>. + </item> + <tag><c>{report_warnings, bool()}</c>.</tag> + <item>Causes warnings to be printed as they occur. Default is + <c>true</c>. + </item> + <tag><c>{report, bool()}</c>.</tag> + <item>This is a short form for both <c>report_errors</c> and + <c>report_warnings</c>. + </item> + <tag><c>{return_errors, bool()}</c>.</tag> + <item>If this flag is set, <c>{error, Errors, Warnings}</c> + is returned when there are errors. Default is + <c>false</c>. + </item> + <tag><c>{return_warnings, bool()}</c>.</tag> + <item>If this flag is set, an extra field containing + <c>Warnings</c> is added to the tuple returned upon + success. Default is <c>false</c>. + </item> + <tag><c>{return, bool()}</c>.</tag> + <item>This is a short form for both <c>return_errors</c> and + <c>return_warnings</c>. + </item> + <tag><c>{verbose, bool()}</c>. </tag> + <item>Determines whether the parser generator should give + full information about resolved and unresolved parse + action conflicts (<c>true</c>), or only about those + conflicts that prevent a parser from being generated from + the input grammar (<c>false</c>, the default). + </item> + </taglist> + <p>Any of the Boolean options can be set to <c>true</c> by + stating the name of the option. For example, <c>verbose</c> + is equivalent to <c>{verbose, true}</c>. + </p> + <p>The value of the <c>Parserfile</c> option stripped of the + <c>.erl</c> extension is used by Yecc as the module name of + the generated parser file.</p> + <p>Yecc will add the extension <c>.yrl</c> to the + <c>Grammarfile</c> name, the extension <c>.hrl</c> to the + <c>Includefile</c> name, and the extension <c>.erl</c> to + the <c>Parserfile</c> name, unless the extension is already + there.</p> + </desc> + </func> + <func> + <name>format_error(Reason) -> Chars</name> + <fsummary>Return an English description of a an error tuple.</fsummary> + <type> + <v>Reason = - as returned by yecc:file/1,2 -</v> + <v>Chars = [char() | Chars]</v> + </type> + <desc> + <p>Returns a descriptive string in English of an error tuple + returned by <c>yecc:file/1,2</c>. This function is mainly + used by the compiler invoking Yecc.</p> + </desc> + </func> + </funcs> + + <section> + <title>Pre-Processing</title> + <p>A <c>scanner</c> to pre-process the text (program, etc.) to be + parsed is not provided in the <c>yecc</c> module. The scanner + serves as a kind of lexicon look-up routine. It is possible to + write a grammar that uses only character tokens as terminal + symbols, thereby eliminating the need for a scanner, but this + would make the parser larger and slower.</p> + <p>The user should implement a scanner that segments the input + text, and turns it into one or more lists of tokens. Each token + should be a tuple containing information about syntactic + category, position in the text (e.g. line number), and the + actual terminal symbol found in the text: <c>{Category, LineNumber, Symbol}</c>.</p> + <p>If a terminal symbol is the only member of a category, and the + symbol name is identical to the category name, the token format + may be <c>{Symbol, LineNumber}</c>.</p> + <p>A list of tokens produced by the scanner should end with a + special <c>end_of_input</c> tuple which the parser is looking + for. The format of this tuple should be <c>{Endsymbol, LastLineNumber}</c>, where <c>Endsymbol</c> is an identifier + that is distinguished from all the terminal and non-terminal + categories of the syntax rules. The <c>Endsymbol</c> may be + declared in the grammar file (see below).</p> + <p>The simplest case is to segment the input string into a list of + identifiers (atoms) and use those atoms both as categories and + values of the tokens. For example, the input string <c>aaa bbb 777, X</c> may be scanned (tokenized) as:</p> + <code type="none"> +[{aaa, 1}, {bbb, 1}, {777, 1}, {',' , 1}, {'X', 1}, + {'$end', 1}]. </code> + <p>This assumes that this is the first line of the input text, and + that <c>'$end'</c> is the distinguished <c>end_of_input</c> + symbol.</p> + <p>The Erlang scanner in the <c>io</c> module can be used as a + starting point when writing a new scanner. Study + <c>yeccscan.erl</c> in order to see how a filter can be added on + top of <c>io:scan_erl_form/3</c> to provide a scanner for + Yecc that tokenizes grammar files before parsing them + with the Yecc parser. A more general approach to scanner + implementation is to use a scanner generator. A scanner + generator in Erlang called <c>leex</c> is under development.</p> + </section> + + <section> + <title>Grammar Definition Format</title> + <p>Erlang style <c>comments</c>, starting with a <c>'%'</c>, are + allowed in grammar files.</p> + <p>Each <c>declaration</c> or <c>rule</c> ends with a dot (the + character <c>'.'</c>).</p> + <p>The grammar starts with an optional <c>header</c> section. The + header is put first in the generated file, before the module + declaration. The purpose of the header is to provide a means to + make the documentation generated by <c>EDoc</c> look nicer. Each + header line should be enclosed in double quotes, and newlines + will be inserted between the lines. For example:</p> + <code> +Header "%% Copyright (C)" +"%% @private" +"%% @Author John"</code> + <p>Next comes a declaration of the <c>nonterminal categories</c> + to be used in the rules. For example:</p> + <code type="none"> +Nonterminals sentence nounphrase verbphrase. </code> + <p>A non-terminal category can be used at the left hand side (= + <c>lhs</c>, or <c>head</c>) of a grammar rule. It can also + appear at the right hand side of rules.</p> + <p>Next comes a declaration of the <c>terminal categories</c>, + which are the categories of tokens produced by the scanner. For + example:</p> + <code type="none"> +Terminals article adjective noun verb. </code> + <p>Terminal categories may only appear in the right hand sides (= + <c>rhs</c>) of grammar rules.</p> + <p>Next comes a declaration of the <c>rootsymbol</c>, or start + category of the grammar. For example:</p> + <code type="none"> +Rootsymbol sentence. </code> + <p>This symbol should appear in the lhs of at least one grammar + rule. This is the most general syntactic category which the + parser ultimately will parse every input string into.</p> + <p>After the rootsymbol declaration comes an optional declaration + of the <c>end_of_input</c> symbol that your scanner is expected + to use. For example:</p> + <code type="none"> +Endsymbol '$end'. </code> + <p>Next comes one or more declarations of <c>operator precedences</c>, if needed. These are used to resolve + shift/reduce conflicts (see <c>yacc</c> documentation).</p> + <p>Examples of operator declarations:</p> + <code type="none"> +Right 100 '='. +Nonassoc 200 '==' '=/='. +Left 300 '+'. +Left 400 '*'. +Unary 500 '-'. </code> + <p>These declarations mean that <c>'='</c> is defined as a + <c>right associative binary</c> operator with precedence 100, + <c>'=='</c> and <c>'=/='</c> are operators with <c>no associativity</c>, <c>'+'</c> and <c>'*'</c> are <c>left associative binary</c> operators, where <c>'*'</c> takes + precedence over <c>'+'</c> (the normal case), and <c>'-'</c> is + a <c>unary</c> operator of higher precedence than <c>'*'</c>. + The fact that '==' has no associativity means that an expression + like <c>a == b == c</c> is considered a syntax error.</p> + <p>Certain rules are assigned precedence: each rule gets its + precedence from the last terminal symbol mentioned in the right + hand side of the rule. It is also possible to declare precedence + for non-terminals, "one level up". This is practical when an + operator is overloaded (see also example 3 below).</p> + <p>Next come the <c>grammar rules</c>. Each rule has the general + form</p> + <code type="none"> +Left_hand_side -> Right_hand_side : Associated_code. </code> + <p>The left hand side is a non-terminal category. The right hand + side is a sequence of one or more non-terminal or terminal + symbols with spaces between. The associated code is a sequence + of zero or more Erlang expressions (with commas <c>','</c> as + separators). If the associated code is empty, the separating + colon <c>':'</c> is also omitted. A final dot marks the end of + the rule.</p> + <p>Symbols such as <c>'{'</c>, <c>'.'</c>, etc., have to be + enclosed in single quotes when used as terminal or non-terminal + symbols in grammar rules. The use of the symbols + <c>'$empty'</c>, <c>'$end'</c>, and <c>'$undefined'</c> should + be avoided.</p> + <p>The last part of the grammar file is an optional section with + Erlang code (= function definitions) which is included 'as is' + in the resulting parser file. This section must start with the + pseudo declaration, or key words</p> + <code type="none"> +Erlang code. </code> + <p>No syntax rule definitions or other declarations may follow + this section. To avoid conflicts with internal variables, do not + use variable names beginning with two underscore characters + ('__') in the Erlang code in this section, or in the code + associated with the individual syntax rules.</p> + <p>The optional <c>expect</c> declaration can be placed anywhere + before the last optional section with Erlang code. It is used + for suppressing the warning about conflicts that is ordinarily + given if the grammar is ambiguous. An example:</p> + <code type="none"> +Expect 2. </code> + <p>The warning is given if the number of shift/reduce conflicts + differs from 2, or if there are reduce/reduce conflicts. + </p> + </section> + + <section> + <title>Examples</title> + <p>A grammar to parse list expressions (with empty associated + code):</p> + <code type="none"> +Nonterminals list elements element. +Terminals atom '(' ')'. +Rootsymbol list. +list -> '(' ')'. +list -> '(' elements ')'. +elements -> element. +elements -> element elements. +element -> atom. +element -> list. </code> + <p>This grammar can be used to generate a parser which parses list + expressions, such as <c>(), (a), (peter charles), (a (b c) d (())), ...</c> provided that your scanner tokenizes, for + example, the input <c>(peter charles)</c> as follows:</p> + <code type="none"> +[{'(', 1} , {atom, 1, peter}, {atom, 1, charles}, {')', 1}, + {'$end', 1}] </code> + <p>When a grammar rule is used by the parser to parse (part of) + the input string as a grammatical phrase, the associated code is + evaluated, and the value of the last expression becomes the + value of the parsed phrase. This value may be used by the parser + later to build structures that are values of higher phrases of + which the current phrase is a part. The values initially + associated with terminal category phrases, i.e. input tokens, + are the token tuples themselves.</p> + <p>Below is an example of the grammar above with structure + building code added:</p> + <code type="none"> +list -> '(' ')' : nil. +list -> '(' elements ')' : '$2'. +elements -> element : {cons, '$1', nil}. +elements -> element elements : {cons, '$1', '$2'}. +element -> atom : '$1'. +element -> list : '$1'. </code> + <p>With this code added to the grammar rules, the parser produces + the following value (structure) when parsing the input string + <c>(a b c).</c>. This still assumes that this was the first + input line that the scanner tokenized:</p> + <code type="none"> +{cons, {atom, 1, a,} {cons, {atom, 1, b}, + {cons, {atom, 1, c}, nil}}} </code> + <p>The associated code contains <c>pseudo variables</c><c>'$1'</c>, <c>'$2'</c>, <c>'$3'</c>, etc. which refer to (are + bound to) the values associated previously by the parser with + the symbols of the right hand side of the rule. When these + symbols are terminal categories, the values are token tuples of + the input string (see above).</p> + <p>The associated code may not only be used to build structures + associated with phrases, but may also be used for syntactic and + semantic tests, printout actions (for example for tracing), etc. + during the parsing process. Since tokens contain positional + (line number) information, it is possible to produce error + messages which contain line numbers. If there is no associated + code after the right hand side of the rule, the value + <c>'$undefined'</c> is associated with the phrase.</p> + <p>The right hand side of a grammar rule may be empty. This is + indicated by using the special symbol <c>'$empty'</c> as rhs. + Then the list grammar above may be simplified to:</p> + <code type="none"> +list -> '(' elements ')' : '$2'. +elements -> element elements : {cons, '$1', '$2'}. +elements -> '$empty' : nil. +element -> atom : '$1'. +element -> list : '$1'. </code> + </section> + + <section> + <title>Generating a Parser</title> + <p>To call the parser generator, use the following command:</p> + <code type="none"> +yecc:file(Grammarfile). </code> + <p>An error message from Yecc will be shown if the grammar + is not of the LALR type (for example too ambiguous). + Shift/reduce conflicts are resolved in favor of shifting if + there are no operator precedence declarations. Refer to the + <c>yacc</c> documentation on the use of operator precedence.</p> + <p>The output file contains Erlang source code for a parser module + with module name equal to the <c>Parserfile</c> parameter. After + compilation, the parser can be called as follows (the module + name is assumed to be <c>myparser</c>):</p> + <code type="none"> +myparser:parse(myscanner:scan(Inport)) </code> + <p>The call format may be different if a customized prologue file + has been included when generating the parser instead of the + default file <c>lib/parsetools/include/yeccpre.hrl</c>.</p> + <p>With the standard prologue, this call will return either + <c>{ok, Result}</c>, where <c>Result</c> is a structure that the + Erlang code of the grammar file has built, or <c>{error, {Line_number, Module, Message}}</c> if there was a syntax error + in the input.</p> + <p><c>Message</c> is something which may be converted into a + string by calling <c>Module:format_error(Message)</c> + and printed with <c>io:format/3</c>.</p> + <note> + <p>By default, the parser that was generated will not print out + error messages to the screen. The user will have to do this + either by printing the returned error messages, or by + inserting tests and print instructions in the Erlang code + associated with the syntax rules of the grammar file.</p> + </note> + <p>It is also possible to make the parser ask for more input + tokens when needed if the following call format is used:</p> + <code type="none"> +myparser:parse_and_scan({Function, Args}) +myparser:parse_and_scan({Mod, Tokenizer, Args}) </code> + <p>The tokenizer <c>Function</c> is either a fun or a tuple + <c>{Mod, Tokenizer}</c>. The call <c>apply(Function, Args)</c> + or <c>apply({Mod, Tokenizer}, Args)</c> is executed whenever a + new token is needed. This, for example, makes it possible to + parse from a file, token by token.</p> + <p>The tokenizer used above has to be implemented so as to return + one of the following:</p> + <code type="none"> +{ok, Tokens, Endline} +{eof, Endline} +{error, Error_description, Endline} </code> + <p>This conforms to the format used by the scanner in the Erlang + <c>io</c> library module.</p> + <p>If <c>{eof, Endline}</c> is returned immediately, the call to + <c>parse_and_scan/1</c> returns <c>{ok, eof}</c>. If <c>{eof, Endline}</c> is returned before the parser expects end of input, + <c>parse_and_scan/1</c> will, of course, return an error message + (see above). Otherwise <c>{ok, Result}</c> is returned.</p> + </section> + + <section> + <title>More Examples</title> + <p>1. A grammar for parsing infix arithmetic expressions into + prefix notation, without operator precedence:</p> + <code type="none"> +Nonterminals E T F. +Terminals '+' '*' '(' ')' number. +Rootsymbol E. +E -> E '+' T: ['$1', '$2', '$3']. +E -> T : '$1'. +T -> T '*' F: ['$1', '$2', '$3']. +T -> F : '$1'. +F -> '(' E ')' : '$2'. +F -> number : '$1'. </code> + <p>2. The same with operator precedence becomes simpler:</p> + <code type="none"> +Nonterminals E. +Terminals '+' '*' '(' ')' number. +Rootsymbol E. +Left 100 '+'. +Left 200 '*'. +E -> E '+' E : ['$1', '$2', '$3']. +E -> E '*' E : ['$1', '$2', '$3']. +E -> '(' E ')' : '$2'. +E -> number : '$1'. </code> + <p>3. An overloaded minus operator:</p> + <code type="none"> +Nonterminals E uminus. +Terminals '*' '-' number. +Rootsymbol E. + +Left 100 '-'. +Left 200 '*'. +Unary 300 uminus. + +E -> E '-' E. +E -> E '*' E. +E -> uminus. +E -> number. + +uminus -> '-' E. </code> + <p>4. The Yecc grammar that is used for parsing grammar + files, including itself:</p> + <code type="none"> +Nonterminals +grammar declaration rule head symbol symbols attached_code +token tokens. +Terminals +atom float integer reserved_symbol reserved_word string char var +'->' ':' dot. +Rootsymbol grammar. +Endsymbol '$end'. +grammar -> declaration : '$1'. +grammar -> rule : '$1'. +declaration -> symbol symbols dot: {'$1', '$2'}. +rule -> head '->' symbols attached_code dot: {rule, ['$1' | '$3'], + '$4'}. +head -> symbol : '$1'. +symbols -> symbol : ['$1']. +symbols -> symbol symbols : ['$1' | '$2']. +attached_code -> ':' tokens : {erlang_code, '$2'}. +attached_code -> '$empty' : {erlang_code, + [{atom, 0, '$undefined'}]}. +tokens -> token : ['$1']. +tokens -> token tokens : ['$1' | '$2']. +symbol -> var : value_of('$1'). +symbol -> atom : value_of('$1'). +symbol -> integer : value_of('$1'). +symbol -> reserved_word : value_of('$1'). +token -> var : '$1'. +token -> atom : '$1'. +token -> float : '$1'. +token -> integer : '$1'. +token -> string : '$1'. +token -> char : '$1'. +token -> reserved_symbol : {value_of('$1'), line_of('$1')}. +token -> reserved_word : {value_of('$1'), line_of('$1')}. +token -> '->' : {'->', line_of('$1')}. +token -> ':' : {':', line_of('$1')}. +Erlang code. +value_of(Token) -> + element(3, Token). +line_of(Token) -> + element(2, Token). </code> + <note> + <p>The symbols <c>'->'</c>, and <c>':'</c> have to be treated in + a special way, as they are meta symbols of the grammar + notation, as well as terminal symbols of the Yecc + grammar.</p> + </note> + <p>5. The file <c>erl_parse.yrl</c> in the <c>lib/stdlib/src</c> + directory contains the grammar for Erlang.</p> + <note> + <p>Syntactic tests are used in the code associated with some + rules, and an error is thrown (and caught by the generated + parser to produce an error message) when a test fails. The + same effect can be achieved with a call to + <c>return_error(Error_line, Message_string)</c>, which is + defined in the <c>yeccpre.hrl</c> default header file.</p> + </note> + </section> + + <section> + <title>Files</title> + <code type="none"> +lib/parsetools/include/yeccpre.hrl </code> + </section> + + <section> + <title>See Also</title> + <p>Aho & Johnson: 'LR Parsing', ACM Computing Surveys, vol. 6:2, 1974.</p> + </section> +</erlref> + diff --git a/lib/parsetools/ebin/.gitignore b/lib/parsetools/ebin/.gitignore new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/lib/parsetools/ebin/.gitignore diff --git a/lib/parsetools/include/leexinc.hrl b/lib/parsetools/include/leexinc.hrl new file mode 100644 index 0000000000..dbbb688d2d --- /dev/null +++ b/lib/parsetools/include/leexinc.hrl @@ -0,0 +1,281 @@ +%% The source of this file is part of leex distribution, as such it +%% has the same Copyright as the other files in the leex +%% distribution. The Copyright is defined in the accompanying file +%% COPYRIGHT. However, the resultant scanner generated by leex is the +%% property of the creator of the scanner and is not covered by that +%% Copyright. + +##module + +-export([string/1,string/2,token/2,token/3,tokens/2,tokens/3]). +-export([format_error/1]). + +%% User code. This is placed here to allow extra attributes. +##code + +format_error({illegal,S}) -> ["illegal characters ",io_lib:write_string(S)]; +format_error({user,S}) -> S. + +string(String) -> string(String, 1). + +string(String, Line) -> string(String, Line, String, []). + +%% string(InChars, Line, TokenChars, Tokens) -> +%% {ok,Tokens,Line} | {error,ErrorInfo,Line}. +%% Note the line number going into yystate, L0, is line of token +%% start while line number returned is line of token end. We want line +%% of token start. + +string([], L, [], Ts) -> % No partial tokens! + {ok,yyrev(Ts),L}; +string(Ics0, L0, Tcs, Ts) -> + case yystate(yystate(), Ics0, L0, 0, reject, 0) of + {A,Alen,Ics1,L1} -> % Accepting end state + string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L0), Ts); + {A,Alen,Ics1,L1,_S1} -> % Accepting transistion state + string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L0), Ts); + {reject,_Alen,Tlen,_Ics1,L1,_S1} -> % After a non-accepting state + {error,{L0,?MODULE,{illegal,yypre(Tcs, Tlen+1)}},L1}; + {A,Alen,_Tlen,_Ics1,L1,_S1} -> + string_cont(yysuf(Tcs, Alen), L1, yyaction(A, Alen, Tcs, L0), Ts) + end. + +%% string_cont(RestChars, Line, Token, Tokens) +%% Test for and remove the end token wrapper. Push back characters +%% are prepended to RestChars. + +string_cont(Rest, Line, {token,T}, Ts) -> + string(Rest, Line, Rest, [T|Ts]); +string_cont(Rest, Line, {token,T,Push}, Ts) -> + NewRest = Push ++ Rest, + string(NewRest, Line, NewRest, [T|Ts]); +string_cont(Rest, Line, {end_token,T}, Ts) -> + string(Rest, Line, Rest, [T|Ts]); +string_cont(Rest, Line, {end_token,T,Push}, Ts) -> + NewRest = Push ++ Rest, + string(NewRest, Line, NewRest, [T|Ts]); +string_cont(Rest, Line, skip_token, Ts) -> + string(Rest, Line, Rest, Ts); +string_cont(Rest, Line, {skip_token,Push}, Ts) -> + NewRest = Push ++ Rest, + string(NewRest, Line, NewRest, Ts); +string_cont(_Rest, Line, {error,S}, _Ts) -> + {error,{Line,?MODULE,{user,S}},Line}. + +%% token(Continuation, Chars) -> +%% token(Continuation, Chars, Line) -> +%% {more,Continuation} | {done,ReturnVal,RestChars}. +%% Must be careful when re-entering to append the latest characters to the +%% after characters in an accept. The continuation is: +%% {token,State,CurrLine,TokenChars,TokenLen,TokenLine,AccAction,AccLen} + +token(Cont, Chars) -> token(Cont, Chars, 1). + +token([], Chars, Line) -> + token(yystate(), Chars, Line, Chars, 0, Line, reject, 0); +token({token,State,Line,Tcs,Tlen,Tline,Action,Alen}, Chars, _) -> + token(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Action, Alen). + +%% token(State, InChars, Line, TokenChars, TokenLen, TokenLine, +%% AcceptAction, AcceptLen) -> +%% {more,Continuation} | {done,ReturnVal,RestChars}. +%% The argument order is chosen to be more efficient. + +token(S0, Ics0, L0, Tcs, Tlen0, Tline, A0, Alen0) -> + case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of + %% Accepting end state, we have a token. + {A1,Alen1,Ics1,L1} -> + token_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline)); + %% Accepting transition state, can take more chars. + {A1,Alen1,[],L1,S1} -> % Need more chars to check + {more,{token,S1,L1,Tcs,Alen1,Tline,A1,Alen1}}; + {A1,Alen1,Ics1,L1,_S1} -> % Take what we got + token_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline)); + %% After a non-accepting state, maybe reach accept state later. + {A1,Alen1,Tlen1,[],L1,S1} -> % Need more chars to check + {more,{token,S1,L1,Tcs,Tlen1,Tline,A1,Alen1}}; + {reject,_Alen1,Tlen1,eof,L1,_S1} -> % No token match + %% Check for partial token which is error. + Ret = if Tlen1 > 0 -> {error,{Tline,?MODULE, + %% Skip eof tail in Tcs. + {illegal,yypre(Tcs, Tlen1)}},L1}; + true -> {eof,L1} + end, + {done,Ret,eof}; + {reject,_Alen1,Tlen1,Ics1,L1,_S1} -> % No token match + Error = {Tline,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}}, + {done,{error,Error,L1},Ics1}; + {A1,Alen1,_Tlen1,_Ics1,L1,_S1} -> % Use last accept match + token_cont(yysuf(Tcs, Alen1), L1, yyaction(A1, Alen1, Tcs, Tline)) + end. + +%% token_cont(RestChars, Line, Token) +%% If we have a token or error then return done, else if we have a +%% skip_token then continue. + +token_cont(Rest, Line, {token,T}) -> + {done,{ok,T,Line},Rest}; +token_cont(Rest, Line, {token,T,Push}) -> + NewRest = Push ++ Rest, + {done,{ok,T,Line},NewRest}; +token_cont(Rest, Line, {end_token,T}) -> + {done,{ok,T,Line},Rest}; +token_cont(Rest, Line, {end_token,T,Push}) -> + NewRest = Push ++ Rest, + {done,{ok,T,Line},NewRest}; +token_cont(Rest, Line, skip_token) -> + token(yystate(), Rest, Line, Rest, 0, Line, reject, 0); +token_cont(Rest, Line, {skip_token,Push}) -> + NewRest = Push ++ Rest, + token(yystate(), NewRest, Line, NewRest, 0, Line, reject, 0); +token_cont(Rest, Line, {error,S}) -> + {done,{error,{Line,?MODULE,{user,S}},Line},Rest}. + +%% tokens(Continuation, Chars, Line) -> +%% {more,Continuation} | {done,ReturnVal,RestChars}. +%% Must be careful when re-entering to append the latest characters to the +%% after characters in an accept. The continuation is: +%% {tokens,State,CurrLine,TokenChars,TokenLen,TokenLine,Tokens,AccAction,AccLen} +%% {skip_tokens,State,CurrLine,TokenChars,TokenLen,TokenLine,Error,AccAction,AccLen} + +tokens(Cont, Chars) -> tokens(Cont, Chars, 1). + +tokens([], Chars, Line) -> + tokens(yystate(), Chars, Line, Chars, 0, Line, [], reject, 0); +tokens({tokens,State,Line,Tcs,Tlen,Tline,Ts,Action,Alen}, Chars, _) -> + tokens(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Ts, Action, Alen); +tokens({skip_tokens,State,Line,Tcs,Tlen,Tline,Error,Action,Alen}, Chars, _) -> + skip_tokens(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Error, Action, Alen). + +%% tokens(State, InChars, Line, TokenChars, TokenLen, TokenLine, Tokens, +%% AcceptAction, AcceptLen) -> +%% {more,Continuation} | {done,ReturnVal,RestChars}. + +tokens(S0, Ics0, L0, Tcs, Tlen0, Tline, Ts, A0, Alen0) -> + case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of + %% Accepting end state, we have a token. + {A1,Alen1,Ics1,L1} -> + tokens_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Ts); + %% Accepting transition state, can take more chars. + {A1,Alen1,[],L1,S1} -> % Need more chars to check + {more,{tokens,S1,L1,Tcs,Alen1,Tline,Ts,A1,Alen1}}; + {A1,Alen1,Ics1,L1,_S1} -> % Take what we got + tokens_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Ts); + %% After a non-accepting state, maybe reach accept state later. + {A1,Alen1,Tlen1,[],L1,S1} -> % Need more chars to check + {more,{tokens,S1,L1,Tcs,Tlen1,Tline,Ts,A1,Alen1}}; + {reject,_Alen1,Tlen1,eof,L1,_S1} -> % No token match + %% Check for partial token which is error, no need to skip here. + Ret = if Tlen1 > 0 -> {error,{Tline,?MODULE, + %% Skip eof tail in Tcs. + {illegal,yypre(Tcs, Tlen1)}},L1}; + Ts == [] -> {eof,L1}; + true -> {ok,yyrev(Ts),L1} + end, + {done,Ret,eof}; + {reject,_Alen1,Tlen1,_Ics1,L1,_S1} -> + %% Skip rest of tokens. + Error = {L1,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}}, + skip_tokens(yysuf(Tcs, Tlen1+1), L1, Error); + {A1,Alen1,_Tlen1,_Ics1,L1,_S1} -> + Token = yyaction(A1, Alen1, Tcs, Tline), + tokens_cont(yysuf(Tcs, Alen1), L1, Token, Ts) + end. + +%% tokens_cont(RestChars, Line, Token, Tokens) +%% If we have an end_token or error then return done, else if we have +%% a token then save it and continue, else if we have a skip_token +%% just continue. + +tokens_cont(Rest, Line, {token,T}, Ts) -> + tokens(yystate(), Rest, Line, Rest, 0, Line, [T|Ts], reject, 0); +tokens_cont(Rest, Line, {token,T,Push}, Ts) -> + NewRest = Push ++ Rest, + tokens(yystate(), NewRest, Line, NewRest, 0, Line, [T|Ts], reject, 0); +tokens_cont(Rest, Line, {end_token,T}, Ts) -> + {done,{ok,yyrev(Ts, [T]),Line},Rest}; +tokens_cont(Rest, Line, {end_token,T,Push}, Ts) -> + NewRest = Push ++ Rest, + {done,{ok,yyrev(Ts, [T]),Line},NewRest}; +tokens_cont(Rest, Line, skip_token, Ts) -> + tokens(yystate(), Rest, Line, Rest, 0, Line, Ts, reject, 0); +tokens_cont(Rest, Line, {skip_token,Push}, Ts) -> + NewRest = Push ++ Rest, + tokens(yystate(), NewRest, Line, NewRest, 0, Line, Ts, reject, 0); +tokens_cont(Rest, Line, {error,S}, _Ts) -> + skip_tokens(Rest, Line, {Line,?MODULE,{user,S}}). + +%%skip_tokens(InChars, Line, Error) -> {done,{error,Error,Line},Ics}. +%% Skip tokens until an end token, junk everything and return the error. + +skip_tokens(Ics, Line, Error) -> + skip_tokens(yystate(), Ics, Line, Ics, 0, Line, Error, reject, 0). + +%% skip_tokens(State, InChars, Line, TokenChars, TokenLen, TokenLine, Tokens, +%% AcceptAction, AcceptLen) -> +%% {more,Continuation} | {done,ReturnVal,RestChars}. + +skip_tokens(S0, Ics0, L0, Tcs, Tlen0, Tline, Error, A0, Alen0) -> + case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of + {A1,Alen1,Ics1,L1} -> % Accepting end state + skip_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Error); + {A1,Alen1,[],L1,S1} -> % After an accepting state + {more,{skip_tokens,S1,L1,Tcs,Alen1,Tline,Error,A1,Alen1}}; + {A1,Alen1,Ics1,L1,_S1} -> + skip_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Error); + {A1,Alen1,Tlen1,[],L1,S1} -> % After a non-accepting state + {more,{skip_tokens,S1,L1,Tcs,Tlen1,Tline,Error,A1,Alen1}}; + {reject,_Alen1,_Tlen1,eof,L1,_S1} -> + {done,{error,Error,L1},eof}; + {reject,_Alen1,Tlen1,_Ics1,L1,_S1} -> + skip_tokens(yysuf(Tcs, Tlen1+1), L1, Error); + {A1,Alen1,_Tlen1,_Ics1,L1,_S1} -> + Token = yyaction(A1, Alen1, Tcs, Tline), + skip_cont(yysuf(Tcs, Alen1), L1, Token, Error) + end. + +%% skip_cont(RestChars, Line, Token, Error) +%% Skip tokens until we have an end_token or error then return done +%% with the original rror. + +skip_cont(Rest, Line, {token,_T}, Error) -> + skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0); +skip_cont(Rest, Line, {token,_T,Push}, Error) -> + NewRest = Push ++ Rest, + skip_tokens(yystate(), NewRest, Line, NewRest, 0, Line, Error, reject, 0); +skip_cont(Rest, Line, {end_token,_T}, Error) -> + {done,{error,Error,Line},Rest}; +skip_cont(Rest, Line, {end_token,_T,Push}, Error) -> + NewRest = Push ++ Rest, + {done,{error,Error,Line},NewRest}; +skip_cont(Rest, Line, skip_token, Error) -> + skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0); +skip_cont(Rest, Line, {skip_token,Push}, Error) -> + NewRest = Push ++ Rest, + skip_tokens(yystate(), NewRest, Line, NewRest, 0, Line, Error, reject, 0); +skip_cont(Rest, Line, {error,_S}, Error) -> + skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0). + +yyrev(List) -> lists:reverse(List). +yyrev(List, Tail) -> lists:reverse(List, Tail). +yypre(List, N) -> lists:sublist(List, N). +yysuf(List, N) -> lists:nthtail(N, List). + +%% yystate() -> InitialState. +%% yystate(State, InChars, Line, CurrTokLen, AcceptAction, AcceptLen) -> +%% {Action, AcceptLen, RestChars, Line} | +%% {Action, AcceptLen, RestChars, Line, State} | +%% {reject, AcceptLen, CurrTokLen, RestChars, Line, State} | +%% {Action, AcceptLen, CurrTokLen, RestChars, Line, State}. +%% Generated state transition functions. The non-accepting end state +%% return signal either an unrecognised character or end of current +%% input. + +##dfa + +%% yyaction(Action, TokenLength, TokenChars, TokenLine) -> +%% {token,Token} | {end_token, Token} | skip_token | {error,String}. +%% Generated action function. + +##actions + diff --git a/lib/parsetools/include/yeccpre.hrl b/lib/parsetools/include/yeccpre.hrl new file mode 100644 index 0000000000..2ffa13d6a7 --- /dev/null +++ b/lib/parsetools/include/yeccpre.hrl @@ -0,0 +1,180 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% The parser generator will insert appropriate declarations before this line.% + +-type(yecc_ret() :: {'error', _} | {'ok', _}). + +-spec parse(Tokens :: list()) -> yecc_ret(). +parse(Tokens) -> + yeccpars0(Tokens, {no_func, no_line}, 0, [], []). + +-spec(parse_and_scan/1 :: + ({function() | {atom(), atom()}, [_]} | {atom(), atom(), [_]}) -> + yecc_ret()). +parse_and_scan({F, A}) -> % Fun or {M, F} + yeccpars0([], {{F, A}, no_line}, 0, [], []); +parse_and_scan({M, F, A}) -> + yeccpars0([], {{{M, F}, A}, no_line}, 0, [], []). + +-spec(format_error/1 :: (any()) -> [char() | list()]). +format_error(Message) -> + case io_lib:deep_char_list(Message) of + true -> + Message; + _ -> + io_lib:write(Message) + end. + +% To be used in grammar files to throw an error message to the parser +% toplevel. Doesn't have to be exported! +-compile({nowarn_unused_function, return_error/2}). +-spec(return_error/2 :: (integer(), any()) -> no_return()). +return_error(Line, Message) -> + throw({error, {Line, ?MODULE, Message}}). + +-define(CODE_VERSION, "1.4"). + +yeccpars0(Tokens, Tzr, State, States, Vstack) -> + try yeccpars1(Tokens, Tzr, State, States, Vstack) + catch + error: Error -> + Stacktrace = erlang:get_stacktrace(), + try yecc_error_type(Error, Stacktrace) of + {syntax_error, Token} -> + yeccerror(Token); + {missing_in_goto_table=Tag, Symbol, State} -> + Desc = {Symbol, State, Tag}, + erlang:raise(error, {yecc_bug, ?CODE_VERSION, Desc}, + Stacktrace) + catch _:_ -> erlang:raise(error, Error, Stacktrace) + end; + %% Probably thrown from return_error/2: + throw: {error, {_Line, ?MODULE, _M}} = Error -> + Error + end. + +yecc_error_type(function_clause, [{?MODULE,F,[State,_,_,_,Token,_,_]} | _]) -> + case atom_to_list(F) of + "yeccpars2" ++ _ -> + {syntax_error, Token}; + "yeccgoto_" ++ SymbolL -> + {ok,[{atom,_,Symbol}],_} = erl_scan:string(SymbolL), + {missing_in_goto_table, Symbol, State} + end. + +yeccpars1([Token | Tokens], Tzr, State, States, Vstack) -> + yeccpars2(State, element(1, Token), States, Vstack, Token, Tokens, Tzr); +yeccpars1([], {{F, A},_Line}, State, States, Vstack) -> + case apply(F, A) of + {ok, Tokens, Endline} -> + yeccpars1(Tokens, {{F, A}, Endline}, State, States, Vstack); + {eof, Endline} -> + yeccpars1([], {no_func, Endline}, State, States, Vstack); + {error, Descriptor, _Endline} -> + {error, Descriptor} + end; +yeccpars1([], {no_func, no_line}, State, States, Vstack) -> + Line = 999999, + yeccpars2(State, '$end', States, Vstack, yecc_end(Line), [], + {no_func, Line}); +yeccpars1([], {no_func, Endline}, State, States, Vstack) -> + yeccpars2(State, '$end', States, Vstack, yecc_end(Endline), [], + {no_func, Endline}). + +%% yeccpars1/7 is called from generated code. +%% +%% When using the {includefile, Includefile} option, make sure that +%% yeccpars1/7 can be found by parsing the file without following +%% include directives. yecc will otherwise assume that an old +%% yeccpre.hrl is included (one which defines yeccpars1/5). +yeccpars1(State1, State, States, Vstack, Token0, [Token | Tokens], Tzr) -> + yeccpars2(State, element(1, Token), [State1 | States], + [Token0 | Vstack], Token, Tokens, Tzr); +yeccpars1(State1, State, States, Vstack, Token0, [], {{_F,_A}, _Line}=Tzr) -> + yeccpars1([], Tzr, State, [State1 | States], [Token0 | Vstack]); +yeccpars1(State1, State, States, Vstack, Token0, [], {no_func, no_line}) -> + Line = yecctoken_end_location(Token0), + yeccpars2(State, '$end', [State1 | States], [Token0 | Vstack], + yecc_end(Line), [], {no_func, Line}); +yeccpars1(State1, State, States, Vstack, Token0, [], {no_func, Line}) -> + yeccpars2(State, '$end', [State1 | States], [Token0 | Vstack], + yecc_end(Line), [], {no_func, Line}). + +% For internal use only. +yecc_end({Line,_Column}) -> + {'$end', Line}; +yecc_end(Line) -> + {'$end', Line}. + +yecctoken_end_location(Token) -> + try + {text, Str} = erl_scan:token_info(Token, text), + {line, Line} = erl_scan:token_info(Token, line), + Parts = re:split(Str, "\n"), + Dline = length(Parts) - 1, + Yline = Line + Dline, + case erl_scan:token_info(Token, column) of + {column, Column} -> + Col = byte_size(lists:last(Parts)), + {Yline, Col + if Dline =:= 0 -> Column; true -> 1 end}; + undefined -> + Yline + end + catch _:_ -> + yecctoken_location(Token) + end. + +yeccerror(Token) -> + Text = yecctoken_to_string(Token), + Location = yecctoken_location(Token), + {error, {Location, ?MODULE, ["syntax error before: ", Text]}}. + +yecctoken_to_string(Token) -> + case catch erl_scan:token_info(Token, text) of + {text, Txt} -> Txt; + _ -> yecctoken2string(Token) + end. + +yecctoken_location(Token) -> + case catch erl_scan:token_info(Token, location) of + {location, Loc} -> Loc; + _ -> element(2, Token) + end. + +yecctoken2string({atom, _, A}) -> io_lib:write(A); +yecctoken2string({integer,_,N}) -> io_lib:write(N); +yecctoken2string({float,_,F}) -> io_lib:write(F); +yecctoken2string({char,_,C}) -> io_lib:write_char(C); +yecctoken2string({var,_,V}) -> io_lib:format("~s", [V]); +yecctoken2string({string,_,S}) -> io_lib:write_unicode_string(S); +yecctoken2string({reserved_symbol, _, A}) -> io_lib:write(A); +yecctoken2string({_Cat, _, Val}) -> io_lib:write(Val); +yecctoken2string({dot, _}) -> "'.'"; +yecctoken2string({'$end', _}) -> + []; +yecctoken2string({Other, _}) when is_atom(Other) -> + io_lib:write(Other); +yecctoken2string(Other) -> + io_lib:write(Other). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + diff --git a/lib/parsetools/info b/lib/parsetools/info new file mode 100644 index 0000000000..bcfb652de0 --- /dev/null +++ b/lib/parsetools/info @@ -0,0 +1,2 @@ +group: tools +short: A set of parsing and lexical analysis tools diff --git a/lib/parsetools/src/Makefile b/lib/parsetools/src/Makefile new file mode 100644 index 0000000000..89e079e411 --- /dev/null +++ b/lib/parsetools/src/Makefile @@ -0,0 +1,100 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(PARSETOOLS_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/parsetools-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES = \ + leex \ + yecc \ + yeccparser \ + yeccscan + +HRL_FILES = ../include/yeccpre.hrl ../include/leexinc.hrl + +ERL_FILES = $(MODULES:%=%.erl) + +TARGET_FILES = $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET) + +APP_FILE = parsetools.app + +APP_SRC = $(APP_FILE).src +APP_TARGET = $(EBIN)/$(APP_FILE) + +APPUP_FILE= parsetools.appup + +APPUP_SRC= $(APPUP_FILE).src +APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +ERL_COMPILE_FLAGS += +warn_obsolete_guard -I$(ERL_TOP)/lib/stdlib/include + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +docs: + + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +$(APP_TARGET): $(APP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/src + $(INSTALL_DATA) $(ERL_FILES) $(RELSYSDIR)/src + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + $(INSTALL_DIR) $(RELSYSDIR)/include + $(INSTALL_DATA) $(HRL_FILES) $(RELSYSDIR)/include + +release_docs_spec: + diff --git a/lib/parsetools/src/esyntax.yrl b/lib/parsetools/src/esyntax.yrl new file mode 100644 index 0000000000..1ecb54f0a7 --- /dev/null +++ b/lib/parsetools/src/esyntax.yrl @@ -0,0 +1,360 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +Nonterminals +add_op attribute basic_type bif_test +case_expr clause_body +clause_guard clause_head comp_op cr_clause cr_clauses expr expr_tail +exprs farity farity_list form formal_parameter_list function +function_call function_clause guard guard_call guard_expr +guard_expr_list guard_exprs guard_expr_tail guard_expr_tuple +guard_parameter_list +guard_tests guard_test if_clause if_clauses if_expr list match_expr +mult_op parameter_list pattern patterns pattern_list pattern_tail pattern_tuple +prefix_op receive_expr send_expr tuple. + +Terminals +'!' '(' ')' '*' '+' ',' '-' '->' '/' '/=' ':' ';' '<' '=' '=/=' '=:=' +'=<' '==' '>' '>=' '[' ']' 'after' 'band' 'begin' 'bnot' +'bor' 'bsl' 'bsr' 'bxor' 'case' 'catch' 'div' 'end' 'if' 'of' +'receive' 'rem' 'when' '{' '|' '}' atom float integer string var. +% 'receive' 'rem' 'true' 'when' '{' '|' '}' atom float integer string var. + +Rootsymbol form. + +Endsymbol dot. + +Unary 0 'catch'. +Right 200 '='. +Right 200 '!'. +Left 300 add_op. +Left 400 mult_op. +Unary 500 prefix_op. + + +add_op -> '+' : '$1'. +add_op -> '-' : '$1'. +add_op -> 'bor' : '$1'. +add_op -> 'bxor' : '$1'. +add_op -> 'bsl' : '$1'. +add_op -> 'bsr' : '$1'. + +comp_op -> '==' : '$1'. +comp_op -> '/=' : '$1'. +comp_op -> '=<' : '$1'. +comp_op -> '<' : '$1'. +comp_op -> '>=' : '$1'. +comp_op -> '>' : '$1'. +comp_op -> '=:=' : '$1'. +comp_op -> '=/=' : '$1'. + +mult_op -> '*' : '$1'. +mult_op -> '/' : '$1'. +mult_op -> 'div' : '$1'. +mult_op -> 'rem' : '$1'. +mult_op -> 'band' : '$1'. + +prefix_op -> '+' : '$1'. +prefix_op -> '-' : '$1'. +prefix_op -> 'bnot' : '$1'. + + +basic_type -> atom : '$1'. +basic_type -> float : '$1'. +basic_type -> integer : '$1'. +basic_type -> string : '$1'. +basic_type -> var : '$1'. +% basic_type -> 'true' : {atom, element(2, '$1'), 'true'}. + + +pattern -> basic_type : '$1'. +pattern -> pattern_list : '$1'. +pattern -> pattern_tuple : '$1'. + +pattern_list -> '[' ']' : {nil, element(2, '$1')}. +pattern_list -> '[' pattern pattern_tail ']' : + {cons, element(2, '$1'), '$2', '$3'}. + +pattern_tail -> '|' pattern : '$2'. +pattern_tail -> ',' pattern pattern_tail : + {cons, element(2, '$2'), '$2', '$3'}. +pattern_tail -> '$empty' : {nil, 0}. + +pattern_tuple -> '{' '}' : {tuple, element(2, '$1'), []}. +pattern_tuple -> '{' patterns '}' : {tuple, element(2, '$1'), '$2'}. + +patterns -> pattern : ['$1']. +patterns -> pattern ',' patterns : ['$1' | '$3']. + + +expr -> basic_type : '$1'. +expr -> list : '$1'. +expr -> tuple : '$1'. +expr -> function_call : '$1'. + +expr -> expr add_op expr : + {Op, Pos} = '$2', + {arith, Pos, Op, '$1', '$3'}. +expr -> expr mult_op expr : + {Op, Pos} = '$2', + {arith, Pos, Op, '$1', '$3'}. +expr -> prefix_op expr: + case '$2' of + {float, Pos, N} -> + case '$1' of + {'-', _} -> + {float, Pos, -N}; + {'+', _} -> + {float, Pos, N}; + {Op, Pos1} -> + {arith, Pos1, Op, {float, Pos, N}} + end; + {integer, Pos, N} -> + case '$1' of + {'-', _} -> + {integer, Pos, -N}; + {'+', _} -> + {integer, Pos, N}; + {Op, Pos1} -> + {arith, Pos1, Op, {integer, Pos, N}} + end; + _ -> + {Op, Pos} = '$1', + {arith, Pos, Op, '$2'} + end. + +expr -> '(' expr ')' : '$2'. +expr -> 'begin' exprs 'end' : {block, element(2, '$1'), '$2'}. +expr -> 'catch' expr : {'catch', element(2, '$1'), '$2'}. + +expr -> case_expr : '$1'. +expr -> if_expr : '$1'. +expr -> receive_expr : '$1'. +expr -> match_expr : '$1'. +expr -> send_expr : '$1'. + + +list -> '[' ']' : {nil, element(2, '$1')}. +list -> '[' expr expr_tail ']' : {cons, element(2, '$1'), '$2', '$3'}. + +expr_tail -> '|' expr : '$2'. +expr_tail -> ',' expr expr_tail : {cons, element(2, '$2'), '$2', '$3'}. +expr_tail -> '$empty' : {nil, 0}. + +tuple -> '{' '}' : {tuple, element(2, '$1'), []}. +tuple -> '{' exprs '}' : {tuple, element(2, '$1'), '$2'}. + + +function_call -> atom '(' parameter_list ')' : + case erl_parse:erlang_bif(element(3, '$1'), length('$3')) of + true -> + {bif, element(2, '$1'), element(3, '$1'), '$3'}; + false -> + {call, element(2, '$1'), [], element(3, '$1'), '$3'} + end. +function_call -> atom ':' atom '(' parameter_list ')' : + {call, element(2, '$1'), element(3, '$1'), element(3, '$3'), '$5'}. + +parameter_list -> exprs : '$1'. +parameter_list -> '$empty' : []. + + +case_expr -> 'case' expr 'of' cr_clauses 'end' : + {'case', element(2, '$1'), '$2', '$4'}. + +cr_clause -> pattern clause_guard clause_body : + {clause, element(2, '$1'), ['$1'], '$2', '$3'}. + +cr_clauses -> cr_clause : ['$1']. +cr_clauses -> cr_clause ';' cr_clauses : ['$1' | '$3']. + +if_expr -> 'if' if_clauses 'end' : {'if', element(2, '$1'), '$2'}. + +if_clause -> guard clause_body : {clause, element(2, hd('$2')), '$1', '$2'}. + +if_clauses -> if_clause : ['$1']. +if_clauses -> if_clause ';' if_clauses : ['$1' | '$3']. + +receive_expr -> 'receive' 'after' expr clause_body 'end' : + {'receive', element(2, '$1'), [], '$3', '$4'}. +receive_expr -> 'receive' cr_clauses 'end' : + {'receive', element(2, '$1'), '$2'}. +receive_expr -> 'receive' cr_clauses 'after' expr clause_body 'end' : + {'receive', element(2, '$1'), '$2', '$4', '$5'}. + + +match_expr -> expr '=' expr : + case erl_parse:is_term('$1') of + true -> + {match, element(2, '$1'), '$1', '$3'}; + false -> + throw({error, {element(2, '$1'), yecc, "illegal lhs in match **"}}) + end. + +send_expr -> expr '!' expr : + Pos = element(2, '$1'), + {send, Pos, '$1', '$3'}. + + +exprs -> expr : ['$1']. +exprs -> expr ',' exprs : ['$1' | '$3']. + + +guard_expr -> basic_type : '$1'. +guard_expr -> guard_expr_list : '$1'. +guard_expr -> guard_expr_tuple : '$1'. +guard_expr -> guard_call : '$1'. +guard_expr -> '(' guard_expr ')' : '$2'. +guard_expr -> guard_expr add_op guard_expr : + {Op, Pos} = '$2', + {arith, Pos, Op, '$1', '$3'}. +guard_expr -> guard_expr mult_op guard_expr : + {Op, Pos} = '$2', + {arith, Pos, Op, '$1', '$3'}. +guard_expr -> prefix_op guard_expr: + case '$2' of + {float, Pos, N} -> + case '$1' of + {'-', _} -> + {float, Pos, -N}; + {'+', _} -> + {float, Pos, N}; + {Op, Pos1} -> + {arith, Pos1, Op, {float, Pos, N}} + end; + {integer, Pos, N} -> + case '$1' of + {'-', _} -> + {integer, Pos, -N}; + {'+', _} -> + {integer, Pos, N}; + {Op, Pos1} -> + {arith, Pos1, Op, {integer, Pos, N}} + end; + _ -> + {Op, Pos} = '$1', + {arith, Pos, Op, '$2'} + end. + +guard_expr_list -> '[' ']' : {nil, element(2, '$1')}. +guard_expr_list -> '[' guard_expr guard_expr_tail ']' : + {cons, element(2, '$1'), '$2', '$3'}. + +guard_expr_tail -> '|' guard_expr : '$2'. +guard_expr_tail -> ',' guard_expr guard_expr_tail : + {cons, element(2, '$2'), '$2', '$3'}. +guard_expr_tail -> '$empty' : {nil, 0}. + +guard_expr_tuple -> '{' '}' : {tuple, element(2, '$1'), []}. +guard_expr_tuple -> '{' guard_exprs '}' : {tuple, element(2, '$1'), '$2'}. + +guard_exprs -> guard_expr : ['$1']. +guard_exprs -> guard_expr ',' guard_exprs : ['$1' | '$3']. + + +guard_call -> atom '(' guard_parameter_list ')' : + case erl_parse:erlang_guard_bif(element(3, '$1'), length('$3')) of + true -> + {bif, element(2, '$1'), element(3, '$1'), '$3'}; + false -> + throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) + end. + +guard_parameter_list -> guard_exprs : '$1'. +guard_parameter_list -> '$empty' : []. + + +bif_test -> atom '(' guard_parameter_list ')' : + case erl_parse:erlang_guard_test(element(3, '$1'), length('$3')) of + true -> + {test, element(2, '$1'), element(3, '$1'), '$3'}; + false -> + throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) + end. + + +guard_test -> bif_test : '$1'. +guard_test -> guard_expr comp_op guard_expr : + {Op, Pos} = '$2', + {comp, Pos, Op, '$1', '$3'}. + +guard_tests -> guard_test : ['$1']. +guard_tests -> guard_test ',' guard_tests : ['$1' | '$3']. + +% guard -> 'true' : []. +guard -> atom : + case '$1' of + {atom, _, true} -> + []; + _ -> + throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) + end. +guard -> guard_tests : '$1'. + + +function_clause -> clause_head clause_guard clause_body : + {Name, Line, Arity, Parameters} = '$1', + {function, Line, Name, Arity, + [{clause, element(2, hd('$3')), Parameters, '$2', '$3'}]}. + +clause_head -> atom '(' formal_parameter_list ')' : + {element(3, '$1'), element(2, '$1'), length('$3'), '$3'}. + +formal_parameter_list -> patterns : '$1'. +formal_parameter_list -> '$empty' : []. + +clause_guard -> 'when' guard : '$2'. +clause_guard -> '$empty' : []. + +clause_body -> '->' exprs: '$2'. + + +function -> function_clause : '$1'. +function -> function_clause ';' function : + case '$1' of + {function, Pos1, Name1, Arity1, [Clause]} -> + case '$3' of + {function, _, Name1, Arity2, Clauses} -> + if + Arity1 /= Arity2 -> + throw({error, {Pos1, yecc, + io_lib:format('arity conflict in definition of ~w', + [Name1])}}); + true -> + {function, Pos1, Name1, Arity1, [Clause | Clauses]} + end; + _ -> + throw({error, {Pos1, yecc, + io_lib:format('missing final dot in def of ~w/~w', + [Name1, Arity1])}}) + end + end. + + +attribute -> atom : element(3, '$1'). +attribute -> '[' farity_list ']' : '$2'. + +farity_list -> farity : ['$1']. +farity_list -> farity ',' farity_list : ['$1' | '$3']. + +farity -> atom '/' integer : {element(3, '$1'), element(3, '$3')}. + + +form -> '-' atom '(' attribute ')' : + {attribute, element(2, '$2'), element(3, '$2'), '$4'}. +form -> function : '$1'. diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl new file mode 100644 index 0000000000..fd494eaf06 --- /dev/null +++ b/lib/parsetools/src/leex.erl @@ -0,0 +1,1608 @@ +%% Copyright (c) 2008,2009 Robert Virding. All rights reserved. +%% +%% Redistribution and use in source and binary forms, with or without +%% modification, are permitted provided that the following conditions +%% are met: +%% +%% 1. Redistributions of source code must retain the above copyright +%% notice, this list of conditions and the following disclaimer. +%% 2. Redistributions in binary form must reproduce the above copyright +%% notice, this list of conditions and the following disclaimer in the +%% documentation and/or other materials provided with the distribution. +%% +%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +%% "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +%% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +%% FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +%% COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +%% INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +%% BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +%% LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +%% CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +%% LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +%% ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +%% POSSIBILITY OF SUCH DAMAGE. + +%%% A Lexical Analyser Generator for Erlang. +%%% +%%% Most of the algorithms used here are taken pretty much as +%%% described in the "Dragon Book" by Aho, Sethi and Ullman. Some +%%% completing details were taken from "Compiler Design in C" by +%%% Hollub. + +-module(leex). + +-export([compile/3,file/1,file/2,format_error/1]). + +-import(lists, [member/2,reverse/1,sort/1,delete/2, + keysearch/3,keysort/2,keydelete/3,keyfind/3, + map/2,foldl/3,foreach/2,flatmap/2]). +-import(string, [substr/2,substr/3,span/2]). +-import(ordsets, [is_element/2,add_element/2,union/2]). +-import(orddict, [store/3]). + +-include("erl_compile.hrl"). +%%-include_lib("stdlib/include/erl_compile.hrl"). + +-define(LEEXINC, "leexinc.hrl"). % Include file +-define(LEEXLIB, parsetools). % Leex is in lib parsetools +%%-define(LEEXLIB, leex). % Leex is in lib leex + +-define(DEFS_HEAD, "Definitions."). +-define(RULE_HEAD, "Rules."). +-define(CODE_HEAD, "Erlang code."). + +-record(leex, {xfile=[], % Xrl file + efile=[], % Erl file + ifile=[], % Include file + gfile=[], % Graph file + module, % Module name + opts=[], % Options + posix=false, % POSIX regular expressions + errors=[], + warnings=[] + }). + +-record(nfa_state, {no,edges=[],accept=noaccept}). +-record(dfa_state, {no,nfa=[],trans=[],accept=noaccept}). + +%%% +%%% Exported functions +%%% + +%%% Interface to erl_compile. + +compile(Input0, Output0, + #options{warning = WarnLevel, verbose=Verbose, includes=Includes}) -> + Input = assure_extension(shorten_filename(Input0), ".xrl"), + Output = assure_extension(shorten_filename(Output0), ".erl"), + Includefile = lists:sublist(Includes, 1), + Opts = [{scannerfile,Output},{includefile,Includefile},{verbose,Verbose}, + {report_errors,true},{report_warnings,WarnLevel > 0}], + case file(Input, Opts) of + {ok, _} -> + ok; + error -> + error + end. + +%% file(File) -> ok | error. +%% file(File, Options) -> ok | error. + +file(File) -> file(File, []). + +file(File, Opts0) -> + case is_filename(File) of + no -> erlang:error(badarg, [File,Opts0]); + _ -> ok + end, + Opts = case options(Opts0) of + badarg -> + erlang:error(badarg, [File,Opts0]); + Options -> + Options + end, + St0 = #leex{}, + St1 = filenames(File, Opts, St0), % Get all the filenames + St = try + {ok,REAs,Actions,Code,St2} = parse_file(St1), + {DFA,DF} = make_dfa(REAs, St2), + St3 = out_file(St2, DFA, DF, Actions, Code), + case lists:member(dfa_graph, St3#leex.opts) of + true -> out_dfa_graph(St3, DFA, DF); + false -> St3 + end + catch #leex{}=St4 -> + St4 + end, + leex_ret(St). + +format_error({file_error, Reason}) -> + io_lib:fwrite("~s",[file:format_error(Reason)]); +format_error(missing_defs) -> "missing Definitions"; +format_error(missing_rules) -> "missing Rules"; +format_error(missing_code) -> "missing Erlang code"; +format_error(empty_rules) -> "no rules"; +format_error(bad_rule) -> "bad rule"; +format_error({regexp,E})-> + Es = case E of + {interval_range,_} -> "interval range"; + {unterminated,Cs} -> + "unterminated " ++ Cs; + {illegal_char,Cs} -> + "illegal character " ++ Cs; + {posix_cc,What} -> + ["illegal POSIX character class ",io_lib:write_string(What)]; + {char_class,What} -> + ["illegal character class ",io_lib:write_string(What)] + end, + ["bad regexp `",Es,"'"]; +format_error(ignored_characters) -> + "ignored characters". + +%%% +%%% Local functions +%%% + +assure_extension(File, Ext) -> + lists:concat([strip_extension(File, Ext), Ext]). + +%% Assumes File is a filename. +strip_extension(File, Ext) -> + case filename:extension(File) of + Ext -> filename:rootname(File); + _Other -> File + end. + +options(Options0) when is_list(Options0) -> + try + Options = flatmap(fun(return) -> short_option(return, true); + (report) -> short_option(report, true); + ({return,T}) -> short_option(return, T); + ({report,T}) -> short_option(report, T); + (T) -> [T] + end, Options0), + options(Options, [scannerfile,includefile,report_errors, + report_warnings,return_errors,return_warnings, + verbose,dfa_graph], []) + catch error: _ -> badarg + end; +options(Option) -> + options([Option]). + +short_option(return, T) -> + [{return_errors,T}, {return_warnings,T}]; +short_option(report, T) -> + [{report_errors,T}, {report_warnings,T}]. + +options(Options0, [Key|Keys], L) when is_list(Options0) -> + Options = case member(Key, Options0) of + true -> + [atom_option(Key)|delete(Key, Options0)]; + false -> + Options0 + end, + V = case keysearch(Key, 1, Options) of + {value, {Key, Filename0}} when Key =:= includefile; + Key =:= scannerfile -> + case is_filename(Filename0) of + no -> + badarg; + Filename -> + {ok,[{Key,Filename}]} + end; + {value,{Key,Bool}} when Bool; not Bool -> + {ok,[{Key, Bool}]}; + {value,{Key, _}} -> + badarg; + false -> + {ok,[{Key,default_option(Key)}]} + end, + case V of + badarg -> + badarg; + {ok,KeyValueL} -> + NewOptions = keydelete(Key, 1, Options), + options(NewOptions, Keys, KeyValueL ++ L) + end; +options([], [], L) -> + foldl(fun({_,false}, A) -> A; + ({Tag,true}, A) -> [Tag|A]; + (F,A) -> [F|A] + end, [], L); +options(_Options, _, _L) -> + badarg. + +default_option(dfa_graph) -> false; +default_option(includefile) -> []; +default_option(report_errors) -> true; +default_option(report_warnings) -> true; +default_option(return_errors) -> false; +default_option(return_warnings) -> false; +default_option(scannerfile) -> []; +default_option(verbose) -> false. + +atom_option(dfa_graph) -> {dfa_graph,true}; +atom_option(report_errors) -> {report_errors,true}; +atom_option(report_warnings) -> {report_warnings,true}; +atom_option(return_errors) -> {return_errors,true}; +atom_option(return_warnings) -> {return_warnings,true}; +atom_option(verbose) -> {verbose,true}; +atom_option(Key) -> Key. + +is_filename(T) -> + try filename:flatten(T) of + Filename -> Filename + catch error: _ -> no + end. + +shorten_filename(Name0) -> + {ok,Cwd} = file:get_cwd(), + case lists:prefix(Cwd, Name0) of + false -> Name0; + true -> + case lists:nthtail(length(Cwd), Name0) of + "/"++N -> N; + N -> N + end + end. + +leex_ret(St) -> + report_errors(St), + report_warnings(St), + Es = pack_errors(St#leex.errors), + Ws = pack_warnings(St#leex.warnings), + if + Es =:= [] -> + case member(return_warnings, St#leex.opts) of + true -> {ok, St#leex.efile, Ws}; + false -> {ok, St#leex.efile} + end; + true -> + case member(return_errors, St#leex.opts) of + true -> {error, Es, Ws}; + false -> error + end + end. + +pack_errors([{File,_} | _] = Es) -> + [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}]; +pack_errors([]) -> + []. + +pack_warnings([{File,_} | _] = Ws) -> + [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}]; +pack_warnings([]) -> + []. + +report_errors(St) -> + when_opt(fun () -> + foreach(fun({File,{none,Mod,E}}) -> + io:fwrite("~s: ~s\n", + [File,Mod:format_error(E)]); + ({File,{Line,Mod,E}}) -> + io:fwrite("~s:~w: ~s\n", + [File,Line,Mod:format_error(E)]) + end, sort(St#leex.errors)) + end, report_errors, St#leex.opts). + +report_warnings(St) -> + when_opt(fun () -> + foreach(fun({File,{none,Mod,W}}) -> + io:fwrite("~s: Warning: ~s\n", + [File,Mod:format_error(W)]); + ({File,{Line,Mod,W}}) -> + io:fwrite("~s:~w: Warning: ~s\n", + [File,Line,Mod:format_error(W)]) + end, sort(St#leex.warnings)) + end, report_warnings, St#leex.opts). + +add_error(E, St) -> + add_error(St#leex.xfile, E, St). + +add_error(File, Error, St) -> + throw(St#leex{errors = [{File,Error}|St#leex.errors]}). + +add_warning(Line, W, St) -> + St#leex{warnings = [{St#leex.xfile,{Line,leex,W}}|St#leex.warnings]}. + +%% filenames(File, Options, State) -> State. +%% The default output dir is the current directory unless an +%% explicit one has been given in the options. + +filenames(File, Opts, St0) -> + Dir = filename:dirname(File), + Base = filename:basename(File, ".xrl"), + Xfile = filename:join(Dir, Base ++ ".xrl"), + Efile = Base ++ ".erl", + Gfile = Base ++ ".dot", + Module = list_to_atom(Base), + St1 = St0#leex{xfile=Xfile, + opts=Opts, + module=Module}, + {value,{includefile,Ifile0}} = keysearch(includefile, 1, Opts), + Ifile = inc_file_name(Ifile0), + %% Test for explicit scanner file. + {value,{scannerfile,Ofile}} = keysearch(scannerfile, 1, Opts), + if + Ofile =:= [] -> + St1#leex{efile=filename:join(Dir, Efile), + ifile=Ifile, + gfile=filename:join(Dir, Gfile)}; + true -> + D = filename:dirname(Ofile), + St1#leex{efile=Ofile, + ifile=Ifile, + gfile=filename:join(D, Gfile)} + end. + +when_opt(Do, Opt, Opts) -> + case member(Opt, Opts) of + true -> Do(); + false -> ok + end. + +verbose_print(St, Format, Args) -> + when_opt(fun () -> io:fwrite(Format, Args) end, verbose, St#leex.opts). + +%% parse_file(State) -> {ok,[REA],[Action],Code,NewState} | throw(NewState) +%% when +%% REA = {RegExp,ActionNo}; +%% Action = {ActionNo,ActionString}; +%% Code = {StartLine, StartPos, NumOfLines}. Where the Erlang code is. +%% +%% Read and parse the file Xfile. +%% After each section of the file has been parsed we directly call the +%% next section. This is done when we detect a line we don't recognise +%% in the current section. The file format is very simple and Erlang +%% token based, we allow empty lines and Erlang style comments. + +parse_file(St0) -> + case file:open(St0#leex.xfile, [read]) of + {ok,Xfile} -> + try + verbose_print(St0, "Parsing file ~s, ", [St0#leex.xfile]), + %% We KNOW that errors throw so we can ignore them here. + {ok,Line1,St1} = parse_head(Xfile, St0), + {ok,Line2,Macs,St2} = parse_defs(Xfile, Line1, St1), + {ok,Line3,REAs,Actions,St3} = + parse_rules(Xfile, Line2, Macs, St2), + {ok,Code,St4} = parse_code(Xfile, Line3, St3), + verbose_print(St1, "contained ~w rules.~n", [length(REAs)]), + {ok,REAs,Actions,Code,St4} + after file:close(Xfile) + end; + {error,Error} -> + add_error({none,leex,{file_error,Error}}, St0) + end. + +%% parse_head(File, State) -> {ok,NextLine,State}. +%% Parse the head of the file. Skip all comments and blank lines. + +parse_head(Ifile, St) -> {ok,nextline(Ifile, 0),St}. + +%% parse_defs(File, Line, State) -> {ok,NextLine,Macros,State}. +%% Parse the macro definition section of a file. This must exist. +%% The section is ended by a non-blank line which is not a macro def. + +parse_defs(Ifile, {ok,?DEFS_HEAD ++ Rest,L}, St) -> + St1 = warn_ignored_chars(L, Rest, St), + parse_defs(Ifile, nextline(Ifile, L), [], St1); +parse_defs(_, {ok,_,L}, St) -> + add_error({L,leex,missing_defs}, St); +parse_defs(_, {eof,L}, St) -> + add_error({L,leex,missing_defs}, St). + +parse_defs(Ifile, {ok,Chars,L}=Line, Ms, St) -> + %% This little beauty matches out a macro definition, RE's are so clear. + MS = "^[ \t]*([A-Z_][A-Za-z0-9_]*)[ \t]*=[ \t]*([^ \t\r\n]*)[ \t\r\n]*\$", + case re:run(Chars, MS, [{capture,all_but_first,list}]) of + {match,[Name,Def]} -> + %%io:fwrite("~p = ~p\n", [Name,Def]), + parse_defs(Ifile, nextline(Ifile, L), [{Name,Def}|Ms], St); + _ -> {ok,Line,Ms,St} % Anything else + end; +parse_defs(_, Line, Ms, St) -> + {ok,Line,Ms,St}. + +%% parse_rules(File, Line, Macros, State) -> {ok,NextLine,REAs,Actions,State}. +%% Parse the RE rules section of the file. This must exist. + +parse_rules(Ifile, {ok,?RULE_HEAD ++ Rest,L}, Ms, St) -> + St1 = warn_ignored_chars(L, Rest, St), + parse_rules(Ifile, nextline(Ifile, L), Ms, [], [], 0, St1); +parse_rules(_, {ok,_,L}, _, St) -> + add_error({L,leex,missing_rules}, St); +parse_rules(_, {eof,L}, _, St) -> + add_error({L,leex,missing_rules}, St). + +%% parse_rules(File, Result, Macros, RegExpActions, Actions, Acount, State) -> +%% {ok,NextCLine,RegExpActions,Actions,NewState} | throw(NewState) + +parse_rules(Ifile, NextLine, Ms, REAs, As, N, St) -> + case NextLine of + {ok,?CODE_HEAD ++ _Rest,_} -> + parse_rules_end(Ifile, NextLine, REAs, As, St); + {ok,Chars,L0} -> + %%io:fwrite("~w: ~p~n", [L0,Chars]), + case collect_rule(Ifile, Chars, L0) of + {ok,Re,Atoks,L1} -> + {ok,REA,A,St1} = parse_rule(Re, L0, Atoks, Ms, N, St), + parse_rules(Ifile, nextline(Ifile, L1), Ms, + [REA|REAs], [A|As], N+1, St1); + {error,E} -> add_error(E, St) + end; + {eof,_} -> + parse_rules_end(Ifile, NextLine, REAs, As, St) + end. + +parse_rules_end(_, {ok,_,L}, [], [], St) -> + add_error({L,leex,empty_rules}, St); +parse_rules_end(_, {eof,L}, [], [], St) -> + add_error({L,leex,empty_rules}, St); +parse_rules_end(_, NextLine, REAs, As, St) -> + %% Must be *VERY* careful to put rules in correct order! + {ok,NextLine,reverse(REAs),reverse(As),St}. + +%% collect_rule(File, Line, Lineno) -> +%% {ok,RegExp,ActionTokens,NewLineno} | {error,E}. +%% Collect a complete rule by reading lines until the the regexp and +%% action has been read. Keep track of line number. + +collect_rule(Ifile, Chars, L0) -> + %% Erlang strings are 1 based, but re 0 :-( + {match,[{St0,Len}|_]} = re:run(Chars, "[^ \t\r\n]+"), + St = St0 + 1, + %%io:fwrite("RE = ~p~n", [substr(Chars, St, Len)]), + case collect_action(Ifile, substr(Chars, St+Len), L0, []) of + {ok,[{':',_}|Toks],L1} -> {ok,substr(Chars, St, Len),Toks,L1}; + {ok,_,_} -> {error,{L0,leex,bad_rule}}; + {eof,L1} -> {error,{L1,leex,bad_rule}}; + {error,E,_} -> {error,E} + end. + +collect_action(Ifile, Chars, L0, Cont0) -> + case erl_scan:tokens(Cont0, Chars, L0) of + {done,{ok,Toks,_},_} -> {ok,Toks,L0}; + {done,{eof,_},_} -> {eof,L0}; + {done,{error,E,_},_} -> {error,E,L0}; + {more,Cont1} -> + collect_action(Ifile, io:get_line(Ifile, leex), L0+1, Cont1) + end. + +%% parse_rule(RegExpString, RegExpLine, ActionTokens, Macros, Counter, State) -> +%% {ok,{RE,Action},ActionData,State}. +%% Parse one regexp after performing macro substition. + +parse_rule(S, Line, [{dot,_}], Ms, N, St) -> + case parse_rule_regexp(S, Ms, St) of + {ok,R} -> + {ok,{R,N},{N,empty_action},St}; + {error,E} -> + add_error({Line,leex,E}, St) + end; +parse_rule(S, Line, Atoks, Ms, N, St) -> + case parse_rule_regexp(S, Ms, St) of + {ok,R} -> + %%io:fwrite("RE = ~p~n", [R]), + %% Check for token variables. + TokenChars = var_used('TokenChars', Atoks), + TokenLen = var_used('TokenLen', Atoks), + TokenLine = var_used('TokenLine', Atoks), + {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St}; + {error,E} -> + add_error({Line,leex,E}, St) + end. + +var_used(Name, Toks) -> + case keyfind(Name, 3, Toks) of + {var,_,Name} -> true; %It's the var we want + _ -> false + end. + +%% parse_rule_regexp(RegExpString, Macros, State) -> +%% {ok,RegExp} | {error,Error}. +%% Substitute in macros and parse RegExpString. Cannot use re:replace +%% here as it uses info in replace string (&). + +parse_rule_regexp(RE0, [{M,Exp}|Ms], St) -> + Split= re:split(RE0, "\\{" ++ M ++ "\\}", [{return,list}]), + RE1 = string:join(Split, Exp), + parse_rule_regexp(RE1, Ms, St); +parse_rule_regexp(RE, [], St) -> + %%io:fwrite("RE = ~p~n", [RE]), + case re_parse(RE, St) of + {ok,R} -> {ok,R}; + {error,E} -> {error,{regexp,E}} + end. + +%% parse_code(File, Line, State) -> {ok,Code,NewState}. +%% Finds the line and the position where the code section of the file +%% begins. This must exist. + +parse_code(Ifile, {ok,?CODE_HEAD ++ Rest,CodeL}, St) -> + St1 = warn_ignored_chars(CodeL, Rest, St), + {ok, CodePos} = file:position(Ifile, cur), + %% Just count the lines; copy the code from file to file later. + NCodeLines = count_lines(Ifile, 0), + {ok,{CodeL,CodePos,NCodeLines},St1}; +parse_code(_, {ok,_,L}, St) -> + add_error({L,leex,missing_code}, St); +parse_code(_, {eof,L}, St) -> + add_error({L,leex,missing_code}, St). + +count_lines(File, N) -> + case io:get_line(File, leex) of + eof -> N; + _Line -> count_lines(File, N+1) + end. + +%% nextline(InputFile, PrevLineNo) -> {ok,Chars,LineNo} | {eof,LineNo}. +%% Get the next line skipping comment lines and blank lines. + +nextline(Ifile, L) -> + case io:get_line(Ifile, leex) of + eof -> {eof,L}; + Chars -> + case substr(Chars, span(Chars, " \t\n")+1) of + [$%|_Rest] -> nextline(Ifile, L+1); + [] -> nextline(Ifile, L+1); + _Other -> {ok,Chars,L+1} + end + end. + +warn_ignored_chars(Line, S, St) -> + case non_white(S) of + [] -> St; + _ -> add_warning(Line, ignored_characters, St) + end. + +non_white(S) -> + [C || C <- S, C > $\s, C < $\200 orelse C > $\240]. + +%% This is the regular expression grammar used. It is equivalent to the +%% one used in AWK, except that we allow ^ $ to be used anywhere and fail +%% in the matching. +%% +%% reg -> alt : '$1'. +%% alt -> seq "|" seq ... : {alt,['$1','$2'...]}. +%% seq -> repeat repeat ... : {seq,['$1','$2'...]}. +%% repeat -> repeat "*" : {kclosure,'$1'}. +%% repeat -> repeat "+" : {pclosure,'$1'}. +%% repeat -> repeat "?" : {optional,'$1'}. +%% repeat -> repeat "{" [Min],[Max] "}" : {interval,'$1',Min,Max} +%% repeat -> single : '$1'. +%% single -> "(" reg ")" : {sub,'$2',Number}. +%% single -> "^" : bos/bol. +%% single -> "$" : eos/eol. +%% single -> "." : any. +%% single -> "[" class "]" : {char_class,char_class('$2')} +%% single -> "[" "^" class "]" : {comp_class,char_class('$3')}. +%% single -> "\"" chars "\"" : {lit,'$2'}. +%% single -> "\\" char : {lit,['$2']}. +%% single -> char : {lit,['$1']}. +%% single -> empty : epsilon. +%% The grammar of the current regular expressions. The actual parser +%% is a recursive descent implementation of the grammar. + +%% re_parse(Chars, State) -> {ok,RegExp} | {error,Error}. + +re_parse(Cs0, St) -> + case catch re_reg(Cs0, 0, St) of + {RE,_,[]} -> {ok,RE}; + {_,_,[C|_]} -> {error,{illegal_char,[C]}}; + {parse_error,E} -> {error,E} + end. + +parse_error(E) -> throw({parse_error,E}). + +re_reg(Cs, Sn, St) -> re_alt(Cs, Sn, St). + +re_alt(Cs0, Sn0, St) -> + {L,Sn1,Cs1} = re_seq(Cs0, Sn0, St), + case re_alt1(Cs1, Sn1, St) of + {[],Sn2,Cs2} -> {L,Sn2,Cs2}; + {Rs,Sn2,Cs2} -> {{alt,[L|Rs]},Sn2,Cs2} + end. + +re_alt1([$||Cs0], Sn0, St) -> + {L,Sn1,Cs1} = re_seq(Cs0, Sn0, St), + {Rs,Sn2,Cs2} = re_alt1(Cs1, Sn1, St), + {[L|Rs],Sn2,Cs2}; +re_alt1(Cs, Sn, _) -> {[],Sn,Cs}. + +%% Parse a sequence of regexps. Don't allow the empty sequence. +%% re_seq(Cs0, Sn0, St) -> +%% {L,Sn1,Cs1} = repeat(Cs0, Sn0, St), +%% case re_seq1(Cs1, Sn1, St) of +%% {[],Sn2,Cs2} -> {L,Sn2,Cs2}; +%% {Rs,Sn2,Cs2} -> {{seq,[L|Rs]},Sn2,Cs2} +%% end. + +%% re_seq(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}. +%% Parse a sequence of regexps. Allow the empty sequence, returns epsilon. + +re_seq(Cs0, Sn0, St) -> + case re_seq1(Cs0, Sn0, St) of + {[],Sn1,Cs1} -> {epsilon,Sn1,Cs1}; + {[R],Sn1,Cs1} -> {R,Sn1,Cs1}; + {Rs,Sn1,Cs1} -> {{seq,Rs},Sn1,Cs1} + end. + +re_seq1([C|_]=Cs0, Sn0, St) when C /= $|, C /= $) -> + {L,Sn1,Cs1} = re_repeat(Cs0, Sn0, St), + {Rs,Sn2,Cs2} = re_seq1(Cs1, Sn1, St), + {[L|Rs],Sn2,Cs2}; +re_seq1(Cs, Sn, _) -> {[],Sn,Cs}. + +%% re_repeat(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}. + +re_repeat(Cs0, Sn0, St) -> + {S,Sn1,Cs1} = re_single(Cs0, Sn0, St), + re_repeat1(Cs1, Sn1, S, St). + +re_repeat1([$*|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {kclosure,S}, St); +re_repeat1([$+|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {pclosure,S}, St); +re_repeat1([$?|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {optional,S}, St); +%% { only starts interval when ere is true, otherwise normal character. +re_repeat1([${|Cs0], Sn, S, #leex{posix=true}=St) -> % $} + case re_interval_range(Cs0) of + {Min,Max,[$}|Cs1]} when is_integer(Min), is_integer(Max), Min =< Max -> + re_repeat1(Cs1, Sn, {interval,S,Min,Max}, St); + {Min,Max,[$}|Cs1]} when is_integer(Min), is_atom(Max) -> + re_repeat1(Cs1, Sn, {interval,S,Min,Max}, St); + {_,_,Cs1} -> parse_error({interval_range,string_between([${|Cs0], Cs1)}) + end; +re_repeat1(Cs, Sn, S, _) -> {S,Sn,Cs}. + +%% re_single(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}. +%% Parse a re_single regexp. + +re_single([$(|Cs0], Sn0, St) -> % $) + Sn1 = Sn0 + 1, % Keep track of sub count anyway + case re_reg(Cs0, Sn1, St) of + {S,Sn2,[$)|Cs1]} -> {S,Sn2,Cs1}; + %%{S,Sn2,[$)|Cs1]} -> {{sub,S,Sn1},Sn2,Cs1}; + _ -> parse_error({unterminated,"("}) + end; +%% These are not legal inside a regexp. +%% re_single([$^|Cs], Sn, St) -> {bos,Sn,Cs}; +%% re_single([$$|Cs], Sn, St) -> {eos,Sn,Cs}; +%% re_single([$.|Cs], Sn, St) -> {any,Sn,Cs}; +re_single([$.|Cs], Sn, _) -> {{comp_class,"\n"},Sn,Cs}; % Do this here? +re_single("[^" ++ Cs0, Sn, St) -> + case re_char_class(Cs0, St) of + {Cc,[$]|Cs1]} -> {{comp_class,Cc},Sn,Cs1}; + _ -> parse_error({unterminated,"["}) + end; +re_single([$[|Cs0], Sn, St) -> + case re_char_class(Cs0, St) of + {Cc,[$]|Cs1]} -> {{char_class,Cc},Sn,Cs1}; + _ -> parse_error({unterminated,"["}) + end; +re_single([$\\|Cs0], Sn, _) -> + {C,Cs1} = re_char($\\, Cs0), + {{lit,[C]},Sn,Cs1}; +re_single([C|Cs0], Sn, St) -> + case special_char(C, St) of + true -> parse_error({illegal_char,[C]}); + false -> + {C,Cs1} = re_char(C, Cs0), + {{lit,[C]},Sn,Cs1} + end. + +-define(IS_HEX(C), C >= $0 andalso C =< $9 orelse + C >= $A andalso C =< $F orelse + C >= $a andalso C =< $f). + +%% re_char(Char, Chars) -> {CharValue,Chars}. +%% Reads one character value from the input list, it knows about escapes. + +re_char($\\, [O1,O2,O3|S]) when + O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> + {(O1*8 + O2)*8 + O3 - 73*$0,S}; +re_char($\\, [$x,H1,H2|S]) when ?IS_HEX(H1), ?IS_HEX(H2) -> + {erlang:list_to_integer([H1,H2], 16),S}; +re_char($\\,[$x,${|S0]) -> + re_hex(S0, []); +re_char($\\,[$x|_]) -> + parse_error({illegal_char,"\\x"}); +re_char($\\, [C|S]) -> {escape_char(C),S}; +re_char($\\, []) -> parse_error({unterminated,"\\"}); +re_char(C, S) -> {C,S}. % Just this character + +re_hex([C|Cs], L) when ?IS_HEX(C) -> re_hex(Cs, [C|L]); +re_hex([$}|S], L0) -> + L = lists:reverse(L0), + case erlang:list_to_integer(L, 16) of + C when C =< 16#10FFFF -> {C,S}; + _ -> parse_error({illegal_char,[$\\,$x,${|L]++"}"}) + end; +re_hex(_, _) -> parse_error({unterminated,"\\x{"}). + +%% special_char(Char, State) -> bool(). +%% These are the special characters for an ERE. +%% N.B. ]}) are only special in the context after [{(. + +special_char($^, _) -> true; +special_char($., _) -> true; +special_char($[, _) -> true; +special_char($$, _) -> true; +special_char($(, _) -> true; +special_char($), _) -> true; +special_char($|, _) -> true; +special_char($*, _) -> true; +special_char($+, _) -> true; +special_char($?, _) -> true; +special_char(${, #leex{posix=true}) -> true; % Only when POSIX set +special_char($\\, _) -> true; +special_char(_, _) -> false. + +%% re_char_class(Chars, State) -> {CharClass,Chars}. +%% Parse a character class. + +re_char_class([$]|Cs], St) -> % Must special case this. + re_char_class(Cs, [$]], St); +re_char_class(Cs, St) -> re_char_class(Cs, [], St). + +re_char_class("[:" ++ Cs0, Cc, #leex{posix=true}=St) -> + %% POSIX char class only. + case posix_cc(Cs0) of + {Pcl,":]" ++ Cs1} -> re_char_class(Cs1, [{posix,Pcl}|Cc], St); + {_,Cs1} -> parse_error({posix_cc,string_between(Cs0, Cs1)}) + end; +re_char_class([C1|Cs0], Cc, St) when C1 /= $] -> + case re_char(C1, Cs0) of + {Cf,[$-,C2|Cs1]} when C2 /= $] -> + case re_char(C2, Cs1) of + {Cl,Cs2} when Cf < Cl -> + re_char_class(Cs2, [{range,Cf,Cl}|Cc], St); + {_,Cs2} -> + parse_error({char_class,string_between([C1|Cs0], Cs2)}) + end; + {C,Cs1} -> re_char_class(Cs1, [C|Cc], St) + end; +re_char_class(Cs, Cc, _) -> {reverse(Cc),Cs}. % Preserve order + +%% posix_cc(String) -> {PosixClass,RestString}. +%% Handle POSIX character classes. + +posix_cc("alnum" ++ Cs) -> {alnum,Cs}; +posix_cc("alpha" ++ Cs) -> {alpha,Cs}; +posix_cc("blank" ++ Cs) -> {blank,Cs}; +posix_cc("cntrl" ++ Cs) -> {cntrl,Cs}; +posix_cc("digit" ++ Cs) -> {digit,Cs}; +posix_cc("graph" ++ Cs) -> {graph,Cs}; +posix_cc("lower" ++ Cs) -> {lower,Cs}; +posix_cc("print" ++ Cs) -> {print,Cs}; +posix_cc("punct" ++ Cs) -> {punct,Cs}; +posix_cc("space" ++ Cs) -> {space,Cs}; +posix_cc("upper" ++ Cs) -> {upper,Cs}; +posix_cc("xdigit" ++ Cs) -> {xdigit,Cs}; +posix_cc(Cs) -> parse_error({posix_cc,substr(Cs, 1, 5)}). + +escape_char($n) -> $\n; % \n = LF +escape_char($r) -> $\r; % \r = CR +escape_char($t) -> $\t; % \t = TAB +escape_char($v) -> $\v; % \v = VT +escape_char($b) -> $\b; % \b = BS +escape_char($f) -> $\f; % \f = FF +escape_char($e) -> $\e; % \e = ESC +escape_char($s) -> $\s; % \s = SPACE +escape_char($d) -> $\d; % \d = DEL +escape_char(C) -> C. % Pass it straight through + +%% re_interval_range(Chars) -> {Min,Max,RestChars}. +%% NoInt -> none,none +%% Int -> Int,none +%% Int, -> Int,any +%% Int1,Int2 -> Int1,Int2 + +re_interval_range(Cs0) -> + case re_number(Cs0) of + {none,Cs1} -> {none,none,Cs1}; + {N,[$,|Cs1]} -> + case re_number(Cs1) of + {none,Cs2} -> {N,any,Cs2}; + {M,Cs2} -> {N,M,Cs2} + end; + {N,Cs1} -> {N,none,Cs1} + end. + +re_number([C|Cs]) when C >= $0, C =< $9 -> + re_number(Cs, C - $0); +re_number(Cs) -> {none,Cs}. + +re_number([C|Cs], Acc) when C >= $0, C =< $9 -> + re_number(Cs, 10*Acc + (C - $0)); +re_number(Cs, Acc) -> {Acc,Cs}. + +string_between(Cs1, Cs2) -> + substr(Cs1, 1, length(Cs1)-length(Cs2)). + +%% We use standard methods, Thompson's construction and subset +%% construction, to create first an NFA and then a DFA from the +%% regexps. A non-standard feature is that we work with sets of +%% character ranges (crs) instead sets of characters. This is most +%% noticeable when constructing DFAs. The major benefit is that we can +%% handle characters from any set, not just limited ASCII or 8859, +%% even 16/32 bit unicode. +%% +%% The whole range of characters is 0-maxchar, where maxchar is a BIG +%% number. We don't make any assumptions about the size of maxchar, it +%% is just bigger than any character. +%% +%% Using character ranges makes describing many regexps very simple, +%% for example the regexp "." just becomes the range +%% [{0-9},{11-maxchar}]. + +%% make_nfa(RegExpActions) -> {ok,{NFA,StartState}} | {error,E}. +%% Build a complete nfa from a list of {RegExp,Action}. The NFA field +%% accept has values {yes,Action}|no. The NFA is a list of states. + +make_dfa(REAs, St) -> + {NFA,NF} = build_combined_nfa(REAs), + verbose_print(St, "NFA contains ~w states, ", [tuple_size(NFA)]), + {DFA0,DF0} = build_dfa(NFA, NF), + verbose_print(St, "DFA contains ~w states, ", [length(DFA0)]), + {DFA,DF} = minimise_dfa(DFA0, DF0), + verbose_print(St, "minimised to ~w states.~n", [length(DFA)]), + %%io:fwrite("~p\n", [{NF,NFA}]), + %%io:fwrite("~p\n", [{DF0,DFA0}]), + %%io:fwrite("~p\n", [{DF,DFA}]), + {DFA,DF}. + +%% build_combined_nfa(RegExpActionList) -> {NFA,FirstState}. +%% Build the combined NFA using Thompson's construction straight out +%% of the book. Build the separate NFAs in the same order as the +%% rules so that the accepting have ascending states have ascending +%% state numbers. Start numbering the states from 1 as we put the +%% states in a tuple with the state number as the index. +%% +%% The edges from a state are a list of {CharRange,State} | {epsilon,State}. + +build_combined_nfa(REAs) -> + {NFA0,Firsts,Free} = build_nfa_list(REAs, [], [], 1), + F = #nfa_state{no=Free,edges=epsilon_trans(Firsts)}, + {list_to_tuple(keysort(#nfa_state.no, [F|NFA0])),Free}. + +build_nfa_list([{RE,Action}|REAs], NFA0, Firsts, Free0) -> + {NFA1,Free1,First} = build_nfa(RE, Free0, Action), + build_nfa_list(REAs, NFA1 ++ NFA0, [First|Firsts], Free1); +build_nfa_list([], NFA, Firsts, Free) -> + {NFA,reverse(Firsts),Free}. + +epsilon_trans(Firsts) -> [ {epsilon,F} || F <- Firsts ]. + +%% build_nfa(RegExp, NextState, Action) -> {NFA,NextState,FirstState}. +%% When building the NFA states for a regexp we don't build the end +%% state, just allocate a State for it and return this state's +%% number. This allows us to avoid building unnecessary states for +%% concatenation which would then have to be removed by overwriting +%% an existing state. + +build_nfa(RE, N0, Action) -> + {NFA,N1,E} = build_nfa(RE, N0+1, N0, []), + {[#nfa_state{no=E,accept={accept,Action}}|NFA],N1,N0}. + +%% build_nfa(RegExp, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%% Build an NFA from the RegExp. NFA is a list of #nfa_state{} in no +%% predefined order. NextState is the number of the next free state +%% to use, FirstState is the the state which must be the start for +%% this regexp as a previous regexp refers to it, EndState is the +%% state to which this NFA will exit to. The number of the returned +%% EndState is already allocated! + +build_nfa({alt,REs}, N, F, NFA) -> + build_nfa_alt(REs, N, F, NFA); +build_nfa({seq,REs}, N, F, NFA) -> + build_nfa_seq(REs, N, F, NFA); +build_nfa({kclosure,RE}, N0, F, NFA0) -> + {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0), + E = N1, % End state + {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,E}]}, + #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1], + N1+1,E}; +build_nfa({pclosure,RE}, N0, F, NFA0) -> + {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0), + E = N1, % End state + {[#nfa_state{no=F,edges=[{epsilon,N0}]}, + #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1], + N1+1,E}; +build_nfa({optional,RE}, N0, F, NFA0) -> + {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0), + E = N1, % End state + {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,E}]}, + #nfa_state{no=E1,edges=[{epsilon,E}]}|NFA1], + N1+1,E}; +build_nfa({char_class,Cc}, N, F, NFA) -> + {[#nfa_state{no=F,edges=[{pack_cc(Cc),N}]}|NFA],N+1,N}; +build_nfa({comp_class,Cc}, N, F, NFA) -> + {[#nfa_state{no=F,edges=[{comp_class(Cc),N}]}|NFA],N+1,N}; +build_nfa({lit,Cs}, N, F, NFA) -> % Implicit concatenation + build_nfa_lit(Cs, N, F, NFA); +build_nfa(epsilon, N, F, NFA) -> % Just an epsilon transition + {[#nfa_state{no=F,edges=[{epsilon,N}]}|NFA],N+1,N}. + +%% build_nfa_lit(Chars, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%% Build an NFA for the sequence of literal characters. + +build_nfa_lit(Cs, N0, F0, NFA0) -> + foldl(fun (C, {NFA,N,F}) -> + {[#nfa_state{no=F,edges=[{[{C,C}],N}]}|NFA],N+1,N} + end, {NFA0,N0,F0}, Cs). + +%% build_nfa_lit([C|Cs], N, F, NFA0) when is_integer(C) -> +%% NFA1 = [#nfa_state{no=F,edges=[{[{C,C}],N}]}|NFA0], +%% build_nfa_lit(Cs, N+1, N, NFA1); +%% build_nfa_lit([], N, F, NFA) -> {NFA,N,F}. + +%% build_nfa_seq(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%% Build an NFA for the regexps in a sequence. + +build_nfa_seq(REs, N0, F0, NFA0) -> + foldl(fun (RE, {NFA,N,F}) -> build_nfa(RE, N, F, NFA) end, + {NFA0,N0,F0}, REs). + +%% build_nfa_seq([RE|REs], N0, F, NFA0) -> +%% {NFA1,N1,E1} = build_nfa(RE, N0, F, NFA0), +%% build_nfa_seq(REs, N1, E1, NFA1); +%% build_nfa_seq([], N, F, NFA) -> {NFA,N,F}. + +%% build_nfa_alt(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%% Build an NFA for the regexps in an alternative. N.B. we don't +%% handle empty alts here but the parser should never generate them +%% anyway. + +build_nfa_alt([RE], N, F, NFA) -> build_nfa(RE, N, F, NFA); +build_nfa_alt([RE|REs], N0, F, NFA0) -> + {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0), + {NFA2,N2,E2} = build_nfa_alt(REs, N1+1, N1, NFA1), + E = N2, % End state + {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,N1}]}, + #nfa_state{no=E1,edges=[{epsilon,E}]}, + #nfa_state{no=E2,edges=[{epsilon,E}]}|NFA2], + N2+1,E}. + +%% build_nfa_alt(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%% Build an NFA for the regexps in an alternative. Make one big +%% epsilon split state, not necessary but fun. + +%% build_nfa_alt(REs, N0, F0, NFA0) -> +%% E = N0, % Must reserve End state first +%% {Fs,{NFA1,N1}} = mapfoldl(fun (RE, {NFA,N}) -> +%% build_nfa_alt1(RE, N, E, NFA) +%% end, {NFA0,N0+1}, REs), +%% {[#nfa_state{no=F0,edges=epsilon_trans(Fs)}, +%% #nfa_state{no=E,edges=[{epsilon,N1}]}|NFA1],N1+1,N1}. + +%% build_nfa_alt1(RE, N0, End, NFA0) -> +%% {NFA1,N1,E} = build_nfa(RE, N0+1, N0, NFA0), +%% {N0,{[#nfa_state{no=E,edges=[{epsilon,End}]}|NFA1],N1}}. + +%% pack_cc(CharClass) -> CharClass +%% Pack and optimise a character class specification (bracket +%% expression). First sort it and then compact it. + +pack_cc(Cc) -> + Crs = foldl(fun ({range,Cf,Cl}, Set) -> add_element({Cf,Cl}, Set); + (C, Set) -> add_element({C,C}, Set) + end, ordsets:new(), Cc), + pack_crs(Crs). % An ordset IS a list! + +pack_crs([{C1,C2}=Cr,{C3,C4}|Crs]) when C1 =< C3, C2 >= C4 -> + %% C1 C2 + %% C3 C4 + pack_crs([Cr|Crs]); +pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 >= C3, C2 < C4 -> + %% C1 C2 + %% C3 C4 + pack_crs([{C1,C4}|Crs]); +pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 + 1 == C3 -> + %% C1 C2 + %% C3 C4 + pack_crs([{C1,C4}|Crs]); +pack_crs([Cr|Crs]) -> [Cr|pack_crs(Crs)]; +pack_crs([]) -> []. + +comp_class(Cc) -> + Crs = pack_cc(Cc), + Comp = comp_crs(Crs, 0), + %% io:fwrite("comp: ~p\n ~p\n", [Crs,Comp]), + Comp. + +comp_crs([{0,C2}|Crs], 0) -> % Get first range right + comp_crs(Crs, C2+1); +comp_crs([{C1,C2}|Crs], Last) -> + [{Last,C1-1}|comp_crs(Crs, C2+1)]; +comp_crs([], Last) -> [{Last,maxchar}]. + +%% build_dfa(NFA, NfaFirstState) -> {DFA,DfaFirstState}. +%% Build a DFA from an NFA using "subset construction". The major +%% difference from the book is that we keep the marked and unmarked +%% DFA states in seperate lists. New DFA states are added to the +%% unmarked list and states are marked by moving them to the marked +%% list. We assume that the NFA accepting state numbers are in +%% ascending order for the rules and use ordsets to keep this order. + +build_dfa(NFA, Nf) -> + D = #dfa_state{no=0,nfa=eclosure([Nf], NFA)}, + {build_dfa([D], 1, [], NFA),0}. + +%% build_dfa([UnMarked], NextState, [Marked], NFA) -> DFA. +%% Traverse the unmarked states. Temporarily add the current unmarked +%% state to the marked list before calculating translation, this is +%% to avoid adding too many duplicate states. Add it properly to the +%% marked list afterwards with correct translations. + +build_dfa([U|Us0], N0, Ms, NFA) -> + {Ts,Us1,N1} = build_dfa(U#dfa_state.nfa, Us0, N0, [], [U|Ms], NFA), + M = U#dfa_state{trans=Ts,accept=accept(U#dfa_state.nfa, NFA)}, + build_dfa(Us1, N1, [M|Ms], NFA); +build_dfa([], _, Ms, _) -> Ms. + +%% build_dfa([NfaState], [Unmarked], NextState, [Transition], [Marked], NFA) -> +%% {Transitions,UnmarkedStates,NextState}. +%% Foreach NFA state set calculate the legal translations. N.B. must +%% search *BOTH* the unmarked and marked lists to check if DFA state +%% already exists. As the range of characters is potentially VERY +%% large we cannot explicitly test all characters. Instead we first +%% calculate the set of all disjoint character ranges which are +%% possible candidates to the set of NFA states. The transitions are +%% an orddict so we get the transition lists in ascending order. + +build_dfa(Set, Us, N, Ts, Ms, NFA) -> + %% List of all transition sets. + Crs0 = [Cr || S <- Set, + {Crs,_St} <- (element(S, NFA))#nfa_state.edges, + Crs /= epsilon, % Not an epsilon transition + Cr <- Crs ], + Crs1 = lists:usort(Crs0), % Must remove duplicates! + %% Build list of disjoint test ranges. + Test = disjoint_crs(Crs1), + %% io:fwrite("bd: ~p\n ~p\n ~p\n ~p\n", [Set,Crs0,Crs1,Test]), + build_dfa(Test, Set, Us, N, Ts, Ms, NFA). + +%% disjoint_crs([CharRange]) -> [CharRange]. +%% Take a sorted list of char ranges and make a sorted list of +%% disjoint char ranges. No new char range extends past an existing +%% char range. + +disjoint_crs([{_C1,C2}=Cr1,{C3,_C4}=Cr2|Crs]) when C2 < C3 -> + %% C1 C2 + %% C3 C4 + [Cr1|disjoint_crs([Cr2|Crs])]; +disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 == C3 -> + %% C1 C2 + %% C3 C4 + [{C1,C2}|disjoint_crs(add_element({C2+1,C4}, Crs))]; +disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 >= C3, C2 < C4 -> + %% C1 C2 + %% C3 C4 + [{C1,C3-1}|disjoint_crs(union([{C3,C2},{C2+1,C4}], Crs))]; +disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 == C4 -> + %% C1 C2 + %% C3 C4 + [{C1,C3-1}|disjoint_crs(add_element({C3,C4}, Crs))]; +disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 > C4 -> + %% C1 C2 + %% C3 C4 + [{C1,C3-1}|disjoint_crs(union([{C3,C4},{C4+1,C2}], Crs))]; +disjoint_crs([Cr|Crs]) -> [Cr|disjoint_crs(Crs)]; +disjoint_crs([]) -> []. + +build_dfa([Cr|Crs], Set, Us, N, Ts, Ms, NFA) -> + case eclosure(move(Set, Cr, NFA), NFA) of + S when S /= [] -> + case dfa_state_exist(S, Us, Ms) of + {yes,T} -> + build_dfa(Crs, Set, Us, N, store(Cr, T, Ts), Ms, NFA); + no -> + U = #dfa_state{no=N,nfa=S}, + build_dfa(Crs, Set, [U|Us], N+1, store(Cr, N, Ts), Ms, NFA) + end; + [] -> + build_dfa(Crs, Set, Us, N, Ts, Ms, NFA) + end; +build_dfa([], _, Us, N, Ts, _, _) -> + {Ts,Us,N}. + +%% dfa_state_exist(Set, Unmarked, Marked) -> {yes,State} | no. + +dfa_state_exist(S, Us, Ms) -> + case keysearch(S, #dfa_state.nfa, Us) of + {value,#dfa_state{no=T}} -> {yes,T}; + false -> + case keysearch(S, #dfa_state.nfa, Ms) of + {value,#dfa_state{no=T}} -> {yes,T}; + false -> no + end + end. + +%% eclosure([State], NFA) -> [State]. +%% move([State], Char, NFA) -> [State]. +%% These are straight out of the book. As eclosure uses ordsets then +%% the generated state sets are in ascending order. + +eclosure(Sts, NFA) -> eclosure(Sts, NFA, []). + +eclosure([St|Sts], NFA, Ec) -> + #nfa_state{edges=Es} = element(St, NFA), + eclosure([ N || {epsilon,N} <- Es, + not is_element(N, Ec) ] ++ Sts, + NFA, add_element(St, Ec)); +eclosure([], _, Ec) -> Ec. + +move(Sts, Cr, NFA) -> + %% io:fwrite("move1: ~p\n", [{Sts,Cr}]), + [ St || N <- Sts, + {Crs,St} <- (element(N, NFA))#nfa_state.edges, + Crs /= epsilon, % Not an epsilon transition + in_crs(Cr, Crs) ]. + +in_crs({C1,C2}, [{C3,C4}|_Crs]) when C1 >= C3, C2 =< C4 -> true; +in_crs(Cr, [Cr|_Crs]) -> true; % Catch bos and eos. +in_crs(Cr, [_|Crs]) -> in_crs(Cr, Crs); +in_crs(_Cr, []) -> false. + +%% accept([State], NFA) -> {accept,A} | noaccept. +%% Scan down the state list until we find an accepting state. + +accept([St|Sts], NFA) -> + case element(St, NFA) of + #nfa_state{accept={accept,A}} -> {accept,A}; + #nfa_state{accept=noaccept} -> accept(Sts, NFA) + end; +accept([], _) -> noaccept. + +%% minimise_dfa(DFA, DfaFirst) -> {DFA,DfaFirst}. +%% Minimise the DFA by removing equivalent states. We consider a +%% state if both the transitions and the their accept state is the +%% same. First repeatedly run throught the DFA state list removing +%% equivalent states and updating remaining transitions with +%% remaining equivalent state numbers. When no more reductions are +%% possible then pack the remaining state numbers to get consecutive +%% states. + +minimise_dfa(DFA0, Df0) -> + case min_dfa(DFA0) of + {DFA1,[]} -> % No reduction! + {DFA2,Rs} = pack_dfa(DFA1), + {min_update(DFA2, Rs),min_use(Df0, Rs)}; + {DFA1,Rs} -> + minimise_dfa(min_update(DFA1, Rs), min_use(Df0, Rs)) + end. + +min_dfa(DFA) -> min_dfa(DFA, [], []). + +min_dfa([D|DFA0], Rs0, MDFA) -> + {DFA1,Rs1} = min_delete(DFA0, D#dfa_state.trans, D#dfa_state.accept, + D#dfa_state.no, Rs0, []), + min_dfa(DFA1, Rs1, [D|MDFA]); +min_dfa([], Rs, MDFA) -> {MDFA,Rs}. + +%% min_delete(States, Trans, Action, NewN, Rs, MiniDFA) -> {MiniDFA,Rs}. +%% Delete all states with same transactions and action. Return +%% rewrites and minimised DFA with no duplicate states. + +min_delete([#dfa_state{no=N,trans=T,accept=A}|DFA], T, A, NewN, Rs, MDFA) -> + min_delete(DFA, T, A, NewN, [{N,NewN}|Rs], MDFA); +min_delete([D|DFA], T, A, NewN, Rs, MDFA) -> + min_delete(DFA, T, A, NewN, Rs, [D|MDFA]); +min_delete([], _, _, _, Rs, MDFA) -> {MDFA,Rs}. + +min_update(DFA, Rs) -> + [ D#dfa_state{trans=min_update_trans(D#dfa_state.trans, Rs)} || D <- DFA ]. + +min_update_trans(Tr, Rs) -> + [ {C,min_use(S, Rs)} || {C,S} <- Tr ]. + +min_use(Old, [{Old,New}|_]) -> New; +min_use(Old, [_|Reds]) -> min_use(Old, Reds); +min_use(Old, []) -> Old. + +pack_dfa(DFA) -> pack_dfa(DFA, 0, [], []). + +pack_dfa([D|DFA], NewN, Rs, PDFA) -> + pack_dfa(DFA, NewN+1, + [{D#dfa_state.no,NewN}|Rs], [D#dfa_state{no=NewN}|PDFA]); +pack_dfa([], _, Rs, PDFA) -> {PDFA,Rs}. + +%% The main output is the yystate function which is built from the +%% DFA. It has the spec: +%% +%% yystate() -> InitialState. +%% yystate(State, InChars, Line, CurrTokLen, AcceptAction, AcceptLen) -> +%% {Action, AcceptLength, RestChars, Line} | Accepting end state +%% {Action, AcceptLength, RestChars, Line, State} | Accepting state +%% {reject, AcceptLength, CurrTokLen, RestChars, Line, State} | +%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State}. + +%% The return CurrTokLen is always the current number of characters +%% scanned in the current token. The returns have the follwoing +%% meanings: +%% {Action, AcceptLength, RestChars, Line} - +%% The scanner has reached an accepting end-state, for example after +%% a regexp "abc". Action is the action number and AcceptLength is +%% the length of the matching token. +%% +%% {Action, AcceptLength, RestChars, Line, State} - +%% The scanner has reached an accepting transition state, for example +%% after c in regexp "abc(xyz)?", continuation depends on +%% RestChars. If RestChars == [] (no more current characters) then we +%% need to get more characters to see if it is an end-state, +%% otherwise (eof or chars) then we have not found continuing +%% characters and it is an end state. +%% +%% {reject, AcceptLength, CurrTokLen, RestChars, Line, State} - +%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State} - +%% The scanner has reached a non-accepting transistion state. If +%% RestChars == [] we need to get more characters to continue. +%% Otherwise if 'reject' then no accepting state has been reached it +%% is an error. If we have an Action and AcceptLength then these are +%% the last accept state, use them and continue from there. + +%% out_file(LeexState, DFA, DfaStart, [Action], Code) -> ok | error. +%% Generate an output .erl file from the include file, the DFA and +%% the code for the actions. + +out_file(St0, DFA, DF, Actions, Code) -> + verbose_print(St0, "Writing file ~s, ", [St0#leex.efile]), + case open_inc_file(St0) of + {ok,Ifile} -> + try + case file:open(St0#leex.efile, [write]) of + {ok,Ofile} -> + try + output_file_directive(Ofile, St0#leex.ifile, 0), + out_file(Ifile, Ofile, St0, DFA, DF, Actions, + Code, 1), + verbose_print(St0, "ok~n", []), + St0 + after file:close(Ofile) + end; + {error,Error} -> + verbose_print(St0, "error~n", []), + add_error({none,leex,{file_error,Error}}, St0) + end + after file:close(Ifile) + end; + {{error,Error},Ifile} -> + add_error(Ifile, {none,leex,{file_error,Error}}, St0) + end. + +open_inc_file(State) -> + Ifile = State#leex.ifile, + case file:open(Ifile, [read]) of + {ok,F} -> {ok,F}; + Error -> {Error,Ifile} + end. + +inc_file_name([]) -> + Incdir = filename:join(code:lib_dir(parsetools), "include"), + filename:join(Incdir, ?LEEXINC); +inc_file_name(Filename) -> + Filename. + +%% out_file(IncFile, OutFile, State, DFA, DfaStart, Actions, Code, Line) -> ok +%% Copy the include file line by line substituting special lines with +%% generated code. We cheat by only looking at the first 5 +%% characters. + +out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) -> + case io:get_line(Ifile, leex) of + eof -> output_file_directive(Ofile, St#leex.ifile, L); + Line -> + case substr(Line, 1, 5) of + "##mod" -> out_module(Ofile, St); + "##cod" -> out_erlang_code(Ofile, St, Code, L); + "##dfa" -> out_dfa(Ofile, St, DFA, Code, DF, L); + "##act" -> out_actions(Ofile, St#leex.xfile, Actions); + _ -> io:put_chars(Ofile, Line) + end, + out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L+1) + end. + +out_module(File, St) -> + io:fwrite(File, "-module(~w).\n", [St#leex.module]). + +out_erlang_code(File, St, Code, L) -> + {CodeL,CodePos,_NCodeLines} = Code, + output_file_directive(File, St#leex.xfile, CodeL), + {ok,Xfile} = file:open(St#leex.xfile, [read]), + try + {ok,_} = file:position(Xfile, CodePos), + {ok,_} = file:copy(Xfile, File) + after + file:close(Xfile) + end, + io:nl(File), + output_file_directive(File, St#leex.ifile, L). + +out_dfa(File, St, DFA, Code, DF, L) -> + {_CodeL,_CodePos,NCodeLines} = Code, + %% Three file attributes before this one... + output_file_directive(File, St#leex.efile, L+(NCodeLines-1)+3), + io:fwrite(File, "yystate() -> ~w.~n~n", [DF]), + foreach(fun (S) -> out_trans(File, S) end, DFA), + io:fwrite(File, "yystate(S, Ics, Line, Tlen, Action, Alen) ->~n", []), + io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,S}.~n", []). + +out_trans(File, #dfa_state{no=N,trans=[],accept={accept,A}}) -> + %% Accepting end state, guaranteed done. + io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]), + io:fwrite(File, " {~w,Tlen,Ics,Line};~n", [A]); +out_trans(File, #dfa_state{no=N,trans=Tr,accept={accept,A}}) -> + %% Accepting state, but there maybe more. + foreach(fun (T) -> out_accept_tran(File, N, A, T) end, pack_trans(Tr)), + io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]), + io:fwrite(File, " {~w,Tlen,Ics,Line,~w};~n", [A,N]); +out_trans(File, #dfa_state{no=N,trans=Tr,accept=noaccept}) -> + %% Non-accepting transition state. + foreach(fun (T) -> out_noaccept_tran(File, N, T) end, pack_trans(Tr)), + io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]), + io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,~w};~n", [N]). + +out_accept_tran(File, N, A, {{Cf,maxchar},S}) -> + out_accept_head_max(File, N, Cf), + out_accept_body(File, S, "Line", A); +out_accept_tran(File, N, A, {{Cf,Cl},S}) -> + out_accept_head_range(File, N, Cf, Cl), + out_accept_body(File, S, "Line", A); +out_accept_tran(File, N, A, {$\n,S}) -> + out_accept_head_1(File, N, $\n), + out_accept_body(File, S, "Line+1", A); +out_accept_tran(File, N, A, {C,S}) -> + out_accept_head_1(File, N, C), + out_accept_body(File, S, "Line", A). + +out_accept_head_1(File, State, Char) -> + out_head_1(File, State, Char, "_", "_"). + +out_accept_head_max(File, State, Min) -> + out_head_max(File, State, Min, "_", "_"). + +out_accept_head_range(File, State, Min, Max) -> + out_head_range(File, State, Min, Max, "_", "_"). + +out_accept_body(File, Next, Line, Action) -> + out_body(File, Next, Line, io_lib:write(Action), "Tlen"). + +out_noaccept_tran(File, N, {{Cf,maxchar},S}) -> + out_noaccept_head_max(File, N, Cf), + out_noaccept_body(File, S, "Line"); +out_noaccept_tran(File, N, {{Cf,Cl},S}) -> + out_noaccept_head_range(File, N, Cf, Cl), + out_noaccept_body(File, S, "Line"); +out_noaccept_tran(File, N, {$\n,S}) -> + out_noaccept_head_1(File, N, $\n), + out_noaccept_body(File, S, "Line+1"); +out_noaccept_tran(File, N, {C,S}) -> + out_noaccept_head_1(File, N, C), + out_noaccept_body(File, S, "Line"). + +out_noaccept_head_1(File, State, Char) -> + out_head_1(File, State, Char, "Action", "Alen"). + +out_noaccept_head_max(File, State, Min) -> + out_head_max(File, State, Min, "Action", "Alen"). + +out_noaccept_head_range(File, State, Min, Max) -> + out_head_range(File, State, Min, Max, "Action", "Alen"). + +out_noaccept_body(File, Next, Line) -> + out_body(File, Next, Line, "Action", "Alen"). + +out_head_1(File, State, Char, Action, Alen) -> + io:fwrite(File, "yystate(~w, [~w|Ics], Line, Tlen, ~s, ~s) ->\n", + [State,Char,Action,Alen]). + +out_head_max(File, State, Min, Action, Alen) -> + io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w ->\n", + [State,Action,Alen,Min]). + +out_head_range(File, State, Min, Max, Action, Alen) -> + io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w, C =< ~w ->\n", + [State,Action,Alen,Min,Max]). + +out_body(File, Next, Line, Action, Alen) -> + io:fwrite(File, " yystate(~w, Ics, ~s, Tlen+1, ~s, ~s);\n", + [Next,Line,Action,Alen]). + +%% pack_trans([{Crange,State}]) -> [{Crange,State}] when +%% Crange = {Char,Char} | Char. +%% Pack the translation table into something more suitable for +%% generating code. We KNOW how the pattern matching compiler works +%% so solitary characters are stored before ranges. We do this by +%% prepending singletons to the front of the packed transitions and +%% appending ranges to the back. This preserves the smallest to +%% largest order of ranges. Newline characters, $\n, are always +%% extracted and handled as singeltons. + +pack_trans(Trs) -> pack_trans(Trs, []). + +%% pack_trans(Trs) -> +%% Trs1 = pack_trans(Trs, []), +%% io:fwrite("tr:~p\n=> ~p\n", [Trs,Trs1]), +%% Trs1. + +pack_trans([{{C,C},S}|Trs], Pt) -> % Singletons to the head + pack_trans(Trs, [{C,S}|Pt]); +%% Special detection and handling of $\n. +pack_trans([{{Cf,$\n},S}|Trs], Pt) -> + pack_trans([{{Cf,$\n-1},S}|Trs], [{$\n,S}|Pt]); +pack_trans([{{$\n,Cl},S}|Trs], Pt) -> + pack_trans([{{$\n+1,Cl},S}|Trs], [{$\n,S}|Pt]); +pack_trans([{{Cf,Cl},S}|Trs], Pt) when Cf < $\n, Cl > $\n -> + pack_trans([{{Cf,$\n-1},S},{{$\n+1,Cl},S}|Trs], [{$\n,S}|Pt]); +%% Small ranges become singletons. +pack_trans([{{Cf,Cl},S}|Trs], Pt) when Cl == Cf + 1 -> + pack_trans(Trs, [{Cf,S},{Cl,S}|Pt]); +pack_trans([Tr|Trs], Pt) -> % The default uninteresting case + pack_trans(Trs, Pt ++ [Tr]); +pack_trans([], Pt) -> Pt. + +%% out_actions(File, XrlFile, ActionList) -> ok. +%% Write out the action table. + +out_actions(File, XrlFile, As) -> + As1 = prep_out_actions(As), + foreach(fun (A) -> out_action(File, A) end, As1), + io:fwrite(File, "yyaction(_, _, _, _) -> error.~n", []), + foreach(fun (A) -> out_action_code(File, XrlFile, A) end, As1). + +prep_out_actions(As) -> + map(fun ({A,empty_action}) -> + {A,empty_action}; + ({A,Code,TokenChars,TokenLen,TokenLine}) -> + Vs = [{TokenChars,"TokenChars"}, + {TokenLen,"TokenLen"}, + {TokenLine,"TokenLine"}, + {TokenChars,"YYtcs"}, + {TokenLen or TokenChars,"TokenLen"}], + Vars = [if F -> S; true -> "_" end || {F,S} <- Vs], + Name = list_to_atom(lists:concat([yyaction_,A])), + [Chars,Len,Line,_,_] = Vars, + Args = [V || V <- [Chars,Len,Line], V =/= "_"], + ArgsChars = string:join(Args, ", "), + {A,Code,Vars,Name,Args,ArgsChars} + end, As). + +out_action(File, {A,empty_action}) -> + io:fwrite(File, "yyaction(~w, _, _, _) -> skip_token;~n", [A]); +out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars}) -> + [_,_,Line,Tcs,Len] = Vars, + io:fwrite(File, "yyaction(~w, ~s, ~s, ~s) ->~n", [A,Len,Tcs,Line]), + if + Tcs =/= "_" -> + io:fwrite(File, " TokenChars = yypre(YYtcs, TokenLen),~n", []); + true -> ok + end, + io:fwrite(File, " ~s(~s);~n", [Name, ArgsChars]). + +out_action_code(_File, _XrlFile, {_A,empty_action}) -> + ok; +out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) -> + %% Should set the file to the .erl file, but instead assumes that + %% ?LEEXINC is syntactically correct. + io:fwrite(File, "\n-compile({inline,~w/~w}).\n", [Name, length(Args)]), + {line, L} = erl_scan:token_info(hd(Code), line), + output_file_directive(File, XrlFile, L-2), + io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]), + io:fwrite(File, " ~s\n", [pp_tokens(Code, L)]). + +%% pp_tokens(Tokens, Line) -> [char()]. +%% Prints the tokens keeping the line breaks of the original code. + +pp_tokens(Tokens, Line0) -> pp_tokens(Tokens, Line0, none). + +pp_tokens([], _Line0, _) -> []; +pp_tokens([T | Ts], Line0, Prev) -> + {line, Line} = erl_scan:token_info(T, line), + [pp_sep(Line, Line0, Prev, T), pp_symbol(T) | pp_tokens(Ts, Line, T)]. + +pp_symbol({var,_,Var}) -> atom_to_list(Var); +pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]); +pp_symbol({dot, _}) -> "."; +pp_symbol({Symbol, _}) -> atom_to_list(Symbol). + +pp_sep(Line, Line0, Prev, T) when Line > Line0 -> + ["\n " | pp_sep(Line - 1, Line0, Prev, T)]; +pp_sep(_, _, {'.',_}, _) -> ""; % No space after '.' (not a dot) +pp_sep(_, _, {'#',_}, _) -> ""; % No space after '#' +pp_sep(_, _, {'(',_}, _) -> ""; % No space after '(' +pp_sep(_, _, {'[',_}, _) -> ""; % No space after '[' +pp_sep(_, _, _, {'.',_}) -> ""; % No space before '.' +pp_sep(_, _, _, {'#',_}) -> ""; % No space before '#' +pp_sep(_, _, _, {',',_}) -> ""; % No space before ',' +pp_sep(_, _, _, {')',_}) -> ""; % No space before ')' +pp_sep(_, _, _, _) -> " ". + +%% out_dfa_graph(LeexState, DFA, DfaStart) -> ok | error. +%% Writes the DFA to a .dot file in DOT-format which can be viewed +%% with Graphviz. + +out_dfa_graph(St, DFA, DF) -> + verbose_print(St, "Writing DFA to file ~s, ", [St#leex.gfile]), + case file:open(St#leex.gfile, [write]) of + {ok,Gfile} -> + try + io:fwrite(Gfile, "digraph DFA {~n", []), + out_dfa_states(Gfile, DFA, DF), + out_dfa_edges(Gfile, DFA), + io:fwrite(Gfile, "}~n", []), + verbose_print(St, "ok~n", []), + St + after file:close(Gfile) + end; + {error,Error} -> + verbose_print(St, "error~n", []), + add_error({none,leex,{file_error,Error}}, St) + end. + +out_dfa_states(File, DFA, DF) -> + foreach(fun (S) -> out_dfa_state(File, DF, S) end, DFA), + io:fwrite(File, "~n", []). + +out_dfa_state(File, DF, #dfa_state{no=DF, accept={accept,_}}) -> + io:fwrite(File, " ~b [shape=doublecircle color=green];~n", [DF]); +out_dfa_state(File, DF, #dfa_state{no=DF, accept=noaccept}) -> + io:fwrite(File, " ~b [shape=circle color=green];~n", [DF]); +out_dfa_state(File, _, #dfa_state{no=S, accept={accept,_}}) -> + io:fwrite(File, " ~b [shape=doublecircle];~n", [S]); +out_dfa_state(File, _, #dfa_state{no=S, accept=noaccept}) -> + io:fwrite(File, " ~b [shape=circle];~n", [S]). + +out_dfa_edges(File, DFA) -> + foreach(fun (#dfa_state{no=S,trans=Trans}) -> + Pt = pack_trans(Trans), + Tdict = foldl(fun ({Cr,T}, D) -> + orddict:append(T, Cr, D) + end, orddict:new(), Pt), + foreach(fun (T) -> + Crs = orddict:fetch(T, Tdict), + Edgelab = dfa_edgelabel(Crs), + io:fwrite(File, " ~b -> ~b [label=\"~s\"];~n", + [S,T,Edgelab]) + end, sort(orddict:fetch_keys(Tdict))) + end, DFA). + +dfa_edgelabel([C]) when is_integer(C) -> quote(C); +dfa_edgelabel(Cranges) -> + %% io:fwrite("el: ~p\n", [Cranges]), + "[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)]; + (C) -> [quote(C)] + end, Cranges) ++ "]". + +output_file_directive(File, Filename, Line) -> + io:fwrite(File, <<"-file(~s, ~w).\n">>, + [format_filename(Filename), Line]). + +format_filename(Filename) -> + io_lib:write_string(filename:flatten(Filename)). + +quote($^) -> "\\^"; +quote($.) -> "\\."; +quote($$) -> "\\$"; +quote($-) -> "\\-"; +quote($[) -> "\\["; +quote($]) -> "\\]"; +quote($\s) -> "\\\\s"; +quote($\") -> "\\\""; +quote($\b) -> "\\\\b"; +quote($\f) -> "\\\\f"; +quote($\n) -> "\\\\n"; +quote($\r) -> "\\\\r"; +quote($\t) -> "\\\\t"; +quote($\e) -> "\\\\e"; +quote($\v) -> "\\\\v"; +quote($\d) -> "\\\\d"; +quote($\\) -> "\\\\"; +quote(C) when is_integer(C) -> + %% Must remove the $ and get the \'s right. + case io_lib:write_unicode_char(C) of + [$$,$\\|Cs] -> "\\\\" ++ Cs; + [$$|Cs] -> Cs + end; +quote(maxchar) -> + "MAXCHAR". diff --git a/lib/parsetools/src/parsetools.app.src b/lib/parsetools/src/parsetools.app.src new file mode 100644 index 0000000000..af62fc4f6b --- /dev/null +++ b/lib/parsetools/src/parsetools.app.src @@ -0,0 +1,33 @@ +{application, parsetools, + [{description, "XLATETOOLS CXC 138 xx"}, + {vsn, "%VSN%"}, + {modules, [leex, + yecc, + yeccparser, + yeccscan + ] + }, + {registered,[]}, + {applications, [kernel,stdlib]}, + {env, [{file_util_search_methods,[{"", ""}, {"ebin", "esrc"}, {"ebin", "src"}]} + ] + } + ] +}. + + + + + + + + + + + + + + + + + diff --git a/lib/parsetools/src/parsetools.appup.src b/lib/parsetools/src/parsetools.appup.src new file mode 100644 index 0000000000..54a63833e6 --- /dev/null +++ b/lib/parsetools/src/parsetools.appup.src @@ -0,0 +1 @@ +{"%VSN%",[],[]}. diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl new file mode 100644 index 0000000000..f4d76f471d --- /dev/null +++ b/lib/parsetools/src/yecc.erl @@ -0,0 +1,2531 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Yacc like LALR-1 parser generator for Erlang. +%% Ref: Aho & Johnson: "LR Parsing", ACM Computing Surveys, vol. 6:2, 1974. +%% Auxiliary files: yeccgramm.yrl, yeccparser.erl, yeccpre.hrl, yeccscan.erl. +%% + +-module(yecc). + +-export([compile/3, file/1, file/2, format_error/1]). + +%% Kept for compatibility with R10B. +-export([yecc/2, yecc/3, yecc/4]). + +-import(lists, [append/1, append/2, concat/1, delete/2, filter/2, + flatmap/2, foldl/3, foldr/3, foreach/2, keydelete/3, + keysearch/3, keysort/2, last/1, map/2, member/2, + reverse/1, sort/1, usort/1]). + +-include("erl_compile.hrl"). +-include("ms_transform.hrl"). + +-record(yecc, { + infile, + outfile, + includefile, + includefile_version, + module, + options = [], + verbose = false, + file_attrs = true, + errors = [], + warnings = [], + conflicts_done = false, + shift_reduce = [], + reduce_reduce = [], + n_states = 0, + inport, + outport, + line, + + parse_actions, + symbol_tab, + inv_symbol_tab, + state_tab, + prec_tab, + goto_tab, + + terminals = [], + nonterminals = [], + all_symbols = [], + prec = [], + rules_list = [], + rules, % a tuple of rules_list + rule_pointer2rule, + rootsymbol = [], + endsymbol = [], + expect_shift_reduce = [], + expect_n_states = [], + header = [], + erlang_code = none + }). + +-record(rule, { + n, % rule n in the grammar file + line, + symbols, % the names of symbols + tokens, + is_guard, % the action is a guard (not used) + is_well_formed % can be parsed (without macro expansion) + }). + +-record(reduce, { + rule_nmbr, + head, + nmbr_of_daughters, + prec, + unused % assure that #reduce{} comes before #shift{} when sorting + }). + +-record(shift, { + state, + pos, + prec, + rule_nmbr + }). + +-record(user_code, {state, terminal, funname, action}). + +-record(symbol, {line = none, name}). + +%% ACCEPT is neither an atom nor a non-terminal. +-define(ACCEPT, {}). + +%% During the phase 'compute_states' terminals in lookahead sets are +%% coded as integers; sets of terminals are integer bit masks. This is +%% for efficiency only. '$empty' is always given the mask 1. The +%% behaviour can be turned off by un-defining SYMBOLS_AS_CODES (useful +%% when debugging). + +%% Non-terminals are also given integer codes, starting with -1. The +%% absolut value of the code is used for indexing a tuple of lists of +%% rules. + +-define(SYMBOLS_AS_CODES, true). + +-ifdef(SYMBOLS_AS_CODES). +-define(EMPTY, 0). +-else. +-define(EMPTY, '$empty'). +-endif. + +%%% +%%% Exported functions +%%% + +%%% Interface to erl_compile. + +compile(Input0, Output0, + #options{warning = WarnLevel, verbose=Verbose, includes=Includes}) -> + Input = shorten_filename(Input0), + Output = shorten_filename(Output0), + Includefile = lists:sublist(Includes, 1), + Opts = [{parserfile,Output}, {includefile,Includefile}, {verbose,Verbose}, + {report_errors, true}, {report_warnings, WarnLevel > 0}], + case file(Input, Opts) of + {ok, _OutFile} -> + ok; + error -> + error + end. + +format_error(bad_declaration) -> + io_lib:fwrite("unknown or bad declaration, ignored", []); +format_error({bad_expect, SymName}) -> + io_lib:fwrite("argument ~s of Expect is not an integer", + [format_symbol(SymName)]); +format_error({bad_rootsymbol, SymName}) -> + io_lib:fwrite("rootsymbol ~s is not a nonterminal", + [format_symbol(SymName)]); +format_error({bad_states, SymName}) -> + io_lib:fwrite("argument ~s of States is not an integer", + [format_symbol(SymName)]); +format_error({conflict, Conflict}) -> + format_conflict(Conflict); +format_error({conflicts, SR, RR}) -> + io_lib:fwrite("conflicts: ~w shift/reduce, ~w reduce/reduce", [SR, RR]); +format_error({duplicate_declaration, Tag}) -> + io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]); +format_error({duplicate_nonterminal, Nonterminal}) -> + io_lib:fwrite("duplicate non-terminals ~s", + [format_symbol(Nonterminal)]); +format_error({duplicate_precedence, Op}) -> + io_lib:fwrite("duplicate precedence operator ~s", + [format_symbol(Op)]); +format_error({duplicate_terminal, Terminal}) -> + io_lib:fwrite("duplicate terminal ~s", + [format_symbol(Terminal)]); +format_error({endsymbol_is_nonterminal, Symbol}) -> + io_lib:fwrite("endsymbol ~s is a nonterminal", + [format_symbol(Symbol)]); +format_error({endsymbol_is_terminal, Symbol}) -> + io_lib:fwrite("endsymbol ~s is a terminal", + [format_symbol(Symbol)]); +format_error({error, Module, Error}) -> + Module:format_error(Error); +format_error({file_error, Reason}) -> + io_lib:fwrite("~s",[file:format_error(Reason)]); +format_error(illegal_empty) -> + io_lib:fwrite("illegal use of empty symbol", []); +format_error({internal_error, Error}) -> + io_lib:fwrite("internal yecc error: ~w", [Error]); +format_error({missing_syntax_rule, Nonterminal}) -> + io_lib:fwrite("no syntax rule for non-terminal symbol ~s", + [format_symbol(Nonterminal)]); +format_error({n_states, Exp, N}) -> + io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]); +format_error(no_grammar_rules) -> + io_lib:fwrite("grammar rules are missing", []); +format_error(nonterminals_missing) -> + io_lib:fwrite("Nonterminals is missing", []); +format_error({precedence_op_is_endsymbol, SymName}) -> + io_lib:fwrite("precedence operator ~s is endsymbol", + [format_symbol(SymName)]); +format_error({precedence_op_is_unknown, SymName}) -> + io_lib:fwrite("unknown precedence operator ~s", + [format_symbol(SymName)]); +format_error({reserved, N}) -> + io_lib:fwrite("the use of ~w should be avoided", [N]); +format_error({symbol_terminal_and_nonterminal, SymName}) -> + io_lib:fwrite("symbol ~s is both a terminal and nonterminal", + [format_symbol(SymName)]); +format_error(rootsymbol_missing) -> + io_lib:fwrite("Rootsymbol is missing", []); +format_error(terminals_missing) -> + io_lib:fwrite("Terminals is missing", []); +format_error({undefined_nonterminal, Symbol}) -> + io_lib:fwrite("undefined nonterminal: ~s", [format_symbol(Symbol)]); +format_error({undefined_pseudo_variable, Atom}) -> + io_lib:fwrite("undefined pseudo variable ~w", [Atom]); +format_error({undefined_symbol, SymName}) -> + io_lib:fwrite("undefined rhs symbol ~s", [format_symbol(SymName)]); +format_error({unused_nonterminal, Nonterminal}) -> + io_lib:fwrite("non-terminal symbol ~s not used", + [format_symbol(Nonterminal)]); +format_error({unused_terminal, Terminal}) -> + io_lib:fwrite("terminal symbol ~s not used", + [format_symbol(Terminal)]). + +file(File) -> + file(File, [report_errors, report_warnings]). + +file(File, Options) -> + case is_filename(File) of + no -> erlang:error(badarg, [File, Options]); + _ -> ok + end, + case options(Options) of + badarg -> + erlang:error(badarg, [File, Options]); + OptionValues -> + Self = self(), + Flag = process_flag(trap_exit, false), + Pid = spawn_link(fun() -> infile(Self, File, OptionValues) end), + receive + {Pid, Rep} -> + receive after 1 -> ok end, + process_flag(trap_exit, Flag), + Rep + end + end. + +%% Kept for backward compatibility. +yecc(Infile, Outfile) -> + yecc(Infile, Outfile, false, []). + +yecc(Infile, Outfile, Verbose) -> + yecc(Infile, Outfile, Verbose, []). + +yecc(Infilex, Outfilex, Verbose, Includefilex) -> + statistics(runtime), + case file(Infilex, [{parserfile, Outfilex}, + {verbose, Verbose}, + {report, true}, + {includefile, Includefilex}]) of + {ok, _File} -> + statistics(runtime); + error -> + exit(error) + end. + +%%% +%%% Local functions +%%% + +options(Options0) when is_list(Options0) -> + try + Options = flatmap(fun(return) -> short_option(return, true); + (report) -> short_option(report, true); + ({return,T}) -> short_option(return, T); + ({report,T}) -> short_option(report, T); + (T) -> [T] + end, Options0), + options(Options, [file_attributes, includefile, parserfile, + report_errors, report_warnings, return_errors, + return_warnings, time, verbose], []) + catch error: _ -> badarg + end; +options(Option) -> + options([Option]). + +short_option(return, T) -> + [{return_errors,T}, {return_warnings,T}]; +short_option(report, T) -> + [{report_errors,T}, {report_warnings,T}]. + +options(Options0, [Key | Keys], L) when is_list(Options0) -> + Options = case member(Key, Options0) of + true -> + [atom_option(Key) | delete(Key, Options0)]; + false -> + Options0 + end, + V = case keysearch(Key, 1, Options) of + {value, {Key, Filename0}} when Key =:= includefile; + Key =:= parserfile -> + case is_filename(Filename0) of + no -> + badarg; + Filename -> + {ok, [{Key, Filename}]} + end; + {value, {Key, Bool}} when Bool =:= true; Bool =:= false -> + {ok, [{Key, Bool}]}; + {value, {Key, _}} -> + badarg; + false -> + {ok, [{Key, default_option(Key)}]} + end, + case V of + badarg -> + badarg; + {ok, KeyValueL} -> + NewOptions = keydelete(Key, 1, Options), + options(NewOptions, Keys, KeyValueL ++ L) + end; +options([], [], L) -> + foldl(fun({_,false}, A) -> A; + ({Tag,true}, A) -> [Tag | A]; + (F, A) -> [F | A] + end, [], L); +options(_Options, _, _L) -> + badarg. + +default_option(file_attributes) -> true; +default_option(includefile) -> []; +default_option(parserfile) -> []; +default_option(report_errors) -> true; +default_option(report_warnings) -> true; +default_option(return_errors) -> false; +default_option(return_warnings) -> false; +default_option(time) -> false; +default_option(verbose) -> false. + +atom_option(file_attributes) -> {file_attributes, true}; +atom_option(report_errors) -> {report_errors, true}; +atom_option(report_warnings) -> {report_warnings, true}; +atom_option(return_errors) -> {return_errors, true}; +atom_option(return_warnings) -> {return_warnings, true}; +atom_option(time) -> {time, true}; +atom_option(verbose) -> {verbose, true}; +atom_option(Key) -> Key. + +is_filename(T) -> + try filename:flatten(T) of + Filename -> Filename + catch error: _ -> no + end. + +shorten_filename(Name0) -> + {ok,Cwd} = file:get_cwd(), + case lists:prefix(Cwd, Name0) of + false -> Name0; + true -> + case lists:nthtail(length(Cwd), Name0) of + "/"++N -> N; + N -> N + end + end. + +start(Infilex, Options) -> + Infile = assure_extension(Infilex, ".yrl"), + {value, {_, Outfilex0}} = keysearch(parserfile, 1, Options), + {value, {_, Includefilex}} = keysearch(includefile, 1, Options), + Outfilex = case Outfilex0 of + [] -> filename:rootname(Infilex, ".yrl"); + _ -> Outfilex0 + end, + Includefile = case Includefilex of + [] -> []; + _ -> assure_extension(Includefilex,".hrl") + end, + IncludefileVersion = includefile_version(Includefile), + Outfile = assure_extension(Outfilex, ".erl"), + Module = list_to_atom(filename:basename(Outfile, ".erl")), + #yecc{infile = Infile, + outfile = Outfile, + includefile = Includefile, + includefile_version = IncludefileVersion, + module = Module, + options = Options, + verbose = member(verbose, Options), + file_attrs = member(file_attributes, Options)}. + +assure_extension(File, Ext) -> + concat([strip_extension(File, Ext), Ext]). + +%% Assumes File is a filename. +strip_extension(File, Ext) -> + case filename:extension(File) of + Ext -> filename:rootname(File); + _Other -> File + end. + +infile(Parent, Infilex, Options) -> + St0 = start(Infilex, Options), + St = case file:open(St0#yecc.infile, [read, read_ahead]) of + {ok, Inport} -> + try + outfile(St0#yecc{inport = Inport}) + after + ok = file:close(Inport) + end; + {error, Reason} -> + add_error(St0#yecc.infile, none, {file_error, Reason}, St0) + end, + case St#yecc.errors of + [] -> ok; + _ -> _ = file:delete(St#yecc.outfile) + end, + Parent ! {self(), yecc_ret(St)}. + +outfile(St0) -> + case file:open(St0#yecc.outfile, [write, delayed_write]) of + {ok, Outport} -> + try + generate(St0#yecc{outport = Outport, line = 1}) + catch + throw: St1 -> + St1; + exit: Reason -> + add_error({internal_error, Reason}, St0) + after + ok = file:close(Outport) + end; + {error, Reason} -> + add_error(St0#yecc.outfile, none, {file_error, Reason}, St0) + end. + +os_process_size() -> + case os:type() of + {unix, sunos} -> + Size = os:cmd("ps -o vsz -p " ++ os:getpid() ++ " | tail -1"), + list_to_integer(lib:nonl(Size)); + _ -> + 0 + end. + +timeit(Name, Fun, St0) -> + Time = runtime, + %% Time = wall_clock, + {Before, _} = statistics(Time), + St = Fun(St0), + {After, _} = statistics(Time), + Mem0 = erts_debug:flat_size(St)*erlang:system_info(wordsize), + Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])), + Sz = lists:flatten(io_lib:format("~.1f MB", [os_process_size()/1024])), + io:fwrite(" ~-30w: ~10.2f s ~12s ~10s\n", + [Name, (After-Before)/1000, Mem, Sz]), + St. + +-define(PASS(P), {P, fun P/1}). + +generate(St0) -> + Passes = [?PASS(parse_grammar), ?PASS(check_grammar), + ?PASS(states_and_goto_table), ?PASS(parse_actions), + ?PASS(action_conflicts), ?PASS(write_file)], + F = case member(time, St0#yecc.options) of + true -> + io:fwrite(<<"Generating parser from grammar in ~s\n">>, + [format_filename(St0#yecc.infile)]), + fun timeit/3; + false -> + fun(_Name, Fn, St) -> Fn(St) end + end, + Fun = fun({Name, Fun}, St) -> + St2 = F(Name, Fun, St), + if + St2#yecc.errors =:= [] -> St2; + true -> throw(St2) + end + end, + foldl(Fun, St0, Passes). + +parse_grammar(St) -> + parse_grammar(St#yecc.inport, 1, St). + +parse_grammar(Inport, Line, St) -> + {NextLine, Grammar} = read_grammar(Inport, Line), + parse_grammar(Grammar, Inport, NextLine, St). + +parse_grammar(eof, _Inport, _NextLine, St) -> + St; +parse_grammar({#symbol{name = 'Header'}, Ss}, Inport, NextLine, St0) -> + St1 = St0#yecc{header = [S || {string,_,S} <- Ss]}, + parse_grammar(Inport, NextLine, St1); +parse_grammar({#symbol{name = 'Erlang'}, [#symbol{name = code}]}, _Inport, + NextLine, St) -> + St#yecc{erlang_code = NextLine}; +parse_grammar(Grammar, Inport, NextLine, St0) -> + St = parse_grammar(Grammar, St0), + parse_grammar(Inport, NextLine, St). + +parse_grammar({error,ErrorLine,Error}, St) -> + add_error(ErrorLine, Error, St); +parse_grammar({rule, Rule, Tokens}, St0) -> + NmbrOfDaughters = case Rule of + [_, #symbol{name = '$empty'}] -> 0; + _ -> length(Rule) - 1 + end, + {IsGuard, IsWellFormed} = check_action(Tokens), + {Tokens1, St} = subst_pseudo_vars(Tokens, + NmbrOfDaughters, + St0), + RuleDef = #rule{symbols = Rule, + tokens = Tokens1, + is_guard = IsGuard, + is_well_formed = IsWellFormed}, + St#yecc{rules_list = [RuleDef | St#yecc.rules_list]}; +parse_grammar({prec, Prec}, St) -> + St#yecc{prec = Prec ++ St#yecc.prec}; +parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) -> + CF = fun(I) -> + case element(I, St) of + [] -> + setelement(I, St, Symbols); + _ -> + add_error(Line, {duplicate_declaration, Name}, St) + end + end, + OneSymbol = length(Symbols) =:= 1, + case Name of + 'Nonterminals' -> CF(#yecc.nonterminals); + 'Terminals' -> CF(#yecc.terminals); + 'Rootsymbol' when OneSymbol -> CF(#yecc.rootsymbol); + 'Endsymbol' when OneSymbol -> CF(#yecc.endsymbol); + 'Expect' when OneSymbol -> CF(#yecc.expect_shift_reduce); + 'States' when OneSymbol -> CF(#yecc.expect_n_states); % undocumented + _ -> add_warning(Line, bad_declaration, St) + end. + +read_grammar(Inport, Line) -> + case yeccscan:scan(Inport, '', Line) of + {eof, NextLine} -> + {NextLine, eof}; + {error, {ErrorLine, Mod, What}, NextLine} -> + {NextLine, {error, ErrorLine, {error, Mod, What}}}; + {ok, Input, NextLine} -> + {NextLine, case yeccparser:parse(Input) of + {error, {ErrorLine, Mod, Message}} -> + {error, ErrorLine, {error, Mod, Message}}; + {ok, {rule, Rule, {erlang_code, Tokens}}} -> + {rule, Rule, Tokens}; + {ok, {#symbol{name=P}, + [#symbol{name=I} | OpL]}=Ss} -> + A = precedence(P), + if + A =/= unknown, + is_integer(I), + OpL =/= [] -> + Ps = [{Op, I , A} || Op <- OpL], + {prec, Ps}; + true -> + Ss + end; + {ok, Ss} -> + Ss + end} + end. + +precedence('Left') -> left; +precedence('Right') -> right; +precedence('Unary') -> unary; +precedence('Nonassoc') -> nonassoc; +precedence(_) -> unknown. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +check_grammar(St0) -> + Empty = #symbol{line = none, name = '$empty'}, + AllSymbols = St0#yecc.nonterminals ++ St0#yecc.terminals ++ [Empty], + St1 = St0#yecc{all_symbols = AllSymbols}, + Cs = [fun check_nonterminals/1, fun check_terminals/1, + fun check_rootsymbol/1, fun check_endsymbol/1, + fun check_expect/1, fun check_states/1, + fun check_precedences/1, fun check_rules/1], + foldl(fun(F, St) -> F(St) end, St1, Cs). + +check_nonterminals(St) -> + case St#yecc.nonterminals of + [] -> + add_error(nonterminals_missing, St); + Nonterminals -> + {Unique, Dups} = duplicates(names(Nonterminals)), + St1 = add_warnings(Dups, duplicate_nonterminal, St), + St2 = check_reserved(Unique, St1), + St2#yecc{nonterminals = [?ACCEPT | Unique]} + end. + +check_terminals(St0) -> + case St0#yecc.terminals of + [] -> + add_error(terminals_missing, St0); + Terminals -> + {Unique, Dups} = duplicates(names(Terminals)), + St1 = add_warnings(Dups, duplicate_terminal, St0), + Common = intersect(St1#yecc.nonterminals, Unique), + St2 = add_errors(Common, symbol_terminal_and_nonterminal, St1), + St3 = check_reserved(Unique, St2), + St3#yecc{terminals = ['$empty' | Unique]} + end. + +check_reserved(Names, St) -> + add_errors(intersect(Names, ['$empty', '$end', '$undefined']), + reserved, St). + +check_rootsymbol(St) -> + case St#yecc.rootsymbol of + [] -> + add_error(rootsymbol_missing, St); + [#symbol{line = Line, name = SymName}] -> + case kind_of_symbol(St, SymName) of + nonterminal -> + St#yecc{rootsymbol = SymName}; + _ -> + add_error(Line, {bad_rootsymbol, SymName}, St) + end + end. + +check_endsymbol(St) -> + case St#yecc.endsymbol of + [] -> + St#yecc{endsymbol = '$end'}; + [#symbol{line = Line, name = SymName}] -> + case kind_of_symbol(St, SymName) of + nonterminal -> + add_error(Line, {endsymbol_is_nonterminal, SymName}, St); + terminal -> + add_error(Line, {endsymbol_is_terminal, SymName}, St); + _ -> + St#yecc{endsymbol = SymName} + end + end. + +check_expect(St0) -> + case St0#yecc.expect_shift_reduce of + [] -> + St0#yecc{expect_shift_reduce = 0}; + [#symbol{name = Expect}] when is_integer(Expect) -> + St0#yecc{expect_shift_reduce = Expect}; + [#symbol{line = Line, name = Name}] -> + St1 = add_error(Line, {bad_expect, Name}, St0), + St1#yecc{expect_shift_reduce = 0} + end. + +check_states(St) -> + case St#yecc.expect_n_states of + [] -> + St; + [#symbol{name = NStates}] when is_integer(NStates) -> + St#yecc{expect_n_states = NStates}; + [#symbol{line = Line, name = Name}] -> + add_error(Line, {bad_states, Name}, St) + end. + +check_precedences(St0) -> + {St1, _} = + foldr(fun({#symbol{line = Line, name = Op},_I,_A}, {St,Ps}) -> + case member(Op, Ps) of + true -> + {add_error(Line, {duplicate_precedence,Op}, St), + Ps}; + false -> + {St, [Op | Ps]} + end + end, {St0,[]}, St0#yecc.prec), + foldl(fun({#symbol{line = Line, name = Op},I,A}, St) -> + case kind_of_symbol(St, Op) of + endsymbol -> + add_error(Line,{precedence_op_is_endsymbol,Op}, St); + unknown -> + add_error(Line, {precedence_op_is_unknown, Op}, St); + _ -> + St#yecc{prec = [{Op,I,A} | St#yecc.prec]} + end + end, St1#yecc{prec = []}, St1#yecc.prec). + +check_rule(Rule0, {St0,Rules}) -> + Symbols = Rule0#rule.symbols, + #symbol{line = HeadLine, name = Head} = hd(Symbols), + case member(Head, St0#yecc.nonterminals) of + false -> + {add_error(HeadLine, {undefined_nonterminal, Head}, St0), Rules}; + true -> + St = check_rhs(tl(Symbols), St0), + Rule = Rule0#rule{line = HeadLine, symbols = names(Symbols)}, + {St, [Rule | Rules]} + end. + +check_rules(St0) -> + {St,Rules0} = foldl(fun check_rule/2, {St0,[]}, St0#yecc.rules_list), + case St#yecc.rules_list of + [] -> + add_error(no_grammar_rules, St); + _ -> + Rule = #rule{line = none, + symbols = [?ACCEPT, St#yecc.rootsymbol], + tokens = []}, + Rules1 = [Rule | Rules0], + Rules = map(fun({R,I}) -> R#rule{n = I} end, count(0, Rules1)), + St#yecc{rules_list = Rules, rules = list_to_tuple(Rules)} + end. + +duplicates(List) -> + Unique = usort(List), + {Unique, List -- Unique}. + +names(Symbols) -> + map(fun(Symbol) -> Symbol#symbol.name end, Symbols). + +symbol_line(Name, St) -> + {value, #symbol{line = Line}} = symbol_search(Name, St#yecc.all_symbols), + Line. + +symbol_member(Symbol, Symbols) -> + symbol_search(Symbol#symbol.name, Symbols) =/= false. + +symbol_search(Name, Symbols) -> + keysearch(Name, #symbol.name, Symbols). + +states_and_goto_table(St0) -> + St1 = create_symbol_table(St0), + St = compute_states(St1), + create_precedence_table(St). + +parse_actions(St) -> + erase(), % the pd is used when decoding lookahead sets + ParseActions = compute_parse_actions(St#yecc.n_states, St, []), + erase(), + St#yecc{parse_actions = ParseActions, state_tab = []}. + +action_conflicts(St0) -> + St = find_action_conflicts(St0), + St#yecc{conflicts_done = true}. + +-record(state_info, {reduce_only, state_repr, comment}). + +write_file(St0) -> + #yecc{parse_actions = ParseActions, goto_tab = GotoTab} = St0, + Sorted = sort_parse_actions(ParseActions), + StateReprs = find_identical_shift_states(Sorted), + StateInfo = collect_some_state_info(Sorted, StateReprs), + StateJumps = find_partial_shift_states(Sorted, StateReprs), + UserCodeActions = find_user_code(Sorted, St0), + #yecc{infile = Infile, outfile = Outfile, + inport = Inport, outport = Outport, + nonterminals = Nonterminals} = St0, + {St10, N_lines, LastErlangCodeLine} = + output_prelude(Outport, Inport, St0), + St20 = St10#yecc{line = St10#yecc.line + N_lines}, + St25 = nl(St20), + St30 = output_file_directive(St25, Outfile, St25#yecc.line), + St40 = nl(St30), + St50 = output_actions(St40, StateJumps, StateInfo), + Go0 = [{Symbol,{From,To}} || {{From,Symbol},To} <- ets:tab2list(GotoTab)], + Go = family_with_domain(Go0, Nonterminals), + St60 = output_goto(St50, Go, StateInfo), + St70 = output_inlined(St60, UserCodeActions, Infile), + St = nl(St70), + case LastErlangCodeLine of + %% Just in case warnings or errors are emitted after the last + %% line of the file. + {last_erlang_code_line, Last_line} -> + output_file_directive(St, Infile, Last_line); + no_erlang_code -> + St + end. + +yecc_ret(St0) -> + St = check_expected(St0), + report_errors(St), + report_warnings(St), + Es = pack_errors(St#yecc.errors), + Ws = pack_warnings(St#yecc.warnings), + if + Es =:= [] -> + case member(return_warnings, St#yecc.options) of + true -> {ok, St#yecc.outfile, Ws}; + false -> {ok, St#yecc.outfile} + end; + true -> + case member(return_errors, St#yecc.options) of + true -> {error, Es, Ws}; + false -> error + end + end. + +check_expected(St0) -> + #yecc{shift_reduce = SR, reduce_reduce = RR, expect_shift_reduce = ExpSR, + n_states = NStates0, expect_n_states = ExpStates, + conflicts_done = Done} = St0, + N_RR = length(usort(RR)), + N_SR = length(usort(SR)), + St1 = if + not Done -> + St0; + N_SR =:= ExpSR, N_RR =:= 0 -> + St0; + true -> + add_warning(none, {conflicts, N_SR, N_RR}, St0) + end, + NStates = NStates0 + 1, + if + (not Done) or (ExpStates =:= []) or (NStates =:= ExpStates) -> + St1; + true -> + add_warning(none, {n_states, ExpStates, NStates}, St1) + end. + +pack_errors([{File,_} | _] = Es) -> + [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}]; +pack_errors([]) -> + []. + +pack_warnings([{File,_} | _] = Ws) -> + [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}]; +pack_warnings([]) -> + []. + +report_errors(St) -> + case member(report_errors, St#yecc.options) of + true -> + foreach(fun({File,{none,Mod,E}}) -> + io:fwrite(<<"~s: ~s\n">>, + [File,Mod:format_error(E)]); + ({File,{Line,Mod,E}}) -> + io:fwrite(<<"~s:~w: ~s\n">>, + [File,Line,Mod:format_error(E)]) + end, sort(St#yecc.errors)); + false -> + ok + end. + +report_warnings(St) -> + case member(report_warnings, St#yecc.options) of + true -> + foreach(fun({File,{none,Mod,W}}) -> + io:fwrite(<<"~s: Warning: ~s\n">>, + [File,Mod:format_error(W)]); + ({File,{Line,Mod,W}}) -> + io:fwrite(<<"~s:~w: Warning: ~s\n">>, + [File,Line,Mod:format_error(W)]) + end, sort(St#yecc.warnings)); + false -> + ok + end. + +add_error(E, St) -> + add_error(none, E, St). + +add_error(Line, E, St) -> + add_error(St#yecc.infile, Line, E, St). + +add_error(File, Line, E, St) -> + St#yecc{errors = [{File,{Line,?MODULE,E}}|St#yecc.errors]}. + +add_errors(SymNames, E0, St0) -> + foldl(fun(SymName, St) -> + add_error(symbol_line(SymName, St), {E0, SymName}, St) + end, St0, SymNames). + +add_warning(Line, W, St) -> + St#yecc{warnings = [{St#yecc.infile,{Line,?MODULE,W}}|St#yecc.warnings]}. + +add_warnings(SymNames, W0, St0) -> + foldl(fun(SymName, St) -> + add_warning(symbol_line(SymName, St), {W0, SymName}, St) + end, St0, SymNames). + +check_rhs([#symbol{name = '$empty'}], St) -> + St; +check_rhs(Rhs, St0) -> + case symbol_search('$empty', Rhs) of + {value, #symbol{line = Line}} -> + add_error(Line, illegal_empty, St0); + false -> + foldl(fun(Sym, St) -> + case symbol_member(Sym, St#yecc.all_symbols) of + true -> + St; + false -> + E = {undefined_symbol,Sym#symbol.name}, + add_error(Sym#symbol.line, E, St) + end + end, St0, Rhs) + end. + +check_action(Tokens) -> + case erl_parse:parse_exprs(add_roberts_dot(Tokens, 0)) of + {error, _Error} -> + {false, false}; + {ok, [Expr | Exprs]} -> + IsGuard = Exprs =:= [] andalso erl_lint:is_guard_test(Expr), + {IsGuard, true} + end. + +add_roberts_dot([], Line) -> + [{'dot', Line}]; +add_roberts_dot([{'dot', Line} | _], _) -> + [{'dot', Line}]; +add_roberts_dot([Token | Tokens], _) -> + [Token | add_roberts_dot(Tokens, element(2, Token))]. + +subst_pseudo_vars([], _, St) -> + {[], St}; +subst_pseudo_vars([H0 | T0], NmbrOfDaughters, St0) -> + {H, St1} = subst_pseudo_vars(H0, NmbrOfDaughters, St0), + {T, St} = subst_pseudo_vars(T0, NmbrOfDaughters, St1), + {[H | T], St}; +subst_pseudo_vars({atom, Line, Atom}, NmbrOfDaughters, St0) -> + case atom_to_list(Atom) of + [$$ | Rest] -> + try list_to_integer(Rest) of + N when N > 0, N =< NmbrOfDaughters -> + {{var, Line, list_to_atom(append("__", Rest))}, St0}; + _ -> + St = add_error(Line, {undefined_pseudo_variable, Atom}, + St0), + {{atom, Line, '$undefined'}, St} + catch + error: _ -> {{atom, Line, Atom}, St0} + end; + _ -> + {{atom, Line, Atom}, St0} + end; +subst_pseudo_vars(Tuple, NmbrOfDaughters, St0) when is_tuple(Tuple) -> + {L, St} = subst_pseudo_vars(tuple_to_list(Tuple), NmbrOfDaughters, St0), + {list_to_tuple(L), St}; +subst_pseudo_vars(Something_else, _, St) -> + {Something_else, St}. + +kind_of_symbol(St, SymName) -> + case member(SymName, St#yecc.nonterminals) of + false -> + case member(SymName, St#yecc.terminals) of + false -> + case St#yecc.endsymbol of + SymName -> + endsymbol; + _ -> + unknown + end; + true -> + terminal + end; + true -> + nonterminal + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Computing parse states and goto table from grammar. +% Start item: {0, [Endsymbol]} <-> +% (['ACCEPT' '.', Rootsymbol], {'$'}) in Aho & Johnson +% where '$end' is the default end of input symbol of the +% scanner if no 'Endsymbol' has been declared in the syntax file. + +-record(tabs, { + symbols, % ETS-set, keypos 1: {SymbolName, SymbolCode} + inv_symbols, % ETS-set, keypos 2: {SymbolName, SymbolCode} + state_id, % ETS-bag, keypos 1: {StateId, StateNum} + % StateId is not unique for a state. + rp_rhs, % rule pointer -> the remaining rhs symbols + rp_info, % rule pointer -> expanding rules and lookahead + goto % ETS-bag, keypos 1: first + % {{FromStateNum, Symbol, ToStateNum}}, then + % {{FromStateNum, Symbol}, ToStateNum} + }). + +-record(item, { % what states are made of + rule_pointer, + look_ahead, + rhs + }). + +compute_states(St0) -> + SymbolTab = St0#yecc.symbol_tab, + CodedRules = map(fun(#rule{symbols = Syms} = R) -> + R#rule{symbols = code_symbols(Syms, SymbolTab)} + end, St0#yecc.rules_list), + CodedNonterminals = code_symbols(St0#yecc.nonterminals, SymbolTab), + %% Only coded in this phase; StC is thrown away. + StC = St0#yecc{rules_list = CodedRules, + rules = list_to_tuple(CodedRules), + nonterminals = CodedNonterminals}, + {RuleIndex, RulePointer2Rule} = + make_rule_index(StC, St0#yecc.rules_list), + StateTab0 = {}, + StateIdTab = ets:new(yecc_state_id, [set]), + GotoTab = ets:new(yecc_goto, [bag]), + RulePointerRhs = make_rhs_index(StC#yecc.rules_list), + RulePointerInfo = make_rule_pointer_info(StC, RulePointerRhs, RuleIndex), + + Tables = #tabs{symbols = SymbolTab, + state_id = StateIdTab, + rp_rhs = RulePointerRhs, + rp_info = RulePointerInfo, + goto = GotoTab}, + + erase(), + EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab), + {StateId, State0} = compute_state([{EndsymCode, 1}], Tables), + + StateNum0 = first_state(), + FirstState = {StateNum0, State0}, + StateTab1 = insert_state(Tables, StateTab0, FirstState, StateId), + {StateTab, N} = + compute_states1([{StateNum0, get_current_symbols(State0)}], + FirstState, StateTab1, Tables), + true = ets:delete(StateIdTab), + St = St0#yecc{state_tab = StateTab, goto_tab = GotoTab, n_states = N, + rule_pointer2rule = RulePointer2Rule}, + decode_goto(GotoTab, St#yecc.inv_symbol_tab), + check_usage(St). + +first_state() -> + 0. + +decode_goto(GotoTab, InvSymTab) -> + G = ets:tab2list(GotoTab), + ets:delete_all_objects(GotoTab), + ets:insert(GotoTab, + map(fun({{From, Sym, Next}}) -> + {{From, decode_symbol(Sym, InvSymTab)}, Next} + end, G)). + +check_usage(St0) -> + SelSyms = ets:fun2ms(fun({{_,Sym},_}) -> Sym end), + UsedSymbols = ets:select(St0#yecc.goto_tab, SelSyms), + Syms = ordsets:from_list([?ACCEPT, '$empty' | UsedSymbols]), + NonTerms = ordsets:from_list(St0#yecc.nonterminals), + UnusedNonTerms = ordsets:to_list(ordsets:subtract(NonTerms, Syms)), + St1 = add_warnings(UnusedNonTerms, unused_nonterminal, St0), + Terms = ordsets:from_list(St0#yecc.terminals), + St2 = add_warnings(ordsets:to_list(ordsets:subtract(Terms, Syms)), + unused_terminal, St1), + DefinedNonTerminals = map(fun(#rule{symbols = [Name | _]}) -> + Name + end, St2#yecc.rules_list), + DefNonTerms = ordsets:from_list(DefinedNonTerminals), + UndefNonTerms = ordsets:subtract(NonTerms, DefNonTerms), + add_errors(ordsets:to_list(ordsets:subtract(UndefNonTerms, + UnusedNonTerms)), + missing_syntax_rule, St2). + +%% States are sometimes big, should not be copied to ETS tables. +%% Here an "extendible" tuple is used. +lookup_state(StateTab, N) -> + element(N+1, StateTab). + +insert_state(#tabs{state_id = StateIdTab}, StateTab0, State, StateId) -> + {N, _Items} = State, + insert_state_id(StateIdTab, N, StateId), + StateTab = if + tuple_size(StateTab0) > N -> + StateTab0; + true -> + list_to_tuple(tuple_to_list(StateTab0) ++ + lists:duplicate(round(1 + N * 1.5), [])) + end, + setelement(N+1, StateTab, State). + +insert_state_id(StateIdTab, N, StateId) -> + true = ets:insert(StateIdTab, {StateId, N}). + +compute_states1([], {N, _}=_CurrState, StateTab0, _Tables) -> + {StateTab0, N}; +compute_states1([{N, Symbols} | Try], CurrState, StateTab, Tables) -> + {_N, S} = lookup_state(StateTab, N), + Seeds = state_seeds(S, Symbols), + compute_states2(Seeds, N, Try, CurrState, StateTab, Tables). + +compute_states2([], _N, Try, CurrState, StateTab, Tables) -> + compute_states1(Try, CurrState, StateTab, Tables); +compute_states2([{Sym,Seed} | Seeds], N, Try, CurrState, StateTab, Tables) -> + {StateId, NewState} = compute_state(Seed, Tables), + case check_states(NewState, StateId, StateTab, Tables) of + add -> + {M, _} = CurrState, + %% io:fwrite(<<"Adding state ~w\n">>, [M + 1]), + CurrentSymbols = get_current_symbols(NewState), + Next = M + 1, + NextState = {Next, NewState}, + NewStateTab = insert_state(Tables, StateTab, NextState, StateId), + insert_goto(Tables, N, Sym, Next), + compute_states2(Seeds, N, [{Next, CurrentSymbols} | Try], + NextState, NewStateTab, Tables); + {old, M} -> + %% io:fwrite(<<"Identical to old state ~w\n">>, [M]), + insert_goto(Tables, N, Sym, M), + compute_states2(Seeds, N, Try, CurrState, StateTab, Tables); + {merge, M, NewCurrent} -> + %% io:fwrite(<<"Merging with state ~w\n">>, [M]), + Try1 = case keysearch(M, 1, Try) of + false -> + [{M, NewCurrent} | Try]; + {value, {_, OldCurrent}} -> + case ordsets:is_subset(NewCurrent, OldCurrent) of + true -> + Try; + false -> + [{M, ordsets:union(NewCurrent, OldCurrent)} + | keydelete(M, 1, Try)] + end + end, + NewStateTab = merge_states(NewState, StateTab, Tables, M,StateId), + insert_goto(Tables, N, Sym, M), + compute_states2(Seeds, N, Try1, CurrState, NewStateTab, Tables) + end. + +insert_goto(Tables, From, Sym, To) -> + true = ets:insert(Tables#tabs.goto, {{From, Sym, To}}). + +%% Create an ets table for faster lookups. +create_symbol_table(St) -> + #yecc{terminals = Terminals, endsymbol = Endsymbol} = St, + SymbolTab = ets:new(yecc_symbols, [{keypos,1}]), + %% '$empty' is always assigned 0 + Ts = ['$empty', Endsymbol | delete('$empty', Terminals)], + TsC = count(0, Ts), + NTsC = map(fun({NT,I}) -> {NT,-I} end, count(1, St#yecc.nonterminals)), + Cs = TsC++NTsC, + true = ets:insert(SymbolTab, Cs), + + InvSymTable = ets:new(yecc_inverted_terminals, [{keypos,2}]), + true = ets:insert(InvSymTable, Cs), + + St#yecc{symbol_tab = SymbolTab, inv_symbol_tab = InvSymTable}. + +get_current_symbols(State) -> + usort(get_current_symbols1(State, [])). + +get_current_symbols1([], Syms) -> + Syms; +get_current_symbols1([#item{rhs = Rhs} | Items], Syms) -> + case Rhs of + [] -> + get_current_symbols1(Items, Syms); + [Symbol | _] -> + get_current_symbols1(Items, [Symbol | Syms]) + end. + +state_seeds(Items, Symbols) -> + L = [{S,{LA,RP + 1}} || #item{rule_pointer = RP, look_ahead = LA, + rhs = [S | _]} <- Items], + state_seeds1(keysort(1, L), Symbols). + +state_seeds1(_L, []) -> + []; +state_seeds1(L, [Symbol | Symbols]) -> + state_seeds(L, Symbol, Symbols, []). + +state_seeds([{Symbol, Item} | L], Symbol, Symbols, Is) -> + state_seeds(L, Symbol, Symbols, [Item | Is]); +state_seeds([{S, _Item} | L], Symbol, Symbols, Is) when S < Symbol -> + state_seeds(L, Symbol, Symbols, Is); +state_seeds(L, Symbol, Symbols, Is) -> + [{Symbol, Is} | state_seeds1(L, Symbols)]. + +compute_state(Seed, Tables) -> + RpInfo = Tables#tabs.rp_info, + foreach(fun({LA, RulePointer}) -> put(RulePointer, LA) end, Seed), + foreach(fun({LA, RP}) -> compute_closure(LA, RP, RpInfo) end, Seed), + Closure = keysort(1, erase()), + state_items(Closure, [], [], Tables#tabs.rp_rhs). + +%% Collects a uniqe id for the state (all rule pointers). +state_items([{RP, LA} | L], Is, Id, RpRhs) -> + I = #item{rule_pointer = RP, look_ahead = LA, rhs = element(RP, RpRhs)}, + state_items(L, [I | Is], [RP | Id], RpRhs); +state_items(_, Is, Id, _RpRhs) -> + {Id, Is}. + +-compile({inline,[compute_closure/3]}). +compute_closure(Lookahead, RulePointer, RpInfo) -> + case element(RulePointer, RpInfo) of + []=Void -> % no followers, or terminal + Void; + {no_union, ExpandingRules, NewLookahead} -> + compute_closure1(ExpandingRules, NewLookahead, RpInfo); + {union, ExpandingRules, Lookahead0} -> + NewLookahead = set_union(Lookahead0, Lookahead), + compute_closure1(ExpandingRules, NewLookahead, RpInfo); + ExpandingRules -> + compute_closure1(ExpandingRules, Lookahead, RpInfo) + end. + +compute_closure1([RulePointer | Tail], NewLookahead, RpInfo) -> + compute_closure1(Tail, NewLookahead, RpInfo), + case get(RulePointer) of + undefined -> % New + put(RulePointer, NewLookahead), + compute_closure(NewLookahead, RulePointer, RpInfo); + Lookahead2 -> + Lookahead = set_union(Lookahead2, NewLookahead), + if + Lookahead =:= Lookahead2 -> % Old + Lookahead2; % void() + true -> % Merge + put(RulePointer, Lookahead), + compute_closure(NewLookahead, RulePointer, RpInfo) + end + end; +compute_closure1(Nil, _, _RpInfo) -> + Nil. + +%% Check if some old state is a superset of our NewState +check_states(NewState, StateId, StateTab, #tabs{state_id = StateIdTab}) -> + try ets:lookup_element(StateIdTab, StateId, 2) of + N -> + {_N, OldState} = lookup_state(StateTab, N), + check_state1(NewState, OldState, [], N) + catch error:_ -> add + end. + +check_state1([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], + [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> + case set_is_subset(Lookahead1, Lookahead2) of + true -> + check_state1(Items1, Items2, Symbols, N); + false -> + case Rhs of + [] -> + check_state2(Items1, Items2, Symbols, N); + [Symbol | _] -> + check_state2(Items1, Items2, [Symbol | Symbols], N) + end + end; +check_state1([], [], _Symbols, N) -> + {old, N}. + +check_state2([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], + [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> + case set_is_subset(Lookahead1, Lookahead2) of + true -> + check_state2(Items1, Items2, Symbols, N); + false -> + case Rhs of + [] -> + check_state2(Items1, Items2, Symbols, N); + [Symbol | _] -> + check_state2(Items1, Items2, [Symbol | Symbols], N) + end + end; +check_state2([], [], Symbols, N) -> + {merge, N, usort(Symbols)}. + +merge_states(NewState, StateTab, Tables, M, StateId) -> + {_M, Old_state} = lookup_state(StateTab, M), + MergedState = merge_states1(NewState, Old_state), + insert_state(Tables, StateTab, {M, MergedState}, StateId). + +merge_states1([Item1 | Items1], [Item2 | Items2]) -> + LA1 = Item1#item.look_ahead, + LA2 = Item2#item.look_ahead, + if + LA1 =:= LA2 -> + [Item1 | merge_states1(Items1, Items2)]; + true -> + [Item1#item{look_ahead = set_union(LA1, LA2)} + | merge_states1(Items1, Items2)] + end; +merge_states1(_, _) -> + []. + +%% RulePointer -> Rhs. Every position Rhs in has its unique "rule pointer". +make_rhs_index(RulesList) -> + Index = flatmap(fun(#rule{symbols = [_Non | Daughters]}) -> + suffixes0(Daughters) + end, RulesList), + list_to_tuple(Index). + +suffixes0([?EMPTY]) -> + [[], []]; +suffixes0(L) -> + suffixes(L). + +suffixes([]=L) -> + [L]; +suffixes([_ | T]=L) -> + [L | suffixes(T)]. + +%% Setup info about lookahead and expanding rules for each point +%% ("rule pointer") in the grammar. +make_rule_pointer_info(StC, RpRhs, RuleIndex) -> + SymbolTab = StC#yecc.symbol_tab, + LcTab = make_left_corner_table(StC), + LA_index = map(fun(Syms) -> + rp_info(Syms, SymbolTab, LcTab, RuleIndex) + end, tuple_to_list(RpRhs)), + list_to_tuple(LA_index). + +rp_info([], _SymbolTab, _LcTab, _RuleIndex) -> + []; +rp_info([Category | Followers], SymbolTab, LcTab, RuleIndex) -> + case dict:find(Category, RuleIndex) of + error -> % terminal + []; + {ok, ExpandingRules} when Followers =:= [] -> + ExpandingRules; + {ok, ExpandingRules} -> + case make_lookahead(Followers, SymbolTab, LcTab, set_empty()) of + {empty, LA} -> + {union, ExpandingRules, LA}; + LA -> + {no_union, ExpandingRules, LA} + end + end. + +%% Lookahead computation is complicated by the possible existence +%% of null string rewriting rules, such as A -> '$empty'. +make_lookahead([], _, _, LA) -> + {empty, LA}; +make_lookahead([Symbol | Symbols], SymbolTab, LcTab, LA) -> + case dict:find(Symbol, LcTab) of + {ok, LeftCorner} -> % nonterminal + case empty_member(LeftCorner) of + true -> + make_lookahead(Symbols, SymbolTab, LcTab, + set_union(empty_delete(LeftCorner), LA)); + false -> + set_union(LeftCorner, LA) + end; + error -> % terminal + set_add(Symbol, LA) + end. + +%% -> dict-of({Nonterminal, [Terminal]}). +%% The algorithm FIRST/1 from the Dragon Book. +%% Left corner table, all terminals (including '$empty') that can +%% begin strings generated by Nonterminal. +make_left_corner_table(#yecc{rules_list = RulesList} = St) -> + SymbolTab = left_corner_symbol_table(St), + Rules = map(fun(#rule{symbols = [Lhs | Rhs]}) -> + {Lhs,{Lhs, Rhs}} + end, RulesList), + LeftHandTab = dict:from_list(family(Rules)), + X0 = [{S,H} || {H,{H,Rhs}} <- Rules, + S <- Rhs, + not is_terminal(SymbolTab, S)], + XL = family_with_domain(X0, St#yecc.nonterminals), + X = dict:from_list(XL), + Xref = fun(NT) -> dict:fetch(NT, X) end, + E = set_empty(), + LC0 = dict:from_list([{H, E} || {H,_} <- XL]), + %% Handle H -> a S, where a is a terminal ('$empty' inclusive). + {Q, LC1} = + foldl(fun({H,{H,[S | _]}}, {Q0, LC}) -> + case ets:lookup(SymbolTab, S) of + [{_,Num}=SymbolAndNum] when Num >= 0 -> + F = set_add_terminal(SymbolAndNum, E), + {[Xref(H) | Q0], upd_first(H, F, LC)}; + _ -> + {Q0, LC} + end + end, {[], LC0}, Rules), + left_corners(Q, LC1, LeftHandTab, SymbolTab, Xref). + +left_corners(Q0, LC0, LeftHandTab, SymbolTab, Xref) -> + case usort(append(Q0)) of + [] -> + LC0; + Q1 -> + Rs = flatmap(fun(NT) -> dict:fetch(NT, LeftHandTab) end, Q1), + {LC, Q} = left_corners2(Rs, LC0, [], SymbolTab, Xref), + left_corners(Q, LC, LeftHandTab, SymbolTab, Xref) + end. + +left_corners2([], LC, Q, _SymbolTab, _Xref) -> + {LC, Q}; +left_corners2([{Head,Rhs} | Rs], LC, Q0, SymbolTab, Xref) -> + Ts = left_corner_rhs(Rhs, Head, LC, set_empty(), SymbolTab), + First0 = dict:fetch(Head, LC), + case set_is_subset(Ts, First0) of + true -> + left_corners2(Rs, LC, Q0, SymbolTab, Xref); + false -> + LC1 = upd_first(Head, Ts, LC), + left_corners2(Rs, LC1, [Xref(Head) | Q0], SymbolTab, Xref) + end. + +upd_first(NT, Ts, LC) -> + dict:update(NT, fun(First) -> set_union(First, Ts) end, LC). + +left_corner_rhs([S | Ss], Head, LC, Ts, SymbolTab) -> + case ets:lookup(SymbolTab, S) of + [{_,Num}=SymbolAndNum] when Num >= 0 -> + set_add_terminal(SymbolAndNum, Ts); + [_NonTerminalSymbol] -> + First = dict:fetch(S, LC), + case empty_member(First) of + true -> + NTs = set_union(empty_delete(First), Ts), + left_corner_rhs(Ss, Head, LC, NTs, SymbolTab); + false -> + set_union(First, Ts) + end + end; +left_corner_rhs([], _Head, _LC, Ts, _SymbolTab) -> + set_add(?EMPTY, Ts). + +%% For every non-terminal return a list of "rule pointers" for rules +%% expanding the non-terminal. +%% Also assigns a unique number to each point in the grammar, "rule pointer". +make_rule_index(#yecc{nonterminals = Nonterminals, + rules_list = RulesList}, RulesListNoCodes) -> + {RulesL, _N} = + lists:mapfoldl(fun(#rule{symbols = [Nonterminal | Daughters]}, I) -> + I1 = I + length(Daughters)+1, + {{Nonterminal, I}, I1} + end, 1, RulesList), + IndexedTab = family_with_domain(RulesL, Nonterminals), + + Symbol2Rule = [{Foo,R} || #rule{symbols = Symbols}=R <- RulesListNoCodes, + Foo <- Symbols], + Pointer2Rule = [{I, R} || {{_Foo,R},I} <- count(1, Symbol2Rule)], + {dict:from_list(IndexedTab), dict:from_list(Pointer2Rule)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Computing parse action table from list of states and goto table: + +compute_parse_actions(N, St, StateActions) -> + case N < first_state() of + true -> + StateActions; + false -> + {N, StateN} = lookup_state(St#yecc.state_tab, N), + %% There can be duplicates in Actions. + Actions = compute_parse_actions1(StateN, N, St), + compute_parse_actions(N - 1, St, [{N, Actions} | StateActions]) + end. + +compute_parse_actions1([], _, _) -> + []; +compute_parse_actions1([#item{rule_pointer = RulePointer, + look_ahead = Lookahead0, + rhs = Rhs} | Items], N, St) -> + case Rhs of + [] -> + Lookahead = decode_terminals(Lookahead0, St#yecc.inv_symbol_tab), + case rule(RulePointer, St) of + {[?ACCEPT | _], _RuleLine, _} -> + [{Lookahead, accept} + | compute_parse_actions1(Items, N, St)]; + %% Head is placed after the daughters when finding the + %% precedence. This is how giving precedence to + %% non-terminals takes effect. + {[Head | Daughters0], _RuleLine, _} -> + Daughters = delete('$empty', Daughters0), + [{Lookahead, + #reduce{rule_nmbr = RulePointer, head = Head, + nmbr_of_daughters = length(Daughters), + prec = get_prec(Daughters ++ [Head], St)}} + | compute_parse_actions1(Items, N, St)] + end; + [Symbol | Daughters] -> + case is_terminal(St#yecc.symbol_tab, Symbol) of + true -> + DecSymbol = decode_symbol(Symbol, St#yecc.inv_symbol_tab), + {[Head | _], _RuleLine, _} = rule(RulePointer, St), + %% A bogus shift-shift conflict can be introduced + %% here if some terminal occurs in different rules + %% which have been given precedence "one level up". + Prec1 = case Daughters of + [] -> get_prec([DecSymbol, Head], St); + _ -> get_prec([DecSymbol], St) + end, + Pos = case Daughters of + [] -> z; + _ -> a + end, + [{[DecSymbol], + #shift{state = goto(N, DecSymbol, St), + pos = Pos, + prec = Prec1, + rule_nmbr = RulePointer}} + | compute_parse_actions1(Items, N, St)]; + false -> + compute_parse_actions1(Items, N, St) + end + end. + +get_prec(Symbols, St) -> + get_prec1(Symbols, St#yecc.prec_tab, {0, none}). + +get_prec1([], _, P) -> + P; +get_prec1([Symbol | T], PrecTab, P) -> + case ets:lookup(PrecTab, Symbol) of + [] -> + get_prec1(T, PrecTab, P); + [{_, N, Ass}] -> + get_prec1(T, PrecTab, {N, Ass}) + end. + +create_precedence_table(St) -> + PrecTab = ets:new(yecc_precedences, []), + true = ets:insert(PrecTab, St#yecc.prec), + St#yecc{prec_tab = PrecTab}. + +-record(cxt, {terminal, state_n, yecc, res}). + +%% Detects shift-reduce and reduce-reduce conflicts. +%% Also removes all but one conflicting action. As a consequence the +%% lookahead sets for a state are always disjoint. +%% Reduce/reduce conflicts are considered errors. +find_action_conflicts(St0) -> + Cxt0 = #cxt{yecc = St0, res = []}, + {#cxt{yecc = St, res = Res}, NewParseActions0} = + foldl(fun({N, Actions0}, {Cxt1, StateActions}) -> + L = [{Terminal, Act} || {Lookahead, Act} <- Actions0, + Terminal <- Lookahead], + {Cxt, Actions} = + foldl(fun({Terminal, As}, {Cxt2,Acts0}) -> + Cxt3 = Cxt2#cxt{terminal = Terminal, + state_n = N}, + {Action, Cxt} = + find_action_conflicts2(As, Cxt3), + {Cxt,[{Action,Terminal} | Acts0]} + end, {Cxt1,[]}, family(L)), + {Cxt,[{N,inverse(family(Actions))} | StateActions]} + end, {Cxt0, []}, St0#yecc.parse_actions), + if + length(Res) > 0, St#yecc.verbose -> + io:fwrite(<<"\n*** Conflicts resolved by operator " + "precedences:\n\n">>), + foreach(fun({Confl, Name}) -> + report_conflict(Confl, St, Name, prec) + end, reverse(Res)), + io:fwrite(<<"*** End of resolved conflicts\n\n">>); + true -> + ok + end, + NewParseActions = reverse(NewParseActions0), + St#yecc{parse_actions = NewParseActions}. + +find_action_conflicts2([Action], Cxt) -> + {Action, Cxt}; +find_action_conflicts2([#shift{state = St, pos = Pos, prec = Prec}, + #shift{state = St}=S | As], + Cxt) when Pos =:= a; Prec =:= {0,none} -> + %% This is a kludge to remove the bogus shift-shift conflict + %% introduced in compute_parse_actions1(). + find_action_conflicts2([S | As], Cxt); +find_action_conflicts2([#shift{state = NewState, pos = z}=S1, + #shift{state = NewState}=S2 | _], Cxt) -> + %% This is even worse than last clause. Give up. + Confl = conflict(S1, S2, Cxt), + #cxt{yecc = St0} = Cxt, + St = conflict_error(Confl, St0), + {S1, Cxt#cxt{yecc = St}}; % return any action +find_action_conflicts2([#shift{prec = {P1, Ass1}}=S | Rs], Cxt0) -> + {R, Cxt1} = find_reduce_reduce(Rs, Cxt0), + #cxt{res = Res0, yecc = St0} = Cxt1, + #reduce{prec = {P2, Ass2}} = R, + Confl = conflict(R, S, Cxt1), + if + P1 > P2 -> + {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; + P2 > P1 -> + {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; + Ass1 =:= left, Ass2 =:= left -> + {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; + Ass1 =:= right, Ass2 =:= right -> + {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; + Ass1 =:= nonassoc, Ass2 =:= nonassoc -> + {nonassoc, Cxt1}; + P1 =:= 0, P2 =:= 0 -> + report_conflict(Confl, St0, shift, default), + St = add_conflict(Confl, St0), + {S, Cxt1#cxt{yecc = St}}; + true -> + St = conflict_error(Confl, St0), + {S, Cxt1#cxt{yecc = St}} % return any action + end; +find_action_conflicts2(Rs, Cxt0) -> + find_reduce_reduce(Rs, Cxt0). + +find_reduce_reduce([R], Cxt) -> + {R, Cxt}; +find_reduce_reduce([#reduce{head = Categ1, prec = {P1, _}}=R1, + #reduce{head = Categ2, prec = {P2, _}}=R2 | Rs], Cxt0) -> + #cxt{res = Res0, yecc = St0} = Cxt0, + Confl = conflict(R1, R2, Cxt0), + {R, Res, St} = + if + P1 > P2 -> + {R1, [{Confl, Categ1} | Res0], St0}; + P2 > P1 -> + {R2, [{Confl, Categ2} | Res0], St0}; + true -> + St1 = conflict_error(Confl, St0), + {R1, Res0, St1} + end, + Cxt = Cxt0#cxt{res = Res, yecc = St}, + find_reduce_reduce([R | Rs], Cxt). + +%% Since the lookahead sets are disjoint (assured by +%% find_action_conflicts), the order between actions can be chosen +%% almost arbitrarily. nonassoc has to come last, though (but is later +%% discarded!). And shift has to come before reduce. +sort_parse_actions([]) -> + []; +sort_parse_actions([{N, La_actions} | Tail]) -> + [{N, sort_parse_actions1(La_actions)} | sort_parse_actions(Tail)]. + +sort_parse_actions1(LaActions) -> + As = filter(fun({_LA, A}) -> A =:= accept end, LaActions), + Ss = filter(fun({_LA, A}) -> is_record(A, shift) end, LaActions), + Rs = filter(fun({_LA, A}) -> is_record(A, reduce) end, LaActions), + Ns = filter(fun({_LA, A}) -> A =:= nonassoc end, LaActions), + As ++ Ss ++ Rs ++ Ns. + +%% -> {State, StateRepr}. StateRepr has the same set of shift actions +%% as State. No code will be output for State if State =/= StateRepr. +find_identical_shift_states(StateActions) -> + L1 = [{Actions, State} || {State,Actions} <- StateActions], + {SO, NotSO} = lists:partition(fun({Actions,_States}) -> + shift_actions_only(Actions) + end, family(L1)), + R = [{State, hd(States)} || {_Actions, States} <- SO, State <- States] + ++ + [{State, State} || {_Actions, States} <- NotSO, State <- States], + lists:keysort(1, R). + +-record(part_data, {name, eq_state, actions, n_actions, states}). + +%% Replace {SStates,Actions} with {SStates,{Actions,Jump}} where +%% Jump describes which clauses that have been extracted from shift +%% states so that they can be used from other states. Some space is +%% saved. +find_partial_shift_states(StateActionsL, StateReprs) -> + L = [{State, Actions} || {{State,Actions}, {State,State}} <- + lists:zip(StateActionsL, StateReprs), + shift_actions_only(Actions)], + StateActions = sofs:family(L, [{state,[action]}]), + StateAction = sofs:family_to_relation(StateActions), + + %% Two actions are equal if they occur in the same states: + Parts = sofs:partition(sofs:range(StateActions)), + PartsL = sofs:to_external(Parts), + %% Assign temporary names to the parts of the partition (of actions): + PartNameL = lists:zip(seq1(length(PartsL)), PartsL), + ActPartL = [{Action,PartName} || + {PartName,Actions} <- PartNameL, + Action <- Actions], + ActionPartName = sofs:relation(ActPartL, [{action,partname}]), + StatePartName = sofs:relative_product(StateAction, ActionPartName), + PartInStates = sofs:relation_to_family(sofs:converse(StatePartName)), + + %% Parts that equal all actions of a state: + PartActions = sofs:family(PartNameL, [{partname,[action]}]), + PartState = + sofs:relative_product(PartActions, sofs:converse(StateActions)), + PartStates = sofs_family_with_domain(PartState, sofs:domain(PartActions)), + + PartDataL = [#part_data{name = Nm, eq_state = EqS, actions = P, + n_actions = length(P), + states = ordsets:from_list(S)} || + {{Nm,P}, {Nm,S}, {Nm,EqS}} <- + lists:zip3(PartNameL, + sofs:to_external(PartInStates), + sofs:to_external(PartStates))], + true = length(PartDataL) =:= length(PartNameL), + Ps = select_parts(PartDataL), + + J1 = [{State, Actions, {jump_some,hd(States)}} || + {_W, #part_data{actions = Actions, eq_state = [], + states = States}} <- Ps, + State <- States], + J2 = [{State, Actions, {jump_all,To}} || + {_W, #part_data{actions = Actions, eq_state = EqS, + states = States}} <- Ps, + To <- EqS, + State <- States, + State =/= To], + J = lists:keysort(1, J1 ++ J2), + + JumpStates = ordsets:from_list([S || {S,_,_} <- J]), + {JS, NJS} = + sofs:partition(1, sofs:relation(StateActionsL, [{state, actions}]), + sofs:set(JumpStates, [state])), + R = + [{S, {Actions,jump_none}} || {S,Actions} <- sofs:to_external(NJS)] + ++ + [{S, {Actions--Part, {Tag,ToS,Part}}} || + {{S,Actions}, {S,Part,{Tag,ToS}}} <- + lists:zip(sofs:to_external(JS), J)], + true = length(StateActionsL) =:= length(R), + lists:keysort(1, R). + +%% Very greedy. By no means optimal. +select_parts([]) -> + []; +select_parts(PartDataL) -> + T1 = [{score(PD), PD} || PD <- PartDataL], + [{W, PD} | Ws] = lists:reverse(lists:keysort(1, T1)), + #part_data{n_actions = NActions, states = S} = PD, + if + W < 8 -> % don't bother + []; + true -> + %% Cannot extract more clauses from the chosen part's states: + NL = [D#part_data{states = NewS} || + {W1, #part_data{states = S0}=D} <- Ws, + W1 > 0, + (NewS = ordsets:subtract(S0, S)) =/= []], + if + length(S) =:= 1; NActions =:= 1 -> + select_parts(NL); + true -> + [{W,PD} | select_parts(NL)] + end + end. + +%% Does it pay off to extract clauses into a new function? +%% Assumptions: +%% - a call costs 8 (C = 8); +%% - a clause (per action) costs 20 plus 8 (select) (Cl = 28); +%% - a new function costs 20 (funinfo) plus 16 (select) (F = 36). +%% A is number of actions, S is number of states. +%% Special case (the part equals all actions of some state): +%% C * (S - 1) < (S - 1) * A * Cl +%% Normal case (introduce new function): +%% F + A * Cl + C * S < S * A * Cl +score(#part_data{n_actions = NActions, eq_state = [], states = S}) -> + (length(S) * NActions * 28) - (36 + NActions * 28 + length(S) * 8); +score(#part_data{n_actions = NActions, states = S}) -> + ((length(S) - 1) * NActions * 28) - (8 * (length(S) - 1)). + +shift_actions_only(Actions) -> + length([foo || {_Ts,{shift,_,_,_,_}} <- Actions]) =:= length(Actions). + +collect_some_state_info(StateActions, StateReprs) -> + RF = fun({_LA, A}) -> is_record(A, reduce) end, + L = [{State, + begin + RO = lists:all(RF, LaActions), + %% C is currently always ""; identical states are all shift. + C = [io_lib:fwrite(<<" %% ~w\n">>, [State]) || + true <- [RO], Repr =/= State], + #state_info{reduce_only = RO, state_repr = Repr, comment = C} + end} || + {{State, LaActions}, {State, Repr}} <- + lists:zip(StateActions, StateReprs)], + list_to_tuple(L). + +conflict_error(Conflict, St0) -> + St1 = add_conflict(Conflict, St0), + add_error({conflict, Conflict}, St1). + +report_conflict(Conflict, St, ActionName, How) -> + if + St#yecc.verbose -> + io:fwrite(<<"~s\n">>, [format_conflict(Conflict)]), + Formated = format_symbol(ActionName), + case How of + prec -> + io:fwrite(<<"Resolved in favor of ~s.\n\n">>, [Formated]); + default -> + io:fwrite(<<"Conflict resolved in favor of ~s.\n\n">>, + [Formated]) + end; + true -> + ok + end. + +add_conflict(Conflict, St) -> + case Conflict of + {Symbol, StateN, _, {reduce, _, _, _}} -> + St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]}; + {Symbol, StateN, _, {shift, _, _}} -> + St#yecc{shift_reduce = [{StateN,Symbol} | St#yecc.shift_reduce]}; + {_Symbol, _StateN, {one_level_up, _, _}, _Confl} -> + St + end. + +conflict(#shift{prec = Prec1, rule_nmbr = RuleNmbr1}, + #shift{prec = Prec2, rule_nmbr = RuleNmbr2}, Cxt) -> + %% Conflict due to precedences "one level up". Kludge. + #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, + {_, L1, RuleN1} = rule(RuleNmbr1, St), + {_, L2, RuleN2} = rule(RuleNmbr2, St), + Confl = {one_level_up, {L1, RuleN1, Prec1}, {L2, RuleN2, Prec2}}, + {Symbol, N, Confl, Confl}; +conflict(#reduce{rule_nmbr = RuleNmbr1}, NewAction, Cxt) -> + #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, + {R1, RuleLine1, RuleN1} = rule(RuleNmbr1, St), + Confl = case NewAction of + #reduce{rule_nmbr = RuleNmbr2} -> + {R2, RuleLine2, RuleN2} = rule(RuleNmbr2, St), + {reduce, R2, RuleN2, RuleLine2}; + #shift{state = NewState} -> + {shift, NewState, last(R1)} + end, + {Symbol, N, {R1, RuleN1, RuleLine1}, Confl}. + +format_conflict({Symbol, N, _, {one_level_up, + {L1, RuleN1, {P1, Ass1}}, + {L2, RuleN2, {P2, Ass2}}}}) -> + S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when " + "scanning ~s in state ~w:\n">>, + [format_symbol(Symbol), N]), + S2 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n" + " vs.\n">>, + [format_assoc(Ass1), P1, RuleN1, L1]), + S3 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n">>, + [format_assoc(Ass2), P2, RuleN2, L2]), + [S1, S2, S3]; +format_conflict({Symbol, N, Reduce, Confl}) -> + S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol " + "~s in state ~w:\n">>, [format_symbol(Symbol), N]), + S2 = case Reduce of + {[HR | TR], RuleNmbr, RuleLine} -> + io_lib:fwrite(<<" Reduce to ~s from ~s (rule ~w at " + "line ~w)\n vs.\n">>, + [format_symbol(HR), format_symbols(TR), + RuleNmbr, RuleLine]) + end, + S3 = case Confl of + {reduce, [HR2|TR2], RuleNmbr2, RuleLine2} -> + io_lib:fwrite(<<" reduce to ~s from ~s " + "(rule ~w at line ~w).">>, + [format_symbol(HR2), format_symbols(TR2), + RuleNmbr2, RuleLine2]); + {shift, NewState, Sym} -> + io_lib:fwrite(<<" shift to state ~w, adding right " + "sisters to ~s.">>, + [NewState, format_symbol(Sym)]) + end, + [S1, S2, S3]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Code generation: + +%% The version up to and including parsetools-1.3 is called "1.0". +%% +%% "1.1", parsetools-1.4: +%% - the prologue file has been updated; +%% - nonassoc is new; +%% - different order of clauses; +%% - never more than one clause matching a given symbol in a given state; +%% - file attributes relate messages to .yrl file; +%% - actions put in inlined functions; +%% - a few other minor fixes. +%% +%% "1.2", parsetools-1.4.2: +%% - the generated code has been changed as follows: +%% - yeccpars2() calls the functions yeccpars2_State(); +%% - several states can share yeccpars2_State(), which reduces code size; +%% - yeccgoto() has been split on one function per nonterminal; +%% - several minor changes have made the loaded code smaller. +%% - the include file yeccpre.hrl has been changed incompatibly. +%% +%% "1.3", parsetools-1.4.4: +%% - the generated code has been changed as follows: +%% - yeccgoto_T() no longer returns the next state, but calls yeccpars_S(); +%% - yeccpars2() is not called when it is known which yeccpars2_S() to call; +%% - "__Stack" has been substituted for "Stack"; +%% - several states can share yeccpars2_S_cont(), which reduces code size; +%% - instead if calling lists:nthtail() matching code is emitted. + +-define(CODE_VERSION, "1.3"). +-define(YECC_BUG(M, A), + iolist_to_binary([" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",", + io_lib:fwrite(M, A), "}).\n\n"])). + +%% Returns number of newlines in included files. +output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] -> + St5 = output_header(St0), + #yecc{infile = Infile, module = Module} = St5, + St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), + St20 = + fwrite(St10, + <<"-export([parse/1, parse_and_scan/1, format_error/1]).\n">>, + []), + {St25, N_lines_1, LastErlangCodeLine} = + case St20#yecc.erlang_code of + none -> + {St20, 0, no_erlang_code}; + Next_line -> + St_10 = output_file_directive(St20, Infile, Next_line-1), + Nmbr_of_lines = include1([], Inport, Outport), + {St_10, Nmbr_of_lines, + {last_erlang_code_line, Next_line+Nmbr_of_lines}} + end, + St30 = nl(St25), + IncludeFile = + filename:join([code:lib_dir(parsetools), "include","yeccpre.hrl"]), + %% Maybe one could assume there are no warnings in this file. + St = output_file_directive(St30, IncludeFile, 0), + N_lines_2 = include(St, IncludeFile, Outport), + {St, N_lines_1 + N_lines_2, LastErlangCodeLine}; +output_prelude(Outport, Inport, St0) -> + St5 = output_header(St0), + #yecc{infile = Infile, module = Module, includefile = Includefile} = St5, + St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), + St20 = output_file_directive(St10, Includefile, 0), + N_lines_1 = include(St20, Includefile, Outport), + St30 = nl(St20), + case St30#yecc.erlang_code of + none -> + {St30, N_lines_1, no_erlang_code}; + Next_line -> + St = output_file_directive(St30, Infile, Next_line-1), + Nmbr_of_lines = include1([], Inport, Outport), + {St, Nmbr_of_lines + N_lines_1, + {last_erlang_code_line, Next_line+Nmbr_of_lines}} + end. + +output_header(St0) -> + lists:foldl(fun(Str, St) -> fwrite(St, <<"~s\n">>, [Str]) + end, St0, St0#yecc.header). + +output_goto(St, [{_Nonterminal, []} | Go], StateInfo) -> + output_goto(St, Go, StateInfo); +output_goto(St0, [{Nonterminal, List} | Go], StateInfo) -> + F = function_name(yeccgoto, Nonterminal), + St10 = output_goto1(St0, List, F, StateInfo, true), + St = output_goto_fini(F, Nonterminal, St10), + output_goto(St, Go, StateInfo); +output_goto(St, [], _StateInfo) -> + St. + +output_goto1(St0, [{From, To} | Tail], F, StateInfo, IsFirst) -> + St10 = delim(St0, IsFirst), + {To, ToInfo} = lookup_state(StateInfo, To), + #state_info{reduce_only = RO, state_repr = Repr, comment = C} = ToInfo, + if + RO -> + %% Reduce actions do not use the state, so we just pass + %% the old (now bogus) on: + FromS = io_lib:fwrite("~w=_S", [From]), + ToS = "_S"; + true -> + FromS = io_lib:fwrite("~w", [From]), + ToS = io_lib:fwrite("~w", [To]) + end, + St20 = fwrite(St10, <<"~w(~s, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, + [F,FromS]), + St30 = fwrite(St20, <<"~s">>, [C]), + %% Short-circuit call to yeccpars2: + St = fwrite(St30, <<" yeccpars2_~w(~s, Cat, Ss, Stack, T, Ts, Tzr)">>, + [Repr, ToS]), + output_goto1(St, Tail, F, StateInfo, false); +output_goto1(St, [], _F, _StateInfo, _IsFirst) -> + St. + +output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) -> + %% Backward compatibility. + St10 = delim(St0, false), + St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>, + [F]), + fwrite(St, + ?YECC_BUG(<<"{~w, State, missing_in_goto_table}">>, [NT]), + []); +output_goto_fini(_F, _NT, St) -> + fwrite(St, <<".\n\n">>, []). + +%% Find actions having user code. +find_user_code(ParseActions, St) -> + [#user_code{state = State, + terminal = Terminal, + funname = inlined_function_name(State, Terminal), + action = Action} || + {State, La_actions} <- ParseActions, + {Action, Terminals, RuleNmbr, NmbrOfDaughters} + <- find_user_code2(La_actions), + case tokens(RuleNmbr, St) of + [{var, _, '__1'}] -> NmbrOfDaughters =/= 1; + _ -> true + end, + Terminal <- Terminals]. + +find_user_code2([]) -> + []; +find_user_code2([{_, #reduce{rule_nmbr = RuleNmbr, + nmbr_of_daughters = NmbrOfDaughters} + =Action}]) -> + %% Same optimization as in output_state_actions. + [{Action, ["Cat"], RuleNmbr, NmbrOfDaughters}]; +find_user_code2([{La, #reduce{rule_nmbr = RuleNmbr, + nmbr_of_daughters = NmbrOfDaughters} + =Action} | T]) -> + [{Action,La, RuleNmbr, NmbrOfDaughters} | find_user_code2(T)]; +find_user_code2([_ | T]) -> + find_user_code2(T). + +output_actions(St0, StateJumps, StateInfo) -> + %% Not all the clauses of the dispatcher function yeccpars2() can + %% be reached. Only when shifting, that is, calling yeccpars1(), + %% will yeccpars2() be called. + Y2CL = [NewState || {_State,{Actions,_J}} <- StateJumps, + {_LA, #shift{state = NewState}} <- Actions], + Y2CS = ordsets:from_list([0 | Y2CL]), + Y2S = ordsets:from_list([S || {S,_} <- StateJumps]), + NY2CS = ordsets:subtract(Y2S, Y2CS), + Sel = [{S,true} || S <- ordsets:to_list(Y2CS)] ++ + [{S,false} || S <- ordsets:to_list(NY2CS)], + + SelS = [{State,Called} || + {{State,_JActions}, {State,Called}} <- + lists:zip(StateJumps, lists:keysort(1, Sel))], + St10 = foldl(fun({State, Called}, St_0) -> + {State, #state_info{state_repr = IState}} = + lookup_state(StateInfo, State), + output_state_selection(St_0, State, IState, Called) + end, St0, SelS), + St20 = fwrite(St10, <<"yeccpars2(Other, _, _, _, _, _, _) ->\n">>, []), + St = fwrite(St20, + ?YECC_BUG(<<"{missing_state_in_action_table, Other}">>, []), + []), + foldl(fun({State, JActions}, St_0) -> + {State, #state_info{state_repr = IState}} = + lookup_state(StateInfo, State), + output_state_actions(St_0, State, IState, + JActions, StateInfo) + end, St, StateJumps). + +output_state_selection(St0, State, IState, Called) -> + Comment = [<<"%% ">> || false <- [Called]], + St = fwrite(St0, <<"~syeccpars2(~w=S, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, + [Comment, State]), + fwrite(St, + <<"~s yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr);\n">>, + [Comment, IState]). + +output_state_actions(St, State, State, {Actions,jump_none}, SI) -> + output_state_actions1(St, State, Actions, true, normal, SI); +output_state_actions(St0, State, State, {Actions, Jump}, SI) -> + {Tag, To, Common} = Jump, + CS = case Tag of + jump_some -> list_to_atom(lists:concat([cont_, To])); + jump_all -> To + end, + St = output_state_actions1(St0, State, Actions, true, {to, CS}, SI), + if + To =:= State -> + output_state_actions1(St, CS, Common, true, normal, SI); + true -> + St + end; +output_state_actions(St, State, JState, _XActions, _SI) -> + fwrite(St, <<"%% yeccpars2_~w: see yeccpars2_~w\n\n">>, [State, JState]). + +output_state_actions1(St, State, [], _IsFirst, normal, _SI) -> + output_state_actions_fini(State, St); +output_state_actions1(St0, State, [], IsFirst, {to, ToS}, _SI) -> + St = delim(St0, IsFirst), + fwrite(St, + <<"yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr) ->\n" + " yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr).\n\n">>, + [State, ToS]); +output_state_actions1(St0, State, [{_, #reduce{}=Action}], + IsFirst, _End, SI) -> + St = output_reduce(St0, State, "Cat", Action, IsFirst, SI), + fwrite(St, <<".\n\n">>, []); +output_state_actions1(St0, State, [{Lookahead,Action} | Tail], + IsFirst, End, SI) -> + {_, St} = + foldl(fun(Terminal, {IsFst,St_0}) -> + {false, + output_action(St_0, State, Terminal, Action, IsFst,SI)} + end, {IsFirst,St0}, Lookahead), + output_state_actions1(St, State, Tail, false, End, SI). + +output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) -> + output_reduce(St, State, Terminal, Action, IsFirst, SI); +output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) -> + St10 = delim(St0, IsFirst), + St = fwrite(St10, <<"yeccpars2_~w(S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>, + [State, quoted_atom(Terminal)]), + output_call_to_includefile(NewState, St); +output_action(St0, State, Terminal, accept, IsFirst, _SI) -> + St10 = delim(St0, IsFirst), + St = fwrite(St10, + <<"yeccpars2_~w(_S, ~s, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>, + [State, quoted_atom(Terminal)]), + fwrite(St, <<" {ok, hd(Stack)}">>, []); +output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) -> + St. + +output_call_to_includefile(NewState, #yecc{includefile_version = {1,1}}=St) -> + %% Backward compatibility. + fwrite(St, <<" yeccpars1(Ts, Tzr, ~w, [S | Ss], [T | Stack])">>, + [NewState]); +output_call_to_includefile(NewState, St) -> + fwrite(St, <<" yeccpars1(S, ~w, Ss, Stack, T, Ts, Tzr)">>, + [NewState]). + +output_state_actions_fini(State, #yecc{includefile_version = {1,1}}=St0) -> + %% Backward compatibility. + St10 = delim(St0, false), + St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]), + fwrite(St, <<" yeccerror(T).\n\n">>, []); +output_state_actions_fini(_State, St) -> + fwrite(St, <<".\n\n">>, []). + +output_reduce(St0, State, Terminal0, + #reduce{rule_nmbr = RuleNmbr, + head = Head, + nmbr_of_daughters = NmbrOfDaughters}, + IsFirst, StateInfo) -> + St10 = delim(St0, IsFirst), + Terminal = if + is_atom(Terminal0) -> quoted_atom(Terminal0); + true -> Terminal0 + end, + St20 = fwrite(St10, + <<"yeccpars2_~w(_S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>, + [State, Terminal]), + St30 = + if + NmbrOfDaughters < 2 -> + Ns = "Ss", + St20; + true -> + Ns = "Nss", + Tmp = string:join(lists:duplicate(NmbrOfDaughters - 1, "_"), + ","), + fwrite(St20, <<" [~s|Nss] = Ss,\n">>, [Tmp]) + end, + St40 = case tokens(RuleNmbr, St30) of + [{var, _, '__1'}] when NmbrOfDaughters =:= 1 -> + NewStack = "Stack", + St30; + _ -> + NewStack = "NewStack", + fwrite(St30, <<" NewStack = ~w(Stack),\n">>, + [inlined_function_name(State, Terminal0)]) + end, + if + NmbrOfDaughters =:= 0 -> + NextState = goto(State, Head, St40), + {NextState, I} = lookup_state(StateInfo, NextState), + #state_info{reduce_only = RO, state_repr = Repr, comment = C} = I, + %% Reduce actions do not use the state, so we just pass + %% the old (now bogus) on: + if + RO -> NextS = "_S"; + true -> NextS = io_lib:fwrite("~w", [NextState]) + end, + St = fwrite(St40, <<"~s">>, [C]), + %% Short-circuit call to yeccpars2: + fwrite(St, + <<" yeccpars2_~w(~s, ~s, [~w | Ss], ~s, T, Ts, Tzr)">>, + [Repr, NextS, Terminal, State, NewStack]); + true -> + fwrite(St40, + <<" ~w(hd(~s), ~s, ~s, ~s, T, Ts, Tzr)">>, + [function_name(yeccgoto, Head), Ns, + Terminal, Ns, NewStack]) + end. + +delim(St, true) -> + St; +delim(St, false) -> + fwrite(St, <<";\n">>, []). + +quoted_atom(Atom) -> + io_lib:fwrite(<<"~w">>, [Atom]). + +output_inlined(St, UserCodeActions, Infile) -> + foldl(fun(#user_code{funname = InlinedFunctionName, + action = Action}, St_0) -> + output_inlined(St_0, InlinedFunctionName, + Action, Infile) + end, St, UserCodeActions). + +%% Each action with user code is placed in a separate inlined function. +%% The purpose is to be able to pinpoint errors and warnings correctly. +output_inlined(St0, FunctionName, Reduce, Infile) -> + #reduce{rule_nmbr = RuleNmbr, nmbr_of_daughters = N_daughters} = Reduce, + #rule{tokens = Tokens, is_well_formed = WF} = get_rule(RuleNmbr, St0), + Line0 = first_line(Tokens), + NLines = last_line(Tokens) - Line0, + + St5 = if + WF -> + St0; + not WF -> + %% The compiler will generate an error message for + %% the inlined function (unless the reason that yecc + %% failed to parse the action was some macro). The + %% line number of the message will be correct since + %% we are keeping track of the current line of the + %% output file... + #yecc{outfile = Outfile, line = CurLine} = St0, + output_file_directive(St0, Outfile, CurLine) + end, + + CodeStartLine = lists:max([0, Line0 - 4]), + St10 = fwrite(St5, <<"-compile({inline,~w/1}).\n">>, [FunctionName]), + St20 = output_file_directive(St10, Infile, CodeStartLine), + St30 = fwrite(St20, <<"~w(__Stack0) ->\n">>, [FunctionName]), + %% Currently the (old) inliner emits less code if matching the + %% stack inside the body rather than in the head... + St40 = case N_daughters of + 0 -> + Stack = "__Stack0", + St30; + _ -> + Stack = "__Stack", + A = concat(flatmap(fun(I) -> [",__",I] end, + lists:seq(N_daughters, 1, -1))), + fwrite(St30, <<" ~s = __Stack0,\n">>, + [append(["[", tl(A), " | __Stack]"])]) + end, + St = St40#yecc{line = St40#yecc.line + NLines}, + fwrite(St, <<" [begin\n ~s\n end | ~s].\n\n">>, + [pp_tokens(Tokens, Line0), Stack]). + +inlined_function_name(State, "Cat") -> + inlined_function_name(State, ""); +inlined_function_name(State, Terminal) -> + list_to_atom(concat([yeccpars2_, State, '_', Terminal])). + +-compile({nowarn_unused_function,function_name/2}). +function_name(Name, Suf) -> + list_to_atom(concat([Name, '_' | quoted_atom(Suf)])). + +rule(RulePointer, St) -> + #rule{n = N, line = Line, symbols = Symbols} = + dict:fetch(RulePointer, St#yecc.rule_pointer2rule), + {Symbols, Line, N}. + +get_rule(RuleNmbr, St) -> + dict:fetch(RuleNmbr, St#yecc.rule_pointer2rule). + +tokens(RuleNmbr, St) -> + Rule = dict:fetch(RuleNmbr, St#yecc.rule_pointer2rule), + Rule#rule.tokens. + +goto(From, Symbol, St) -> + case ets:lookup(St#yecc.goto_tab, {From, Symbol}) of + [{_, To}] -> + To; + [] -> + erlang:error({error_in_goto_table, From, Symbol}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Auxiliaries: + +-ifdef(SYMBOLS_AS_CODES). + +%%% Bit mask operations. + +-compile({inline,[set_empty/0]}). +set_empty() -> + 0. + +set_add(I, BM) -> + (1 bsl I) bor BM. + +-compile({inline,[set_member/2]}). +set_member(I, BM) -> + ((1 bsl I) band BM) =/= 0. + +%% Assumes I is a member... +-compile({inline,[set_delete/2]}). +set_delete(I, BM) -> + (1 bsl I) bxor BM. + +-compile({inline,[set_union/2]}). +set_union(BM1, BM2) -> + BM1 bor BM2. + +-compile({inline,[set_is_subset/2]}). +set_is_subset(BM1, BM2) -> + (BM1 band BM2) =:= BM1. + +empty_member(BM) -> + set_member(0, BM). + +empty_delete(BM) -> + set_delete(0, BM). + +code_symbols(Ss, SymbolTable) -> + map(fun(S) -> ets:lookup_element(SymbolTable, S, 2) end, Ss). + +decode_symbol(C, InvSymbolTable) -> + ets:lookup_element(InvSymbolTable, C, 1). + +code_terminal(T, SymbolTab) -> + set_add(ets:lookup_element(SymbolTab, T, 2), 0). + +decode_terminals(BM, InvSymbolTab) -> + case get(BM) of + undefined -> + Symbols = decode_terminals(BM, 0, InvSymbolTab), + put(BM, Symbols), + Symbols; + Symbols -> + Symbols + end. + +decode_terminals(0, _I, _InvSymbolTab) -> + []; +decode_terminals(BM, I, InvSymbolTab) -> + case set_member(I, BM) of + true -> + [ets:lookup_element(InvSymbolTab, I, 1) + | decode_terminals(set_delete(I, BM), I+1, InvSymbolTab)]; + false -> + decode_terminals(BM, I+1, InvSymbolTab) + end. + +set_add_terminal({_Symbol, TerminalNum}, BM) -> + set_add(TerminalNum, BM). + +-compile({inline,[is_terminal/2]}). +is_terminal(_Tab, SymbolCode) -> + SymbolCode >= 0. + +left_corner_symbol_table(St) -> + St#yecc.inv_symbol_tab. + +-else. + +set_empty() -> + []. + +set_add(Symbol, L) -> + ordsets:union([Symbol], L). + +set_union(Es1, Es2) -> + ordsets:union(Es1, Es2). + +set_is_subset(Es1, Es2) -> + ordsets:is_subset(Es1, Es2). + +code_symbols(Ss, _SymbolTab) -> + Ss. + +decode_symbol(S, _InvSymbolTab) -> + S. + +code_terminal(T, _SymbolTab) -> + [T]. + +decode_terminals(Ts, _InvSymbolTab) -> + Ts. + +empty_member(['$empty' | _]) -> + true; +empty_member(_) -> + false. + +empty_delete(['$empty' | Terminals]) -> + Terminals. + +set_add_terminal({Symbol, _TerminalNum}, L) -> + set_add(Symbol, L). + +is_terminal(Tab, SymbolName) -> + ets:lookup_element(Tab, SymbolName, 2) >= 0. + +left_corner_symbol_table(St) -> + St#yecc.symbol_tab. + +-endif. % SYMBOLS_AS_CODES + +intersect(L1, L2) -> + ordsets:to_list(ordsets:intersection(ordsets:from_list(L1), + ordsets:from_list(L2))). + +format_symbols([Sym | Syms]) -> + concat([format_symbol(Sym) | format_symbols1(Syms)]). + +format_symbols1([]) -> + []; +format_symbols1([H | T]) -> + [" ", format_symbol(H) | format_symbols1(T)]. + +include(St, File, Outport) -> + case file:open(File, [read]) of + {error, Reason} -> + throw(add_error(File, none, {file_error, Reason}, St)); + {ok, Inport} -> + Line = io:get_line(Inport, ''), + N_lines = include1(Line, Inport, Outport), + file:close(Inport), + N_lines + end. + +include1(Line, Inport, Outport) -> + include1(Line, Inport, Outport, 0). + +include1(eof, _, _, Nmbr_of_lines) -> + Nmbr_of_lines; +include1(Line, Inport, Outport, Nmbr_of_lines) -> + Incr = case member($\n, Line) of + true -> 1; + false -> 0 + end, + io:put_chars(Outport, Line), + include1(io:get_line(Inport, ''), Inport, Outport, Nmbr_of_lines + Incr). + +includefile_version([]) -> + {1,2}; +includefile_version(Includefile) -> + case epp:open(Includefile, []) of + {ok, Epp} -> + try + parse_file(Epp) + after + epp:close(Epp) + end; + {error, _Error} -> + {1,1} + end. + +parse_file(Epp) -> + case epp:parse_erl_form(Epp) of + {ok, {function,_Line,yeccpars1,7,_Clauses}} -> + {1,2}; + {eof,_Line} -> + {1,1}; + _Form -> + parse_file(Epp) + end. + +%% Keeps the line breaks of the original code. +pp_tokens(Tokens, Line0) -> + concat(pp_tokens1(Tokens, Line0, [])). + +pp_tokens1([], _Line0, _T0) -> + []; +pp_tokens1([T | Ts], Line0, T0) -> + Line = element(2, T), + [pp_sep(Line, Line0, T0), pp_symbol(T) | pp_tokens1(Ts, Line, T)]. + +pp_symbol({var,_,Var}) -> Var; +pp_symbol({_,_,Symbol}) -> io_lib:fwrite(<<"~p">>, [Symbol]); +pp_symbol({Symbol, _}) -> Symbol. + +pp_sep(Line, Line0, T0) when Line > Line0 -> + ["\n " | pp_sep(Line - 1, Line0, T0)]; +pp_sep(_Line, _Line0, {'.',_}) -> + ""; +pp_sep(_Line, _Line0, _T0) -> + " ". + +output_file_directive(St, Filename, Line) when St#yecc.file_attrs -> + fwrite(St, <<"-file(~s, ~w).\n">>, + [format_filename(Filename), Line]); +output_file_directive(St, _Filename, _Line) -> + St. + +first_line(Tokens) -> + element(2, hd(Tokens)). + +last_line(Tokens) -> + element(2, lists:last(Tokens)). + +%% Keep track of the current line in the generated file. +fwrite(#yecc{outport = Outport, line = Line}=St, Format, Args) -> + NLines = count_nl(Format), + io:fwrite(Outport, Format, Args), + St#yecc{line = Line + NLines}. + +%% Assumes \n is used, and never ~n. +count_nl(<<$\n,Rest/binary>>) -> + 1 + count_nl(Rest); +count_nl(<<_,Rest/binary>>) -> + count_nl(Rest); +count_nl(<<>>) -> + 0. + +nl(#yecc{outport = Outport, line = Line}=St) -> + io:nl(Outport), + St#yecc{line = Line + 1}. + +format_filename(Filename) -> + io_lib:write_string(filename:flatten(Filename)). + +format_assoc(left) -> + "Left"; +format_assoc(right) -> + "Right"; +format_assoc(unary) -> + "Unary"; +format_assoc(nonassoc) -> + "Nonassoc". + +format_symbol(Symbol) -> + String = concat([Symbol]), + case erl_scan:string(String) of + {ok, [{atom, _, _}], _} -> + io_lib:fwrite(<<"~w">>, [Symbol]); + {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' -> + case erl_scan:reserved_word(Word) of + true -> + String; + false -> + io_lib:fwrite(<<"~w">>, [Symbol]) + end; + {ok, [{var, _, _}], _} -> + String; + _ -> + io_lib:fwrite(<<"~w">>, [Symbol]) + end. + +inverse(L) -> + sort([{A,B} || {B,A} <- L]). + +family(L) -> + sofs:to_external(sofs:relation_to_family(sofs:relation(L))). + +seq1(To) when To < 1 -> + []; +seq1(To) -> + lists:seq(1, To). + +count(From, L) -> + lists:zip(L, lists:seq(From, length(L)-1+From)). + +family_with_domain(L, DL) -> + sofs:to_external(sofs_family_with_domain(sofs:relation(L), sofs:set(DL))). + +sofs_family_with_domain(R0, D) -> + R = sofs:restriction(R0, D), + F = sofs:relation_to_family(R), + FD = sofs:constant_function(D, sofs:from_term([])), + sofs:family_union(F, FD). diff --git a/lib/parsetools/src/yeccgramm.yrl b/lib/parsetools/src/yeccgramm.yrl new file mode 100644 index 0000000000..562a9a7458 --- /dev/null +++ b/lib/parsetools/src/yeccgramm.yrl @@ -0,0 +1,74 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% This is the syntax (metagrammar) of grammar definitions of the yecc +%% parser generator. + +Nonterminals +grammar declaration rule head symbol symbols strings attached_code +token tokens. + +Terminals +atom float integer reserved_symbol reserved_word string char var +'->' ':' dot. + +Rootsymbol grammar. + +grammar -> declaration : '$1'. +grammar -> rule : '$1'. +declaration -> symbol symbols dot: {'$1', '$2'}. +declaration -> symbol strings dot: {'$1', '$2'}. +rule -> head '->' symbols attached_code dot: {rule, ['$1' | '$3'], '$4'}. +head -> symbol : '$1'. +symbols -> symbol : ['$1']. +symbols -> symbol symbols : ['$1' | '$2']. +strings -> string : ['$1']. +strings -> string strings : ['$1' | '$2']. +attached_code -> ':' tokens : {erlang_code, '$2'}. +attached_code -> '$empty' : {erlang_code, [{atom, 0, '$undefined'}]}. +tokens -> token : ['$1']. +tokens -> token tokens : ['$1' | '$2']. +symbol -> var : symbol('$1'). +symbol -> atom : symbol('$1'). +symbol -> integer : symbol('$1'). +symbol -> reserved_word : symbol('$1'). +token -> var : '$1'. +token -> atom : '$1'. +token -> float : '$1'. +token -> integer : '$1'. +token -> string : '$1'. +token -> char : '$1'. +token -> reserved_symbol : {value_of('$1'), line_of('$1')}. +token -> reserved_word : {value_of('$1'), line_of('$1')}. +token -> '->' : {'->', line_of('$1')}. % Have to be treated in this +token -> ':' : {':', line_of('$1')}. % manner, because they are also + % special symbols of the metagrammar + +Erlang code. + +-record(symbol, {line, name}). + +symbol(Symbol) -> + #symbol{line = line_of(Symbol), name = value_of(Symbol)}. + +value_of(Token) -> + element(3, Token). + +line_of(Token) -> + element(2, Token). diff --git a/lib/parsetools/src/yeccparser.erl b/lib/parsetools/src/yeccparser.erl new file mode 100644 index 0000000000..80a6bbce0e --- /dev/null +++ b/lib/parsetools/src/yeccparser.erl @@ -0,0 +1,642 @@ +-module(yeccparser). +-export([parse/1, parse_and_scan/1, format_error/1]). +-file("yeccgramm.yrl", 63). + +-record(symbol, {line, name}). + +symbol(Symbol) -> + #symbol{line = line_of(Symbol), name = value_of(Symbol)}. + +value_of(Token) -> + element(3, Token). + +line_of(Token) -> + element(2, Token). + +-file("/clearcase/otp/erts/lib/parsetools/include/yeccpre.hrl", 0). +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% The parser generator will insert appropriate declarations before this line.% + +-type(yecc_ret() :: {'error', _} | {'ok', _}). + +-spec(parse/1 :: (_) -> yecc_ret()). +parse(Tokens) -> + yeccpars0(Tokens, false). + +-spec(parse_and_scan/1 :: + ({function() | {atom(), atom()}, [_]} | {atom(), atom(), [_]}) -> + yecc_ret()). +parse_and_scan({F, A}) -> % Fun or {M, F} + yeccpars0([], {F, A}); +parse_and_scan({M, F, A}) -> + yeccpars0([], {{M, F}, A}). + +-spec(format_error/1 :: (any()) -> [char() | list()]). +format_error(Message) -> + case io_lib:deep_char_list(Message) of + true -> + Message; + _ -> + io_lib:write(Message) + end. + +% To be used in grammar files to throw an error message to the parser +% toplevel. Doesn't have to be exported! +-compile({nowarn_unused_function,{return_error,2}}). +-spec(return_error/2 :: (integer(), any()) -> no_return()). +return_error(Line, Message) -> + throw({error, {Line, ?MODULE, Message}}). + +-define(CODE_VERSION, "1.3"). + +yeccpars0(Tokens, MFA) -> + try yeccpars1(Tokens, MFA, 0, [], []) + catch + error: Error -> + Stacktrace = erlang:get_stacktrace(), + try yecc_error_type(Error, Stacktrace) of + {syntax_error, Token} -> + yeccerror(Token); + {missing_in_goto_table=Tag, Symbol, State} -> + Desc = {Symbol, State, Tag}, + erlang:raise(error, {yecc_bug, ?CODE_VERSION, Desc}, + Stacktrace) + catch _:_ -> erlang:raise(error, Error, Stacktrace) + end; + throw: {error, {_Line, ?MODULE, _M}} = Error -> + Error % probably from return_error/2 + end. + +yecc_error_type(function_clause, [{?MODULE,F,[State,_,_,_,Token,_,_]} | _]) -> + case atom_to_list(F) of + "yeccpars2" ++ _ -> + {syntax_error, Token}; + "yeccgoto_" ++ SymbolL -> + {ok,[{atom,_,Symbol}],_} = erl_scan:string(SymbolL), + {missing_in_goto_table, Symbol, State} + end. + +yeccpars1([Token | Tokens], Tokenizer, State, States, Vstack) -> + yeccpars2(State, element(1, Token), States, Vstack, Token, Tokens, + Tokenizer); +yeccpars1([], {F, A}, State, States, Vstack) -> + case apply(F, A) of + {ok, Tokens, _Endline} -> + yeccpars1(Tokens, {F, A}, State, States, Vstack); + {eof, _Endline} -> + yeccpars1([], false, State, States, Vstack); + {error, Descriptor, _Endline} -> + {error, Descriptor} + end; +yeccpars1([], false, State, States, Vstack) -> + yeccpars2(State, '$end', States, Vstack, {'$end', 999999}, [], false). + +%% yeccpars1/7 is called from generated code. +%% +%% When using the {includefile, Includefile} option, make sure that +%% yeccpars1/7 can be found by parsing the file without following +%% include directives. yecc will otherwise assume that an old +%% yeccpre.hrl is included (one which defines yeccpars1/5). +yeccpars1(State1, State, States, Vstack, Stack1, [Token | Tokens], + Tokenizer) -> + yeccpars2(State, element(1, Token), [State1 | States], + [Stack1 | Vstack], Token, Tokens, Tokenizer); +yeccpars1(State1, State, States, Vstack, Stack1, [], {F, A}) -> + case apply(F, A) of + {ok, Tokens, _Endline} -> + yeccpars1(State1, State, States, Vstack, Stack1, Tokens, {F, A}); + {eof, _Endline} -> + yeccpars1(State1, State, States, Vstack, Stack1, [], false); + {error, Descriptor, _Endline} -> + {error, Descriptor} + end; +yeccpars1(State1, State, States, Vstack, Stack1, [], false) -> + yeccpars2(State, '$end', [State1 | States], [Stack1 | Vstack], + {'$end', 999999}, [], false). + +% For internal use only. +yeccerror(Token) -> + Text = case catch erl_scan:token_info(Token, text) of + {text, Txt} -> Txt; + _ -> yecctoken2string(Token) + end, + Location = case catch erl_scan:token_info(Token, location) of + {location, Loc} -> Loc; + _ -> element(2, Token) + end, + {error, {Location, ?MODULE, ["syntax error before: ", Text]}}. + +yecctoken2string({atom, _, A}) -> io_lib:write(A); +yecctoken2string({integer,_,N}) -> io_lib:write(N); +yecctoken2string({float,_,F}) -> io_lib:write(F); +yecctoken2string({char,_,C}) -> io_lib:write_char(C); +yecctoken2string({var,_,V}) -> io_lib:format("~s", [V]); +yecctoken2string({string,_,S}) -> io_lib:write_unicode_string(S); +yecctoken2string({reserved_symbol, _, A}) -> io_lib:format("~w", [A]); +yecctoken2string({_Cat, _, Val}) -> io_lib:format("~w", [Val]); +yecctoken2string({dot, _}) -> "'.'"; +yecctoken2string({'$end', _}) -> + []; +yecctoken2string({Other, _}) when is_atom(Other) -> + io_lib:format("~w", [Other]); +yecctoken2string(Other) -> + io_lib:write(Other). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +-file("yeccparser.erl", 168). + +yeccpars2(0=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_0(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(1=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_1(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(2=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_2(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(3=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_3(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(4=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_4(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(5=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_5(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(6=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_6(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(7=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_7(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(8=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_8(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(9=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_9(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(10=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_0(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(11=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_11(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(12=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_12(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(13=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_13(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(14=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_14(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(15=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_15(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(16=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_16(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(17=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_17(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(18=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_18(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(19=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_19(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(20=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_20(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(21=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_21(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(22=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_22(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(23=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_23(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(24=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_24(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(25=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_25(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(26=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_26(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(27=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_27(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(28=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_28(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(29=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_29(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(30=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_30(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(31=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_31(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(32=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_32(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(33=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%% yeccpars2_33(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(34=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_34(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(35=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_35(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(Other, _, _, _, _, _, _) -> + erlang:error({yecc_bug,"1.3",{missing_state_in_action_table, Other}}). + +yeccpars2_0(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr); +yeccpars2_0(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr); +yeccpars2_0(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr); +yeccpars2_0(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr). + +yeccpars2_1(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(S, string, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 32, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_head(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_2(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_grammar(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_3(S, '->', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 10, Ss, Stack, T, Ts, Tzr). + +yeccpars2_4(_S, '$end', _Ss, Stack, _T, _Ts, _Tzr) -> + {ok, hd(Stack)}. + +yeccpars2_5(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_grammar(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_6(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_6_(Stack), + yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_7(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_7_(Stack), + yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_8(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_8_(Stack), + yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_9(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_9_(Stack), + yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +%% yeccpars2_10: see yeccpars2_0 + +yeccpars2_11(S, ':', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 15, Ss, Stack, T, Ts, Tzr); +yeccpars2_11(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_11_(Stack), + yeccpars2_14(14, Cat, [11 | Ss], NewStack, T, Ts, Tzr). + +yeccpars2_12(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr); +yeccpars2_12(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr); +yeccpars2_12(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr); +yeccpars2_12(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr); +yeccpars2_12(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_12_(Stack), + yeccgoto_symbols(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_13(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_|Nss] = Ss, + NewStack = yeccpars2_13_(Stack), + yeccgoto_symbols(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_14(S, dot, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 29, Ss, Stack, T, Ts, Tzr). + +yeccpars2_15(S, '->', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 18, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, ':', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 19, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 20, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, char, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 21, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, float, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 22, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 23, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, reserved_symbol, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 24, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 25, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, string, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 26, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 27, Ss, Stack, T, Ts, Tzr). + +yeccpars2_16(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_|Nss] = Ss, + NewStack = yeccpars2_16_(Stack), + yeccgoto_attached_code(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_17(S, '->', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 18, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, ':', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 19, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 20, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, char, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 21, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, float, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 22, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 23, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, reserved_symbol, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 24, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 25, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, string, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 26, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 27, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_17_(Stack), + yeccgoto_tokens(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_18(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_18_(Stack), + yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_19(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_19_(Stack), + yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_20(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_21(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_22(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_23(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_24(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_24_(Stack), + yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_25(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_25_(Stack), + yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_26(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_27(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_28(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_|Nss] = Ss, + NewStack = yeccpars2_28_(Stack), + yeccgoto_tokens(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_29(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_,_,_,_|Nss] = Ss, + NewStack = yeccpars2_29_(Stack), + yeccgoto_rule(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_30(S, dot, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 35, Ss, Stack, T, Ts, Tzr). + +yeccpars2_31(S, dot, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 34, Ss, Stack, T, Ts, Tzr). + +yeccpars2_32(S, string, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 32, Ss, Stack, T, Ts, Tzr); +yeccpars2_32(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_32_(Stack), + yeccgoto_strings(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_33(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_|Nss] = Ss, + NewStack = yeccpars2_33_(Stack), + yeccgoto_strings(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_34(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_,_|Nss] = Ss, + NewStack = yeccpars2_34_(Stack), + yeccgoto_declaration(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_35(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_,_|Nss] = Ss, + NewStack = yeccpars2_35_(Stack), + yeccgoto_declaration(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccgoto_attached_code(11, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_14(14, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_declaration(0=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_5(_S, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_grammar(0, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_4(4, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_head(0, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_3(3, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_rule(0=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_2(_S, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_strings(1, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_31(31, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_strings(32=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_33(_S, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_symbol(0, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_1(1, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbol(1, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbol(10, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbol(12, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_symbols(1, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_30(30, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbols(10, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_11(11, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbols(12=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_13(_S, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_token(15, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_17(17, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_token(17, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_17(17, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_tokens(15=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_16(_S, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_tokens(17=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_28(_S, Cat, Ss, Stack, T, Ts, Tzr). + +-compile({inline,{yeccpars2_6_,1}}). +-file("yeccgramm.yrl", 44). +yeccpars2_6_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + symbol ( __1 ) + end | __Stack]. + +-compile({inline,{yeccpars2_7_,1}}). +-file("yeccgramm.yrl", 45). +yeccpars2_7_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + symbol ( __1 ) + end | __Stack]. + +-compile({inline,{yeccpars2_8_,1}}). +-file("yeccgramm.yrl", 46). +yeccpars2_8_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + symbol ( __1 ) + end | __Stack]. + +-compile({inline,{yeccpars2_9_,1}}). +-file("yeccgramm.yrl", 43). +yeccpars2_9_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + symbol ( __1 ) + end | __Stack]. + +-compile({inline,{yeccpars2_11_,1}}). +-file("yeccgramm.yrl", 40). +yeccpars2_11_(__Stack0) -> + [begin + { erlang_code , [ { atom , 0 , '$undefined' } ] } + end | __Stack0]. + +-compile({inline,{yeccpars2_12_,1}}). +-file("yeccgramm.yrl", 35). +yeccpars2_12_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + [ __1 ] + end | __Stack]. + +-compile({inline,{yeccpars2_13_,1}}). +-file("yeccgramm.yrl", 36). +yeccpars2_13_(__Stack0) -> + [__2,__1 | __Stack] = __Stack0, + [begin + [ __1 | __2 ] + end | __Stack]. + +-compile({inline,{yeccpars2_16_,1}}). +-file("yeccgramm.yrl", 39). +yeccpars2_16_(__Stack0) -> + [__2,__1 | __Stack] = __Stack0, + [begin + { erlang_code , __2 } + end | __Stack]. + +-compile({inline,{yeccpars2_17_,1}}). +-file("yeccgramm.yrl", 41). +yeccpars2_17_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + [ __1 ] + end | __Stack]. + +-compile({inline,{yeccpars2_18_,1}}). +-file("yeccgramm.yrl", 55). +yeccpars2_18_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + { '->' , line_of ( __1 ) } + end | __Stack]. + +-compile({inline,{yeccpars2_19_,1}}). +-file("yeccgramm.yrl", 56). +yeccpars2_19_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + { ':' , line_of ( __1 ) } + end | __Stack]. + +-compile({inline,{yeccpars2_24_,1}}). +-file("yeccgramm.yrl", 53). +yeccpars2_24_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + { value_of ( __1 ) , line_of ( __1 ) } + end | __Stack]. + +-compile({inline,{yeccpars2_25_,1}}). +-file("yeccgramm.yrl", 54). +yeccpars2_25_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + { value_of ( __1 ) , line_of ( __1 ) } + end | __Stack]. + +-compile({inline,{yeccpars2_28_,1}}). +-file("yeccgramm.yrl", 42). +yeccpars2_28_(__Stack0) -> + [__2,__1 | __Stack] = __Stack0, + [begin + [ __1 | __2 ] + end | __Stack]. + +-compile({inline,{yeccpars2_29_,1}}). +-file("yeccgramm.yrl", 33). +yeccpars2_29_(__Stack0) -> + [__5,__4,__3,__2,__1 | __Stack] = __Stack0, + [begin + { rule , [ __1 | __3 ] , __4 } + end | __Stack]. + +-compile({inline,{yeccpars2_32_,1}}). +-file("yeccgramm.yrl", 37). +yeccpars2_32_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin + [ __1 ] + end | __Stack]. + +-compile({inline,{yeccpars2_33_,1}}). +-file("yeccgramm.yrl", 38). +yeccpars2_33_(__Stack0) -> + [__2,__1 | __Stack] = __Stack0, + [begin + [ __1 | __2 ] + end | __Stack]. + +-compile({inline,{yeccpars2_34_,1}}). +-file("yeccgramm.yrl", 32). +yeccpars2_34_(__Stack0) -> + [__3,__2,__1 | __Stack] = __Stack0, + [begin + { __1 , __2 } + end | __Stack]. + +-compile({inline,{yeccpars2_35_,1}}). +-file("yeccgramm.yrl", 31). +yeccpars2_35_(__Stack0) -> + [__3,__2,__1 | __Stack] = __Stack0, + [begin + { __1 , __2 } + end | __Stack]. + + +-file("yeccgramm.yrl", 75). diff --git a/lib/parsetools/src/yeccscan.erl b/lib/parsetools/src/yeccscan.erl new file mode 100644 index 0000000000..d7ec3ba8d3 --- /dev/null +++ b/lib/parsetools/src/yeccscan.erl @@ -0,0 +1,62 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(yeccscan). +-export([scan/1, scan/3]). + +scan(Inport) -> + scan(Inport, '', 1). + +scan(Inport, Prompt, Line1) -> + case catch io:scan_erl_form(Inport, Prompt, Line1) of + {eof, Line2} -> + {eof, Line2}; + {ok, Tokens, Line2} -> + case Tokens of + [] -> + scan(Inport, Prompt, Line2); + _ -> + {ok, lex(Tokens), Line2} + end; + {error, Descriptor, Line2} -> + {error, Descriptor, Line2}; + {'EXIT', Why} -> + io:format('yeccscan: Error scanning input line ~w~n', [Line1]), + exit(Why) + end. + +lex([]) -> + []; +lex([Token | Tokens]) -> + case Token of + {'dot', Line} -> + [{'dot', Line} | lex(Tokens)]; + {':', Line} -> + [{':', Line} | lex(Tokens)]; + {'->', Line} -> + [{'->', Line} | lex(Tokens)]; + {Category, Line, Symbol} -> + [{Category, Line, Symbol} | lex(Tokens)]; + {Other, Line} -> + Cat = case erl_scan:reserved_word(Other) of + true -> reserved_word; + false -> reserved_symbol + end, + [{Cat, Line, Other} | lex(Tokens)] + end. diff --git a/lib/parsetools/vsn.mk b/lib/parsetools/vsn.mk new file mode 100644 index 0000000000..e3db40c193 --- /dev/null +++ b/lib/parsetools/vsn.mk @@ -0,0 +1 @@ +PARSETOOLS_VSN = 2.0.1 |