diff options
Diffstat (limited to 'lib/parsetools/src')
| -rw-r--r-- | lib/parsetools/src/Makefile | 100 | ||||
| -rw-r--r-- | lib/parsetools/src/esyntax.yrl | 360 | ||||
| -rw-r--r-- | lib/parsetools/src/leex.erl | 1608 | ||||
| -rw-r--r-- | lib/parsetools/src/parsetools.app.src | 33 | ||||
| -rw-r--r-- | lib/parsetools/src/parsetools.appup.src | 1 | ||||
| -rw-r--r-- | lib/parsetools/src/yecc.erl | 2531 | ||||
| -rw-r--r-- | lib/parsetools/src/yeccgramm.yrl | 74 | ||||
| -rw-r--r-- | lib/parsetools/src/yeccparser.erl | 642 | ||||
| -rw-r--r-- | lib/parsetools/src/yeccscan.erl | 62 | 
9 files changed, 5411 insertions, 0 deletions
| diff --git a/lib/parsetools/src/Makefile b/lib/parsetools/src/Makefile new file mode 100644 index 0000000000..89e079e411 --- /dev/null +++ b/lib/parsetools/src/Makefile @@ -0,0 +1,100 @@ +# +# %CopyrightBegin% +#  +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +#  +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +#  +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +#  +# %CopyrightEnd% +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(PARSETOOLS_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/parsetools-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES = \ +	leex \ +	yecc \ +	yeccparser \ +	yeccscan + +HRL_FILES = ../include/yeccpre.hrl ../include/leexinc.hrl + +ERL_FILES = $(MODULES:%=%.erl) + +TARGET_FILES = $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET) + +APP_FILE = parsetools.app + +APP_SRC = $(APP_FILE).src +APP_TARGET = $(EBIN)/$(APP_FILE) + +APPUP_FILE= parsetools.appup + +APPUP_SRC= $(APPUP_FILE).src +APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +ERL_COMPILE_FLAGS += +warn_obsolete_guard -I$(ERL_TOP)/lib/stdlib/include + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +clean: +	rm -f $(TARGET_FILES) +	rm -f core + +docs: + + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +$(APP_TARGET): $(APP_SRC) ../vsn.mk +	sed -e 's;%VSN%;$(VSN);' $< > $@ + +$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk +	sed -e 's;%VSN%;$(VSN);' $< > $@ + +# ---------------------------------------------------- +# Release Target +# ----------------------------------------------------  +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt +	$(INSTALL_DIR) $(RELSYSDIR)/src +	$(INSTALL_DATA) $(ERL_FILES) $(RELSYSDIR)/src +	$(INSTALL_DIR) $(RELSYSDIR)/ebin +	$(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin +	$(INSTALL_DIR) $(RELSYSDIR)/include +	$(INSTALL_DATA) $(HRL_FILES) $(RELSYSDIR)/include + +release_docs_spec: + diff --git a/lib/parsetools/src/esyntax.yrl b/lib/parsetools/src/esyntax.yrl new file mode 100644 index 0000000000..1ecb54f0a7 --- /dev/null +++ b/lib/parsetools/src/esyntax.yrl @@ -0,0 +1,360 @@ +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%%  +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%%  +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%%  +%% %CopyrightEnd% +%% +Nonterminals +add_op attribute basic_type bif_test +case_expr clause_body +clause_guard clause_head comp_op cr_clause cr_clauses expr expr_tail +exprs farity farity_list form formal_parameter_list function +function_call function_clause guard guard_call guard_expr +guard_expr_list guard_exprs guard_expr_tail guard_expr_tuple +guard_parameter_list +guard_tests guard_test if_clause if_clauses if_expr list match_expr +mult_op parameter_list pattern patterns pattern_list pattern_tail pattern_tuple +prefix_op receive_expr send_expr tuple. + +Terminals +'!' '(' ')' '*' '+' ',' '-' '->' '/' '/=' ':' ';' '<' '=' '=/=' '=:=' +'=<' '==' '>' '>=' '[' ']' 'after' 'band' 'begin' 'bnot' +'bor' 'bsl' 'bsr' 'bxor' 'case' 'catch' 'div' 'end' 'if' 'of' +'receive' 'rem' 'when' '{' '|' '}' atom float integer string var. +% 'receive' 'rem' 'true' 'when' '{' '|' '}' atom float integer string var. + +Rootsymbol form. + +Endsymbol dot. + +Unary 0 'catch'. +Right 200 '='. +Right 200 '!'. +Left 300 add_op. +Left 400 mult_op. +Unary 500 prefix_op. + + +add_op -> '+' : '$1'. +add_op -> '-' : '$1'. +add_op -> 'bor' : '$1'. +add_op -> 'bxor' : '$1'. +add_op -> 'bsl' : '$1'. +add_op -> 'bsr' : '$1'. + +comp_op -> '==' : '$1'. +comp_op -> '/=' : '$1'. +comp_op -> '=<' : '$1'. +comp_op -> '<' : '$1'. +comp_op -> '>=' : '$1'. +comp_op -> '>' : '$1'. +comp_op -> '=:=' : '$1'. +comp_op -> '=/=' : '$1'. + +mult_op -> '*' : '$1'. +mult_op -> '/' : '$1'. +mult_op -> 'div' : '$1'. +mult_op -> 'rem' : '$1'. +mult_op -> 'band' : '$1'. + +prefix_op -> '+' : '$1'. +prefix_op -> '-' : '$1'. +prefix_op -> 'bnot' : '$1'. + + +basic_type -> atom : '$1'. +basic_type -> float : '$1'. +basic_type -> integer : '$1'. +basic_type -> string : '$1'. +basic_type -> var : '$1'. +% basic_type -> 'true' : {atom, element(2, '$1'), 'true'}. + + +pattern -> basic_type : '$1'. +pattern -> pattern_list : '$1'. +pattern -> pattern_tuple : '$1'. + +pattern_list -> '[' ']' : {nil, element(2, '$1')}. +pattern_list -> '[' pattern pattern_tail ']' : +   {cons, element(2, '$1'), '$2', '$3'}. + +pattern_tail -> '|' pattern : '$2'. +pattern_tail -> ',' pattern pattern_tail : +   {cons, element(2, '$2'), '$2', '$3'}. +pattern_tail -> '$empty' : {nil, 0}. + +pattern_tuple -> '{' '}' : {tuple, element(2, '$1'), []}. +pattern_tuple -> '{' patterns '}' : {tuple, element(2, '$1'), '$2'}. + +patterns -> pattern : ['$1']. +patterns -> pattern ',' patterns : ['$1' | '$3']. + + +expr -> basic_type : '$1'. +expr -> list : '$1'. +expr -> tuple : '$1'. +expr -> function_call : '$1'. + +expr -> expr add_op expr : +   {Op, Pos} = '$2', +   {arith, Pos, Op, '$1', '$3'}. +expr -> expr mult_op expr : +   {Op, Pos} = '$2', +   {arith, Pos, Op, '$1', '$3'}. +expr -> prefix_op expr: +   case '$2' of +       {float, Pos, N} -> +	   case '$1' of +	       {'-', _} -> +		   {float, Pos, -N}; +	       {'+', _} -> +		   {float, Pos, N}; +	       {Op, Pos1} -> +		   {arith, Pos1, Op, {float, Pos, N}} +	   end; +       {integer, Pos, N} -> +	   case '$1' of +	       {'-', _} -> +		   {integer, Pos, -N}; +	       {'+', _} -> +		   {integer, Pos, N}; +	       {Op, Pos1} -> +		   {arith, Pos1, Op, {integer, Pos, N}} +	   end; +       _ -> +	   {Op, Pos} = '$1', +	   {arith, Pos, Op, '$2'} +   end. + +expr -> '(' expr ')' : '$2'. +expr -> 'begin' exprs 'end' : {block, element(2, '$1'), '$2'}. +expr -> 'catch' expr : {'catch', element(2, '$1'), '$2'}. + +expr -> case_expr : '$1'. +expr -> if_expr : '$1'. +expr -> receive_expr : '$1'. +expr -> match_expr : '$1'. +expr -> send_expr : '$1'. + + +list -> '[' ']' : {nil, element(2, '$1')}. +list -> '[' expr expr_tail ']' : {cons, element(2, '$1'), '$2', '$3'}. + +expr_tail -> '|' expr : '$2'. +expr_tail -> ',' expr expr_tail : {cons, element(2, '$2'), '$2', '$3'}. +expr_tail -> '$empty' : {nil, 0}. + +tuple -> '{' '}' : {tuple, element(2, '$1'), []}. +tuple -> '{' exprs '}' : {tuple, element(2, '$1'), '$2'}. + + +function_call -> atom '(' parameter_list ')' : +   case erl_parse:erlang_bif(element(3, '$1'), length('$3')) of +       true -> +	   {bif, element(2, '$1'), element(3, '$1'), '$3'}; +       false -> +	   {call, element(2, '$1'), [], element(3, '$1'), '$3'} +   end. +function_call -> atom ':' atom '(' parameter_list ')' : +   {call, element(2, '$1'), element(3, '$1'), element(3, '$3'), '$5'}. + +parameter_list -> exprs : '$1'. +parameter_list -> '$empty' : []. + + +case_expr -> 'case' expr 'of' cr_clauses 'end' : +   {'case', element(2, '$1'), '$2', '$4'}. + +cr_clause -> pattern clause_guard clause_body : +   {clause, element(2, '$1'), ['$1'], '$2', '$3'}. + +cr_clauses -> cr_clause : ['$1']. +cr_clauses -> cr_clause ';' cr_clauses : ['$1' | '$3']. + +if_expr -> 'if' if_clauses 'end' : {'if', element(2, '$1'), '$2'}. + +if_clause -> guard clause_body : {clause, element(2, hd('$2')), '$1', '$2'}. + +if_clauses -> if_clause : ['$1']. +if_clauses -> if_clause ';' if_clauses : ['$1' | '$3']. + +receive_expr -> 'receive' 'after' expr clause_body 'end' : +   {'receive', element(2, '$1'), [], '$3', '$4'}. +receive_expr -> 'receive' cr_clauses 'end' : +   {'receive', element(2, '$1'), '$2'}. +receive_expr -> 'receive' cr_clauses 'after' expr clause_body 'end' : +   {'receive', element(2, '$1'), '$2', '$4', '$5'}. + + +match_expr -> expr '=' expr : +   case erl_parse:is_term('$1') of +       true -> +	   {match, element(2, '$1'), '$1', '$3'}; +       false -> +	   throw({error, {element(2, '$1'), yecc, "illegal lhs in match **"}}) +   end. + +send_expr -> expr '!' expr : +   Pos = element(2, '$1'), +   {send, Pos, '$1', '$3'}. + + +exprs -> expr : ['$1']. +exprs -> expr ',' exprs : ['$1' | '$3']. + + +guard_expr -> basic_type : '$1'. +guard_expr -> guard_expr_list : '$1'. +guard_expr -> guard_expr_tuple : '$1'. +guard_expr -> guard_call : '$1'. +guard_expr -> '(' guard_expr ')' : '$2'. +guard_expr -> guard_expr add_op guard_expr : +   {Op, Pos} = '$2', +   {arith, Pos, Op, '$1', '$3'}. +guard_expr -> guard_expr mult_op guard_expr : +   {Op, Pos} = '$2', +   {arith, Pos, Op, '$1', '$3'}. +guard_expr -> prefix_op guard_expr: +   case '$2' of +       {float, Pos, N} -> +	   case '$1' of +	       {'-', _} -> +		   {float, Pos, -N}; +	       {'+', _} -> +		   {float, Pos, N}; +	       {Op, Pos1} -> +		   {arith, Pos1, Op, {float, Pos, N}} +	   end; +       {integer, Pos, N} -> +	   case '$1' of +	       {'-', _} -> +		   {integer, Pos, -N}; +	       {'+', _} -> +		   {integer, Pos, N}; +	       {Op, Pos1} -> +		   {arith, Pos1, Op, {integer, Pos, N}} +	   end; +       _ -> +	   {Op, Pos} = '$1', +	   {arith, Pos, Op, '$2'} +   end. + +guard_expr_list -> '[' ']' : {nil, element(2, '$1')}. +guard_expr_list -> '[' guard_expr guard_expr_tail ']' : +   {cons, element(2, '$1'), '$2', '$3'}. + +guard_expr_tail -> '|' guard_expr : '$2'. +guard_expr_tail -> ',' guard_expr guard_expr_tail : + {cons, element(2, '$2'), '$2', '$3'}. +guard_expr_tail -> '$empty' : {nil, 0}. + +guard_expr_tuple -> '{' '}' : {tuple, element(2, '$1'), []}. +guard_expr_tuple -> '{' guard_exprs '}' : {tuple, element(2, '$1'), '$2'}. + +guard_exprs -> guard_expr : ['$1']. +guard_exprs -> guard_expr ',' guard_exprs : ['$1' | '$3']. + + +guard_call -> atom '(' guard_parameter_list ')' : +   case erl_parse:erlang_guard_bif(element(3, '$1'), length('$3')) of +       true -> +	   {bif, element(2, '$1'), element(3, '$1'), '$3'}; +       false -> +	   throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) +   end. + +guard_parameter_list -> guard_exprs : '$1'. +guard_parameter_list -> '$empty' : []. + + +bif_test -> atom '(' guard_parameter_list ')' : +   case erl_parse:erlang_guard_test(element(3, '$1'), length('$3')) of +       true -> +	   {test, element(2, '$1'), element(3, '$1'), '$3'}; +       false -> +	   throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) +   end. + + +guard_test -> bif_test : '$1'. +guard_test -> guard_expr comp_op guard_expr : +   {Op, Pos} = '$2', +   {comp, Pos, Op, '$1', '$3'}. + +guard_tests -> guard_test : ['$1']. +guard_tests -> guard_test ',' guard_tests : ['$1' | '$3']. + +% guard -> 'true' : []. +guard -> atom : +   case '$1' of +       {atom, _, true} -> +           []; +       _ -> +	   throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}}) +   end. +guard -> guard_tests : '$1'. + + +function_clause -> clause_head clause_guard clause_body : +   {Name, Line, Arity, Parameters} = '$1', +   {function, Line, Name, Arity, +    [{clause, element(2, hd('$3')), Parameters, '$2', '$3'}]}. + +clause_head -> atom '(' formal_parameter_list ')' : +   {element(3, '$1'), element(2, '$1'), length('$3'), '$3'}. + +formal_parameter_list -> patterns : '$1'. +formal_parameter_list -> '$empty' : []. + +clause_guard -> 'when' guard : '$2'. +clause_guard -> '$empty' : []. + +clause_body -> '->' exprs: '$2'. + + +function -> function_clause : '$1'. +function -> function_clause ';' function : +   case '$1' of +       {function, Pos1, Name1, Arity1, [Clause]} -> +	   case '$3' of +	       {function, _, Name1, Arity2, Clauses} -> +		   if +		       Arity1 /= Arity2 -> +			   throw({error, {Pos1, yecc, +				  io_lib:format('arity conflict in definition of ~w', +						[Name1])}}); +		       true -> +			   {function, Pos1, Name1, Arity1, [Clause | Clauses]} +		   end; +	       _ -> +		   throw({error, {Pos1, yecc, +			  io_lib:format('missing final dot in def of ~w/~w', +					[Name1, Arity1])}}) +	   end +   end. + + +attribute -> atom : element(3, '$1'). +attribute -> '[' farity_list ']' : '$2'. + +farity_list -> farity : ['$1']. +farity_list -> farity ',' farity_list : ['$1' | '$3']. + +farity -> atom '/' integer : {element(3, '$1'), element(3, '$3')}. + + +form -> '-' atom '(' attribute ')' : +   {attribute, element(2, '$2'), element(3, '$2'), '$4'}. +form -> function : '$1'. diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl new file mode 100644 index 0000000000..fd494eaf06 --- /dev/null +++ b/lib/parsetools/src/leex.erl @@ -0,0 +1,1608 @@ +%% Copyright (c) 2008,2009 Robert Virding. All rights reserved. +%% +%% Redistribution and use in source and binary forms, with or without +%% modification, are permitted provided that the following conditions +%% are met: +%% +%% 1. Redistributions of source code must retain the above copyright +%% notice, this list of conditions and the following disclaimer. +%% 2. Redistributions in binary form must reproduce the above copyright +%% notice, this list of conditions and the following disclaimer in the +%% documentation and/or other materials provided with the distribution. +%% +%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +%% "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +%% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +%% FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +%% COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +%% INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +%% BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +%% LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +%% CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +%% LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +%% ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +%% POSSIBILITY OF SUCH DAMAGE. + +%%% A Lexical Analyser Generator for Erlang. +%%% +%%% Most of the algorithms used here are taken pretty much as +%%% described in the "Dragon Book" by Aho, Sethi and Ullman. Some +%%% completing details were taken from "Compiler Design in C" by +%%% Hollub. + +-module(leex). + +-export([compile/3,file/1,file/2,format_error/1]). + +-import(lists, [member/2,reverse/1,sort/1,delete/2, +                keysearch/3,keysort/2,keydelete/3,keyfind/3, +                map/2,foldl/3,foreach/2,flatmap/2]). +-import(string, [substr/2,substr/3,span/2]). +-import(ordsets, [is_element/2,add_element/2,union/2]). +-import(orddict, [store/3]). + +-include("erl_compile.hrl"). +%%-include_lib("stdlib/include/erl_compile.hrl"). + +-define(LEEXINC, "leexinc.hrl").        % Include file +-define(LEEXLIB, parsetools).           % Leex is in lib parsetools +%%-define(LEEXLIB, leex).               % Leex is in lib leex + +-define(DEFS_HEAD, "Definitions."). +-define(RULE_HEAD, "Rules."). +-define(CODE_HEAD, "Erlang code."). + +-record(leex, {xfile=[],        % Xrl file +               efile=[],        % Erl file +               ifile=[],        % Include file +               gfile=[],        % Graph file +               module,          % Module name +               opts=[],         % Options +               posix=false,     % POSIX regular expressions +               errors=[], +               warnings=[] +              }). + +-record(nfa_state, {no,edges=[],accept=noaccept}). +-record(dfa_state, {no,nfa=[],trans=[],accept=noaccept}). + +%%% +%%% Exported functions +%%% + +%%% Interface to erl_compile. + +compile(Input0, Output0, +        #options{warning = WarnLevel, verbose=Verbose, includes=Includes}) -> +    Input = assure_extension(shorten_filename(Input0), ".xrl"), +    Output = assure_extension(shorten_filename(Output0), ".erl"), +    Includefile = lists:sublist(Includes, 1), +    Opts = [{scannerfile,Output},{includefile,Includefile},{verbose,Verbose}, +            {report_errors,true},{report_warnings,WarnLevel > 0}], +    case file(Input, Opts) of +        {ok, _} -> +            ok; +        error -> +            error +    end. + +%% file(File) -> ok | error. +%% file(File, Options) -> ok | error. + +file(File) -> file(File, []). + +file(File, Opts0) -> +    case is_filename(File) of +        no -> erlang:error(badarg, [File,Opts0]); +        _ -> ok +    end, +    Opts = case options(Opts0) of +               badarg -> +                   erlang:error(badarg, [File,Opts0]); +               Options -> +                   Options +           end, +    St0 = #leex{}, +    St1 = filenames(File, Opts, St0),   % Get all the filenames +    St = try +             {ok,REAs,Actions,Code,St2} = parse_file(St1), +             {DFA,DF} = make_dfa(REAs, St2), +             St3 = out_file(St2, DFA, DF, Actions, Code), +             case lists:member(dfa_graph, St3#leex.opts) of +                 true -> out_dfa_graph(St3, DFA, DF); +                 false -> St3 +             end +         catch #leex{}=St4 -> +             St4 +         end, +    leex_ret(St).              + +format_error({file_error, Reason}) -> +    io_lib:fwrite("~s",[file:format_error(Reason)]); +format_error(missing_defs) -> "missing Definitions"; +format_error(missing_rules) -> "missing Rules"; +format_error(missing_code) -> "missing Erlang code"; +format_error(empty_rules) -> "no rules"; +format_error(bad_rule) -> "bad rule"; +format_error({regexp,E})-> +    Es = case E of +             {interval_range,_} -> "interval range"; +             {unterminated,Cs} -> +                 "unterminated " ++ Cs; +             {illegal_char,Cs} -> +                 "illegal character " ++ Cs; +             {posix_cc,What} -> +                 ["illegal POSIX character class ",io_lib:write_string(What)]; +             {char_class,What} -> +                 ["illegal character class ",io_lib:write_string(What)] +         end, +    ["bad regexp `",Es,"'"]; +format_error(ignored_characters) -> +    "ignored characters". + +%%% +%%% Local functions +%%% + +assure_extension(File, Ext) -> +    lists:concat([strip_extension(File, Ext), Ext]). + +%% Assumes File is a filename. +strip_extension(File, Ext) -> +    case filename:extension(File) of +        Ext -> filename:rootname(File); +        _Other -> File +    end. + +options(Options0) when is_list(Options0) -> +    try  +        Options = flatmap(fun(return) -> short_option(return, true); +                             (report) -> short_option(report, true); +                             ({return,T}) -> short_option(return, T); +                             ({report,T}) -> short_option(report, T); +                             (T) -> [T] +                          end, Options0), +        options(Options, [scannerfile,includefile,report_errors, +                          report_warnings,return_errors,return_warnings, +                          verbose,dfa_graph], []) +    catch error: _ -> badarg +    end; +options(Option) -> +    options([Option]). + +short_option(return, T) -> +    [{return_errors,T}, {return_warnings,T}]; +short_option(report, T) -> +    [{report_errors,T}, {report_warnings,T}]. + +options(Options0, [Key|Keys], L) when is_list(Options0) -> +    Options = case member(Key, Options0) of +                  true ->  +                      [atom_option(Key)|delete(Key, Options0)]; +                  false -> +                      Options0 +              end, +    V = case keysearch(Key, 1, Options) of +            {value, {Key, Filename0}} when Key =:= includefile;  +                                           Key =:= scannerfile -> +                case is_filename(Filename0) of +                    no ->  +                        badarg; +                    Filename ->  +                        {ok,[{Key,Filename}]} +                end; +            {value,{Key,Bool}} when Bool; not Bool -> +                {ok,[{Key, Bool}]}; +            {value,{Key, _}} -> +                badarg; +            false -> +                {ok,[{Key,default_option(Key)}]} +        end, +    case V of +        badarg -> +            badarg; +        {ok,KeyValueL} -> +            NewOptions = keydelete(Key, 1, Options), +            options(NewOptions, Keys, KeyValueL ++ L) +    end; +options([], [], L) -> +    foldl(fun({_,false}, A) -> A; +             ({Tag,true}, A) -> [Tag|A]; +             (F,A) -> [F|A] +          end, [], L); +options(_Options, _, _L) -> +    badarg. + +default_option(dfa_graph) -> false; +default_option(includefile) -> []; +default_option(report_errors) -> true; +default_option(report_warnings) -> true; +default_option(return_errors) -> false; +default_option(return_warnings) -> false; +default_option(scannerfile) -> []; +default_option(verbose) -> false. + +atom_option(dfa_graph) -> {dfa_graph,true}; +atom_option(report_errors) -> {report_errors,true}; +atom_option(report_warnings) -> {report_warnings,true}; +atom_option(return_errors) -> {return_errors,true}; +atom_option(return_warnings) -> {return_warnings,true}; +atom_option(verbose) -> {verbose,true}; +atom_option(Key) -> Key. + +is_filename(T) -> +    try filename:flatten(T) of +        Filename -> Filename +    catch error: _ -> no +    end.     + +shorten_filename(Name0) -> +    {ok,Cwd} = file:get_cwd(), +    case lists:prefix(Cwd, Name0) of +        false -> Name0; +        true -> +            case lists:nthtail(length(Cwd), Name0) of +                "/"++N -> N; +                N -> N +            end +    end. + +leex_ret(St) -> +    report_errors(St), +    report_warnings(St), +    Es = pack_errors(St#leex.errors), +    Ws = pack_warnings(St#leex.warnings), +    if  +        Es =:= [] ->  +            case member(return_warnings, St#leex.opts) of +                true -> {ok, St#leex.efile, Ws}; +                false -> {ok, St#leex.efile} +            end; +        true ->  +            case member(return_errors, St#leex.opts) of +                true -> {error, Es, Ws}; +                false -> error +            end +    end. + +pack_errors([{File,_} | _] = Es) -> +    [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}]; +pack_errors([]) -> +    []. +     +pack_warnings([{File,_} | _] = Ws) -> +    [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}]; +pack_warnings([]) -> +    []. + +report_errors(St) -> +    when_opt(fun () ->  +                     foreach(fun({File,{none,Mod,E}}) ->  +                                     io:fwrite("~s: ~s\n", +                                               [File,Mod:format_error(E)]); +                                ({File,{Line,Mod,E}}) ->  +                                     io:fwrite("~s:~w: ~s\n", +                                               [File,Line,Mod:format_error(E)]) +                             end, sort(St#leex.errors)) +             end, report_errors, St#leex.opts). + +report_warnings(St) -> +    when_opt(fun () -> +                     foreach(fun({File,{none,Mod,W}}) ->  +                                     io:fwrite("~s: Warning: ~s\n", +                                               [File,Mod:format_error(W)]); +                                ({File,{Line,Mod,W}}) ->  +                                     io:fwrite("~s:~w: Warning: ~s\n", +                                               [File,Line,Mod:format_error(W)]) +                             end, sort(St#leex.warnings)) +             end, report_warnings, St#leex.opts). + +add_error(E, St) -> +    add_error(St#leex.xfile, E, St). + +add_error(File, Error, St) -> +    throw(St#leex{errors = [{File,Error}|St#leex.errors]}). + +add_warning(Line, W, St) -> +    St#leex{warnings = [{St#leex.xfile,{Line,leex,W}}|St#leex.warnings]}. + +%% filenames(File, Options, State) -> State. +%%  The default output dir is the current directory unless an +%%  explicit one has been given in the options. + +filenames(File, Opts, St0) -> +    Dir = filename:dirname(File), +    Base = filename:basename(File, ".xrl"), +    Xfile = filename:join(Dir, Base ++ ".xrl"), +    Efile = Base ++ ".erl", +    Gfile = Base ++ ".dot", +    Module = list_to_atom(Base), +    St1 = St0#leex{xfile=Xfile, +                   opts=Opts, +                   module=Module}, +    {value,{includefile,Ifile0}} = keysearch(includefile, 1, Opts), +    Ifile = inc_file_name(Ifile0), +    %% Test for explicit scanner file. +    {value,{scannerfile,Ofile}} = keysearch(scannerfile, 1, Opts), +    if +        Ofile =:= [] -> +            St1#leex{efile=filename:join(Dir, Efile), +                     ifile=Ifile, +                     gfile=filename:join(Dir, Gfile)}; +        true -> +            D = filename:dirname(Ofile), +            St1#leex{efile=Ofile, +                     ifile=Ifile, +                     gfile=filename:join(D, Gfile)} +    end. + +when_opt(Do, Opt, Opts) -> +    case member(Opt, Opts) of +        true -> Do(); +        false -> ok +    end. + +verbose_print(St, Format, Args) -> +    when_opt(fun () -> io:fwrite(Format, Args) end, verbose, St#leex.opts). + +%% parse_file(State) -> {ok,[REA],[Action],Code,NewState} | throw(NewState) +%%  when +%%      REA = {RegExp,ActionNo}; +%%      Action = {ActionNo,ActionString}; +%%      Code = {StartLine, StartPos, NumOfLines}. Where the Erlang code is. +%% +%%  Read and parse the file Xfile. +%%  After each section of the file has been parsed we directly call the +%%  next section. This is done when we detect a line we don't recognise +%%  in the current section. The file format is very simple and Erlang +%%  token based, we allow empty lines and Erlang style comments. + +parse_file(St0) -> +    case file:open(St0#leex.xfile, [read]) of +        {ok,Xfile} -> +            try +                verbose_print(St0, "Parsing file ~s, ", [St0#leex.xfile]), +                %% We KNOW that errors throw so we can ignore them here. +                {ok,Line1,St1} = parse_head(Xfile, St0), +                {ok,Line2,Macs,St2} = parse_defs(Xfile, Line1, St1), +                {ok,Line3,REAs,Actions,St3} =  +                    parse_rules(Xfile, Line2, Macs, St2), +                {ok,Code,St4} = parse_code(Xfile, Line3, St3), +                verbose_print(St1, "contained ~w rules.~n", [length(REAs)]), +                {ok,REAs,Actions,Code,St4} +            after file:close(Xfile) +            end; +        {error,Error} -> +            add_error({none,leex,{file_error,Error}}, St0) +    end. + +%% parse_head(File, State) -> {ok,NextLine,State}. +%%  Parse the head of the file. Skip all comments and blank lines. + +parse_head(Ifile, St) -> {ok,nextline(Ifile, 0),St}. + +%% parse_defs(File, Line, State) -> {ok,NextLine,Macros,State}. +%%  Parse the macro definition section of a file. This must exist. +%%  The section is ended by a non-blank line which is not a macro def. + +parse_defs(Ifile, {ok,?DEFS_HEAD ++ Rest,L}, St) -> +    St1 = warn_ignored_chars(L, Rest, St), +    parse_defs(Ifile, nextline(Ifile, L), [], St1); +parse_defs(_, {ok,_,L}, St) -> +    add_error({L,leex,missing_defs}, St); +parse_defs(_, {eof,L}, St) -> +    add_error({L,leex,missing_defs}, St). + +parse_defs(Ifile, {ok,Chars,L}=Line, Ms, St) -> +    %% This little beauty matches out a macro definition, RE's are so clear. +    MS = "^[ \t]*([A-Z_][A-Za-z0-9_]*)[ \t]*=[ \t]*([^ \t\r\n]*)[ \t\r\n]*\$", +    case re:run(Chars, MS, [{capture,all_but_first,list}]) of +        {match,[Name,Def]} -> +            %%io:fwrite("~p = ~p\n", [Name,Def]), +            parse_defs(Ifile, nextline(Ifile, L), [{Name,Def}|Ms], St); +        _ -> {ok,Line,Ms,St}                    % Anything else +    end; +parse_defs(_, Line, Ms, St) -> +    {ok,Line,Ms,St}. + +%% parse_rules(File, Line, Macros, State) -> {ok,NextLine,REAs,Actions,State}. +%%  Parse the RE rules section of the file. This must exist. + +parse_rules(Ifile, {ok,?RULE_HEAD ++ Rest,L}, Ms, St) -> +    St1 = warn_ignored_chars(L, Rest, St), +    parse_rules(Ifile, nextline(Ifile, L), Ms, [], [], 0, St1); +parse_rules(_, {ok,_,L}, _, St) -> +    add_error({L,leex,missing_rules}, St); +parse_rules(_, {eof,L}, _, St) -> +    add_error({L,leex,missing_rules}, St). + +%% parse_rules(File, Result, Macros, RegExpActions, Actions, Acount, State) -> +%%      {ok,NextCLine,RegExpActions,Actions,NewState} | throw(NewState) + +parse_rules(Ifile, NextLine, Ms, REAs, As, N, St) -> +    case NextLine of +        {ok,?CODE_HEAD ++ _Rest,_} -> +            parse_rules_end(Ifile, NextLine, REAs, As, St); +        {ok,Chars,L0} -> +            %%io:fwrite("~w: ~p~n", [L0,Chars]), +            case collect_rule(Ifile, Chars, L0) of +                {ok,Re,Atoks,L1} -> +                    {ok,REA,A,St1} = parse_rule(Re, L0, Atoks, Ms, N, St), +                    parse_rules(Ifile, nextline(Ifile, L1), Ms, +                                [REA|REAs], [A|As], N+1, St1); +                {error,E} -> add_error(E, St) +            end; +        {eof,_} -> +            parse_rules_end(Ifile, NextLine, REAs, As, St) +    end. + +parse_rules_end(_, {ok,_,L}, [], [], St) -> +    add_error({L,leex,empty_rules}, St); +parse_rules_end(_, {eof,L}, [], [], St) -> +    add_error({L,leex,empty_rules}, St); +parse_rules_end(_, NextLine, REAs, As, St) -> +    %% Must be *VERY* careful to put rules in correct order! +    {ok,NextLine,reverse(REAs),reverse(As),St}. + +%% collect_rule(File, Line, Lineno) -> +%%      {ok,RegExp,ActionTokens,NewLineno} | {error,E}. +%% Collect a complete rule by reading lines until the the regexp and +%% action has been read. Keep track of line number. + +collect_rule(Ifile, Chars, L0) -> +    %% Erlang strings are 1 based, but re 0 :-( +    {match,[{St0,Len}|_]} = re:run(Chars, "[^ \t\r\n]+"), +    St = St0 + 1, +    %%io:fwrite("RE = ~p~n", [substr(Chars, St, Len)]), +    case collect_action(Ifile, substr(Chars, St+Len), L0, []) of +        {ok,[{':',_}|Toks],L1} -> {ok,substr(Chars, St, Len),Toks,L1}; +        {ok,_,_} -> {error,{L0,leex,bad_rule}}; +        {eof,L1} -> {error,{L1,leex,bad_rule}}; +        {error,E,_} -> {error,E} +    end. + +collect_action(Ifile, Chars, L0, Cont0) -> +    case erl_scan:tokens(Cont0, Chars, L0) of +        {done,{ok,Toks,_},_} -> {ok,Toks,L0}; +        {done,{eof,_},_} -> {eof,L0}; +        {done,{error,E,_},_} -> {error,E,L0}; +        {more,Cont1} -> +            collect_action(Ifile, io:get_line(Ifile, leex), L0+1, Cont1) +    end. + +%% parse_rule(RegExpString, RegExpLine, ActionTokens, Macros, Counter, State) -> +%%      {ok,{RE,Action},ActionData,State}. +%%  Parse one regexp after performing macro substition. + +parse_rule(S, Line, [{dot,_}], Ms, N, St) -> +    case parse_rule_regexp(S, Ms, St) of +        {ok,R} -> +            {ok,{R,N},{N,empty_action},St}; +        {error,E} -> +            add_error({Line,leex,E}, St) +    end; +parse_rule(S, Line, Atoks, Ms, N, St) -> +    case parse_rule_regexp(S, Ms, St) of +        {ok,R} -> +            %%io:fwrite("RE = ~p~n", [R]), +            %% Check for token variables. +            TokenChars = var_used('TokenChars', Atoks), +            TokenLen = var_used('TokenLen', Atoks), +            TokenLine = var_used('TokenLine', Atoks), +            {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St}; +        {error,E} -> +            add_error({Line,leex,E}, St) +    end. + +var_used(Name, Toks) -> +    case keyfind(Name, 3, Toks) of +        {var,_,Name} -> true;                   %It's the var we want +        _ -> false +    end. + +%% parse_rule_regexp(RegExpString, Macros, State) -> +%%     {ok,RegExp} | {error,Error}. +%% Substitute in macros and parse RegExpString. Cannot use re:replace +%% here as it uses info in replace string (&). + +parse_rule_regexp(RE0, [{M,Exp}|Ms], St) -> +    Split= re:split(RE0, "\\{" ++ M ++ "\\}", [{return,list}]), +    RE1 = string:join(Split, Exp), +    parse_rule_regexp(RE1, Ms, St); +parse_rule_regexp(RE, [], St) -> +    %%io:fwrite("RE = ~p~n", [RE]), +    case re_parse(RE, St) of +        {ok,R} -> {ok,R}; +        {error,E} -> {error,{regexp,E}} +    end. + +%% parse_code(File, Line, State) -> {ok,Code,NewState}. +%%  Finds the line and the position where the code section of the file +%%  begins. This must exist. + +parse_code(Ifile, {ok,?CODE_HEAD ++ Rest,CodeL}, St) -> +    St1 = warn_ignored_chars(CodeL, Rest, St), +    {ok, CodePos} = file:position(Ifile, cur), +    %% Just count the lines; copy the code from file to file later. +    NCodeLines = count_lines(Ifile, 0), +    {ok,{CodeL,CodePos,NCodeLines},St1}; +parse_code(_, {ok,_,L}, St) -> +    add_error({L,leex,missing_code}, St); +parse_code(_, {eof,L}, St) -> +    add_error({L,leex,missing_code}, St). + +count_lines(File, N) -> +    case io:get_line(File, leex) of +        eof -> N; +        _Line -> count_lines(File, N+1) +    end. + +%% nextline(InputFile, PrevLineNo) -> {ok,Chars,LineNo} | {eof,LineNo}. +%%  Get the next line skipping comment lines and blank lines. + +nextline(Ifile, L) -> +    case io:get_line(Ifile, leex) of +        eof -> {eof,L}; +        Chars -> +            case substr(Chars, span(Chars, " \t\n")+1) of +                [$%|_Rest] -> nextline(Ifile, L+1); +                [] -> nextline(Ifile, L+1); +                _Other -> {ok,Chars,L+1} +            end +    end. + +warn_ignored_chars(Line, S, St) -> +    case non_white(S) of +        [] -> St; +        _ -> add_warning(Line, ignored_characters, St) +    end. + +non_white(S) -> +    [C || C <- S, C > $\s, C < $\200 orelse C > $\240]. + +%% This is the regular expression grammar used. It is equivalent to the +%% one used in AWK, except that we allow ^ $ to be used anywhere and fail +%% in the matching. +%% +%% reg -> alt : '$1'. +%% alt -> seq "|" seq ... : {alt,['$1','$2'...]}. +%% seq -> repeat repeat ... : {seq,['$1','$2'...]}. +%% repeat -> repeat "*" : {kclosure,'$1'}. +%% repeat -> repeat "+" : {pclosure,'$1'}. +%% repeat -> repeat "?" : {optional,'$1'}. +%% repeat -> repeat "{" [Min],[Max] "}" : {interval,'$1',Min,Max} +%% repeat -> single : '$1'. +%% single -> "(" reg ")" : {sub,'$2',Number}. +%% single -> "^" : bos/bol. +%% single -> "$" : eos/eol. +%% single -> "." : any. +%% single -> "[" class "]" : {char_class,char_class('$2')} +%% single -> "[" "^" class "]" : {comp_class,char_class('$3')}. +%% single -> "\"" chars "\"" : {lit,'$2'}. +%% single -> "\\" char : {lit,['$2']}. +%% single -> char : {lit,['$1']}. +%% single -> empty : epsilon. +%%  The grammar of the current regular expressions. The actual parser +%%  is a recursive descent implementation of the grammar. + +%% re_parse(Chars, State) -> {ok,RegExp} | {error,Error}. + +re_parse(Cs0, St) -> +    case catch re_reg(Cs0, 0, St) of +        {RE,_,[]} -> {ok,RE}; +        {_,_,[C|_]} -> {error,{illegal_char,[C]}}; +        {parse_error,E} -> {error,E} +    end. + +parse_error(E) -> throw({parse_error,E}). + +re_reg(Cs, Sn, St) -> re_alt(Cs, Sn, St). + +re_alt(Cs0, Sn0, St) -> +    {L,Sn1,Cs1} = re_seq(Cs0, Sn0, St), +    case re_alt1(Cs1, Sn1, St) of +        {[],Sn2,Cs2} -> {L,Sn2,Cs2}; +        {Rs,Sn2,Cs2} -> {{alt,[L|Rs]},Sn2,Cs2} +    end. + +re_alt1([$||Cs0], Sn0, St) -> +    {L,Sn1,Cs1} = re_seq(Cs0, Sn0, St), +    {Rs,Sn2,Cs2} = re_alt1(Cs1, Sn1, St), +    {[L|Rs],Sn2,Cs2}; +re_alt1(Cs, Sn, _) -> {[],Sn,Cs}. + +%% Parse a sequence of regexps. Don't allow the empty sequence. +%% re_seq(Cs0, Sn0, St) -> +%%     {L,Sn1,Cs1} = repeat(Cs0, Sn0, St), +%%     case re_seq1(Cs1, Sn1, St) of +%%      {[],Sn2,Cs2} -> {L,Sn2,Cs2}; +%%      {Rs,Sn2,Cs2} -> {{seq,[L|Rs]},Sn2,Cs2} +%%     end. + +%% re_seq(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}. +%% Parse a sequence of regexps. Allow the empty sequence, returns epsilon. + +re_seq(Cs0, Sn0, St) -> +    case re_seq1(Cs0, Sn0, St) of +        {[],Sn1,Cs1} -> {epsilon,Sn1,Cs1}; +        {[R],Sn1,Cs1} -> {R,Sn1,Cs1}; +        {Rs,Sn1,Cs1} -> {{seq,Rs},Sn1,Cs1} +    end. + +re_seq1([C|_]=Cs0, Sn0, St) when C /= $|, C /= $) -> +    {L,Sn1,Cs1} = re_repeat(Cs0, Sn0, St), +    {Rs,Sn2,Cs2} = re_seq1(Cs1, Sn1, St), +    {[L|Rs],Sn2,Cs2}; +re_seq1(Cs, Sn, _) -> {[],Sn,Cs}. + +%% re_repeat(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}. + +re_repeat(Cs0, Sn0, St) -> +    {S,Sn1,Cs1} = re_single(Cs0, Sn0, St), +    re_repeat1(Cs1, Sn1, S, St). + +re_repeat1([$*|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {kclosure,S}, St); +re_repeat1([$+|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {pclosure,S}, St); +re_repeat1([$?|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {optional,S}, St); +%% { only starts interval when ere is true, otherwise normal character. +re_repeat1([${|Cs0], Sn, S, #leex{posix=true}=St) ->    % $} +    case re_interval_range(Cs0) of +        {Min,Max,[$}|Cs1]} when is_integer(Min), is_integer(Max), Min =< Max -> +            re_repeat1(Cs1, Sn, {interval,S,Min,Max}, St); +        {Min,Max,[$}|Cs1]} when is_integer(Min), is_atom(Max) -> +            re_repeat1(Cs1, Sn, {interval,S,Min,Max}, St); +        {_,_,Cs1} -> parse_error({interval_range,string_between([${|Cs0], Cs1)}) +    end; +re_repeat1(Cs, Sn, S, _) -> {S,Sn,Cs}. + +%% re_single(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}. +%% Parse a re_single regexp. + +re_single([$(|Cs0], Sn0, St) ->                % $) +    Sn1 = Sn0 + 1,                             % Keep track of sub count anyway +    case re_reg(Cs0, Sn1, St) of +        {S,Sn2,[$)|Cs1]} -> {S,Sn2,Cs1}; +        %%{S,Sn2,[$)|Cs1]} -> {{sub,S,Sn1},Sn2,Cs1}; +        _ -> parse_error({unterminated,"("}) +    end; +%% These are not legal inside a regexp. +%% re_single([$^|Cs], Sn, St) -> {bos,Sn,Cs}; +%% re_single([$$|Cs], Sn, St) -> {eos,Sn,Cs}; +%% re_single([$.|Cs], Sn, St) -> {any,Sn,Cs}; +re_single([$.|Cs], Sn, _) -> {{comp_class,"\n"},Sn,Cs}; % Do this here? +re_single("[^" ++ Cs0, Sn, St) -> +    case re_char_class(Cs0, St) of +        {Cc,[$]|Cs1]} -> {{comp_class,Cc},Sn,Cs1}; +        _ -> parse_error({unterminated,"["}) +    end; +re_single([$[|Cs0], Sn, St) -> +    case re_char_class(Cs0, St) of +        {Cc,[$]|Cs1]} -> {{char_class,Cc},Sn,Cs1}; +        _ -> parse_error({unterminated,"["}) +    end; +re_single([$\\|Cs0], Sn, _) -> +    {C,Cs1} = re_char($\\, Cs0), +    {{lit,[C]},Sn,Cs1}; +re_single([C|Cs0], Sn, St) -> +    case special_char(C, St) of +        true -> parse_error({illegal_char,[C]}); +        false -> +            {C,Cs1} = re_char(C, Cs0), +            {{lit,[C]},Sn,Cs1} +    end. + +-define(IS_HEX(C), C >= $0 andalso C =< $9 orelse +        C >= $A andalso C =< $F orelse +        C >= $a andalso C =< $f). + +%% re_char(Char, Chars) -> {CharValue,Chars}. +%%  Reads one character value from the input list, it knows about escapes. + +re_char($\\, [O1,O2,O3|S]) when +  O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> +    {(O1*8 + O2)*8 + O3 - 73*$0,S}; +re_char($\\, [$x,H1,H2|S]) when ?IS_HEX(H1), ?IS_HEX(H2) -> +    {erlang:list_to_integer([H1,H2], 16),S}; +re_char($\\,[$x,${|S0]) ->  +    re_hex(S0, []); +re_char($\\,[$x|_]) -> +    parse_error({illegal_char,"\\x"}); +re_char($\\, [C|S]) -> {escape_char(C),S}; +re_char($\\, []) -> parse_error({unterminated,"\\"}); +re_char(C, S) -> {C,S}.                         % Just this character + +re_hex([C|Cs], L) when ?IS_HEX(C) -> re_hex(Cs, [C|L]); +re_hex([$}|S], L0) ->  +    L = lists:reverse(L0), +    case erlang:list_to_integer(L, 16) of +        C when C =< 16#10FFFF -> {C,S}; +        _ -> parse_error({illegal_char,[$\\,$x,${|L]++"}"}) +    end; +re_hex(_, _) -> parse_error({unterminated,"\\x{"}). + +%% special_char(Char, State) -> bool(). +%% These are the special characters for an ERE. +%% N.B. ]}) are only special in the context after [{(. + +special_char($^, _) -> true; +special_char($., _) -> true; +special_char($[, _) -> true; +special_char($$, _) -> true; +special_char($(, _) -> true; +special_char($), _) -> true; +special_char($|, _) -> true; +special_char($*, _) -> true; +special_char($+, _) -> true; +special_char($?, _) -> true; +special_char(${, #leex{posix=true}) -> true;    % Only when POSIX set +special_char($\\, _) -> true; +special_char(_, _) -> false. + +%% re_char_class(Chars, State) -> {CharClass,Chars}. +%% Parse a character class. + +re_char_class([$]|Cs], St) ->                   % Must special case this. +    re_char_class(Cs, [$]], St); +re_char_class(Cs, St) -> re_char_class(Cs, [], St). + +re_char_class("[:" ++ Cs0, Cc, #leex{posix=true}=St) -> +    %% POSIX char class only. +    case posix_cc(Cs0) of +        {Pcl,":]" ++ Cs1} -> re_char_class(Cs1, [{posix,Pcl}|Cc], St); +        {_,Cs1} -> parse_error({posix_cc,string_between(Cs0, Cs1)}) +    end; +re_char_class([C1|Cs0], Cc, St) when C1 /= $] -> +    case re_char(C1, Cs0) of +        {Cf,[$-,C2|Cs1]} when C2 /= $] -> +            case re_char(C2, Cs1) of +                {Cl,Cs2} when Cf < Cl -> +                    re_char_class(Cs2, [{range,Cf,Cl}|Cc], St); +                {_,Cs2} -> +                    parse_error({char_class,string_between([C1|Cs0], Cs2)}) +            end; +        {C,Cs1} -> re_char_class(Cs1, [C|Cc], St) +    end; +re_char_class(Cs, Cc, _) -> {reverse(Cc),Cs}.   % Preserve order + +%% posix_cc(String) -> {PosixClass,RestString}. +%%  Handle POSIX character classes. + +posix_cc("alnum" ++ Cs) -> {alnum,Cs}; +posix_cc("alpha" ++ Cs) -> {alpha,Cs}; +posix_cc("blank" ++ Cs) -> {blank,Cs}; +posix_cc("cntrl" ++ Cs) -> {cntrl,Cs}; +posix_cc("digit" ++ Cs) -> {digit,Cs}; +posix_cc("graph" ++ Cs) -> {graph,Cs}; +posix_cc("lower" ++ Cs) -> {lower,Cs}; +posix_cc("print" ++ Cs) -> {print,Cs}; +posix_cc("punct" ++ Cs) -> {punct,Cs}; +posix_cc("space" ++ Cs) -> {space,Cs}; +posix_cc("upper" ++ Cs) -> {upper,Cs}; +posix_cc("xdigit" ++ Cs) -> {xdigit,Cs}; +posix_cc(Cs) -> parse_error({posix_cc,substr(Cs, 1, 5)}). + +escape_char($n) -> $\n;                         % \n = LF +escape_char($r) -> $\r;                         % \r = CR +escape_char($t) -> $\t;                         % \t = TAB +escape_char($v) -> $\v;                         % \v = VT +escape_char($b) -> $\b;                         % \b = BS +escape_char($f) -> $\f;                         % \f = FF +escape_char($e) -> $\e;                         % \e = ESC +escape_char($s) -> $\s;                         % \s = SPACE +escape_char($d) -> $\d;                         % \d = DEL +escape_char(C) -> C.                            % Pass it straight through + +%% re_interval_range(Chars) -> {Min,Max,RestChars}. +%% NoInt     -> none,none +%% Int       -> Int,none +%% Int,      -> Int,any +%% Int1,Int2 -> Int1,Int2 + +re_interval_range(Cs0) -> +    case re_number(Cs0) of +        {none,Cs1} -> {none,none,Cs1}; +        {N,[$,|Cs1]} -> +            case re_number(Cs1) of +                {none,Cs2} -> {N,any,Cs2}; +                {M,Cs2} -> {N,M,Cs2} +            end; +        {N,Cs1} -> {N,none,Cs1} +    end. + +re_number([C|Cs]) when C >= $0, C =< $9 -> +    re_number(Cs, C - $0); +re_number(Cs) -> {none,Cs}. + +re_number([C|Cs], Acc) when C >= $0, C =< $9 -> +    re_number(Cs, 10*Acc + (C - $0)); +re_number(Cs, Acc) -> {Acc,Cs}. + +string_between(Cs1, Cs2) -> +    substr(Cs1, 1, length(Cs1)-length(Cs2)). + +%% We use standard methods, Thompson's construction and subset +%% construction, to create first an NFA and then a DFA from the +%% regexps. A non-standard feature is that we work with sets of +%% character ranges (crs) instead sets of characters. This is most +%% noticeable when constructing DFAs. The major benefit is that we can +%% handle characters from any set, not just limited ASCII or 8859, +%% even 16/32 bit unicode. +%% +%% The whole range of characters is 0-maxchar, where maxchar is a BIG +%% number. We don't make any assumptions about the size of maxchar, it +%% is just bigger than any character. +%% +%% Using character ranges makes describing many regexps very simple, +%% for example the regexp "." just becomes the range +%% [{0-9},{11-maxchar}]. + +%% make_nfa(RegExpActions) -> {ok,{NFA,StartState}} | {error,E}. +%% Build a complete nfa from a list of {RegExp,Action}. The NFA field +%% accept has values {yes,Action}|no. The NFA is a list of states. + +make_dfa(REAs, St) -> +    {NFA,NF} = build_combined_nfa(REAs), +    verbose_print(St, "NFA contains ~w states, ", [tuple_size(NFA)]), +    {DFA0,DF0} = build_dfa(NFA, NF), +    verbose_print(St, "DFA contains ~w states, ", [length(DFA0)]), +    {DFA,DF} = minimise_dfa(DFA0, DF0), +    verbose_print(St, "minimised to ~w states.~n", [length(DFA)]), +    %%io:fwrite("~p\n", [{NF,NFA}]), +    %%io:fwrite("~p\n", [{DF0,DFA0}]), +    %%io:fwrite("~p\n", [{DF,DFA}]), +    {DFA,DF}. + +%% build_combined_nfa(RegExpActionList) -> {NFA,FirstState}. +%%  Build the combined NFA using Thompson's construction straight out +%%  of the book. Build the separate NFAs in the same order as the +%%  rules so that the accepting have ascending states have ascending +%%  state numbers. Start numbering the states from 1 as we put the +%%  states in a tuple with the state number as the index. +%% +%%  The edges from a state are a list of {CharRange,State} | {epsilon,State}. + +build_combined_nfa(REAs) -> +    {NFA0,Firsts,Free} = build_nfa_list(REAs, [], [], 1), +    F = #nfa_state{no=Free,edges=epsilon_trans(Firsts)}, +    {list_to_tuple(keysort(#nfa_state.no, [F|NFA0])),Free}. + +build_nfa_list([{RE,Action}|REAs], NFA0, Firsts, Free0) -> +    {NFA1,Free1,First} = build_nfa(RE, Free0, Action), +    build_nfa_list(REAs, NFA1 ++ NFA0, [First|Firsts], Free1); +build_nfa_list([], NFA, Firsts, Free) -> +    {NFA,reverse(Firsts),Free}. + +epsilon_trans(Firsts) -> [ {epsilon,F} || F <- Firsts ]. + +%% build_nfa(RegExp, NextState, Action) -> {NFA,NextState,FirstState}. +%%  When building the NFA states for a regexp we don't build the end +%%  state, just allocate a State for it and return this state's +%%  number. This allows us to avoid building unnecessary states for +%%  concatenation which would then have to be removed by overwriting +%%  an existing state. + +build_nfa(RE, N0, Action) -> +    {NFA,N1,E} = build_nfa(RE, N0+1, N0, []), +    {[#nfa_state{no=E,accept={accept,Action}}|NFA],N1,N0}. + +%% build_nfa(RegExp, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%%  Build an NFA from the RegExp. NFA is a list of #nfa_state{} in no +%%  predefined order. NextState is the number of the next free state +%%  to use, FirstState is the the state which must be the start for +%%  this regexp as a previous regexp refers to it, EndState is the +%%  state to which this NFA will exit to. The number of the returned +%%  EndState is already allocated! + +build_nfa({alt,REs}, N, F, NFA) -> +    build_nfa_alt(REs, N, F, NFA); +build_nfa({seq,REs}, N, F, NFA) -> +    build_nfa_seq(REs, N, F, NFA); +build_nfa({kclosure,RE}, N0, F, NFA0) -> +    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0), +    E = N1,                             % End state +    {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,E}]}, +      #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1], +     N1+1,E}; +build_nfa({pclosure,RE}, N0, F, NFA0) -> +    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0), +    E = N1,                             % End state +    {[#nfa_state{no=F,edges=[{epsilon,N0}]}, +      #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1], +     N1+1,E}; +build_nfa({optional,RE}, N0, F, NFA0) -> +    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0), +    E = N1,                             % End state +    {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,E}]}, +      #nfa_state{no=E1,edges=[{epsilon,E}]}|NFA1], +     N1+1,E}; +build_nfa({char_class,Cc}, N, F, NFA) -> +    {[#nfa_state{no=F,edges=[{pack_cc(Cc),N}]}|NFA],N+1,N}; +build_nfa({comp_class,Cc}, N, F, NFA) -> +    {[#nfa_state{no=F,edges=[{comp_class(Cc),N}]}|NFA],N+1,N}; +build_nfa({lit,Cs}, N, F, NFA) ->               % Implicit concatenation +    build_nfa_lit(Cs, N, F, NFA); +build_nfa(epsilon, N, F, NFA) ->                % Just an epsilon transition +    {[#nfa_state{no=F,edges=[{epsilon,N}]}|NFA],N+1,N}. + +%% build_nfa_lit(Chars, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%%  Build an NFA for the sequence of literal characters. + +build_nfa_lit(Cs, N0, F0, NFA0) -> +    foldl(fun (C, {NFA,N,F}) -> +                  {[#nfa_state{no=F,edges=[{[{C,C}],N}]}|NFA],N+1,N} +          end, {NFA0,N0,F0}, Cs). + +%% build_nfa_lit([C|Cs], N, F, NFA0) when is_integer(C) -> +%%     NFA1 = [#nfa_state{no=F,edges=[{[{C,C}],N}]}|NFA0], +%%     build_nfa_lit(Cs, N+1, N, NFA1); +%% build_nfa_lit([], N, F, NFA) -> {NFA,N,F}. + +%% build_nfa_seq(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%%  Build an NFA for the regexps in a sequence. + +build_nfa_seq(REs, N0, F0, NFA0) -> +    foldl(fun (RE, {NFA,N,F}) -> build_nfa(RE, N, F, NFA) end, +          {NFA0,N0,F0}, REs). + +%% build_nfa_seq([RE|REs], N0, F, NFA0) -> +%%     {NFA1,N1,E1} = build_nfa(RE, N0, F, NFA0), +%%     build_nfa_seq(REs, N1, E1, NFA1); +%% build_nfa_seq([], N, F, NFA) -> {NFA,N,F}. + +%% build_nfa_alt(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%%  Build an NFA for the regexps in an alternative. N.B. we don't +%%  handle empty alts here but the parser should never generate them +%%  anyway. + +build_nfa_alt([RE], N, F, NFA) -> build_nfa(RE, N, F, NFA); +build_nfa_alt([RE|REs], N0, F, NFA0) -> +    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0), +    {NFA2,N2,E2} = build_nfa_alt(REs, N1+1, N1, NFA1), +    E = N2,                                     % End state +    {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,N1}]}, +      #nfa_state{no=E1,edges=[{epsilon,E}]}, +      #nfa_state{no=E2,edges=[{epsilon,E}]}|NFA2], +     N2+1,E}. + +%% build_nfa_alt(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}. +%%  Build an NFA for the regexps in an alternative. Make one big +%%  epsilon split state, not necessary but fun. + +%% build_nfa_alt(REs, N0, F0, NFA0) -> +%%     E = N0,                                  % Must reserve End state first +%%     {Fs,{NFA1,N1}} = mapfoldl(fun (RE, {NFA,N}) -> +%%                                    build_nfa_alt1(RE, N, E, NFA) +%%                            end, {NFA0,N0+1}, REs), +%%     {[#nfa_state{no=F0,edges=epsilon_trans(Fs)}, +%%       #nfa_state{no=E,edges=[{epsilon,N1}]}|NFA1],N1+1,N1}. + +%% build_nfa_alt1(RE, N0, End, NFA0) -> +%%     {NFA1,N1,E} = build_nfa(RE, N0+1, N0, NFA0), +%%     {N0,{[#nfa_state{no=E,edges=[{epsilon,End}]}|NFA1],N1}}. + +%% pack_cc(CharClass) -> CharClass +%%  Pack and optimise a character class specification (bracket +%%  expression). First sort it and then compact it. + +pack_cc(Cc) -> +    Crs = foldl(fun ({range,Cf,Cl}, Set) -> add_element({Cf,Cl}, Set); +                    (C, Set) -> add_element({C,C}, Set) +                end, ordsets:new(), Cc), +    pack_crs(Crs).                              % An ordset IS a list! + +pack_crs([{C1,C2}=Cr,{C3,C4}|Crs]) when C1 =< C3, C2 >= C4 -> +    %% C1      C2 +    %%   C3  C4 +    pack_crs([Cr|Crs]); +pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 >= C3, C2 < C4 -> +    %% C1    C2 +    %%    C3   C4 +    pack_crs([{C1,C4}|Crs]); +pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 + 1 == C3 -> +    %% C1   C2 +    %%        C3  C4 +    pack_crs([{C1,C4}|Crs]); +pack_crs([Cr|Crs]) -> [Cr|pack_crs(Crs)]; +pack_crs([]) -> []. + +comp_class(Cc) -> +    Crs = pack_cc(Cc), +    Comp = comp_crs(Crs, 0), +    %% io:fwrite("comp: ~p\n      ~p\n", [Crs,Comp]), +    Comp. + +comp_crs([{0,C2}|Crs], 0) ->                   % Get first range right +    comp_crs(Crs, C2+1); +comp_crs([{C1,C2}|Crs], Last) -> +    [{Last,C1-1}|comp_crs(Crs, C2+1)]; +comp_crs([], Last) -> [{Last,maxchar}]. + +%% build_dfa(NFA, NfaFirstState) -> {DFA,DfaFirstState}. +%%  Build a DFA from an NFA using "subset construction". The major +%%  difference from the book is that we keep the marked and unmarked +%%  DFA states in seperate lists. New DFA states are added to the +%%  unmarked list and states are marked by moving them to the marked +%%  list. We assume that the NFA accepting state numbers are in +%%  ascending order for the rules and use ordsets to keep this order. + +build_dfa(NFA, Nf) -> +    D = #dfa_state{no=0,nfa=eclosure([Nf], NFA)}, +    {build_dfa([D], 1, [], NFA),0}. + +%% build_dfa([UnMarked], NextState, [Marked], NFA) -> DFA. +%%  Traverse the unmarked states. Temporarily add the current unmarked +%%  state to the marked list before calculating translation, this is +%%  to avoid adding too many duplicate states. Add it properly to the +%%  marked list afterwards with correct translations. + +build_dfa([U|Us0], N0, Ms, NFA) -> +    {Ts,Us1,N1} = build_dfa(U#dfa_state.nfa, Us0, N0, [], [U|Ms], NFA), +    M = U#dfa_state{trans=Ts,accept=accept(U#dfa_state.nfa, NFA)}, +    build_dfa(Us1, N1, [M|Ms], NFA); +build_dfa([], _, Ms, _) -> Ms. + +%% build_dfa([NfaState], [Unmarked], NextState, [Transition], [Marked], NFA) -> +%%      {Transitions,UnmarkedStates,NextState}. +%%  Foreach NFA state set calculate the legal translations. N.B. must +%%  search *BOTH* the unmarked and marked lists to check if DFA state +%%  already exists. As the range of characters is potentially VERY +%%  large we cannot explicitly test all characters. Instead we first +%%  calculate the set of all disjoint character ranges which are +%%  possible candidates to the set of NFA states. The transitions are +%%  an orddict so we get the transition lists in ascending order. + +build_dfa(Set, Us, N, Ts, Ms, NFA) -> +    %% List of all transition sets. +    Crs0 = [Cr || S <- Set, +                  {Crs,_St} <- (element(S, NFA))#nfa_state.edges, +                  Crs /= epsilon,        % Not an epsilon transition +                  Cr <- Crs ], +    Crs1 = lists:usort(Crs0),            % Must remove duplicates! +    %% Build list of disjoint test ranges. +    Test = disjoint_crs(Crs1), +    %% io:fwrite("bd: ~p\n    ~p\n    ~p\n    ~p\n", [Set,Crs0,Crs1,Test]), +    build_dfa(Test, Set, Us, N, Ts, Ms, NFA). + +%% disjoint_crs([CharRange]) -> [CharRange]. +%%  Take a sorted list of char ranges and make a sorted list of +%%  disjoint char ranges. No new char range extends past an existing +%%  char range. + +disjoint_crs([{_C1,C2}=Cr1,{C3,_C4}=Cr2|Crs]) when C2 < C3 -> +    %% C1  C2 +    %%        C3  C4 +    [Cr1|disjoint_crs([Cr2|Crs])]; +disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 == C3 -> +    %% C1     C2 +    %% C3       C4 +    [{C1,C2}|disjoint_crs(add_element({C2+1,C4}, Crs))]; +disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 >= C3, C2 < C4 -> +    %% C1     C2 +    %%    C3     C4 +    [{C1,C3-1}|disjoint_crs(union([{C3,C2},{C2+1,C4}], Crs))]; +disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 == C4 -> +    %% C1      C2 +    %%    C3   C4 +    [{C1,C3-1}|disjoint_crs(add_element({C3,C4}, Crs))]; +disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 > C4 -> +    %% C1        C2 +    %%    C3   C4 +    [{C1,C3-1}|disjoint_crs(union([{C3,C4},{C4+1,C2}], Crs))]; +disjoint_crs([Cr|Crs]) -> [Cr|disjoint_crs(Crs)]; +disjoint_crs([]) -> []. + +build_dfa([Cr|Crs], Set, Us, N, Ts, Ms, NFA) -> +    case eclosure(move(Set, Cr, NFA), NFA) of +        S when S /= [] -> +            case dfa_state_exist(S, Us, Ms) of +                {yes,T} -> +                    build_dfa(Crs, Set, Us, N, store(Cr, T, Ts), Ms, NFA); +                no -> +                    U = #dfa_state{no=N,nfa=S}, +                    build_dfa(Crs, Set, [U|Us], N+1, store(Cr, N, Ts), Ms, NFA) +            end; +        [] -> +            build_dfa(Crs, Set, Us, N, Ts, Ms, NFA) +    end; +build_dfa([], _, Us, N, Ts, _, _) -> +    {Ts,Us,N}. + +%% dfa_state_exist(Set, Unmarked, Marked) -> {yes,State} | no. + +dfa_state_exist(S, Us, Ms) -> +    case keysearch(S, #dfa_state.nfa, Us) of +        {value,#dfa_state{no=T}} -> {yes,T}; +        false -> +            case keysearch(S, #dfa_state.nfa, Ms) of +                {value,#dfa_state{no=T}} -> {yes,T}; +                false -> no +            end +    end. + +%% eclosure([State], NFA) -> [State]. +%% move([State], Char, NFA) -> [State]. +%%  These are straight out of the book. As eclosure uses ordsets then +%%  the generated state sets are in ascending order. + +eclosure(Sts, NFA) -> eclosure(Sts, NFA, []). + +eclosure([St|Sts], NFA, Ec) -> +    #nfa_state{edges=Es} = element(St, NFA), +    eclosure([ N || {epsilon,N} <- Es, +                    not is_element(N, Ec) ] ++ Sts, +             NFA, add_element(St, Ec)); +eclosure([], _, Ec) -> Ec. + +move(Sts, Cr, NFA) -> +    %% io:fwrite("move1: ~p\n", [{Sts,Cr}]), +    [ St || N <- Sts, +            {Crs,St} <- (element(N, NFA))#nfa_state.edges, +            Crs /= epsilon,             % Not an epsilon transition +            in_crs(Cr, Crs) ]. + +in_crs({C1,C2}, [{C3,C4}|_Crs]) when C1 >= C3, C2 =< C4 -> true; +in_crs(Cr, [Cr|_Crs]) -> true;          % Catch bos and eos. +in_crs(Cr, [_|Crs]) -> in_crs(Cr, Crs); +in_crs(_Cr, []) -> false. + +%% accept([State], NFA) -> {accept,A} | noaccept. +%%  Scan down the state list until we find an accepting state. + +accept([St|Sts], NFA) -> +    case element(St, NFA) of +        #nfa_state{accept={accept,A}} -> {accept,A}; +        #nfa_state{accept=noaccept} -> accept(Sts, NFA) +    end; +accept([], _) -> noaccept. + +%% minimise_dfa(DFA, DfaFirst) -> {DFA,DfaFirst}. +%%  Minimise the DFA by removing equivalent states. We consider a +%%  state if both the transitions and the their accept state is the +%%  same.  First repeatedly run throught the DFA state list removing +%%  equivalent states and updating remaining transitions with +%%  remaining equivalent state numbers. When no more reductions are +%%  possible then pack the remaining state numbers to get consecutive +%%  states. + +minimise_dfa(DFA0, Df0) -> +    case min_dfa(DFA0) of +        {DFA1,[]} ->                    % No reduction! +            {DFA2,Rs} = pack_dfa(DFA1), +            {min_update(DFA2, Rs),min_use(Df0, Rs)}; +        {DFA1,Rs} -> +            minimise_dfa(min_update(DFA1, Rs), min_use(Df0, Rs)) +    end. + +min_dfa(DFA) -> min_dfa(DFA, [], []). + +min_dfa([D|DFA0], Rs0, MDFA) -> +    {DFA1,Rs1} = min_delete(DFA0, D#dfa_state.trans, D#dfa_state.accept, +                            D#dfa_state.no, Rs0, []), +    min_dfa(DFA1, Rs1, [D|MDFA]); +min_dfa([], Rs, MDFA) -> {MDFA,Rs}. + +%% min_delete(States, Trans, Action, NewN, Rs, MiniDFA) -> {MiniDFA,Rs}. +%%  Delete all states with same transactions and action. Return +%%  rewrites and minimised DFA with no duplicate states. + +min_delete([#dfa_state{no=N,trans=T,accept=A}|DFA], T, A, NewN, Rs, MDFA) -> +    min_delete(DFA, T, A, NewN, [{N,NewN}|Rs], MDFA); +min_delete([D|DFA], T, A, NewN, Rs, MDFA) -> +    min_delete(DFA, T, A, NewN, Rs, [D|MDFA]); +min_delete([], _, _, _, Rs, MDFA) -> {MDFA,Rs}. + +min_update(DFA, Rs) -> +    [ D#dfa_state{trans=min_update_trans(D#dfa_state.trans, Rs)} || D <- DFA ]. + +min_update_trans(Tr, Rs) -> +    [ {C,min_use(S, Rs)} || {C,S} <- Tr ]. + +min_use(Old, [{Old,New}|_]) -> New; +min_use(Old, [_|Reds]) -> min_use(Old, Reds); +min_use(Old, []) -> Old. + +pack_dfa(DFA) -> pack_dfa(DFA, 0, [], []). + +pack_dfa([D|DFA], NewN, Rs, PDFA) -> +    pack_dfa(DFA, NewN+1, +             [{D#dfa_state.no,NewN}|Rs], [D#dfa_state{no=NewN}|PDFA]); +pack_dfa([], _, Rs, PDFA) -> {PDFA,Rs}. + +%% The main output is the yystate function which is built from the +%% DFA. It has the spec: +%% +%% yystate() -> InitialState. +%% yystate(State, InChars, Line, CurrTokLen, AcceptAction, AcceptLen) -> +%%      {Action, AcceptLength, RestChars, Line} |         Accepting end state +%%      {Action, AcceptLength, RestChars, Line, State} |  Accepting state +%%      {reject, AcceptLength, CurrTokLen, RestChars, Line, State} | +%%      {Action, AcceptLength, CurrTokLen, RestChars, Line, State}. + +%% The return CurrTokLen is always the current number of characters +%% scanned in the current token. The returns have the follwoing +%% meanings: +%% {Action, AcceptLength, RestChars, Line} - +%%  The scanner has reached an accepting end-state, for example after +%%  a regexp "abc". Action is the action number and AcceptLength is +%%  the length of the matching token. +%% +%% {Action, AcceptLength, RestChars, Line, State} - +%%  The scanner has reached an accepting transition state, for example +%%  after c in regexp "abc(xyz)?", continuation depends on +%%  RestChars. If RestChars == [] (no more current characters) then we +%%  need to get more characters to see if it is an end-state, +%%  otherwise (eof or chars) then we have not found continuing +%%  characters and it is an end state. +%% +%% {reject, AcceptLength, CurrTokLen, RestChars, Line, State} - +%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State} - +%%  The scanner has reached a non-accepting transistion state. If +%%  RestChars == [] we need to get more characters to continue. +%%  Otherwise if 'reject' then no accepting state has been reached it +%%  is an error. If we have an Action and AcceptLength then these are +%%  the last accept state, use them and continue from there. + +%% out_file(LeexState, DFA, DfaStart, [Action], Code) -> ok | error. +%%  Generate an output .erl file from the include file, the DFA and +%%  the code for the actions. + +out_file(St0, DFA, DF, Actions, Code) -> +    verbose_print(St0, "Writing file ~s, ", [St0#leex.efile]), +    case open_inc_file(St0) of +        {ok,Ifile} -> +            try +                case file:open(St0#leex.efile, [write]) of +                    {ok,Ofile} -> +                        try  +                            output_file_directive(Ofile, St0#leex.ifile, 0), +                            out_file(Ifile, Ofile, St0, DFA, DF, Actions, +                                     Code, 1), +                            verbose_print(St0, "ok~n", []), +                            St0 +                        after file:close(Ofile) +                        end; +                    {error,Error} -> +                        verbose_print(St0, "error~n", []), +                        add_error({none,leex,{file_error,Error}}, St0) +                end +            after file:close(Ifile) +            end; +        {{error,Error},Ifile} -> +            add_error(Ifile, {none,leex,{file_error,Error}}, St0) +    end. + +open_inc_file(State) -> +    Ifile = State#leex.ifile, +    case file:open(Ifile, [read]) of +        {ok,F} -> {ok,F}; +        Error -> {Error,Ifile} +    end. + +inc_file_name([]) -> +    Incdir = filename:join(code:lib_dir(parsetools), "include"), +    filename:join(Incdir, ?LEEXINC); +inc_file_name(Filename) -> +    Filename. +                     +%% out_file(IncFile, OutFile, State, DFA, DfaStart, Actions, Code, Line) -> ok +%%  Copy the include file line by line substituting special lines with +%%  generated code. We cheat by only looking at the first 5 +%%  characters. + +out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) -> +    case io:get_line(Ifile, leex) of +        eof -> output_file_directive(Ofile, St#leex.ifile, L); +        Line -> +            case substr(Line, 1, 5) of +                "##mod" -> out_module(Ofile, St); +                "##cod" -> out_erlang_code(Ofile, St, Code, L); +                "##dfa" -> out_dfa(Ofile, St, DFA, Code, DF, L); +                "##act" -> out_actions(Ofile, St#leex.xfile, Actions); +                _ -> io:put_chars(Ofile, Line) +            end, +            out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L+1) +    end. + +out_module(File, St) -> +    io:fwrite(File, "-module(~w).\n", [St#leex.module]). + +out_erlang_code(File, St, Code, L) -> +    {CodeL,CodePos,_NCodeLines} = Code, +    output_file_directive(File, St#leex.xfile, CodeL), +    {ok,Xfile} = file:open(St#leex.xfile, [read]), +    try +        {ok,_} = file:position(Xfile, CodePos), +        {ok,_} = file:copy(Xfile, File) +    after  +        file:close(Xfile) +    end, +    io:nl(File), +    output_file_directive(File, St#leex.ifile, L). + +out_dfa(File, St, DFA, Code, DF, L) -> +    {_CodeL,_CodePos,NCodeLines} = Code, +    %% Three file attributes before this one... +    output_file_directive(File, St#leex.efile, L+(NCodeLines-1)+3), +    io:fwrite(File, "yystate() -> ~w.~n~n", [DF]), +    foreach(fun (S) -> out_trans(File, S) end, DFA), +    io:fwrite(File, "yystate(S, Ics, Line, Tlen, Action, Alen) ->~n", []), +    io:fwrite(File, "    {Action,Alen,Tlen,Ics,Line,S}.~n", []). + +out_trans(File, #dfa_state{no=N,trans=[],accept={accept,A}}) -> +    %% Accepting end state, guaranteed done. +    io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]), +    io:fwrite(File, "    {~w,Tlen,Ics,Line};~n", [A]); +out_trans(File, #dfa_state{no=N,trans=Tr,accept={accept,A}}) -> +    %% Accepting state, but there maybe more. +    foreach(fun (T) -> out_accept_tran(File, N, A, T) end, pack_trans(Tr)), +    io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]), +    io:fwrite(File, "    {~w,Tlen,Ics,Line,~w};~n", [A,N]); +out_trans(File, #dfa_state{no=N,trans=Tr,accept=noaccept}) -> +    %% Non-accepting transition state. +    foreach(fun (T) -> out_noaccept_tran(File, N, T) end, pack_trans(Tr)), +    io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]), +    io:fwrite(File, "    {Action,Alen,Tlen,Ics,Line,~w};~n", [N]). + +out_accept_tran(File, N, A, {{Cf,maxchar},S}) -> +    out_accept_head_max(File, N, Cf), +    out_accept_body(File, S, "Line", A); +out_accept_tran(File, N, A, {{Cf,Cl},S}) -> +    out_accept_head_range(File, N, Cf, Cl), +    out_accept_body(File, S, "Line", A); +out_accept_tran(File, N, A, {$\n,S}) -> +    out_accept_head_1(File, N, $\n), +    out_accept_body(File, S, "Line+1", A); +out_accept_tran(File, N, A, {C,S}) -> +    out_accept_head_1(File, N, C), +    out_accept_body(File, S, "Line", A). + +out_accept_head_1(File, State, Char) -> +    out_head_1(File, State, Char, "_", "_"). + +out_accept_head_max(File, State, Min) -> +    out_head_max(File, State, Min, "_", "_"). + +out_accept_head_range(File, State, Min, Max) -> +    out_head_range(File, State, Min, Max, "_", "_"). + +out_accept_body(File, Next, Line, Action) -> +    out_body(File, Next, Line, io_lib:write(Action), "Tlen"). + +out_noaccept_tran(File, N, {{Cf,maxchar},S}) -> +    out_noaccept_head_max(File, N, Cf), +    out_noaccept_body(File, S, "Line"); +out_noaccept_tran(File, N, {{Cf,Cl},S}) -> +    out_noaccept_head_range(File, N, Cf, Cl), +    out_noaccept_body(File, S, "Line"); +out_noaccept_tran(File, N, {$\n,S}) -> +    out_noaccept_head_1(File, N, $\n), +    out_noaccept_body(File, S, "Line+1"); +out_noaccept_tran(File, N, {C,S}) -> +    out_noaccept_head_1(File, N, C), +    out_noaccept_body(File, S, "Line"). + +out_noaccept_head_1(File, State, Char) -> +    out_head_1(File, State, Char, "Action", "Alen"). + +out_noaccept_head_max(File, State, Min) -> +    out_head_max(File, State, Min, "Action", "Alen"). + +out_noaccept_head_range(File, State, Min, Max) -> +    out_head_range(File, State, Min, Max, "Action", "Alen"). + +out_noaccept_body(File, Next, Line) -> +    out_body(File, Next, Line, "Action", "Alen"). + +out_head_1(File, State, Char, Action, Alen) -> +    io:fwrite(File, "yystate(~w, [~w|Ics], Line, Tlen, ~s, ~s) ->\n", +              [State,Char,Action,Alen]). + +out_head_max(File, State, Min, Action, Alen) -> +    io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w ->\n", +              [State,Action,Alen,Min]). + +out_head_range(File, State, Min, Max, Action, Alen) -> +    io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w, C =< ~w ->\n", +              [State,Action,Alen,Min,Max]). + +out_body(File, Next, Line, Action, Alen) -> +    io:fwrite(File, "    yystate(~w, Ics, ~s, Tlen+1, ~s, ~s);\n", +              [Next,Line,Action,Alen]). + +%% pack_trans([{Crange,State}]) -> [{Crange,State}] when +%%      Crange = {Char,Char} | Char. +%%  Pack the translation table into something more suitable for +%%  generating code. We KNOW how the pattern matching compiler works +%%  so solitary characters are stored before ranges. We do this by +%%  prepending singletons to the front of the packed transitions and +%%  appending ranges to the back. This preserves the smallest to +%%  largest order of ranges. Newline characters, $\n, are always +%%  extracted and handled as singeltons. + +pack_trans(Trs) -> pack_trans(Trs, []). + +%% pack_trans(Trs) -> +%%     Trs1 = pack_trans(Trs, []), +%%     io:fwrite("tr:~p\n=> ~p\n", [Trs,Trs1]), +%%     Trs1. + +pack_trans([{{C,C},S}|Trs], Pt) ->         % Singletons to the head +    pack_trans(Trs, [{C,S}|Pt]); +%% Special detection and handling of $\n. +pack_trans([{{Cf,$\n},S}|Trs], Pt) -> +    pack_trans([{{Cf,$\n-1},S}|Trs], [{$\n,S}|Pt]); +pack_trans([{{$\n,Cl},S}|Trs], Pt) -> +    pack_trans([{{$\n+1,Cl},S}|Trs], [{$\n,S}|Pt]); +pack_trans([{{Cf,Cl},S}|Trs], Pt) when Cf < $\n, Cl > $\n -> +    pack_trans([{{Cf,$\n-1},S},{{$\n+1,Cl},S}|Trs], [{$\n,S}|Pt]); +%% Small ranges become singletons. +pack_trans([{{Cf,Cl},S}|Trs], Pt) when Cl == Cf + 1 -> +    pack_trans(Trs, [{Cf,S},{Cl,S}|Pt]); +pack_trans([Tr|Trs], Pt) ->                % The default uninteresting case +    pack_trans(Trs, Pt ++ [Tr]); +pack_trans([], Pt) -> Pt. + +%% out_actions(File, XrlFile, ActionList) -> ok. +%% Write out the action table. + +out_actions(File, XrlFile, As) -> +    As1 = prep_out_actions(As), +    foreach(fun (A) -> out_action(File, A) end, As1), +    io:fwrite(File, "yyaction(_, _, _, _) -> error.~n", []), +    foreach(fun (A) -> out_action_code(File, XrlFile, A) end, As1). + +prep_out_actions(As) -> +    map(fun ({A,empty_action}) -> +                {A,empty_action}; +            ({A,Code,TokenChars,TokenLen,TokenLine}) -> +                Vs = [{TokenChars,"TokenChars"}, +                      {TokenLen,"TokenLen"}, +                      {TokenLine,"TokenLine"}, +                      {TokenChars,"YYtcs"}, +                      {TokenLen or TokenChars,"TokenLen"}], +                Vars = [if F -> S; true -> "_" end || {F,S} <- Vs], +                Name = list_to_atom(lists:concat([yyaction_,A])), +                [Chars,Len,Line,_,_] = Vars, +                Args = [V || V <- [Chars,Len,Line], V =/= "_"], +                ArgsChars = string:join(Args, ", "), +                {A,Code,Vars,Name,Args,ArgsChars} +        end, As). + +out_action(File, {A,empty_action}) -> +    io:fwrite(File, "yyaction(~w, _, _, _) -> skip_token;~n", [A]); +out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars}) -> +    [_,_,Line,Tcs,Len] = Vars, +    io:fwrite(File, "yyaction(~w, ~s, ~s, ~s) ->~n", [A,Len,Tcs,Line]), +    if +        Tcs =/= "_" -> +            io:fwrite(File, "    TokenChars = yypre(YYtcs, TokenLen),~n", []); +        true -> ok +    end, +    io:fwrite(File, "    ~s(~s);~n", [Name, ArgsChars]). + +out_action_code(_File, _XrlFile, {_A,empty_action}) -> +    ok; +out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) -> +    %% Should set the file to the .erl file, but instead assumes that +    %% ?LEEXINC is syntactically correct. +    io:fwrite(File, "\n-compile({inline,~w/~w}).\n", [Name, length(Args)]), +    {line, L} = erl_scan:token_info(hd(Code), line), +    output_file_directive(File, XrlFile, L-2), +    io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]), +    io:fwrite(File, "    ~s\n", [pp_tokens(Code, L)]). + +%% pp_tokens(Tokens, Line) -> [char()]. +%%  Prints the tokens keeping the line breaks of the original code. + +pp_tokens(Tokens, Line0) -> pp_tokens(Tokens, Line0, none). +     +pp_tokens([], _Line0, _) -> []; +pp_tokens([T | Ts], Line0, Prev) -> +    {line, Line} = erl_scan:token_info(T, line), +    [pp_sep(Line, Line0, Prev, T), pp_symbol(T) | pp_tokens(Ts, Line, T)]. + +pp_symbol({var,_,Var}) -> atom_to_list(Var); +pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]); +pp_symbol({dot, _}) -> "."; +pp_symbol({Symbol, _}) -> atom_to_list(Symbol). + +pp_sep(Line, Line0, Prev, T) when Line > Line0 ->  +    ["\n    " | pp_sep(Line - 1, Line0, Prev, T)]; +pp_sep(_, _, {'.',_}, _) -> "";        % No space after '.' (not a dot) +pp_sep(_, _, {'#',_}, _) -> "";        % No space after '#' +pp_sep(_, _, {'(',_}, _) -> "";        % No space after '(' +pp_sep(_, _, {'[',_}, _) -> "";        % No space after '[' +pp_sep(_, _, _, {'.',_}) -> "";        % No space before '.' +pp_sep(_, _, _, {'#',_}) -> "";        % No space before '#' +pp_sep(_, _, _, {',',_}) -> "";        % No space before ',' +pp_sep(_, _, _, {')',_}) -> "";        % No space before ')' +pp_sep(_, _, _, _) -> " ". + +%% out_dfa_graph(LeexState, DFA, DfaStart) -> ok | error. +%%  Writes the DFA to a .dot file in DOT-format which can be viewed +%%  with Graphviz. + +out_dfa_graph(St, DFA, DF) -> +    verbose_print(St, "Writing DFA to file ~s, ", [St#leex.gfile]), +    case file:open(St#leex.gfile, [write]) of +        {ok,Gfile} -> +            try +                io:fwrite(Gfile, "digraph DFA {~n", []), +                out_dfa_states(Gfile, DFA, DF), +                out_dfa_edges(Gfile, DFA), +                io:fwrite(Gfile, "}~n", []), +                verbose_print(St, "ok~n", []), +                St +            after file:close(Gfile) +            end; +        {error,Error} -> +            verbose_print(St, "error~n", []), +            add_error({none,leex,{file_error,Error}}, St) +    end. + +out_dfa_states(File, DFA, DF) -> +    foreach(fun (S) -> out_dfa_state(File, DF, S) end, DFA), +    io:fwrite(File, "~n", []). + +out_dfa_state(File, DF, #dfa_state{no=DF, accept={accept,_}}) -> +    io:fwrite(File, "  ~b [shape=doublecircle color=green];~n", [DF]); +out_dfa_state(File, DF, #dfa_state{no=DF, accept=noaccept}) -> +    io:fwrite(File, "  ~b [shape=circle color=green];~n", [DF]); +out_dfa_state(File, _, #dfa_state{no=S, accept={accept,_}}) -> +    io:fwrite(File, "  ~b [shape=doublecircle];~n", [S]);     +out_dfa_state(File, _, #dfa_state{no=S, accept=noaccept}) -> +    io:fwrite(File, "  ~b [shape=circle];~n", [S]). + +out_dfa_edges(File, DFA) -> +    foreach(fun (#dfa_state{no=S,trans=Trans}) -> +                    Pt = pack_trans(Trans), +                    Tdict = foldl(fun ({Cr,T}, D) -> +                                          orddict:append(T, Cr, D) +                                  end, orddict:new(), Pt), +                    foreach(fun (T) -> +                                    Crs = orddict:fetch(T, Tdict), +                                    Edgelab = dfa_edgelabel(Crs), +                                    io:fwrite(File, "  ~b -> ~b [label=\"~s\"];~n", +                                              [S,T,Edgelab]) +                            end, sort(orddict:fetch_keys(Tdict))) +            end, DFA). + +dfa_edgelabel([C]) when is_integer(C) -> quote(C); +dfa_edgelabel(Cranges) -> +    %% io:fwrite("el: ~p\n", [Cranges]), +    "[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)]; +                   (C)     -> [quote(C)] +               end, Cranges) ++ "]". + +output_file_directive(File, Filename, Line) -> +    io:fwrite(File, <<"-file(~s, ~w).\n">>,  +              [format_filename(Filename), Line]). + +format_filename(Filename) -> +    io_lib:write_string(filename:flatten(Filename)). + +quote($^)  -> "\\^"; +quote($.)  -> "\\."; +quote($$)  -> "\\$"; +quote($-)  -> "\\-"; +quote($[)  -> "\\["; +quote($])  -> "\\]"; +quote($\s) -> "\\\\s"; +quote($\") -> "\\\""; +quote($\b) -> "\\\\b"; +quote($\f) -> "\\\\f"; +quote($\n) -> "\\\\n"; +quote($\r) -> "\\\\r"; +quote($\t) -> "\\\\t"; +quote($\e) -> "\\\\e"; +quote($\v) -> "\\\\v"; +quote($\d) -> "\\\\d"; +quote($\\) -> "\\\\"; +quote(C) when is_integer(C) -> +    %% Must remove the $ and get the \'s right. +    case io_lib:write_unicode_char(C) of +        [$$,$\\|Cs] -> "\\\\" ++ Cs; +        [$$|Cs] -> Cs +    end; +quote(maxchar) -> +    "MAXCHAR". diff --git a/lib/parsetools/src/parsetools.app.src b/lib/parsetools/src/parsetools.app.src new file mode 100644 index 0000000000..af62fc4f6b --- /dev/null +++ b/lib/parsetools/src/parsetools.app.src @@ -0,0 +1,33 @@ +{application, parsetools, + [{description, "XLATETOOLS  CXC 138 xx"}, +  {vsn, "%VSN%"}, +  {modules, [leex, +             yecc, +	     yeccparser, +	     yeccscan +	    ] +  }, +  {registered,[]}, +  {applications, [kernel,stdlib]}, +  {env, [{file_util_search_methods,[{"", ""}, {"ebin", "esrc"}, {"ebin", "src"}]} +	] +  } + ] +}.  +  + + + + + + + + + + + + + + + + diff --git a/lib/parsetools/src/parsetools.appup.src b/lib/parsetools/src/parsetools.appup.src new file mode 100644 index 0000000000..54a63833e6 --- /dev/null +++ b/lib/parsetools/src/parsetools.appup.src @@ -0,0 +1 @@ +{"%VSN%",[],[]}. diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl new file mode 100644 index 0000000000..f4d76f471d --- /dev/null +++ b/lib/parsetools/src/yecc.erl @@ -0,0 +1,2531 @@ +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%%  +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%%  +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%%  +%% %CopyrightEnd% +%% +%% Yacc like LALR-1 parser generator for Erlang. +%% Ref: Aho & Johnson: "LR Parsing", ACM Computing Surveys, vol. 6:2, 1974. +%% Auxiliary files: yeccgramm.yrl, yeccparser.erl, yeccpre.hrl, yeccscan.erl. +%% + +-module(yecc). + +-export([compile/3, file/1, file/2, format_error/1]). + +%% Kept for compatibility with R10B. +-export([yecc/2, yecc/3, yecc/4]). + +-import(lists, [append/1, append/2, concat/1, delete/2, filter/2, +                flatmap/2, foldl/3, foldr/3, foreach/2, keydelete/3, +                keysearch/3, keysort/2, last/1, map/2, member/2, +                reverse/1, sort/1, usort/1]). + +-include("erl_compile.hrl"). +-include("ms_transform.hrl"). + +-record(yecc, { +          infile, +          outfile, +          includefile, +          includefile_version, +          module, +          options = [], +          verbose = false, +          file_attrs = true, +          errors = [], +          warnings = [], +          conflicts_done = false, +          shift_reduce = [], +          reduce_reduce = [], +          n_states = 0, +          inport, +          outport, +          line, + +          parse_actions, +          symbol_tab, +          inv_symbol_tab, +          state_tab, +          prec_tab, +          goto_tab, + +          terminals = [], +          nonterminals = [], +          all_symbols = [], +          prec = [], +          rules_list = [], +          rules, % a tuple of rules_list +          rule_pointer2rule, +          rootsymbol = [], +          endsymbol = [], +          expect_shift_reduce = [], +          expect_n_states = [], +          header = [], +          erlang_code = none +         }). + +-record(rule, { +          n,             % rule n in the grammar file +          line, +          symbols,       % the names of symbols +          tokens, +          is_guard,      % the action is a guard (not used) +          is_well_formed % can be parsed (without macro expansion) +         }). + +-record(reduce, { +          rule_nmbr, +          head, +          nmbr_of_daughters, +          prec, +          unused % assure that #reduce{} comes before #shift{} when sorting +         }). + +-record(shift, { +          state, +          pos, +          prec, +          rule_nmbr +         }). + +-record(user_code, {state, terminal, funname, action}). + +-record(symbol, {line = none, name}). + +%% ACCEPT is neither an atom nor a non-terminal. +-define(ACCEPT, {}). + +%% During the phase 'compute_states' terminals in lookahead sets are +%% coded as integers; sets of terminals are integer bit masks. This is +%% for efficiency only. '$empty' is always given the mask 1. The +%% behaviour can be turned off by un-defining SYMBOLS_AS_CODES (useful +%% when debugging). + +%% Non-terminals are also given integer codes, starting with -1. The +%% absolut value of the code is used for indexing a tuple of lists of +%% rules. + +-define(SYMBOLS_AS_CODES, true). + +-ifdef(SYMBOLS_AS_CODES). +-define(EMPTY, 0). +-else. +-define(EMPTY, '$empty'). +-endif. + +%%% +%%% Exported functions +%%% + +%%% Interface to erl_compile. + +compile(Input0, Output0,  +        #options{warning = WarnLevel, verbose=Verbose, includes=Includes}) -> +    Input = shorten_filename(Input0), +    Output = shorten_filename(Output0), +    Includefile = lists:sublist(Includes, 1), +    Opts = [{parserfile,Output}, {includefile,Includefile}, {verbose,Verbose}, +            {report_errors, true}, {report_warnings, WarnLevel > 0}], +    case file(Input, Opts) of +        {ok, _OutFile} -> +            ok; +        error -> +            error +    end. + +format_error(bad_declaration) -> +    io_lib:fwrite("unknown or bad declaration, ignored", []); +format_error({bad_expect, SymName}) -> +    io_lib:fwrite("argument ~s of Expect is not an integer",  +                  [format_symbol(SymName)]); +format_error({bad_rootsymbol, SymName}) -> +    io_lib:fwrite("rootsymbol ~s is not a nonterminal",  +                  [format_symbol(SymName)]); +format_error({bad_states, SymName}) -> +    io_lib:fwrite("argument ~s of States is not an integer",  +                  [format_symbol(SymName)]); +format_error({conflict, Conflict}) -> +    format_conflict(Conflict); +format_error({conflicts, SR, RR}) -> +    io_lib:fwrite("conflicts: ~w shift/reduce, ~w reduce/reduce", [SR, RR]); +format_error({duplicate_declaration, Tag}) -> +    io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]); +format_error({duplicate_nonterminal, Nonterminal}) -> +    io_lib:fwrite("duplicate non-terminals ~s",  +                  [format_symbol(Nonterminal)]); +format_error({duplicate_precedence, Op}) -> +    io_lib:fwrite("duplicate precedence operator ~s",  +                  [format_symbol(Op)]); +format_error({duplicate_terminal, Terminal}) -> +    io_lib:fwrite("duplicate terminal ~s",  +                  [format_symbol(Terminal)]); +format_error({endsymbol_is_nonterminal, Symbol}) -> +    io_lib:fwrite("endsymbol ~s is a nonterminal",  +                  [format_symbol(Symbol)]); +format_error({endsymbol_is_terminal, Symbol}) -> +    io_lib:fwrite("endsymbol ~s is a terminal",  +                  [format_symbol(Symbol)]); +format_error({error, Module, Error}) -> +    Module:format_error(Error); +format_error({file_error, Reason}) -> +    io_lib:fwrite("~s",[file:format_error(Reason)]); +format_error(illegal_empty) -> +    io_lib:fwrite("illegal use of empty symbol", []); +format_error({internal_error, Error}) -> +    io_lib:fwrite("internal yecc error: ~w", [Error]); +format_error({missing_syntax_rule, Nonterminal}) -> +    io_lib:fwrite("no syntax rule for non-terminal symbol ~s", +                  [format_symbol(Nonterminal)]); +format_error({n_states, Exp, N}) -> +    io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]); +format_error(no_grammar_rules) -> +    io_lib:fwrite("grammar rules are missing", []); +format_error(nonterminals_missing) -> +    io_lib:fwrite("Nonterminals is missing", []); +format_error({precedence_op_is_endsymbol, SymName}) -> +    io_lib:fwrite("precedence operator ~s is endsymbol", +                  [format_symbol(SymName)]); +format_error({precedence_op_is_unknown, SymName}) -> +    io_lib:fwrite("unknown precedence operator ~s", +                  [format_symbol(SymName)]); +format_error({reserved, N}) -> +    io_lib:fwrite("the use of ~w should be avoided", [N]); +format_error({symbol_terminal_and_nonterminal, SymName}) -> +    io_lib:fwrite("symbol ~s is both a terminal and nonterminal", +                  [format_symbol(SymName)]); +format_error(rootsymbol_missing) -> +    io_lib:fwrite("Rootsymbol is missing", []); +format_error(terminals_missing) -> +    io_lib:fwrite("Terminals is missing", []); +format_error({undefined_nonterminal, Symbol}) -> +    io_lib:fwrite("undefined nonterminal: ~s", [format_symbol(Symbol)]); +format_error({undefined_pseudo_variable, Atom}) -> +    io_lib:fwrite("undefined pseudo variable ~w", [Atom]); +format_error({undefined_symbol, SymName}) -> +    io_lib:fwrite("undefined rhs symbol ~s", [format_symbol(SymName)]); +format_error({unused_nonterminal, Nonterminal}) -> +    io_lib:fwrite("non-terminal symbol ~s not used",  +                  [format_symbol(Nonterminal)]); +format_error({unused_terminal, Terminal}) -> +    io_lib:fwrite("terminal symbol ~s not used",  +                  [format_symbol(Terminal)]). + +file(File) -> +    file(File, [report_errors, report_warnings]). + +file(File, Options) -> +    case is_filename(File) of +        no -> erlang:error(badarg, [File, Options]); +        _ -> ok +    end, +    case options(Options) of +        badarg -> +            erlang:error(badarg, [File, Options]); +        OptionValues -> +            Self = self(), +            Flag = process_flag(trap_exit, false), +            Pid = spawn_link(fun() -> infile(Self, File, OptionValues) end), +            receive +                {Pid, Rep} ->  +                    receive after 1 -> ok end, +                    process_flag(trap_exit, Flag), +                    Rep +            end +    end. + +%% Kept for backward compatibility. +yecc(Infile, Outfile) -> +    yecc(Infile, Outfile, false, []). + +yecc(Infile, Outfile, Verbose) -> +    yecc(Infile, Outfile, Verbose, []). + +yecc(Infilex, Outfilex, Verbose, Includefilex) -> +    statistics(runtime), +    case file(Infilex, [{parserfile, Outfilex},  +                        {verbose, Verbose},  +                        {report, true}, +                        {includefile, Includefilex}]) of +        {ok, _File} -> +            statistics(runtime); +        error -> +            exit(error) +    end. + +%%% +%%% Local functions +%%% + +options(Options0) when is_list(Options0) -> +    try  +        Options = flatmap(fun(return) -> short_option(return, true); +                             (report) -> short_option(report, true); +                             ({return,T}) -> short_option(return, T); +                             ({report,T}) -> short_option(report, T); +                             (T) -> [T] +                          end, Options0), +        options(Options, [file_attributes, includefile, parserfile,  +                          report_errors, report_warnings, return_errors,  +                          return_warnings, time, verbose], []) +    catch error: _ -> badarg +    end; +options(Option) -> +    options([Option]). + +short_option(return, T) -> +    [{return_errors,T}, {return_warnings,T}]; +short_option(report, T) -> +    [{report_errors,T}, {report_warnings,T}]. + +options(Options0, [Key | Keys], L) when is_list(Options0) -> +    Options = case member(Key, Options0) of +                  true ->  +                      [atom_option(Key) | delete(Key, Options0)]; +                  false -> +                      Options0 +              end, +    V = case keysearch(Key, 1, Options) of +            {value, {Key, Filename0}} when Key =:= includefile;  +                                           Key =:= parserfile -> +                case is_filename(Filename0) of +                    no ->  +                        badarg; +                    Filename ->  +                        {ok, [{Key, Filename}]} +                end; +            {value, {Key, Bool}} when Bool =:= true; Bool =:= false -> +                {ok, [{Key, Bool}]}; +            {value, {Key, _}} -> +                badarg; +            false -> +                {ok, [{Key, default_option(Key)}]} +        end, +    case V of +        badarg -> +            badarg; +        {ok, KeyValueL} -> +            NewOptions = keydelete(Key, 1, Options), +            options(NewOptions, Keys, KeyValueL ++ L) +    end; +options([], [], L) -> +    foldl(fun({_,false}, A) -> A; +             ({Tag,true}, A) -> [Tag | A]; +             (F, A) -> [F | A] +          end, [], L); +options(_Options, _, _L) -> +    badarg. + +default_option(file_attributes) -> true; +default_option(includefile) -> []; +default_option(parserfile) -> []; +default_option(report_errors) -> true; +default_option(report_warnings) -> true; +default_option(return_errors) -> false; +default_option(return_warnings) -> false; +default_option(time) -> false; +default_option(verbose) -> false. + +atom_option(file_attributes) -> {file_attributes, true}; +atom_option(report_errors) -> {report_errors, true}; +atom_option(report_warnings) -> {report_warnings, true}; +atom_option(return_errors) -> {return_errors, true}; +atom_option(return_warnings) -> {return_warnings, true}; +atom_option(time) -> {time, true}; +atom_option(verbose) -> {verbose, true}; +atom_option(Key) -> Key. + +is_filename(T) -> +    try filename:flatten(T) of +        Filename -> Filename +    catch error: _ -> no +    end.     + +shorten_filename(Name0) -> +    {ok,Cwd} = file:get_cwd(), +    case lists:prefix(Cwd, Name0) of +        false -> Name0; +        true -> +            case lists:nthtail(length(Cwd), Name0) of +                "/"++N -> N; +                N -> N +            end +    end. + +start(Infilex, Options) -> +    Infile = assure_extension(Infilex, ".yrl"), +    {value, {_, Outfilex0}} = keysearch(parserfile, 1, Options), +    {value, {_, Includefilex}} = keysearch(includefile, 1, Options), +    Outfilex = case Outfilex0 of +                   [] -> filename:rootname(Infilex, ".yrl"); +                   _ -> Outfilex0 +               end, +    Includefile = case Includefilex of +                      [] -> []; +                      _ -> assure_extension(Includefilex,".hrl") +                  end, +    IncludefileVersion = includefile_version(Includefile), +    Outfile = assure_extension(Outfilex, ".erl"), +    Module = list_to_atom(filename:basename(Outfile, ".erl")), +    #yecc{infile = Infile,  +          outfile = Outfile, +          includefile = Includefile, +          includefile_version = IncludefileVersion, +          module = Module, +          options = Options, +          verbose = member(verbose, Options), +          file_attrs = member(file_attributes, Options)}. + +assure_extension(File, Ext) -> +    concat([strip_extension(File, Ext), Ext]). + +%% Assumes File is a filename. +strip_extension(File, Ext) -> +    case filename:extension(File) of +        Ext -> filename:rootname(File); +        _Other -> File +    end. + +infile(Parent, Infilex, Options) -> +    St0 = start(Infilex, Options), +    St = case file:open(St0#yecc.infile, [read, read_ahead]) of +             {ok, Inport} -> +                 try  +                     outfile(St0#yecc{inport = Inport}) +                 after +                     ok = file:close(Inport) +                 end; +             {error, Reason} -> +                 add_error(St0#yecc.infile, none, {file_error, Reason}, St0) +         end, +    case St#yecc.errors of +        [] -> ok; +        _ -> _ = file:delete(St#yecc.outfile) +    end, +    Parent ! {self(), yecc_ret(St)}. + +outfile(St0) -> +    case file:open(St0#yecc.outfile, [write, delayed_write]) of +        {ok, Outport} -> +            try  +                generate(St0#yecc{outport = Outport, line = 1}) +            catch  +                throw: St1  -> +                    St1; +                exit: Reason -> +                    add_error({internal_error, Reason}, St0) +            after +               ok = file:close(Outport) +            end; +        {error, Reason} -> +            add_error(St0#yecc.outfile, none, {file_error, Reason}, St0) +    end. + +os_process_size() -> +    case os:type() of +        {unix, sunos} -> +            Size = os:cmd("ps -o vsz -p " ++ os:getpid() ++ " | tail -1"), +            list_to_integer(lib:nonl(Size)); +        _ -> +            0 +    end.             + +timeit(Name, Fun, St0) -> +    Time = runtime, +    %% Time = wall_clock, +    {Before, _} = statistics(Time), +    St = Fun(St0),  +    {After, _} = statistics(Time), +    Mem0 = erts_debug:flat_size(St)*erlang:system_info(wordsize), +    Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])), +    Sz = lists:flatten(io_lib:format("~.1f MB", [os_process_size()/1024])), +    io:fwrite(" ~-30w: ~10.2f s ~12s ~10s\n",  +              [Name, (After-Before)/1000, Mem, Sz]), +    St. + +-define(PASS(P), {P, fun P/1}). + +generate(St0) -> +    Passes = [?PASS(parse_grammar), ?PASS(check_grammar), +              ?PASS(states_and_goto_table), ?PASS(parse_actions), +              ?PASS(action_conflicts), ?PASS(write_file)], +    F = case member(time, St0#yecc.options) of +            true ->  +                io:fwrite(<<"Generating parser from grammar in ~s\n">>,  +                          [format_filename(St0#yecc.infile)]), +                fun timeit/3; +            false -> +                fun(_Name, Fn, St) -> Fn(St) end +        end, +    Fun = fun({Name, Fun}, St) -> +                  St2 = F(Name, Fun, St), +                  if  +                      St2#yecc.errors =:= [] -> St2; +                      true -> throw(St2) +                  end +          end, +    foldl(Fun, St0, Passes). + +parse_grammar(St) -> +    parse_grammar(St#yecc.inport, 1, St). + +parse_grammar(Inport, Line, St) -> +    {NextLine, Grammar} = read_grammar(Inport, Line), +    parse_grammar(Grammar, Inport, NextLine, St). + +parse_grammar(eof, _Inport, _NextLine, St) -> +    St; +parse_grammar({#symbol{name = 'Header'}, Ss}, Inport, NextLine, St0) -> +    St1 = St0#yecc{header = [S || {string,_,S} <- Ss]}, +    parse_grammar(Inport, NextLine, St1); +parse_grammar({#symbol{name = 'Erlang'}, [#symbol{name = code}]}, _Inport,  +              NextLine, St) -> +    St#yecc{erlang_code = NextLine}; +parse_grammar(Grammar, Inport, NextLine, St0) -> +    St = parse_grammar(Grammar, St0), +    parse_grammar(Inport, NextLine, St). + +parse_grammar({error,ErrorLine,Error}, St) -> +    add_error(ErrorLine, Error, St); +parse_grammar({rule, Rule, Tokens}, St0) -> +    NmbrOfDaughters = case Rule of +                          [_, #symbol{name = '$empty'}]  -> 0; +                          _ -> length(Rule) - 1 +                      end, +    {IsGuard, IsWellFormed} = check_action(Tokens), +    {Tokens1, St} = subst_pseudo_vars(Tokens, +                                      NmbrOfDaughters, +                                      St0), +    RuleDef = #rule{symbols = Rule,  +                    tokens = Tokens1,  +                    is_guard = IsGuard,  +                    is_well_formed = IsWellFormed}, +    St#yecc{rules_list = [RuleDef | St#yecc.rules_list]}; +parse_grammar({prec, Prec}, St) -> +    St#yecc{prec = Prec ++ St#yecc.prec}; +parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) -> +    CF = fun(I) -> +                 case element(I, St) of +                     [] ->  +                         setelement(I, St, Symbols); +                     _ ->  +                         add_error(Line, {duplicate_declaration, Name}, St) +                 end +         end, +    OneSymbol = length(Symbols) =:= 1, +    case Name of +        'Nonterminals' -> CF(#yecc.nonterminals); +        'Terminals' -> CF(#yecc.terminals); +        'Rootsymbol' when OneSymbol -> CF(#yecc.rootsymbol); +        'Endsymbol' when OneSymbol ->  CF(#yecc.endsymbol); +        'Expect' when OneSymbol -> CF(#yecc.expect_shift_reduce); +        'States' when OneSymbol -> CF(#yecc.expect_n_states); % undocumented +        _ -> add_warning(Line, bad_declaration, St) +    end. + +read_grammar(Inport, Line) -> +    case yeccscan:scan(Inport, '', Line) of +        {eof, NextLine} -> +            {NextLine, eof}; +        {error, {ErrorLine, Mod, What}, NextLine} -> +            {NextLine, {error, ErrorLine, {error, Mod, What}}}; +        {ok, Input, NextLine} -> +            {NextLine, case yeccparser:parse(Input) of +                           {error, {ErrorLine, Mod, Message}} -> +                               {error, ErrorLine, {error, Mod, Message}}; +                           {ok, {rule, Rule, {erlang_code, Tokens}}} -> +                               {rule, Rule, Tokens}; +                           {ok, {#symbol{name=P},  +                                 [#symbol{name=I} | OpL]}=Ss} -> +                               A = precedence(P), +                               if +                                   A =/= unknown,  +                                   is_integer(I), +                                   OpL =/= [] -> +                                       Ps = [{Op, I , A} || Op <- OpL], +                                       {prec, Ps}; +                                   true ->  +                                       Ss +                               end; +                           {ok, Ss} ->  +                               Ss +                       end} +    end. + +precedence('Left') -> left; +precedence('Right') -> right; +precedence('Unary') -> unary; +precedence('Nonassoc') -> nonassoc; +precedence(_) -> unknown. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +check_grammar(St0) -> +    Empty = #symbol{line = none, name = '$empty'}, +    AllSymbols = St0#yecc.nonterminals ++ St0#yecc.terminals ++ [Empty], +    St1 = St0#yecc{all_symbols = AllSymbols}, +    Cs = [fun check_nonterminals/1, fun check_terminals/1,  +          fun check_rootsymbol/1, fun check_endsymbol/1,  +          fun check_expect/1, fun check_states/1, +          fun check_precedences/1, fun check_rules/1], +    foldl(fun(F, St) -> F(St) end, St1, Cs). + +check_nonterminals(St) -> +    case St#yecc.nonterminals of  +        [] -> +            add_error(nonterminals_missing, St); +        Nonterminals -> +            {Unique, Dups} = duplicates(names(Nonterminals)), +            St1 = add_warnings(Dups, duplicate_nonterminal, St), +            St2 = check_reserved(Unique, St1), +            St2#yecc{nonterminals = [?ACCEPT | Unique]} +    end. + +check_terminals(St0) -> +    case St0#yecc.terminals of +        [] -> +            add_error(terminals_missing, St0); +        Terminals -> +            {Unique, Dups} = duplicates(names(Terminals)), +            St1 = add_warnings(Dups, duplicate_terminal, St0), +            Common = intersect(St1#yecc.nonterminals, Unique), +            St2 = add_errors(Common, symbol_terminal_and_nonterminal, St1), +            St3 = check_reserved(Unique, St2), +            St3#yecc{terminals = ['$empty' | Unique]} +    end. + +check_reserved(Names, St) -> +    add_errors(intersect(Names, ['$empty', '$end', '$undefined']), +               reserved, St). + +check_rootsymbol(St) -> +    case St#yecc.rootsymbol of +        [] -> +            add_error(rootsymbol_missing, St); +        [#symbol{line = Line, name = SymName}] -> +            case kind_of_symbol(St, SymName) of +                nonterminal -> +                    St#yecc{rootsymbol = SymName}; +                _ -> +                    add_error(Line, {bad_rootsymbol, SymName}, St) +            end +    end. + +check_endsymbol(St) -> +    case St#yecc.endsymbol of +        [] -> +            St#yecc{endsymbol = '$end'}; +        [#symbol{line = Line, name = SymName}] -> +            case kind_of_symbol(St, SymName) of +                nonterminal -> +                    add_error(Line, {endsymbol_is_nonterminal, SymName}, St); +                terminal -> +                    add_error(Line, {endsymbol_is_terminal, SymName}, St); +                _ -> +                    St#yecc{endsymbol = SymName} +            end +    end. + +check_expect(St0) -> +    case St0#yecc.expect_shift_reduce of +        [] -> +            St0#yecc{expect_shift_reduce = 0}; +        [#symbol{name = Expect}] when is_integer(Expect) -> +            St0#yecc{expect_shift_reduce = Expect}; +        [#symbol{line = Line, name = Name}] -> +            St1 = add_error(Line, {bad_expect, Name}, St0), +            St1#yecc{expect_shift_reduce = 0} +    end. + +check_states(St) -> +    case St#yecc.expect_n_states of +        [] -> +            St; +        [#symbol{name = NStates}] when is_integer(NStates) -> +            St#yecc{expect_n_states = NStates}; +        [#symbol{line = Line, name = Name}] -> +            add_error(Line, {bad_states, Name}, St) +    end. + +check_precedences(St0) -> +    {St1, _} =  +        foldr(fun({#symbol{line = Line, name = Op},_I,_A}, {St,Ps}) -> +                      case member(Op, Ps) of +                          true -> +                              {add_error(Line, {duplicate_precedence,Op}, St), +                               Ps}; +                          false -> +                              {St, [Op | Ps]} +                      end +              end, {St0,[]}, St0#yecc.prec), +    foldl(fun({#symbol{line = Line, name = Op},I,A}, St) -> +                  case kind_of_symbol(St, Op) of +                      endsymbol -> +                          add_error(Line,{precedence_op_is_endsymbol,Op}, St); +                      unknown -> +                          add_error(Line, {precedence_op_is_unknown, Op}, St); +                      _ ->  +                          St#yecc{prec = [{Op,I,A} | St#yecc.prec]} +                  end +          end, St1#yecc{prec = []}, St1#yecc.prec). + +check_rule(Rule0, {St0,Rules}) -> +    Symbols = Rule0#rule.symbols, +    #symbol{line = HeadLine, name = Head} = hd(Symbols), +    case member(Head, St0#yecc.nonterminals) of +        false ->  +            {add_error(HeadLine, {undefined_nonterminal, Head}, St0), Rules}; +        true -> +            St = check_rhs(tl(Symbols), St0), +            Rule = Rule0#rule{line = HeadLine, symbols = names(Symbols)}, +            {St, [Rule | Rules]} +    end. + +check_rules(St0) -> +    {St,Rules0} = foldl(fun check_rule/2, {St0,[]}, St0#yecc.rules_list), +    case St#yecc.rules_list of +        [] -> +            add_error(no_grammar_rules, St); +        _ -> +            Rule = #rule{line = none,  +                         symbols = [?ACCEPT, St#yecc.rootsymbol], +                         tokens = []}, +            Rules1 = [Rule | Rules0], +            Rules = map(fun({R,I}) -> R#rule{n = I} end,  count(0, Rules1)), +            St#yecc{rules_list = Rules, rules = list_to_tuple(Rules)} +    end. + +duplicates(List) -> +    Unique = usort(List), +    {Unique, List -- Unique}. + +names(Symbols) -> +    map(fun(Symbol) -> Symbol#symbol.name end, Symbols). + +symbol_line(Name, St) -> +    {value, #symbol{line = Line}} = symbol_search(Name, St#yecc.all_symbols), +    Line. + +symbol_member(Symbol, Symbols) -> +    symbol_search(Symbol#symbol.name, Symbols) =/= false. + +symbol_search(Name, Symbols) -> +    keysearch(Name, #symbol.name, Symbols). + +states_and_goto_table(St0) -> +    St1 = create_symbol_table(St0), +    St = compute_states(St1), +    create_precedence_table(St). + +parse_actions(St) -> +    erase(), % the pd is used when decoding lookahead sets +    ParseActions = compute_parse_actions(St#yecc.n_states, St, []), +    erase(), +    St#yecc{parse_actions = ParseActions, state_tab = []}. + +action_conflicts(St0) -> +    St = find_action_conflicts(St0), +    St#yecc{conflicts_done = true}. + +-record(state_info, {reduce_only, state_repr, comment}). + +write_file(St0) -> +    #yecc{parse_actions = ParseActions, goto_tab = GotoTab} = St0, +    Sorted = sort_parse_actions(ParseActions), +    StateReprs = find_identical_shift_states(Sorted), +    StateInfo = collect_some_state_info(Sorted, StateReprs), +    StateJumps = find_partial_shift_states(Sorted, StateReprs), +    UserCodeActions = find_user_code(Sorted, St0), +    #yecc{infile = Infile, outfile = Outfile, +          inport = Inport, outport = Outport, +          nonterminals = Nonterminals} = St0, +    {St10, N_lines, LastErlangCodeLine} =  +        output_prelude(Outport, Inport, St0), +    St20 = St10#yecc{line = St10#yecc.line + N_lines}, +    St25 = nl(St20), +    St30 = output_file_directive(St25, Outfile, St25#yecc.line), +    St40 = nl(St30), +    St50 = output_actions(St40, StateJumps, StateInfo), +    Go0 = [{Symbol,{From,To}} || {{From,Symbol},To} <- ets:tab2list(GotoTab)], +    Go = family_with_domain(Go0, Nonterminals), +    St60 = output_goto(St50, Go, StateInfo), +    St70 = output_inlined(St60, UserCodeActions, Infile), +    St = nl(St70), +    case LastErlangCodeLine of +        %% Just in case warnings or errors are emitted after the last +        %% line of the file. +        {last_erlang_code_line, Last_line} -> +            output_file_directive(St, Infile, Last_line); +        no_erlang_code -> +            St +    end. + +yecc_ret(St0) -> +    St = check_expected(St0), +    report_errors(St), +    report_warnings(St), +    Es = pack_errors(St#yecc.errors), +    Ws = pack_warnings(St#yecc.warnings), +    if  +        Es =:= [] ->  +            case member(return_warnings, St#yecc.options) of +                true -> {ok, St#yecc.outfile, Ws}; +                false -> {ok, St#yecc.outfile} +            end; +        true ->  +            case member(return_errors, St#yecc.options) of +                true -> {error, Es, Ws}; +                false -> error +            end +    end. + +check_expected(St0) -> +    #yecc{shift_reduce = SR, reduce_reduce = RR, expect_shift_reduce = ExpSR, +          n_states = NStates0, expect_n_states = ExpStates, +          conflicts_done = Done} = St0, +    N_RR = length(usort(RR)), +    N_SR = length(usort(SR)), +    St1 = if +              not Done -> +                  St0; +              N_SR =:= ExpSR, N_RR =:= 0 -> +                  St0; +              true -> +                  add_warning(none, {conflicts, N_SR, N_RR}, St0) +          end, +    NStates = NStates0 + 1, +    if +        (not Done) or (ExpStates =:= []) or (NStates =:= ExpStates) -> +            St1; +        true -> +            add_warning(none, {n_states, ExpStates, NStates}, St1) +    end. + +pack_errors([{File,_} | _] = Es) -> +    [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}]; +pack_errors([]) -> +    []. +     +pack_warnings([{File,_} | _] = Ws) -> +    [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}]; +pack_warnings([]) -> +    []. + +report_errors(St) -> +    case member(report_errors, St#yecc.options) of +        true -> +            foreach(fun({File,{none,Mod,E}}) ->  +                            io:fwrite(<<"~s: ~s\n">>,  +                                      [File,Mod:format_error(E)]); +                       ({File,{Line,Mod,E}}) ->  +                            io:fwrite(<<"~s:~w: ~s\n">>,  +                                      [File,Line,Mod:format_error(E)]) +                    end, sort(St#yecc.errors)); +        false ->  +            ok +    end. + +report_warnings(St) -> +    case member(report_warnings, St#yecc.options) of +        true -> +            foreach(fun({File,{none,Mod,W}}) ->  +                            io:fwrite(<<"~s: Warning: ~s\n">>,  +                                      [File,Mod:format_error(W)]); +                       ({File,{Line,Mod,W}}) ->  +                            io:fwrite(<<"~s:~w: Warning: ~s\n">>,  +                                      [File,Line,Mod:format_error(W)]) +                    end, sort(St#yecc.warnings)); +        false ->  +            ok +    end. + +add_error(E, St) -> +    add_error(none, E, St). + +add_error(Line, E, St) -> +    add_error(St#yecc.infile, Line, E, St). + +add_error(File, Line, E, St) -> +    St#yecc{errors = [{File,{Line,?MODULE,E}}|St#yecc.errors]}.     + +add_errors(SymNames, E0, St0) -> +    foldl(fun(SymName, St) -> +                  add_error(symbol_line(SymName, St), {E0, SymName}, St) +          end, St0, SymNames). + +add_warning(Line, W, St) -> +    St#yecc{warnings = [{St#yecc.infile,{Line,?MODULE,W}}|St#yecc.warnings]}. + +add_warnings(SymNames, W0, St0) -> +    foldl(fun(SymName, St) -> +                  add_warning(symbol_line(SymName, St), {W0, SymName}, St) +          end, St0, SymNames). + +check_rhs([#symbol{name = '$empty'}], St) -> +    St; +check_rhs(Rhs, St0) -> +    case symbol_search('$empty', Rhs) of +        {value, #symbol{line = Line}} -> +            add_error(Line, illegal_empty, St0); +        false -> +            foldl(fun(Sym, St) -> +                          case symbol_member(Sym, St#yecc.all_symbols) of +                              true ->  +                                  St; +                              false ->  +                                  E = {undefined_symbol,Sym#symbol.name}, +                                  add_error(Sym#symbol.line, E, St) +                          end +                  end, St0, Rhs) +    end. + +check_action(Tokens) -> +    case erl_parse:parse_exprs(add_roberts_dot(Tokens, 0)) of +        {error, _Error} -> +            {false, false}; +        {ok, [Expr | Exprs]} -> +            IsGuard = Exprs =:= [] andalso erl_lint:is_guard_test(Expr), +            {IsGuard, true} +    end. + +add_roberts_dot([], Line) -> +    [{'dot', Line}]; +add_roberts_dot([{'dot', Line} | _], _) -> +    [{'dot', Line}]; +add_roberts_dot([Token | Tokens], _) -> +    [Token | add_roberts_dot(Tokens, element(2, Token))]. + +subst_pseudo_vars([], _, St) -> +    {[], St}; +subst_pseudo_vars([H0 | T0], NmbrOfDaughters, St0) -> +    {H, St1} = subst_pseudo_vars(H0, NmbrOfDaughters, St0), +    {T, St} = subst_pseudo_vars(T0, NmbrOfDaughters, St1), +    {[H | T], St}; +subst_pseudo_vars({atom, Line, Atom}, NmbrOfDaughters, St0) -> +    case atom_to_list(Atom) of +        [$$ | Rest] -> +            try list_to_integer(Rest) of +                N when N > 0, N =< NmbrOfDaughters -> +                    {{var, Line, list_to_atom(append("__", Rest))}, St0}; +                _ -> +                    St = add_error(Line, {undefined_pseudo_variable, Atom},  +                                   St0), +                    {{atom, Line, '$undefined'}, St} +            catch  +                error: _ -> {{atom, Line, Atom}, St0} +            end; +        _ -> +            {{atom, Line, Atom}, St0} +    end; +subst_pseudo_vars(Tuple, NmbrOfDaughters, St0) when is_tuple(Tuple) -> +    {L, St} = subst_pseudo_vars(tuple_to_list(Tuple), NmbrOfDaughters, St0), +    {list_to_tuple(L), St}; +subst_pseudo_vars(Something_else, _, St) -> +    {Something_else, St}. + +kind_of_symbol(St, SymName) -> +    case member(SymName, St#yecc.nonterminals) of +        false -> +            case member(SymName, St#yecc.terminals) of +                false -> +                    case St#yecc.endsymbol of +                        SymName -> +                            endsymbol; +                        _ -> +                            unknown +                    end; +                true -> +                    terminal +            end; +        true -> +            nonterminal +    end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Computing parse states and goto table from grammar. +% Start item: {0, [Endsymbol]} <-> +% (['ACCEPT' '.', Rootsymbol], {'$'}) in Aho & Johnson +% where '$end' is the default end of input symbol of the +% scanner if no 'Endsymbol' has been declared in the syntax file. + +-record(tabs, { +          symbols,      % ETS-set, keypos 1: {SymbolName, SymbolCode} +          inv_symbols,  % ETS-set, keypos 2: {SymbolName, SymbolCode} +          state_id,     % ETS-bag, keypos 1: {StateId, StateNum} +                        % StateId is not unique for a state. +          rp_rhs,       % rule pointer -> the remaining rhs symbols +          rp_info,      % rule pointer -> expanding rules and lookahead +          goto          % ETS-bag, keypos 1: first  +                        % {{FromStateNum, Symbol, ToStateNum}}, then +                        % {{FromStateNum, Symbol}, ToStateNum} +         }). + +-record(item, { % what states are made of +          rule_pointer, +          look_ahead, +          rhs +         }). + +compute_states(St0) -> +    SymbolTab = St0#yecc.symbol_tab, +    CodedRules = map(fun(#rule{symbols = Syms} = R) -> +                             R#rule{symbols = code_symbols(Syms, SymbolTab)} +                     end, St0#yecc.rules_list), +    CodedNonterminals = code_symbols(St0#yecc.nonterminals, SymbolTab), +    %% Only coded in this phase; StC is thrown away. +    StC = St0#yecc{rules_list = CodedRules,  +                   rules = list_to_tuple(CodedRules), +                   nonterminals = CodedNonterminals}, +    {RuleIndex, RulePointer2Rule} =  +        make_rule_index(StC, St0#yecc.rules_list), +    StateTab0 = {}, +    StateIdTab = ets:new(yecc_state_id, [set]), +    GotoTab = ets:new(yecc_goto, [bag]), +    RulePointerRhs = make_rhs_index(StC#yecc.rules_list), +    RulePointerInfo = make_rule_pointer_info(StC, RulePointerRhs, RuleIndex), + +    Tables = #tabs{symbols = SymbolTab,  +                   state_id = StateIdTab, +                   rp_rhs = RulePointerRhs, +                   rp_info = RulePointerInfo, +                   goto = GotoTab}, + +    erase(), +    EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab), +    {StateId, State0} = compute_state([{EndsymCode, 1}], Tables), + +    StateNum0 = first_state(), +    FirstState = {StateNum0, State0}, +    StateTab1 = insert_state(Tables, StateTab0, FirstState, StateId), +    {StateTab, N} =  +        compute_states1([{StateNum0, get_current_symbols(State0)}],  +                        FirstState, StateTab1, Tables), +    true = ets:delete(StateIdTab), +    St = St0#yecc{state_tab = StateTab, goto_tab = GotoTab, n_states = N, +                  rule_pointer2rule = RulePointer2Rule}, +    decode_goto(GotoTab, St#yecc.inv_symbol_tab), +    check_usage(St). + +first_state() -> +    0. + +decode_goto(GotoTab, InvSymTab) -> +    G = ets:tab2list(GotoTab), +    ets:delete_all_objects(GotoTab), +    ets:insert(GotoTab,  +               map(fun({{From, Sym, Next}}) -> +                           {{From, decode_symbol(Sym, InvSymTab)}, Next} +                   end, G)). + +check_usage(St0) -> +    SelSyms = ets:fun2ms(fun({{_,Sym},_}) -> Sym end), +    UsedSymbols = ets:select(St0#yecc.goto_tab, SelSyms), +    Syms = ordsets:from_list([?ACCEPT, '$empty' | UsedSymbols]), +    NonTerms = ordsets:from_list(St0#yecc.nonterminals), +    UnusedNonTerms = ordsets:to_list(ordsets:subtract(NonTerms, Syms)), +    St1 = add_warnings(UnusedNonTerms, unused_nonterminal, St0), +    Terms = ordsets:from_list(St0#yecc.terminals), +    St2 = add_warnings(ordsets:to_list(ordsets:subtract(Terms, Syms)), +                       unused_terminal, St1), +    DefinedNonTerminals = map(fun(#rule{symbols = [Name | _]}) ->  +                                            Name +                              end, St2#yecc.rules_list), +    DefNonTerms = ordsets:from_list(DefinedNonTerminals), +    UndefNonTerms = ordsets:subtract(NonTerms, DefNonTerms), +    add_errors(ordsets:to_list(ordsets:subtract(UndefNonTerms,  +                                                UnusedNonTerms)), +               missing_syntax_rule, St2). + +%% States are sometimes big, should not be copied to ETS tables. +%% Here an "extendible" tuple is used. +lookup_state(StateTab, N) -> +    element(N+1, StateTab). + +insert_state(#tabs{state_id = StateIdTab}, StateTab0, State, StateId) -> +    {N, _Items} = State, +    insert_state_id(StateIdTab, N, StateId), +    StateTab = if  +                   tuple_size(StateTab0) > N -> +                       StateTab0; +                   true -> +                       list_to_tuple(tuple_to_list(StateTab0) ++ +                                     lists:duplicate(round(1 + N * 1.5), [])) +               end, +    setelement(N+1, StateTab, State). + +insert_state_id(StateIdTab, N, StateId) -> +    true = ets:insert(StateIdTab, {StateId, N}). + +compute_states1([], {N, _}=_CurrState, StateTab0, _Tables) -> +    {StateTab0, N}; +compute_states1([{N, Symbols} | Try], CurrState, StateTab, Tables) -> +    {_N, S} = lookup_state(StateTab, N), +    Seeds = state_seeds(S, Symbols), +    compute_states2(Seeds, N, Try, CurrState, StateTab, Tables). + +compute_states2([], _N, Try, CurrState, StateTab, Tables) -> +    compute_states1(Try, CurrState, StateTab, Tables); +compute_states2([{Sym,Seed} | Seeds], N, Try, CurrState, StateTab, Tables) -> +    {StateId, NewState} = compute_state(Seed, Tables), +    case check_states(NewState, StateId, StateTab, Tables) of +        add -> +            {M, _} = CurrState, +            %% io:fwrite(<<"Adding state ~w\n">>, [M + 1]), +            CurrentSymbols = get_current_symbols(NewState), +            Next = M + 1, +            NextState = {Next, NewState}, +            NewStateTab = insert_state(Tables, StateTab, NextState, StateId), +            insert_goto(Tables, N, Sym, Next), +            compute_states2(Seeds, N, [{Next, CurrentSymbols} | Try], +                            NextState, NewStateTab, Tables); +        {old, M} -> +            %% io:fwrite(<<"Identical to old state ~w\n">>, [M]), +            insert_goto(Tables, N, Sym, M), +            compute_states2(Seeds, N, Try, CurrState, StateTab, Tables); +        {merge, M, NewCurrent} -> +            %% io:fwrite(<<"Merging with state ~w\n">>, [M]), +            Try1 = case keysearch(M, 1, Try) of +                       false -> +                           [{M, NewCurrent} | Try]; +                       {value, {_, OldCurrent}} -> +                           case ordsets:is_subset(NewCurrent, OldCurrent) of +                               true -> +                                   Try; +                               false -> +                                   [{M, ordsets:union(NewCurrent, OldCurrent)} +                                    | keydelete(M, 1, Try)] +                           end +                   end, +            NewStateTab = merge_states(NewState, StateTab, Tables, M,StateId), +            insert_goto(Tables, N, Sym, M), +            compute_states2(Seeds, N, Try1, CurrState, NewStateTab, Tables) +    end. + +insert_goto(Tables, From, Sym, To) -> +    true = ets:insert(Tables#tabs.goto, {{From, Sym, To}}). + +%% Create an ets table for faster lookups. +create_symbol_table(St) -> +    #yecc{terminals = Terminals, endsymbol = Endsymbol} = St, +    SymbolTab = ets:new(yecc_symbols, [{keypos,1}]), +    %% '$empty' is always assigned 0 +    Ts = ['$empty', Endsymbol | delete('$empty', Terminals)], +    TsC = count(0, Ts), +    NTsC = map(fun({NT,I}) -> {NT,-I} end, count(1, St#yecc.nonterminals)), +    Cs = TsC++NTsC, +    true = ets:insert(SymbolTab, Cs), + +    InvSymTable = ets:new(yecc_inverted_terminals, [{keypos,2}]), +    true = ets:insert(InvSymTable, Cs), + +    St#yecc{symbol_tab = SymbolTab, inv_symbol_tab = InvSymTable}. + +get_current_symbols(State) -> +    usort(get_current_symbols1(State, [])). + +get_current_symbols1([], Syms) -> +    Syms; +get_current_symbols1([#item{rhs = Rhs} | Items], Syms) -> +    case Rhs of +        [] -> +            get_current_symbols1(Items, Syms); +        [Symbol | _] -> +            get_current_symbols1(Items, [Symbol | Syms]) +    end. + +state_seeds(Items, Symbols) -> +    L = [{S,{LA,RP + 1}} || #item{rule_pointer = RP, look_ahead = LA,  +                                  rhs = [S | _]} <- Items], +    state_seeds1(keysort(1, L), Symbols). + +state_seeds1(_L, []) -> +    []; +state_seeds1(L, [Symbol | Symbols]) -> +    state_seeds(L, Symbol, Symbols, []). + +state_seeds([{Symbol, Item} | L], Symbol, Symbols, Is) -> +    state_seeds(L, Symbol, Symbols, [Item | Is]); +state_seeds([{S, _Item} | L], Symbol, Symbols, Is) when S < Symbol -> +    state_seeds(L, Symbol, Symbols, Is); +state_seeds(L, Symbol, Symbols, Is) -> +    [{Symbol, Is} | state_seeds1(L, Symbols)]. + +compute_state(Seed, Tables) -> +    RpInfo = Tables#tabs.rp_info, +    foreach(fun({LA, RulePointer}) -> put(RulePointer, LA) end, Seed), +    foreach(fun({LA, RP}) -> compute_closure(LA, RP, RpInfo) end, Seed), +    Closure = keysort(1, erase()), +    state_items(Closure, [], [], Tables#tabs.rp_rhs). + +%% Collects a uniqe id for the state (all rule pointers).  +state_items([{RP, LA} | L], Is, Id, RpRhs) -> +    I = #item{rule_pointer = RP, look_ahead = LA, rhs = element(RP, RpRhs)}, +    state_items(L, [I | Is], [RP | Id], RpRhs); +state_items(_, Is, Id, _RpRhs) -> +    {Id, Is}. + +-compile({inline,[compute_closure/3]}). +compute_closure(Lookahead, RulePointer, RpInfo) -> +    case element(RulePointer, RpInfo) of +        []=Void -> % no followers, or terminal +            Void; +        {no_union, ExpandingRules, NewLookahead} -> +            compute_closure1(ExpandingRules, NewLookahead, RpInfo); +        {union, ExpandingRules, Lookahead0} -> +            NewLookahead = set_union(Lookahead0, Lookahead), +            compute_closure1(ExpandingRules, NewLookahead, RpInfo); +        ExpandingRules -> +            compute_closure1(ExpandingRules, Lookahead, RpInfo) +    end. +     +compute_closure1([RulePointer | Tail], NewLookahead, RpInfo) -> +    compute_closure1(Tail, NewLookahead, RpInfo), +    case get(RulePointer) of +        undefined -> % New +            put(RulePointer, NewLookahead), +            compute_closure(NewLookahead, RulePointer, RpInfo); +        Lookahead2 -> +            Lookahead = set_union(Lookahead2, NewLookahead), +            if  +                Lookahead =:= Lookahead2 -> % Old +                    Lookahead2; % void() +                true -> % Merge +                    put(RulePointer, Lookahead), +                    compute_closure(NewLookahead, RulePointer, RpInfo) +            end +    end; +compute_closure1(Nil, _, _RpInfo) -> +    Nil. + +%% Check if some old state is a superset of our NewState +check_states(NewState, StateId, StateTab, #tabs{state_id = StateIdTab}) -> +    try ets:lookup_element(StateIdTab, StateId, 2) of +        N -> +            {_N, OldState} = lookup_state(StateTab, N), +            check_state1(NewState, OldState, [], N) +    catch error:_ -> add +    end. + +check_state1([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], +             [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> +    case set_is_subset(Lookahead1, Lookahead2) of +        true -> +            check_state1(Items1, Items2, Symbols, N); +        false -> +            case Rhs of +                [] -> +                    check_state2(Items1, Items2, Symbols, N); +                [Symbol | _] -> +                    check_state2(Items1, Items2, [Symbol | Symbols], N) +            end +    end; +check_state1([], [], _Symbols, N) -> +    {old, N}. + +check_state2([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], +             [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> +    case set_is_subset(Lookahead1, Lookahead2) of +        true -> +            check_state2(Items1, Items2, Symbols, N); +        false -> +            case Rhs of +                [] -> +                    check_state2(Items1, Items2, Symbols, N); +                [Symbol | _] -> +                    check_state2(Items1, Items2, [Symbol | Symbols], N) +            end +    end; +check_state2([], [], Symbols, N) -> +    {merge, N, usort(Symbols)}. + +merge_states(NewState, StateTab, Tables, M, StateId) -> +    {_M, Old_state} = lookup_state(StateTab, M), +    MergedState = merge_states1(NewState, Old_state), +    insert_state(Tables, StateTab, {M, MergedState}, StateId). + +merge_states1([Item1 | Items1], [Item2 | Items2]) -> +    LA1 = Item1#item.look_ahead, +    LA2 = Item2#item.look_ahead, +    if +        LA1 =:= LA2 -> +            [Item1 | merge_states1(Items1, Items2)]; +        true -> +            [Item1#item{look_ahead = set_union(LA1, LA2)} +             | merge_states1(Items1, Items2)] +    end; +merge_states1(_, _) -> +    []. + +%% RulePointer -> Rhs. Every position Rhs in has its unique "rule pointer". +make_rhs_index(RulesList) -> +    Index = flatmap(fun(#rule{symbols = [_Non | Daughters]}) -> +                            suffixes0(Daughters) +                    end, RulesList), +    list_to_tuple(Index). + +suffixes0([?EMPTY]) -> +    [[], []]; +suffixes0(L) -> +    suffixes(L). + +suffixes([]=L) -> +    [L]; +suffixes([_ | T]=L) -> +    [L | suffixes(T)]. + +%% Setup info about lookahead and expanding rules for each point +%% ("rule pointer") in the grammar.  +make_rule_pointer_info(StC, RpRhs, RuleIndex) -> +    SymbolTab = StC#yecc.symbol_tab, +    LcTab = make_left_corner_table(StC), +    LA_index = map(fun(Syms) -> +                           rp_info(Syms, SymbolTab, LcTab, RuleIndex) +                   end, tuple_to_list(RpRhs)), +    list_to_tuple(LA_index). + +rp_info([], _SymbolTab, _LcTab, _RuleIndex) -> +    []; +rp_info([Category | Followers], SymbolTab, LcTab, RuleIndex) -> +    case dict:find(Category, RuleIndex) of +        error -> % terminal +            []; +        {ok, ExpandingRules} when Followers =:= [] -> +            ExpandingRules; +        {ok, ExpandingRules} -> +            case make_lookahead(Followers, SymbolTab, LcTab, set_empty()) of +                {empty, LA} -> +                    {union, ExpandingRules, LA}; +                LA -> +                    {no_union, ExpandingRules, LA} +            end +    end. + +%% Lookahead computation is complicated by the possible existence +%% of null string rewriting rules, such as A -> '$empty'. +make_lookahead([], _, _, LA) -> +    {empty, LA}; +make_lookahead([Symbol | Symbols], SymbolTab, LcTab, LA) -> +    case dict:find(Symbol, LcTab) of +        {ok, LeftCorner} -> % nonterminal +            case empty_member(LeftCorner) of +                true -> +                    make_lookahead(Symbols, SymbolTab, LcTab, +                                   set_union(empty_delete(LeftCorner), LA)); +                false -> +                    set_union(LeftCorner, LA) +            end; +        error -> % terminal +            set_add(Symbol, LA) +    end. + +%% -> dict-of({Nonterminal, [Terminal]}). +%% The algorithm FIRST/1 from the Dragon Book. +%% Left corner table, all terminals (including '$empty') that can +%% begin strings generated by Nonterminal. +make_left_corner_table(#yecc{rules_list = RulesList} = St) -> +    SymbolTab = left_corner_symbol_table(St), +    Rules = map(fun(#rule{symbols = [Lhs | Rhs]}) -> +                        {Lhs,{Lhs, Rhs}} +                end, RulesList), +    LeftHandTab = dict:from_list(family(Rules)), +    X0 = [{S,H} || {H,{H,Rhs}} <- Rules,  +                   S <- Rhs,  +                   not is_terminal(SymbolTab, S)], +    XL = family_with_domain(X0, St#yecc.nonterminals), +    X = dict:from_list(XL), +    Xref = fun(NT) -> dict:fetch(NT, X) end, +    E = set_empty(), +    LC0 = dict:from_list([{H, E} || {H,_} <- XL]), +    %% Handle H -> a S, where a is a terminal ('$empty' inclusive). +    {Q, LC1} = +        foldl(fun({H,{H,[S | _]}}, {Q0, LC}) -> +                      case ets:lookup(SymbolTab, S) of +                          [{_,Num}=SymbolAndNum] when Num >= 0 -> +                              F = set_add_terminal(SymbolAndNum, E), +                              {[Xref(H) | Q0], upd_first(H, F, LC)}; +                          _ -> +                              {Q0, LC} +                      end +              end, {[], LC0}, Rules), +    left_corners(Q, LC1, LeftHandTab, SymbolTab, Xref). + +left_corners(Q0, LC0, LeftHandTab, SymbolTab, Xref) -> +    case usort(append(Q0)) of +        [] ->  +            LC0; +        Q1 ->  +            Rs = flatmap(fun(NT) -> dict:fetch(NT, LeftHandTab) end, Q1), +            {LC, Q} = left_corners2(Rs, LC0, [], SymbolTab, Xref), +            left_corners(Q, LC, LeftHandTab, SymbolTab, Xref) +    end. +     +left_corners2([], LC, Q, _SymbolTab, _Xref) -> +    {LC, Q}; +left_corners2([{Head,Rhs} | Rs], LC, Q0, SymbolTab, Xref) -> +    Ts = left_corner_rhs(Rhs, Head, LC, set_empty(), SymbolTab), +    First0 = dict:fetch(Head, LC), +    case set_is_subset(Ts, First0) of +        true -> +            left_corners2(Rs, LC, Q0, SymbolTab, Xref); +        false -> +            LC1 = upd_first(Head, Ts, LC), +            left_corners2(Rs, LC1, [Xref(Head) | Q0], SymbolTab, Xref) +    end. + +upd_first(NT, Ts, LC) -> +    dict:update(NT, fun(First) -> set_union(First, Ts) end, LC). + +left_corner_rhs([S | Ss], Head, LC, Ts, SymbolTab) -> +    case ets:lookup(SymbolTab, S) of +        [{_,Num}=SymbolAndNum] when Num >= 0 -> +            set_add_terminal(SymbolAndNum, Ts); +        [_NonTerminalSymbol] -> +            First = dict:fetch(S, LC), +            case empty_member(First) of +                true -> +                    NTs = set_union(empty_delete(First), Ts), +                    left_corner_rhs(Ss, Head, LC, NTs, SymbolTab); +                false -> +                    set_union(First, Ts) +            end +    end; +left_corner_rhs([], _Head, _LC, Ts, _SymbolTab) -> +    set_add(?EMPTY, Ts). + +%% For every non-terminal return a list of "rule pointers" for rules +%% expanding the non-terminal. +%% Also assigns a unique number to each point in the grammar, "rule pointer". +make_rule_index(#yecc{nonterminals = Nonterminals,  +                      rules_list = RulesList}, RulesListNoCodes) -> +    {RulesL, _N} =  +        lists:mapfoldl(fun(#rule{symbols = [Nonterminal | Daughters]}, I) -> +                               I1 = I + length(Daughters)+1, +                               {{Nonterminal, I}, I1} +                       end, 1, RulesList), +    IndexedTab = family_with_domain(RulesL, Nonterminals), + +    Symbol2Rule = [{Foo,R} || #rule{symbols = Symbols}=R <- RulesListNoCodes, +                              Foo <- Symbols], +    Pointer2Rule = [{I, R} || {{_Foo,R},I} <- count(1, Symbol2Rule)], +    {dict:from_list(IndexedTab), dict:from_list(Pointer2Rule)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Computing parse action table from list of states and goto table: + +compute_parse_actions(N, St, StateActions) -> +    case N < first_state() of +        true ->  +            StateActions; +        false -> +            {N, StateN} = lookup_state(St#yecc.state_tab, N), +            %% There can be duplicates in Actions. +            Actions = compute_parse_actions1(StateN, N, St), +            compute_parse_actions(N - 1, St, [{N, Actions} | StateActions]) +    end. + +compute_parse_actions1([], _, _) -> +    []; +compute_parse_actions1([#item{rule_pointer = RulePointer,  +                              look_ahead = Lookahead0,  +                              rhs = Rhs} | Items], N, St) -> +    case Rhs of +        [] -> +            Lookahead = decode_terminals(Lookahead0, St#yecc.inv_symbol_tab), +            case rule(RulePointer, St) of +                {[?ACCEPT | _], _RuleLine, _} -> +                    [{Lookahead, accept} +                     | compute_parse_actions1(Items, N, St)]; +                %% Head is placed after the daughters when finding the +                %% precedence. This is how giving precedence to +                %% non-terminals takes effect. +                {[Head | Daughters0], _RuleLine, _} -> +                    Daughters = delete('$empty', Daughters0), +                    [{Lookahead, +                      #reduce{rule_nmbr = RulePointer, head = Head,  +                              nmbr_of_daughters = length(Daughters),  +                              prec = get_prec(Daughters ++ [Head], St)}} +                     | compute_parse_actions1(Items, N, St)] +            end; +        [Symbol | Daughters] -> +            case is_terminal(St#yecc.symbol_tab, Symbol) of +                true -> +                    DecSymbol = decode_symbol(Symbol, St#yecc.inv_symbol_tab), +                    {[Head | _], _RuleLine, _} = rule(RulePointer, St), +                    %% A bogus shift-shift conflict can be introduced +                    %% here if some terminal occurs in different rules +                    %% which have been given precedence "one level up". +                    Prec1 = case Daughters of +                                [] -> get_prec([DecSymbol, Head], St); +                                _ -> get_prec([DecSymbol], St) +                            end, +                    Pos = case Daughters of +                              [] -> z; +                              _ -> a +                          end, +                    [{[DecSymbol], +                      #shift{state = goto(N, DecSymbol, St),  +                             pos = Pos, +                             prec = Prec1, +                             rule_nmbr = RulePointer}} +                     | compute_parse_actions1(Items, N, St)]; +                false -> +                    compute_parse_actions1(Items, N, St) +            end +    end. + +get_prec(Symbols, St) -> +    get_prec1(Symbols, St#yecc.prec_tab, {0, none}). + +get_prec1([], _, P) -> +    P; +get_prec1([Symbol | T], PrecTab, P) -> +    case ets:lookup(PrecTab, Symbol) of +        [] -> +            get_prec1(T, PrecTab, P); +        [{_, N, Ass}] -> +            get_prec1(T, PrecTab, {N, Ass}) +    end. + +create_precedence_table(St) -> +    PrecTab = ets:new(yecc_precedences, []), +    true = ets:insert(PrecTab, St#yecc.prec), +    St#yecc{prec_tab = PrecTab}. +     +-record(cxt, {terminal, state_n, yecc, res}). + +%% Detects shift-reduce and reduce-reduce conflicts. +%% Also removes all but one conflicting action. As a consequence the +%% lookahead sets for a state are always disjoint. +%% Reduce/reduce conflicts are considered errors. +find_action_conflicts(St0) -> +    Cxt0 = #cxt{yecc = St0, res = []}, +    {#cxt{yecc = St, res = Res}, NewParseActions0} =  +        foldl(fun({N, Actions0}, {Cxt1, StateActions}) -> +                      L = [{Terminal, Act} || {Lookahead, Act} <- Actions0, +                                              Terminal <- Lookahead], +                      {Cxt, Actions} =  +                          foldl(fun({Terminal, As}, {Cxt2,Acts0}) -> +                                        Cxt3 = Cxt2#cxt{terminal = Terminal,  +                                                        state_n = N}, +                                        {Action, Cxt} =  +                                            find_action_conflicts2(As, Cxt3), +                                        {Cxt,[{Action,Terminal} | Acts0]} +                                end, {Cxt1,[]}, family(L)), +                      {Cxt,[{N,inverse(family(Actions))} | StateActions]} +              end, {Cxt0, []}, St0#yecc.parse_actions), +    if  +        length(Res) > 0, St#yecc.verbose ->  +            io:fwrite(<<"\n*** Conflicts resolved by operator " +                        "precedences:\n\n">>), +            foreach(fun({Confl, Name}) -> +                            report_conflict(Confl, St, Name, prec) +                    end, reverse(Res)), +            io:fwrite(<<"*** End of resolved conflicts\n\n">>); +        true ->  +            ok +    end, +    NewParseActions = reverse(NewParseActions0), +    St#yecc{parse_actions = NewParseActions}. + +find_action_conflicts2([Action], Cxt) -> +    {Action, Cxt}; +find_action_conflicts2([#shift{state = St, pos = Pos, prec = Prec}, +                        #shift{state = St}=S | As],  +                       Cxt) when Pos =:= a; Prec =:= {0,none} -> +    %% This is a kludge to remove the bogus shift-shift conflict +    %% introduced in compute_parse_actions1(). +    find_action_conflicts2([S | As], Cxt); +find_action_conflicts2([#shift{state = NewState, pos = z}=S1, +                        #shift{state = NewState}=S2 | _], Cxt) -> +    %% This is even worse than last clause. Give up. +    Confl = conflict(S1, S2, Cxt), +    #cxt{yecc = St0} = Cxt, +    St = conflict_error(Confl, St0), +    {S1, Cxt#cxt{yecc = St}}; % return any action +find_action_conflicts2([#shift{prec = {P1, Ass1}}=S | Rs], Cxt0) -> +    {R, Cxt1} = find_reduce_reduce(Rs, Cxt0), +    #cxt{res = Res0, yecc = St0} = Cxt1, +    #reduce{prec = {P2, Ass2}} = R, +    Confl = conflict(R, S, Cxt1), +    if +        P1 > P2 -> +            {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; +        P2 > P1 -> +            {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; +        Ass1 =:= left, Ass2 =:= left -> +            {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; +        Ass1 =:= right, Ass2 =:= right -> +            {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; +        Ass1 =:= nonassoc, Ass2 =:= nonassoc -> +            {nonassoc, Cxt1}; +        P1 =:= 0, P2 =:= 0 -> +            report_conflict(Confl, St0, shift, default), +            St = add_conflict(Confl, St0), +            {S, Cxt1#cxt{yecc = St}}; +        true -> +            St = conflict_error(Confl, St0), +            {S, Cxt1#cxt{yecc = St}} % return any action +    end; +find_action_conflicts2(Rs, Cxt0) -> +    find_reduce_reduce(Rs, Cxt0). +          +find_reduce_reduce([R], Cxt) -> +    {R, Cxt}; +find_reduce_reduce([#reduce{head = Categ1, prec = {P1, _}}=R1,  +                    #reduce{head = Categ2, prec = {P2, _}}=R2 | Rs], Cxt0) -> +    #cxt{res = Res0, yecc = St0} = Cxt0, +    Confl = conflict(R1, R2, Cxt0), +    {R, Res, St} =  +        if +            P1 > P2 -> +                {R1, [{Confl, Categ1} | Res0], St0}; +            P2 > P1 -> +                {R2, [{Confl, Categ2} | Res0], St0}; +            true -> +                St1 = conflict_error(Confl, St0),  +                {R1, Res0, St1} +        end, +    Cxt = Cxt0#cxt{res = Res, yecc = St}, +    find_reduce_reduce([R | Rs], Cxt). + +%% Since the lookahead sets are disjoint (assured by +%% find_action_conflicts), the order between actions can be chosen +%% almost arbitrarily. nonassoc has to come last, though (but is later +%% discarded!). And shift has to come before reduce. +sort_parse_actions([]) -> +    []; +sort_parse_actions([{N, La_actions} | Tail]) -> +    [{N, sort_parse_actions1(La_actions)} | sort_parse_actions(Tail)]. + +sort_parse_actions1(LaActions) -> +    As = filter(fun({_LA, A}) -> A =:= accept end, LaActions), +    Ss = filter(fun({_LA, A}) -> is_record(A, shift) end, LaActions), +    Rs = filter(fun({_LA, A}) -> is_record(A, reduce) end, LaActions), +    Ns = filter(fun({_LA, A}) -> A =:= nonassoc end, LaActions), +    As ++ Ss ++ Rs ++ Ns. + +%% -> {State, StateRepr}. StateRepr has the same set of shift actions +%% as State. No code will be output for State if State =/= StateRepr. +find_identical_shift_states(StateActions) -> +    L1 = [{Actions, State} || {State,Actions} <- StateActions], +    {SO, NotSO} = lists:partition(fun({Actions,_States}) -> +                                          shift_actions_only(Actions) +                                  end, family(L1)), +    R = [{State, hd(States)} || {_Actions, States} <- SO, State <- States] +        ++  +        [{State, State} || {_Actions, States} <- NotSO, State <- States], +    lists:keysort(1, R). + +-record(part_data, {name, eq_state, actions, n_actions, states}). + +%% Replace {SStates,Actions} with {SStates,{Actions,Jump}} where +%% Jump describes which clauses that have been extracted from shift +%% states so that they can be used from other states. Some space is +%% saved. +find_partial_shift_states(StateActionsL, StateReprs) -> +    L = [{State, Actions} || {{State,Actions}, {State,State}} <- +                                 lists:zip(StateActionsL, StateReprs), +                             shift_actions_only(Actions)], +    StateActions = sofs:family(L, [{state,[action]}]), +    StateAction = sofs:family_to_relation(StateActions), + +    %% Two actions are equal if they occur in the same states: +    Parts = sofs:partition(sofs:range(StateActions)), +    PartsL = sofs:to_external(Parts), +    %% Assign temporary names to the parts of the partition (of actions): +    PartNameL = lists:zip(seq1(length(PartsL)), PartsL), +    ActPartL = [{Action,PartName} ||  +                   {PartName,Actions} <- PartNameL, +                   Action <- Actions], +    ActionPartName = sofs:relation(ActPartL, [{action,partname}]), +    StatePartName = sofs:relative_product(StateAction, ActionPartName), +    PartInStates = sofs:relation_to_family(sofs:converse(StatePartName)), + +    %% Parts that equal all actions of a state: +    PartActions = sofs:family(PartNameL, [{partname,[action]}]), +    PartState =  +        sofs:relative_product(PartActions, sofs:converse(StateActions)), +    PartStates = sofs_family_with_domain(PartState, sofs:domain(PartActions)), + +    PartDataL = [#part_data{name = Nm, eq_state = EqS, actions = P,  +                            n_actions = length(P),  +                            states = ordsets:from_list(S)} ||  +                    {{Nm,P}, {Nm,S}, {Nm,EqS}} <-  +                        lists:zip3(PartNameL,  +                                   sofs:to_external(PartInStates), +                                   sofs:to_external(PartStates))], +    true = length(PartDataL) =:= length(PartNameL), +    Ps = select_parts(PartDataL), + +    J1 = [{State, Actions, {jump_some,hd(States)}} || +             {_W, #part_data{actions = Actions, eq_state = [],  +                             states = States}} <- Ps, +             State <- States], +    J2 = [{State, Actions, {jump_all,To}} || +             {_W, #part_data{actions = Actions, eq_state = EqS,  +                             states = States}} <- Ps, +             To <- EqS, +             State <- States, +             State =/= To], +    J = lists:keysort(1, J1 ++ J2), + +    JumpStates = ordsets:from_list([S || {S,_,_} <- J]), +    {JS, NJS} =  +        sofs:partition(1, sofs:relation(StateActionsL, [{state, actions}]), +                       sofs:set(JumpStates, [state])), +    R =  +        [{S, {Actions,jump_none}} || {S,Actions} <- sofs:to_external(NJS)] +        ++ +        [{S, {Actions--Part, {Tag,ToS,Part}}} ||  +            {{S,Actions}, {S,Part,{Tag,ToS}}} <-  +                lists:zip(sofs:to_external(JS), J)], +    true = length(StateActionsL) =:= length(R), +    lists:keysort(1, R). + +%% Very greedy. By no means optimal.  +select_parts([]) -> +    []; +select_parts(PartDataL) -> +    T1 = [{score(PD), PD} || PD <- PartDataL], +    [{W, PD} | Ws] = lists:reverse(lists:keysort(1, T1)), +    #part_data{n_actions = NActions, states = S} = PD, +    if +        W < 8 -> % don't bother +            []; +        true -> +            %% Cannot extract more clauses from the chosen part's states: +            NL = [D#part_data{states = NewS} ||  +                     {W1, #part_data{states = S0}=D} <- Ws, +                     W1 > 0, +                     (NewS = ordsets:subtract(S0, S)) =/= []], +            if  +                length(S) =:= 1; NActions =:= 1 -> +                    select_parts(NL); +                true ->  +                    [{W,PD} | select_parts(NL)] +            end +    end. + +%% Does it pay off to extract clauses into a new function? +%% Assumptions: +%% - a call costs 8 (C = 8); +%% - a clause (per action) costs 20 plus 8 (select) (Cl = 28); +%% - a new function costs 20 (funinfo) plus 16 (select) (F = 36). +%% A is number of actions, S is number of states. +%% Special case (the part equals all actions of some state): +%% C * (S - 1) < (S - 1) * A * Cl +%% Normal case (introduce new function): +%% F + A * Cl + C * S < S * A * Cl +score(#part_data{n_actions = NActions, eq_state = [], states = S}) -> +    (length(S) * NActions * 28) - (36 + NActions * 28 + length(S) * 8); +score(#part_data{n_actions = NActions, states = S}) -> +    ((length(S) - 1) * NActions * 28) - (8 * (length(S) - 1)). + +shift_actions_only(Actions) -> +    length([foo || {_Ts,{shift,_,_,_,_}} <- Actions]) =:= length(Actions). + +collect_some_state_info(StateActions, StateReprs) -> +    RF = fun({_LA, A}) -> is_record(A, reduce) end, +    L = [{State,  +          begin +              RO = lists:all(RF, LaActions), +              %% C is currently always ""; identical states are all shift. +              C = [io_lib:fwrite(<<" %% ~w\n">>, [State]) ||  +                      true <- [RO], Repr =/= State], +              #state_info{reduce_only = RO, state_repr = Repr, comment = C} +          end} || +            {{State, LaActions}, {State, Repr}} <- +                lists:zip(StateActions, StateReprs)], +    list_to_tuple(L). + +conflict_error(Conflict, St0) -> +    St1 = add_conflict(Conflict, St0), +    add_error({conflict, Conflict}, St1). + +report_conflict(Conflict, St, ActionName, How) -> +    if +        St#yecc.verbose -> +            io:fwrite(<<"~s\n">>, [format_conflict(Conflict)]), +            Formated = format_symbol(ActionName), +            case How of  +                prec -> +                    io:fwrite(<<"Resolved in favor of ~s.\n\n">>, [Formated]); +                default -> +                    io:fwrite(<<"Conflict resolved in favor of ~s.\n\n">>,  +                              [Formated]) +            end; +        true -> +            ok +    end. + +add_conflict(Conflict, St) -> +    case Conflict of +        {Symbol, StateN, _, {reduce, _, _, _}} -> +            St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]}; +        {Symbol, StateN, _, {shift, _, _}} -> +            St#yecc{shift_reduce = [{StateN,Symbol} | St#yecc.shift_reduce]}; +        {_Symbol, _StateN, {one_level_up, _, _}, _Confl} -> +            St +    end. + +conflict(#shift{prec = Prec1, rule_nmbr = RuleNmbr1},  +         #shift{prec = Prec2, rule_nmbr = RuleNmbr2}, Cxt) -> +    %% Conflict due to precedences "one level up". Kludge. +    #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt,     +    {_, L1, RuleN1} = rule(RuleNmbr1, St), +    {_, L2, RuleN2} = rule(RuleNmbr2, St), +    Confl = {one_level_up, {L1, RuleN1, Prec1}, {L2, RuleN2, Prec2}}, +    {Symbol, N, Confl, Confl}; +conflict(#reduce{rule_nmbr = RuleNmbr1}, NewAction, Cxt) -> +    #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, +    {R1, RuleLine1, RuleN1} = rule(RuleNmbr1, St), +    Confl = case NewAction of +                #reduce{rule_nmbr = RuleNmbr2} ->  +                    {R2, RuleLine2, RuleN2} = rule(RuleNmbr2, St), +                    {reduce, R2, RuleN2, RuleLine2}; +                #shift{state = NewState} -> +                    {shift, NewState, last(R1)} +            end, +    {Symbol, N, {R1, RuleN1, RuleLine1}, Confl}. + +format_conflict({Symbol, N, _, {one_level_up,  +                                {L1, RuleN1, {P1, Ass1}},  +                                {L2, RuleN2, {P2, Ass2}}}}) -> +    S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when " +                         "scanning ~s in state ~w:\n">>,  +                       [format_symbol(Symbol), N]), +    S2 = io_lib:fwrite(<<"   ~s ~w (rule ~w at line ~w)\n" +                          "      vs.\n">>, +                       [format_assoc(Ass1), P1, RuleN1, L1]), +    S3 = io_lib:fwrite(<<"   ~s ~w (rule ~w at line ~w)\n">>,  +                       [format_assoc(Ass2), P2, RuleN2, L2]), +    [S1, S2, S3]; +format_conflict({Symbol, N, Reduce, Confl}) -> +    S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol " +                         "~s in state ~w:\n">>, [format_symbol(Symbol), N]), +    S2 = case Reduce of +             {[HR | TR], RuleNmbr, RuleLine} -> +                 io_lib:fwrite(<<"   Reduce to ~s from ~s (rule ~w at " +                                 "line ~w)\n      vs.\n">>, +                               [format_symbol(HR), format_symbols(TR),  +                                RuleNmbr, RuleLine]) +         end, +    S3 = case Confl of  +             {reduce, [HR2|TR2], RuleNmbr2, RuleLine2} -> +                 io_lib:fwrite(<<"   reduce to ~s from ~s " +                                 "(rule ~w at line ~w).">>, +                               [format_symbol(HR2), format_symbols(TR2),  +                                RuleNmbr2, RuleLine2]); +             {shift, NewState, Sym} -> +                 io_lib:fwrite(<<"   shift to state ~w, adding right " +                                 "sisters to ~s.">>, +                           [NewState, format_symbol(Sym)]) +         end, +    [S1, S2, S3]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Code generation: + +%% The version up to and including parsetools-1.3 is called "1.0". +%% +%% "1.1", parsetools-1.4: +%% - the prologue file has been updated; +%% - nonassoc is new; +%% - different order of clauses; +%% - never more than one clause matching a given symbol in a given state; +%% - file attributes relate messages to .yrl file; +%% - actions put in inlined functions; +%% - a few other minor fixes. +%% +%% "1.2", parsetools-1.4.2: +%% - the generated code has been changed as follows: +%%   - yeccpars2() calls the functions yeccpars2_State(); +%%   - several states can share yeccpars2_State(), which reduces code size; +%%   - yeccgoto() has been split on one function per nonterminal; +%%   - several minor changes have made the loaded code smaller. +%% - the include file yeccpre.hrl has been changed incompatibly. +%% +%% "1.3", parsetools-1.4.4: +%% - the generated code has been changed as follows: +%%   - yeccgoto_T() no longer returns the next state, but calls yeccpars_S(); +%%   - yeccpars2() is not called when it is known which yeccpars2_S() to call; +%%   - "__Stack" has been substituted for "Stack"; +%%   - several states can share yeccpars2_S_cont(), which reduces code size; +%%   - instead if calling lists:nthtail() matching code is emitted. + +-define(CODE_VERSION, "1.3"). +-define(YECC_BUG(M, A),  +        iolist_to_binary([" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",", +                          io_lib:fwrite(M, A), "}).\n\n"])). + +%% Returns number of newlines in included files. +output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] -> +    St5 = output_header(St0), +    #yecc{infile = Infile, module = Module} = St5, +    St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), +    St20 =  +        fwrite(St10, +               <<"-export([parse/1, parse_and_scan/1, format_error/1]).\n">>, +               []), +    {St25, N_lines_1, LastErlangCodeLine} =  +        case St20#yecc.erlang_code of  +            none -> +                {St20, 0, no_erlang_code}; +            Next_line -> +                St_10 = output_file_directive(St20, Infile, Next_line-1), +                Nmbr_of_lines = include1([], Inport, Outport), +                {St_10, Nmbr_of_lines,  +                 {last_erlang_code_line, Next_line+Nmbr_of_lines}} +    end, +    St30 = nl(St25), +    IncludeFile =  +        filename:join([code:lib_dir(parsetools), "include","yeccpre.hrl"]), +    %% Maybe one could assume there are no warnings in this file. +    St = output_file_directive(St30, IncludeFile, 0), +    N_lines_2 = include(St, IncludeFile, Outport), +    {St, N_lines_1 + N_lines_2, LastErlangCodeLine}; +output_prelude(Outport, Inport, St0) -> +    St5 = output_header(St0), +    #yecc{infile = Infile, module = Module, includefile = Includefile} = St5, +    St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), +    St20 = output_file_directive(St10, Includefile, 0), +    N_lines_1 = include(St20, Includefile, Outport), +    St30 = nl(St20), +    case St30#yecc.erlang_code of  +        none -> +            {St30, N_lines_1, no_erlang_code}; +        Next_line -> +            St = output_file_directive(St30, Infile, Next_line-1), +            Nmbr_of_lines = include1([], Inport, Outport), +            {St, Nmbr_of_lines + N_lines_1,  +             {last_erlang_code_line, Next_line+Nmbr_of_lines}} +    end. + +output_header(St0) -> +    lists:foldl(fun(Str, St) -> fwrite(St, <<"~s\n">>, [Str])  +                end, St0, St0#yecc.header). + +output_goto(St, [{_Nonterminal, []} | Go], StateInfo) -> +    output_goto(St, Go, StateInfo); +output_goto(St0, [{Nonterminal, List} | Go], StateInfo) -> +    F = function_name(yeccgoto, Nonterminal), +    St10 = output_goto1(St0, List, F, StateInfo, true), +    St = output_goto_fini(F, Nonterminal, St10), +    output_goto(St, Go, StateInfo); +output_goto(St, [], _StateInfo) -> +    St. + +output_goto1(St0, [{From, To} | Tail], F, StateInfo, IsFirst) -> +    St10 = delim(St0, IsFirst), +    {To, ToInfo} = lookup_state(StateInfo, To), +    #state_info{reduce_only = RO, state_repr = Repr, comment = C} = ToInfo, +    if +        RO ->  +            %% Reduce actions do not use the state, so we just pass +            %% the old (now bogus) on: +            FromS = io_lib:fwrite("~w=_S", [From]), +            ToS = "_S"; +        true -> +            FromS = io_lib:fwrite("~w", [From]), +            ToS = io_lib:fwrite("~w", [To]) +    end, +    St20 = fwrite(St10, <<"~w(~s, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>,  +                  [F,FromS]), +    St30 = fwrite(St20, <<"~s">>, [C]), +    %% Short-circuit call to yeccpars2: +    St = fwrite(St30, <<" yeccpars2_~w(~s, Cat, Ss, Stack, T, Ts, Tzr)">>,  +                [Repr, ToS]), +    output_goto1(St, Tail, F, StateInfo, false); +output_goto1(St, [], _F, _StateInfo, _IsFirst) -> +    St. + +output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) -> +    %% Backward compatibility. +    St10 = delim(St0, false), +    St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>, +                [F]), +    fwrite(St,  +           ?YECC_BUG(<<"{~w, State, missing_in_goto_table}">>, [NT]), +           []); +output_goto_fini(_F, _NT, St) -> +    fwrite(St, <<".\n\n">>, []). + +%% Find actions having user code. +find_user_code(ParseActions, St) -> +    [#user_code{state = State,  +                terminal = Terminal,  +                funname = inlined_function_name(State, Terminal),  +                action = Action} ||  +        {State, La_actions} <- ParseActions, +        {Action, Terminals, RuleNmbr, NmbrOfDaughters}  +            <- find_user_code2(La_actions), +        case tokens(RuleNmbr, St) of +            [{var, _, '__1'}] -> NmbrOfDaughters =/= 1; +            _ -> true +        end, +        Terminal <- Terminals]. + +find_user_code2([]) -> +    []; +find_user_code2([{_, #reduce{rule_nmbr = RuleNmbr, +                             nmbr_of_daughters = NmbrOfDaughters} +                  =Action}]) -> +    %% Same optimization as in output_state_actions. +    [{Action, ["Cat"], RuleNmbr, NmbrOfDaughters}]; +find_user_code2([{La, #reduce{rule_nmbr = RuleNmbr, +                              nmbr_of_daughters = NmbrOfDaughters} +                  =Action} | T]) -> +    [{Action,La, RuleNmbr, NmbrOfDaughters} | find_user_code2(T)]; +find_user_code2([_ | T]) -> +    find_user_code2(T). + +output_actions(St0, StateJumps, StateInfo) -> +    %% Not all the clauses of the dispatcher function yeccpars2() can +    %% be reached. Only when shifting, that is, calling yeccpars1(), +    %% will yeccpars2() be called. +    Y2CL = [NewState || {_State,{Actions,_J}} <- StateJumps, +                        {_LA, #shift{state = NewState}} <- Actions], +    Y2CS = ordsets:from_list([0 | Y2CL]), +    Y2S = ordsets:from_list([S || {S,_} <- StateJumps]), +    NY2CS = ordsets:subtract(Y2S, Y2CS), +    Sel = [{S,true} || S <- ordsets:to_list(Y2CS)] ++ +          [{S,false} || S <- ordsets:to_list(NY2CS)], +                                     +    SelS = [{State,Called} ||  +               {{State,_JActions}, {State,Called}} <-  +                   lists:zip(StateJumps, lists:keysort(1, Sel))], +    St10 = foldl(fun({State, Called}, St_0) -> +                         {State, #state_info{state_repr = IState}} =  +                             lookup_state(StateInfo, State), +                         output_state_selection(St_0, State, IState, Called) +            end, St0, SelS), +    St20 = fwrite(St10, <<"yeccpars2(Other, _, _, _, _, _, _) ->\n">>, []), +    St = fwrite(St20, +                ?YECC_BUG(<<"{missing_state_in_action_table, Other}">>, []), +                []), +    foldl(fun({State, JActions}, St_0) -> +                  {State, #state_info{state_repr = IState}} =  +                      lookup_state(StateInfo, State), +                  output_state_actions(St_0, State, IState,  +                                       JActions, StateInfo) +          end, St, StateJumps). + +output_state_selection(St0, State, IState, Called) -> +    Comment = [<<"%% ">> || false <- [Called]], +    St = fwrite(St0, <<"~syeccpars2(~w=S, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, +                [Comment, State]), +    fwrite(St,  +           <<"~s yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr);\n">>,  +           [Comment, IState]). + +output_state_actions(St, State, State, {Actions,jump_none}, SI) -> +    output_state_actions1(St, State, Actions, true, normal, SI); +output_state_actions(St0, State, State, {Actions, Jump}, SI) -> +    {Tag, To, Common} = Jump, +    CS = case Tag of +             jump_some -> list_to_atom(lists:concat([cont_, To])); +             jump_all -> To +         end, +    St = output_state_actions1(St0, State, Actions, true, {to, CS}, SI), +    if  +        To =:= State -> +            output_state_actions1(St, CS, Common, true, normal, SI); +        true -> +            St +    end; +output_state_actions(St, State, JState, _XActions, _SI) -> +    fwrite(St, <<"%% yeccpars2_~w: see yeccpars2_~w\n\n">>, [State, JState]). + +output_state_actions1(St, State, [], _IsFirst, normal, _SI) -> +    output_state_actions_fini(State, St); +output_state_actions1(St0, State, [], IsFirst, {to, ToS}, _SI) -> +    St = delim(St0, IsFirst), +    fwrite(St,  +           <<"yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr) ->\n" +            " yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr).\n\n">>, +           [State, ToS]); +output_state_actions1(St0, State, [{_, #reduce{}=Action}],  +                      IsFirst, _End, SI) -> +    St = output_reduce(St0, State, "Cat", Action, IsFirst, SI), +    fwrite(St, <<".\n\n">>, []); +output_state_actions1(St0, State, [{Lookahead,Action} | Tail], +                      IsFirst, End, SI) -> +    {_, St} =  +        foldl(fun(Terminal, {IsFst,St_0}) -> +                      {false, +                       output_action(St_0, State, Terminal, Action, IsFst,SI)} +              end, {IsFirst,St0}, Lookahead), +    output_state_actions1(St, State, Tail, false, End, SI). + +output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) -> +    output_reduce(St, State, Terminal, Action, IsFirst, SI); +output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) -> +    St10 = delim(St0, IsFirst), +    St = fwrite(St10, <<"yeccpars2_~w(S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>, +                [State, quoted_atom(Terminal)]), +    output_call_to_includefile(NewState, St); +output_action(St0, State, Terminal, accept, IsFirst, _SI) -> +    St10 = delim(St0, IsFirst), +    St = fwrite(St10,  +                <<"yeccpars2_~w(_S, ~s, _Ss, Stack,  _T, _Ts, _Tzr) ->\n">>, +                [State, quoted_atom(Terminal)]), +    fwrite(St, <<" {ok, hd(Stack)}">>, []); +output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) -> +    St. + +output_call_to_includefile(NewState, #yecc{includefile_version = {1,1}}=St) -> +    %% Backward compatibility. +    fwrite(St, <<" yeccpars1(Ts, Tzr, ~w, [S | Ss], [T | Stack])">>,  +           [NewState]); +output_call_to_includefile(NewState, St) -> +    fwrite(St, <<" yeccpars1(S, ~w, Ss, Stack, T, Ts, Tzr)">>,  +           [NewState]). + +output_state_actions_fini(State, #yecc{includefile_version = {1,1}}=St0) -> +    %% Backward compatibility. +    St10 = delim(St0, false), +    St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]), +    fwrite(St, <<" yeccerror(T).\n\n">>, []); +output_state_actions_fini(_State, St) -> +    fwrite(St, <<".\n\n">>, []). + +output_reduce(St0, State, Terminal0,  +              #reduce{rule_nmbr = RuleNmbr,  +                      head = Head,  +                      nmbr_of_daughters = NmbrOfDaughters}, +              IsFirst, StateInfo) -> +    St10 = delim(St0, IsFirst), +    Terminal = if  +                   is_atom(Terminal0) -> quoted_atom(Terminal0); +                   true -> Terminal0 +               end, +    St20 = fwrite(St10, +                  <<"yeccpars2_~w(_S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>, +                  [State, Terminal]), +    St30 =  +        if +            NmbrOfDaughters < 2 -> +                Ns = "Ss", +                St20; +            true -> +                Ns = "Nss", +                Tmp = string:join(lists:duplicate(NmbrOfDaughters - 1, "_"), +                                  ","), +                fwrite(St20, <<" [~s|Nss] = Ss,\n">>, [Tmp]) +        end, +    St40 = case tokens(RuleNmbr, St30) of +               [{var, _, '__1'}] when NmbrOfDaughters =:= 1 -> +                   NewStack = "Stack", +                   St30; +               _ -> +                   NewStack = "NewStack", +                   fwrite(St30, <<" NewStack = ~w(Stack),\n">>,  +                          [inlined_function_name(State, Terminal0)]) +               end, +    if  +        NmbrOfDaughters =:= 0 -> +            NextState = goto(State, Head, St40), +            {NextState, I} = lookup_state(StateInfo, NextState), +            #state_info{reduce_only = RO, state_repr = Repr, comment = C} = I, +            %% Reduce actions do not use the state, so we just pass +            %% the old (now bogus) on: +            if +                RO -> NextS = "_S"; +                true -> NextS = io_lib:fwrite("~w", [NextState]) +            end, +            St = fwrite(St40, <<"~s">>, [C]), +            %% Short-circuit call to yeccpars2: +            fwrite(St, +                   <<" yeccpars2_~w(~s, ~s, [~w | Ss], ~s, T, Ts, Tzr)">>, +                   [Repr, NextS, Terminal, State, NewStack]); +        true -> +            fwrite(St40,  +                   <<" ~w(hd(~s), ~s, ~s, ~s, T, Ts, Tzr)">>, +                   [function_name(yeccgoto, Head), Ns, +                    Terminal, Ns, NewStack]) +    end. + +delim(St, true) -> +    St; +delim(St, false) -> +    fwrite(St, <<";\n">>, []). + +quoted_atom(Atom) -> +    io_lib:fwrite(<<"~w">>, [Atom]). +     +output_inlined(St, UserCodeActions, Infile) -> +    foldl(fun(#user_code{funname = InlinedFunctionName,  +                         action = Action}, St_0) -> +                  output_inlined(St_0, InlinedFunctionName,  +                                 Action, Infile) +          end, St, UserCodeActions). + +%% Each action with user code is placed in a separate inlined function. +%% The purpose is to be able to pinpoint errors and warnings correctly. +output_inlined(St0, FunctionName, Reduce, Infile) -> +    #reduce{rule_nmbr = RuleNmbr, nmbr_of_daughters = N_daughters} = Reduce, +    #rule{tokens = Tokens, is_well_formed = WF} = get_rule(RuleNmbr, St0), +    Line0 = first_line(Tokens), +    NLines = last_line(Tokens) - Line0, + +    St5 = if  +              WF -> +                  St0; +              not WF ->  +                  %% The compiler will generate an error message for +                  %% the inlined function (unless the reason that yecc +                  %% failed to parse the action was some macro). The +                  %% line number of the message will be correct since +                  %% we are keeping track of the current line of the +                  %% output file... +                  #yecc{outfile = Outfile, line = CurLine} = St0, +                  output_file_directive(St0, Outfile, CurLine) +          end, + +    CodeStartLine = lists:max([0, Line0 - 4]), +    St10 = fwrite(St5, <<"-compile({inline,~w/1}).\n">>, [FunctionName]), +    St20 = output_file_directive(St10, Infile, CodeStartLine), +    St30 = fwrite(St20, <<"~w(__Stack0) ->\n">>, [FunctionName]), +    %% Currently the (old) inliner emits less code if matching the +    %% stack inside the body rather than in the head... +    St40 = case N_daughters of +               0 ->  +                   Stack = "__Stack0", +                   St30; +               _ ->  +                   Stack = "__Stack", +                   A = concat(flatmap(fun(I) -> [",__",I] end,  +                                      lists:seq(N_daughters, 1, -1))), +                   fwrite(St30, <<" ~s = __Stack0,\n">>,  +                          [append(["[", tl(A), " | __Stack]"])]) +           end, +    St = St40#yecc{line = St40#yecc.line + NLines}, +    fwrite(St, <<" [begin\n  ~s\n  end | ~s].\n\n">>,  +           [pp_tokens(Tokens, Line0), Stack]). + +inlined_function_name(State, "Cat") -> +    inlined_function_name(State, ""); +inlined_function_name(State, Terminal) -> +    list_to_atom(concat([yeccpars2_, State, '_', Terminal])). + +-compile({nowarn_unused_function,function_name/2}). +function_name(Name, Suf) -> +    list_to_atom(concat([Name, '_' | quoted_atom(Suf)])). + +rule(RulePointer, St) -> +    #rule{n = N, line = Line, symbols = Symbols} =  +        dict:fetch(RulePointer, St#yecc.rule_pointer2rule), +    {Symbols, Line, N}. + +get_rule(RuleNmbr, St) -> +    dict:fetch(RuleNmbr, St#yecc.rule_pointer2rule). + +tokens(RuleNmbr, St) -> +    Rule = dict:fetch(RuleNmbr, St#yecc.rule_pointer2rule), +    Rule#rule.tokens. + +goto(From, Symbol, St) -> +    case ets:lookup(St#yecc.goto_tab, {From, Symbol}) of +        [{_, To}] -> +            To; +        [] -> +            erlang:error({error_in_goto_table, From, Symbol}) +    end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Auxiliaries: + +-ifdef(SYMBOLS_AS_CODES). + +%%% Bit mask operations. + +-compile({inline,[set_empty/0]}). +set_empty() -> +    0. + +set_add(I, BM) -> +    (1 bsl I) bor BM. + +-compile({inline,[set_member/2]}). +set_member(I, BM) -> +    ((1 bsl I) band BM) =/= 0. + +%% Assumes I is a member... +-compile({inline,[set_delete/2]}). +set_delete(I, BM) -> +    (1 bsl I) bxor BM. + +-compile({inline,[set_union/2]}). +set_union(BM1, BM2) -> +    BM1 bor BM2. + +-compile({inline,[set_is_subset/2]}). +set_is_subset(BM1, BM2) -> +    (BM1 band BM2) =:= BM1. + +empty_member(BM) -> +    set_member(0, BM). + +empty_delete(BM) -> +    set_delete(0, BM). + +code_symbols(Ss, SymbolTable) -> +    map(fun(S) -> ets:lookup_element(SymbolTable, S, 2) end, Ss). + +decode_symbol(C, InvSymbolTable) -> +    ets:lookup_element(InvSymbolTable, C, 1). + +code_terminal(T, SymbolTab) -> +    set_add(ets:lookup_element(SymbolTab, T, 2), 0). + +decode_terminals(BM, InvSymbolTab) -> +    case get(BM) of +        undefined ->  +            Symbols = decode_terminals(BM, 0, InvSymbolTab), +            put(BM, Symbols), +            Symbols; +        Symbols ->  +            Symbols +    end. + +decode_terminals(0, _I, _InvSymbolTab) -> +    []; +decode_terminals(BM, I, InvSymbolTab) -> +    case set_member(I, BM) of +        true -> +            [ets:lookup_element(InvSymbolTab, I, 1) +             | decode_terminals(set_delete(I, BM), I+1, InvSymbolTab)]; +        false -> +            decode_terminals(BM, I+1, InvSymbolTab) +    end. + +set_add_terminal({_Symbol, TerminalNum}, BM) -> +    set_add(TerminalNum, BM). + +-compile({inline,[is_terminal/2]}). +is_terminal(_Tab, SymbolCode) -> +    SymbolCode >= 0. + +left_corner_symbol_table(St) -> +    St#yecc.inv_symbol_tab. + +-else. + +set_empty() -> +    []. + +set_add(Symbol, L) -> +    ordsets:union([Symbol], L). + +set_union(Es1, Es2) -> +    ordsets:union(Es1, Es2). + +set_is_subset(Es1, Es2) -> +    ordsets:is_subset(Es1, Es2). + +code_symbols(Ss, _SymbolTab) -> +    Ss. + +decode_symbol(S, _InvSymbolTab) -> +    S. + +code_terminal(T, _SymbolTab) -> +    [T]. + +decode_terminals(Ts, _InvSymbolTab) -> +    Ts. + +empty_member(['$empty' | _]) -> +    true; +empty_member(_) -> +    false. + +empty_delete(['$empty' | Terminals]) -> +    Terminals. + +set_add_terminal({Symbol, _TerminalNum}, L) -> +    set_add(Symbol, L). + +is_terminal(Tab, SymbolName) -> +   ets:lookup_element(Tab, SymbolName, 2) >= 0. + +left_corner_symbol_table(St) -> +    St#yecc.symbol_tab. + +-endif. % SYMBOLS_AS_CODES + +intersect(L1, L2) -> +    ordsets:to_list(ordsets:intersection(ordsets:from_list(L1), +                                         ordsets:from_list(L2))). + +format_symbols([Sym | Syms]) -> +    concat([format_symbol(Sym) | format_symbols1(Syms)]). + +format_symbols1([]) -> +    []; +format_symbols1([H | T]) -> +    [" ", format_symbol(H) | format_symbols1(T)]. + +include(St, File, Outport) -> +    case file:open(File, [read]) of +        {error, Reason} -> +            throw(add_error(File, none, {file_error, Reason}, St)); +        {ok, Inport} -> +            Line = io:get_line(Inport, ''), +            N_lines = include1(Line, Inport, Outport), +            file:close(Inport), +            N_lines +    end. + +include1(Line, Inport, Outport) -> +    include1(Line, Inport, Outport, 0). + +include1(eof, _, _, Nmbr_of_lines) -> +    Nmbr_of_lines; +include1(Line, Inport, Outport, Nmbr_of_lines) -> +    Incr = case member($\n, Line) of +               true -> 1; +               false -> 0 +           end, +    io:put_chars(Outport, Line), +    include1(io:get_line(Inport, ''), Inport, Outport, Nmbr_of_lines + Incr). + +includefile_version([]) -> +    {1,2}; +includefile_version(Includefile) -> +    case epp:open(Includefile, []) of +        {ok, Epp} -> +            try +                parse_file(Epp) +            after +                epp:close(Epp) +            end; +        {error, _Error} -> +            {1,1} +    end. + +parse_file(Epp) -> +    case epp:parse_erl_form(Epp) of +        {ok, {function,_Line,yeccpars1,7,_Clauses}} -> +            {1,2}; +        {eof,_Line} -> +            {1,1}; +        _Form -> +            parse_file(Epp) +    end. + +%% Keeps the line breaks of the original code. +pp_tokens(Tokens, Line0) -> +    concat(pp_tokens1(Tokens, Line0, [])). +     +pp_tokens1([], _Line0, _T0) -> +    []; +pp_tokens1([T | Ts], Line0, T0) -> +    Line = element(2, T), +    [pp_sep(Line, Line0, T0), pp_symbol(T) | pp_tokens1(Ts, Line, T)]. + +pp_symbol({var,_,Var}) -> Var; +pp_symbol({_,_,Symbol}) -> io_lib:fwrite(<<"~p">>, [Symbol]); +pp_symbol({Symbol, _}) -> Symbol. + +pp_sep(Line, Line0, T0) when Line > Line0 ->  +    ["\n   " | pp_sep(Line - 1, Line0, T0)]; +pp_sep(_Line, _Line0, {'.',_}) ->  +    ""; +pp_sep(_Line, _Line0, _T0) ->  +    " ". + +output_file_directive(St, Filename, Line) when St#yecc.file_attrs -> +    fwrite(St, <<"-file(~s, ~w).\n">>,  +           [format_filename(Filename), Line]); +output_file_directive(St, _Filename, _Line) -> +    St. + +first_line(Tokens) -> +    element(2, hd(Tokens)). + +last_line(Tokens) -> +    element(2, lists:last(Tokens)). + +%% Keep track of the current line in the generated file. +fwrite(#yecc{outport = Outport, line = Line}=St, Format, Args) -> +    NLines = count_nl(Format), +    io:fwrite(Outport, Format, Args), +    St#yecc{line = Line + NLines}. + +%% Assumes \n is used, and never ~n. +count_nl(<<$\n,Rest/binary>>) -> +    1 + count_nl(Rest); +count_nl(<<_,Rest/binary>>) -> +    count_nl(Rest); +count_nl(<<>>) -> +    0. + +nl(#yecc{outport = Outport, line = Line}=St) -> +    io:nl(Outport), +    St#yecc{line = Line + 1}. + +format_filename(Filename) -> +    io_lib:write_string(filename:flatten(Filename)). + +format_assoc(left) -> +    "Left"; +format_assoc(right) -> +    "Right"; +format_assoc(unary) -> +    "Unary"; +format_assoc(nonassoc) -> +    "Nonassoc". + +format_symbol(Symbol) -> +    String = concat([Symbol]), +    case erl_scan:string(String) of +        {ok, [{atom, _, _}], _} -> +            io_lib:fwrite(<<"~w">>, [Symbol]); +        {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' -> +            case erl_scan:reserved_word(Word) of +                true -> +                    String; +                false -> +                    io_lib:fwrite(<<"~w">>, [Symbol]) +            end; +        {ok, [{var, _, _}], _} -> +            String; +        _ ->  +            io_lib:fwrite(<<"~w">>, [Symbol]) +    end. + +inverse(L) -> +    sort([{A,B} || {B,A} <- L]). + +family(L) -> +    sofs:to_external(sofs:relation_to_family(sofs:relation(L))). + +seq1(To) when To < 1 -> +    []; +seq1(To) -> +    lists:seq(1, To). + +count(From, L) -> +    lists:zip(L, lists:seq(From, length(L)-1+From)). + +family_with_domain(L, DL) -> +    sofs:to_external(sofs_family_with_domain(sofs:relation(L), sofs:set(DL))). + +sofs_family_with_domain(R0, D) -> +    R = sofs:restriction(R0, D), +    F = sofs:relation_to_family(R), +    FD = sofs:constant_function(D, sofs:from_term([])), +    sofs:family_union(F, FD). diff --git a/lib/parsetools/src/yeccgramm.yrl b/lib/parsetools/src/yeccgramm.yrl new file mode 100644 index 0000000000..562a9a7458 --- /dev/null +++ b/lib/parsetools/src/yeccgramm.yrl @@ -0,0 +1,74 @@ +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%%  +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%%  +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%%  +%% %CopyrightEnd% +%% + +%% This is the syntax (metagrammar) of grammar definitions of the yecc +%% parser generator. + +Nonterminals +grammar declaration rule head symbol symbols strings attached_code +token tokens. + +Terminals +atom float integer reserved_symbol reserved_word string char var +'->' ':' dot. + +Rootsymbol grammar. + +grammar -> declaration : '$1'. +grammar -> rule : '$1'. +declaration -> symbol symbols dot: {'$1', '$2'}. +declaration -> symbol strings dot: {'$1', '$2'}. +rule -> head '->' symbols attached_code dot: {rule, ['$1' | '$3'], '$4'}. +head -> symbol : '$1'. +symbols -> symbol : ['$1']. +symbols -> symbol symbols : ['$1' | '$2']. +strings -> string : ['$1']. +strings -> string strings : ['$1' | '$2']. +attached_code -> ':' tokens : {erlang_code, '$2'}. +attached_code -> '$empty' : {erlang_code, [{atom, 0, '$undefined'}]}. +tokens -> token : ['$1']. +tokens -> token tokens : ['$1' | '$2']. +symbol -> var : symbol('$1'). +symbol -> atom : symbol('$1'). +symbol -> integer : symbol('$1'). +symbol -> reserved_word : symbol('$1'). +token -> var : '$1'. +token -> atom : '$1'. +token -> float : '$1'. +token -> integer : '$1'. +token -> string : '$1'. +token -> char : '$1'. +token -> reserved_symbol : {value_of('$1'), line_of('$1')}. +token -> reserved_word : {value_of('$1'), line_of('$1')}. +token -> '->' : {'->', line_of('$1')}. % Have to be treated in this +token -> ':' : {':', line_of('$1')}.   % manner, because they are also +				       % special symbols of the metagrammar + +Erlang code. + +-record(symbol, {line, name}). + +symbol(Symbol) -> +    #symbol{line = line_of(Symbol), name = value_of(Symbol)}. + +value_of(Token) -> +    element(3, Token). + +line_of(Token) -> +    element(2, Token). diff --git a/lib/parsetools/src/yeccparser.erl b/lib/parsetools/src/yeccparser.erl new file mode 100644 index 0000000000..80a6bbce0e --- /dev/null +++ b/lib/parsetools/src/yeccparser.erl @@ -0,0 +1,642 @@ +-module(yeccparser). +-export([parse/1, parse_and_scan/1, format_error/1]). +-file("yeccgramm.yrl", 63). + +-record(symbol, {line, name}). + +symbol(Symbol) -> +    #symbol{line = line_of(Symbol), name = value_of(Symbol)}. + +value_of(Token) -> +    element(3, Token). + +line_of(Token) -> +    element(2, Token). + +-file("/clearcase/otp/erts/lib/parsetools/include/yeccpre.hrl", 0). +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%%  +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%%  +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%%  +%% %CopyrightEnd% +%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% The parser generator will insert appropriate declarations before this line.% + +-type(yecc_ret() :: {'error', _} | {'ok', _}). + +-spec(parse/1 :: (_) -> yecc_ret()). +parse(Tokens) -> +    yeccpars0(Tokens, false). + +-spec(parse_and_scan/1 :: +      ({function() | {atom(), atom()}, [_]} | {atom(), atom(), [_]}) -> +            yecc_ret()). +parse_and_scan({F, A}) -> % Fun or {M, F} +    yeccpars0([], {F, A}); +parse_and_scan({M, F, A}) -> +    yeccpars0([], {{M, F}, A}). + +-spec(format_error/1 :: (any()) -> [char() | list()]). +format_error(Message) -> +    case io_lib:deep_char_list(Message) of +	true -> +	    Message; +	_ -> +	    io_lib:write(Message) +    end. + +% To be used in grammar files to throw an error message to the parser +% toplevel. Doesn't have to be exported! +-compile({nowarn_unused_function,{return_error,2}}). +-spec(return_error/2 :: (integer(), any()) -> no_return()). +return_error(Line, Message) -> +    throw({error, {Line, ?MODULE, Message}}). + +-define(CODE_VERSION, "1.3"). + +yeccpars0(Tokens, MFA) -> +    try yeccpars1(Tokens, MFA, 0, [], []) +    catch  +        error: Error -> +            Stacktrace = erlang:get_stacktrace(), +            try yecc_error_type(Error, Stacktrace) of +                {syntax_error, Token} -> +                    yeccerror(Token); +                {missing_in_goto_table=Tag, Symbol, State} -> +                    Desc = {Symbol, State, Tag}, +                    erlang:raise(error, {yecc_bug, ?CODE_VERSION, Desc}, +                                Stacktrace) +            catch _:_ -> erlang:raise(error, Error, Stacktrace) +            end; +        throw: {error, {_Line, ?MODULE, _M}} = Error ->  +            Error % probably from return_error/2 +    end. + +yecc_error_type(function_clause, [{?MODULE,F,[State,_,_,_,Token,_,_]} | _]) -> +    case atom_to_list(F) of +        "yeccpars2" ++ _ -> +            {syntax_error, Token}; +        "yeccgoto_" ++ SymbolL -> +            {ok,[{atom,_,Symbol}],_} = erl_scan:string(SymbolL), +            {missing_in_goto_table, Symbol, State} +    end. + +yeccpars1([Token | Tokens], Tokenizer, State, States, Vstack) -> +    yeccpars2(State, element(1, Token), States, Vstack, Token, Tokens,  +              Tokenizer); +yeccpars1([], {F, A}, State, States, Vstack) -> +    case apply(F, A) of +        {ok, Tokens, _Endline} -> +	    yeccpars1(Tokens, {F, A}, State, States, Vstack); +        {eof, _Endline} -> +            yeccpars1([], false, State, States, Vstack); +        {error, Descriptor, _Endline} -> +            {error, Descriptor} +    end; +yeccpars1([], false, State, States, Vstack) -> +    yeccpars2(State, '$end', States, Vstack, {'$end', 999999}, [], false). + +%% yeccpars1/7 is called from generated code. +%% +%% When using the {includefile, Includefile} option, make sure that +%% yeccpars1/7 can be found by parsing the file without following +%% include directives. yecc will otherwise assume that an old +%% yeccpre.hrl is included (one which defines yeccpars1/5). +yeccpars1(State1, State, States, Vstack, Stack1, [Token | Tokens],  +          Tokenizer) -> +    yeccpars2(State, element(1, Token), [State1 | States], +              [Stack1 | Vstack], Token, Tokens, Tokenizer); +yeccpars1(State1, State, States, Vstack, Stack1, [], {F, A}) -> +    case apply(F, A) of +        {ok, Tokens, _Endline} -> +	    yeccpars1(State1, State, States, Vstack, Stack1, Tokens, {F, A}); +        {eof, _Endline} -> +            yeccpars1(State1, State, States, Vstack, Stack1, [], false); +        {error, Descriptor, _Endline} -> +            {error, Descriptor} +    end; +yeccpars1(State1, State, States, Vstack, Stack1, [], false) -> +    yeccpars2(State, '$end', [State1 | States], [Stack1 | Vstack], +              {'$end', 999999}, [], false). + +% For internal use only. +yeccerror(Token) -> +    Text = case catch erl_scan:token_info(Token, text) of +               {text, Txt} -> Txt; +               _ -> yecctoken2string(Token) +           end, +    Location = case catch erl_scan:token_info(Token, location) of +                   {location, Loc} -> Loc; +                   _ -> element(2, Token) +               end, +    {error, {Location, ?MODULE, ["syntax error before: ", Text]}}. + +yecctoken2string({atom, _, A}) -> io_lib:write(A); +yecctoken2string({integer,_,N}) -> io_lib:write(N); +yecctoken2string({float,_,F}) -> io_lib:write(F); +yecctoken2string({char,_,C}) -> io_lib:write_char(C); +yecctoken2string({var,_,V}) -> io_lib:format("~s", [V]); +yecctoken2string({string,_,S}) -> io_lib:write_unicode_string(S); +yecctoken2string({reserved_symbol, _, A}) -> io_lib:format("~w", [A]); +yecctoken2string({_Cat, _, Val}) -> io_lib:format("~w", [Val]); +yecctoken2string({dot, _}) -> "'.'"; +yecctoken2string({'$end', _}) -> +    []; +yecctoken2string({Other, _}) when is_atom(Other) -> +    io_lib:format("~w", [Other]); +yecctoken2string(Other) -> +    io_lib:write(Other). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +-file("yeccparser.erl", 168). + +yeccpars2(0=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_0(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(1=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_1(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(2=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_2(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(3=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_3(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(4=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_4(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(5=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_5(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(6=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_6(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(7=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_7(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(8=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_8(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(9=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_9(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(10=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_0(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(11=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_11(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(12=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_12(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(13=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_13(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(14=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_14(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(15=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_15(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(16=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_16(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(17=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_17(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(18=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_18(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(19=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_19(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(20=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_20(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(21=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_21(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(22=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_22(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(23=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_23(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(24=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_24(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(25=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_25(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(26=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_26(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(27=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_27(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(28=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_28(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(29=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_29(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(30=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_30(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(31=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_31(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(32=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_32(S, Cat, Ss, Stack, T, Ts, Tzr); +%% yeccpars2(33=S, Cat, Ss, Stack, T, Ts, Tzr) -> +%%  yeccpars2_33(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(34=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_34(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(35=S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_35(S, Cat, Ss, Stack, T, Ts, Tzr); +yeccpars2(Other, _, _, _, _, _, _) -> + erlang:error({yecc_bug,"1.3",{missing_state_in_action_table, Other}}). + +yeccpars2_0(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr); +yeccpars2_0(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr); +yeccpars2_0(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr); +yeccpars2_0(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr). + +yeccpars2_1(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(S, string, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 32, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr); +yeccpars2_1(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_head(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_2(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_grammar(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_3(S, '->', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 10, Ss, Stack, T, Ts, Tzr). + +yeccpars2_4(_S, '$end', _Ss, Stack,  _T, _Ts, _Tzr) -> + {ok, hd(Stack)}. + +yeccpars2_5(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_grammar(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_6(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_6_(Stack), + yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_7(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_7_(Stack), + yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_8(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_8_(Stack), + yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_9(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_9_(Stack), + yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +%% yeccpars2_10: see yeccpars2_0 + +yeccpars2_11(S, ':', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 15, Ss, Stack, T, Ts, Tzr); +yeccpars2_11(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_11_(Stack), + yeccpars2_14(14, Cat, [11 | Ss], NewStack, T, Ts, Tzr). + +yeccpars2_12(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr); +yeccpars2_12(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr); +yeccpars2_12(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr); +yeccpars2_12(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr); +yeccpars2_12(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_12_(Stack), + yeccgoto_symbols(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_13(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_|Nss] = Ss, + NewStack = yeccpars2_13_(Stack), + yeccgoto_symbols(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_14(S, dot, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 29, Ss, Stack, T, Ts, Tzr). + +yeccpars2_15(S, '->', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 18, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, ':', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 19, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 20, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, char, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 21, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, float, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 22, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 23, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, reserved_symbol, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 24, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 25, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, string, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 26, Ss, Stack, T, Ts, Tzr); +yeccpars2_15(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 27, Ss, Stack, T, Ts, Tzr). + +yeccpars2_16(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_|Nss] = Ss, + NewStack = yeccpars2_16_(Stack), + yeccgoto_attached_code(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_17(S, '->', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 18, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, ':', Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 19, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, atom, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 20, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, char, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 21, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, float, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 22, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, integer, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 23, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, reserved_symbol, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 24, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, reserved_word, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 25, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, string, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 26, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(S, var, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 27, Ss, Stack, T, Ts, Tzr); +yeccpars2_17(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_17_(Stack), + yeccgoto_tokens(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_18(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_18_(Stack), + yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_19(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_19_(Stack), + yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_20(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_21(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_22(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_23(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_24(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_24_(Stack), + yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_25(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_25_(Stack), + yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_26(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_27(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr). + +yeccpars2_28(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_|Nss] = Ss, + NewStack = yeccpars2_28_(Stack), + yeccgoto_tokens(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_29(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_,_,_,_|Nss] = Ss, + NewStack = yeccpars2_29_(Stack), + yeccgoto_rule(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_30(S, dot, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 35, Ss, Stack, T, Ts, Tzr). + +yeccpars2_31(S, dot, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 34, Ss, Stack, T, Ts, Tzr). + +yeccpars2_32(S, string, Ss, Stack, T, Ts, Tzr) -> + yeccpars1(S, 32, Ss, Stack, T, Ts, Tzr); +yeccpars2_32(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + NewStack = yeccpars2_32_(Stack), + yeccgoto_strings(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr). + +yeccpars2_33(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_|Nss] = Ss, + NewStack = yeccpars2_33_(Stack), + yeccgoto_strings(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_34(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_,_|Nss] = Ss, + NewStack = yeccpars2_34_(Stack), + yeccgoto_declaration(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccpars2_35(_S, Cat, Ss, Stack, T, Ts, Tzr) -> + [_,_|Nss] = Ss, + NewStack = yeccpars2_35_(Stack), + yeccgoto_declaration(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr). + +yeccgoto_attached_code(11, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_14(14, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_declaration(0=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_5(_S, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_grammar(0, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_4(4, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_head(0, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_3(3, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_rule(0=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_2(_S, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_strings(1, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_31(31, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_strings(32=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_33(_S, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_symbol(0, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_1(1, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbol(1, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbol(10, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbol(12, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_symbols(1, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_30(30, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbols(10, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_11(11, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_symbols(12=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_13(_S, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_token(15, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_17(17, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_token(17, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_17(17, Cat, Ss, Stack, T, Ts, Tzr). + +yeccgoto_tokens(15=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_16(_S, Cat, Ss, Stack, T, Ts, Tzr); +yeccgoto_tokens(17=_S, Cat, Ss, Stack, T, Ts, Tzr) -> + yeccpars2_28(_S, Cat, Ss, Stack, T, Ts, Tzr). + +-compile({inline,{yeccpars2_6_,1}}). +-file("yeccgramm.yrl", 44). +yeccpars2_6_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   symbol ( __1 ) +  end | __Stack]. + +-compile({inline,{yeccpars2_7_,1}}). +-file("yeccgramm.yrl", 45). +yeccpars2_7_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   symbol ( __1 ) +  end | __Stack]. + +-compile({inline,{yeccpars2_8_,1}}). +-file("yeccgramm.yrl", 46). +yeccpars2_8_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   symbol ( __1 ) +  end | __Stack]. + +-compile({inline,{yeccpars2_9_,1}}). +-file("yeccgramm.yrl", 43). +yeccpars2_9_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   symbol ( __1 ) +  end | __Stack]. + +-compile({inline,{yeccpars2_11_,1}}). +-file("yeccgramm.yrl", 40). +yeccpars2_11_(__Stack0) -> + [begin +   { erlang_code , [ { atom , 0 , '$undefined' } ] } +  end | __Stack0]. + +-compile({inline,{yeccpars2_12_,1}}). +-file("yeccgramm.yrl", 35). +yeccpars2_12_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   [ __1 ] +  end | __Stack]. + +-compile({inline,{yeccpars2_13_,1}}). +-file("yeccgramm.yrl", 36). +yeccpars2_13_(__Stack0) -> + [__2,__1 | __Stack] = __Stack0, + [begin +   [ __1 | __2 ] +  end | __Stack]. + +-compile({inline,{yeccpars2_16_,1}}). +-file("yeccgramm.yrl", 39). +yeccpars2_16_(__Stack0) -> + [__2,__1 | __Stack] = __Stack0, + [begin +   { erlang_code , __2 } +  end | __Stack]. + +-compile({inline,{yeccpars2_17_,1}}). +-file("yeccgramm.yrl", 41). +yeccpars2_17_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   [ __1 ] +  end | __Stack]. + +-compile({inline,{yeccpars2_18_,1}}). +-file("yeccgramm.yrl", 55). +yeccpars2_18_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   { '->' , line_of ( __1 ) } +  end | __Stack]. + +-compile({inline,{yeccpars2_19_,1}}). +-file("yeccgramm.yrl", 56). +yeccpars2_19_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   { ':' , line_of ( __1 ) } +  end | __Stack]. + +-compile({inline,{yeccpars2_24_,1}}). +-file("yeccgramm.yrl", 53). +yeccpars2_24_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   { value_of ( __1 ) , line_of ( __1 ) } +  end | __Stack]. + +-compile({inline,{yeccpars2_25_,1}}). +-file("yeccgramm.yrl", 54). +yeccpars2_25_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   { value_of ( __1 ) , line_of ( __1 ) } +  end | __Stack]. + +-compile({inline,{yeccpars2_28_,1}}). +-file("yeccgramm.yrl", 42). +yeccpars2_28_(__Stack0) -> + [__2,__1 | __Stack] = __Stack0, + [begin +   [ __1 | __2 ] +  end | __Stack]. + +-compile({inline,{yeccpars2_29_,1}}). +-file("yeccgramm.yrl", 33). +yeccpars2_29_(__Stack0) -> + [__5,__4,__3,__2,__1 | __Stack] = __Stack0, + [begin +   { rule , [ __1 | __3 ] , __4 } +  end | __Stack]. + +-compile({inline,{yeccpars2_32_,1}}). +-file("yeccgramm.yrl", 37). +yeccpars2_32_(__Stack0) -> + [__1 | __Stack] = __Stack0, + [begin +   [ __1 ] +  end | __Stack]. + +-compile({inline,{yeccpars2_33_,1}}). +-file("yeccgramm.yrl", 38). +yeccpars2_33_(__Stack0) -> + [__2,__1 | __Stack] = __Stack0, + [begin +   [ __1 | __2 ] +  end | __Stack]. + +-compile({inline,{yeccpars2_34_,1}}). +-file("yeccgramm.yrl", 32). +yeccpars2_34_(__Stack0) -> + [__3,__2,__1 | __Stack] = __Stack0, + [begin +   { __1 , __2 } +  end | __Stack]. + +-compile({inline,{yeccpars2_35_,1}}). +-file("yeccgramm.yrl", 31). +yeccpars2_35_(__Stack0) -> + [__3,__2,__1 | __Stack] = __Stack0, + [begin +   { __1 , __2 } +  end | __Stack]. + + +-file("yeccgramm.yrl", 75). diff --git a/lib/parsetools/src/yeccscan.erl b/lib/parsetools/src/yeccscan.erl new file mode 100644 index 0000000000..d7ec3ba8d3 --- /dev/null +++ b/lib/parsetools/src/yeccscan.erl @@ -0,0 +1,62 @@ +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%%  +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%%  +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%%  +%% %CopyrightEnd% +%% + +-module(yeccscan). +-export([scan/1, scan/3]). + +scan(Inport) -> +    scan(Inport, '', 1). + +scan(Inport, Prompt, Line1) -> +    case catch io:scan_erl_form(Inport, Prompt, Line1) of +	{eof, Line2} -> +	    {eof, Line2}; +	{ok, Tokens, Line2} -> +	    case Tokens of +		[] -> +		    scan(Inport, Prompt, Line2); +		_ -> +		    {ok, lex(Tokens), Line2} +	    end; +	{error, Descriptor, Line2} -> +	    {error, Descriptor, Line2}; +	{'EXIT', Why} -> +	    io:format('yeccscan: Error scanning input line ~w~n', [Line1]), +	    exit(Why) +    end. + +lex([]) -> +    []; +lex([Token | Tokens]) -> +    case Token of +	{'dot', Line} -> +	    [{'dot', Line} | lex(Tokens)]; +	{':', Line} -> +            [{':', Line} | lex(Tokens)]; +        {'->', Line} -> +            [{'->', Line} | lex(Tokens)]; +	{Category, Line, Symbol} -> +	    [{Category, Line, Symbol} | lex(Tokens)]; +	{Other, Line} -> +            Cat = case erl_scan:reserved_word(Other) of +                      true -> reserved_word; +                      false -> reserved_symbol +                  end, +            [{Cat, Line, Other} | lex(Tokens)] +    end. | 
