aboutsummaryrefslogtreecommitdiffstats
path: root/lib/parsetools/src
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/parsetools/src
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/parsetools/src')
-rw-r--r--lib/parsetools/src/Makefile100
-rw-r--r--lib/parsetools/src/esyntax.yrl360
-rw-r--r--lib/parsetools/src/leex.erl1608
-rw-r--r--lib/parsetools/src/parsetools.app.src33
-rw-r--r--lib/parsetools/src/parsetools.appup.src1
-rw-r--r--lib/parsetools/src/yecc.erl2531
-rw-r--r--lib/parsetools/src/yeccgramm.yrl74
-rw-r--r--lib/parsetools/src/yeccparser.erl642
-rw-r--r--lib/parsetools/src/yeccscan.erl62
9 files changed, 5411 insertions, 0 deletions
diff --git a/lib/parsetools/src/Makefile b/lib/parsetools/src/Makefile
new file mode 100644
index 0000000000..89e079e411
--- /dev/null
+++ b/lib/parsetools/src/Makefile
@@ -0,0 +1,100 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 1996-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(PARSETOOLS_VSN)
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/parsetools-$(VSN)
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+MODULES = \
+ leex \
+ yecc \
+ yeccparser \
+ yeccscan
+
+HRL_FILES = ../include/yeccpre.hrl ../include/leexinc.hrl
+
+ERL_FILES = $(MODULES:%=%.erl)
+
+TARGET_FILES = $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET)
+
+APP_FILE = parsetools.app
+
+APP_SRC = $(APP_FILE).src
+APP_TARGET = $(EBIN)/$(APP_FILE)
+
+APPUP_FILE= parsetools.appup
+
+APPUP_SRC= $(APPUP_FILE).src
+APPUP_TARGET= $(EBIN)/$(APPUP_FILE)
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+ERL_COMPILE_FLAGS += +warn_obsolete_guard -I$(ERL_TOP)/lib/stdlib/include
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+
+debug opt: $(TARGET_FILES)
+
+clean:
+ rm -f $(TARGET_FILES)
+ rm -f core
+
+docs:
+
+
+# ----------------------------------------------------
+# Special Build Targets
+# ----------------------------------------------------
+
+$(APP_TARGET): $(APP_SRC) ../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk
+ sed -e 's;%VSN%;$(VSN);' $< > $@
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_spec: opt
+ $(INSTALL_DIR) $(RELSYSDIR)/src
+ $(INSTALL_DATA) $(ERL_FILES) $(RELSYSDIR)/src
+ $(INSTALL_DIR) $(RELSYSDIR)/ebin
+ $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin
+ $(INSTALL_DIR) $(RELSYSDIR)/include
+ $(INSTALL_DATA) $(HRL_FILES) $(RELSYSDIR)/include
+
+release_docs_spec:
+
diff --git a/lib/parsetools/src/esyntax.yrl b/lib/parsetools/src/esyntax.yrl
new file mode 100644
index 0000000000..1ecb54f0a7
--- /dev/null
+++ b/lib/parsetools/src/esyntax.yrl
@@ -0,0 +1,360 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+Nonterminals
+add_op attribute basic_type bif_test
+case_expr clause_body
+clause_guard clause_head comp_op cr_clause cr_clauses expr expr_tail
+exprs farity farity_list form formal_parameter_list function
+function_call function_clause guard guard_call guard_expr
+guard_expr_list guard_exprs guard_expr_tail guard_expr_tuple
+guard_parameter_list
+guard_tests guard_test if_clause if_clauses if_expr list match_expr
+mult_op parameter_list pattern patterns pattern_list pattern_tail pattern_tuple
+prefix_op receive_expr send_expr tuple.
+
+Terminals
+'!' '(' ')' '*' '+' ',' '-' '->' '/' '/=' ':' ';' '<' '=' '=/=' '=:='
+'=<' '==' '>' '>=' '[' ']' 'after' 'band' 'begin' 'bnot'
+'bor' 'bsl' 'bsr' 'bxor' 'case' 'catch' 'div' 'end' 'if' 'of'
+'receive' 'rem' 'when' '{' '|' '}' atom float integer string var.
+% 'receive' 'rem' 'true' 'when' '{' '|' '}' atom float integer string var.
+
+Rootsymbol form.
+
+Endsymbol dot.
+
+Unary 0 'catch'.
+Right 200 '='.
+Right 200 '!'.
+Left 300 add_op.
+Left 400 mult_op.
+Unary 500 prefix_op.
+
+
+add_op -> '+' : '$1'.
+add_op -> '-' : '$1'.
+add_op -> 'bor' : '$1'.
+add_op -> 'bxor' : '$1'.
+add_op -> 'bsl' : '$1'.
+add_op -> 'bsr' : '$1'.
+
+comp_op -> '==' : '$1'.
+comp_op -> '/=' : '$1'.
+comp_op -> '=<' : '$1'.
+comp_op -> '<' : '$1'.
+comp_op -> '>=' : '$1'.
+comp_op -> '>' : '$1'.
+comp_op -> '=:=' : '$1'.
+comp_op -> '=/=' : '$1'.
+
+mult_op -> '*' : '$1'.
+mult_op -> '/' : '$1'.
+mult_op -> 'div' : '$1'.
+mult_op -> 'rem' : '$1'.
+mult_op -> 'band' : '$1'.
+
+prefix_op -> '+' : '$1'.
+prefix_op -> '-' : '$1'.
+prefix_op -> 'bnot' : '$1'.
+
+
+basic_type -> atom : '$1'.
+basic_type -> float : '$1'.
+basic_type -> integer : '$1'.
+basic_type -> string : '$1'.
+basic_type -> var : '$1'.
+% basic_type -> 'true' : {atom, element(2, '$1'), 'true'}.
+
+
+pattern -> basic_type : '$1'.
+pattern -> pattern_list : '$1'.
+pattern -> pattern_tuple : '$1'.
+
+pattern_list -> '[' ']' : {nil, element(2, '$1')}.
+pattern_list -> '[' pattern pattern_tail ']' :
+ {cons, element(2, '$1'), '$2', '$3'}.
+
+pattern_tail -> '|' pattern : '$2'.
+pattern_tail -> ',' pattern pattern_tail :
+ {cons, element(2, '$2'), '$2', '$3'}.
+pattern_tail -> '$empty' : {nil, 0}.
+
+pattern_tuple -> '{' '}' : {tuple, element(2, '$1'), []}.
+pattern_tuple -> '{' patterns '}' : {tuple, element(2, '$1'), '$2'}.
+
+patterns -> pattern : ['$1'].
+patterns -> pattern ',' patterns : ['$1' | '$3'].
+
+
+expr -> basic_type : '$1'.
+expr -> list : '$1'.
+expr -> tuple : '$1'.
+expr -> function_call : '$1'.
+
+expr -> expr add_op expr :
+ {Op, Pos} = '$2',
+ {arith, Pos, Op, '$1', '$3'}.
+expr -> expr mult_op expr :
+ {Op, Pos} = '$2',
+ {arith, Pos, Op, '$1', '$3'}.
+expr -> prefix_op expr:
+ case '$2' of
+ {float, Pos, N} ->
+ case '$1' of
+ {'-', _} ->
+ {float, Pos, -N};
+ {'+', _} ->
+ {float, Pos, N};
+ {Op, Pos1} ->
+ {arith, Pos1, Op, {float, Pos, N}}
+ end;
+ {integer, Pos, N} ->
+ case '$1' of
+ {'-', _} ->
+ {integer, Pos, -N};
+ {'+', _} ->
+ {integer, Pos, N};
+ {Op, Pos1} ->
+ {arith, Pos1, Op, {integer, Pos, N}}
+ end;
+ _ ->
+ {Op, Pos} = '$1',
+ {arith, Pos, Op, '$2'}
+ end.
+
+expr -> '(' expr ')' : '$2'.
+expr -> 'begin' exprs 'end' : {block, element(2, '$1'), '$2'}.
+expr -> 'catch' expr : {'catch', element(2, '$1'), '$2'}.
+
+expr -> case_expr : '$1'.
+expr -> if_expr : '$1'.
+expr -> receive_expr : '$1'.
+expr -> match_expr : '$1'.
+expr -> send_expr : '$1'.
+
+
+list -> '[' ']' : {nil, element(2, '$1')}.
+list -> '[' expr expr_tail ']' : {cons, element(2, '$1'), '$2', '$3'}.
+
+expr_tail -> '|' expr : '$2'.
+expr_tail -> ',' expr expr_tail : {cons, element(2, '$2'), '$2', '$3'}.
+expr_tail -> '$empty' : {nil, 0}.
+
+tuple -> '{' '}' : {tuple, element(2, '$1'), []}.
+tuple -> '{' exprs '}' : {tuple, element(2, '$1'), '$2'}.
+
+
+function_call -> atom '(' parameter_list ')' :
+ case erl_parse:erlang_bif(element(3, '$1'), length('$3')) of
+ true ->
+ {bif, element(2, '$1'), element(3, '$1'), '$3'};
+ false ->
+ {call, element(2, '$1'), [], element(3, '$1'), '$3'}
+ end.
+function_call -> atom ':' atom '(' parameter_list ')' :
+ {call, element(2, '$1'), element(3, '$1'), element(3, '$3'), '$5'}.
+
+parameter_list -> exprs : '$1'.
+parameter_list -> '$empty' : [].
+
+
+case_expr -> 'case' expr 'of' cr_clauses 'end' :
+ {'case', element(2, '$1'), '$2', '$4'}.
+
+cr_clause -> pattern clause_guard clause_body :
+ {clause, element(2, '$1'), ['$1'], '$2', '$3'}.
+
+cr_clauses -> cr_clause : ['$1'].
+cr_clauses -> cr_clause ';' cr_clauses : ['$1' | '$3'].
+
+if_expr -> 'if' if_clauses 'end' : {'if', element(2, '$1'), '$2'}.
+
+if_clause -> guard clause_body : {clause, element(2, hd('$2')), '$1', '$2'}.
+
+if_clauses -> if_clause : ['$1'].
+if_clauses -> if_clause ';' if_clauses : ['$1' | '$3'].
+
+receive_expr -> 'receive' 'after' expr clause_body 'end' :
+ {'receive', element(2, '$1'), [], '$3', '$4'}.
+receive_expr -> 'receive' cr_clauses 'end' :
+ {'receive', element(2, '$1'), '$2'}.
+receive_expr -> 'receive' cr_clauses 'after' expr clause_body 'end' :
+ {'receive', element(2, '$1'), '$2', '$4', '$5'}.
+
+
+match_expr -> expr '=' expr :
+ case erl_parse:is_term('$1') of
+ true ->
+ {match, element(2, '$1'), '$1', '$3'};
+ false ->
+ throw({error, {element(2, '$1'), yecc, "illegal lhs in match **"}})
+ end.
+
+send_expr -> expr '!' expr :
+ Pos = element(2, '$1'),
+ {send, Pos, '$1', '$3'}.
+
+
+exprs -> expr : ['$1'].
+exprs -> expr ',' exprs : ['$1' | '$3'].
+
+
+guard_expr -> basic_type : '$1'.
+guard_expr -> guard_expr_list : '$1'.
+guard_expr -> guard_expr_tuple : '$1'.
+guard_expr -> guard_call : '$1'.
+guard_expr -> '(' guard_expr ')' : '$2'.
+guard_expr -> guard_expr add_op guard_expr :
+ {Op, Pos} = '$2',
+ {arith, Pos, Op, '$1', '$3'}.
+guard_expr -> guard_expr mult_op guard_expr :
+ {Op, Pos} = '$2',
+ {arith, Pos, Op, '$1', '$3'}.
+guard_expr -> prefix_op guard_expr:
+ case '$2' of
+ {float, Pos, N} ->
+ case '$1' of
+ {'-', _} ->
+ {float, Pos, -N};
+ {'+', _} ->
+ {float, Pos, N};
+ {Op, Pos1} ->
+ {arith, Pos1, Op, {float, Pos, N}}
+ end;
+ {integer, Pos, N} ->
+ case '$1' of
+ {'-', _} ->
+ {integer, Pos, -N};
+ {'+', _} ->
+ {integer, Pos, N};
+ {Op, Pos1} ->
+ {arith, Pos1, Op, {integer, Pos, N}}
+ end;
+ _ ->
+ {Op, Pos} = '$1',
+ {arith, Pos, Op, '$2'}
+ end.
+
+guard_expr_list -> '[' ']' : {nil, element(2, '$1')}.
+guard_expr_list -> '[' guard_expr guard_expr_tail ']' :
+ {cons, element(2, '$1'), '$2', '$3'}.
+
+guard_expr_tail -> '|' guard_expr : '$2'.
+guard_expr_tail -> ',' guard_expr guard_expr_tail :
+ {cons, element(2, '$2'), '$2', '$3'}.
+guard_expr_tail -> '$empty' : {nil, 0}.
+
+guard_expr_tuple -> '{' '}' : {tuple, element(2, '$1'), []}.
+guard_expr_tuple -> '{' guard_exprs '}' : {tuple, element(2, '$1'), '$2'}.
+
+guard_exprs -> guard_expr : ['$1'].
+guard_exprs -> guard_expr ',' guard_exprs : ['$1' | '$3'].
+
+
+guard_call -> atom '(' guard_parameter_list ')' :
+ case erl_parse:erlang_guard_bif(element(3, '$1'), length('$3')) of
+ true ->
+ {bif, element(2, '$1'), element(3, '$1'), '$3'};
+ false ->
+ throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}})
+ end.
+
+guard_parameter_list -> guard_exprs : '$1'.
+guard_parameter_list -> '$empty' : [].
+
+
+bif_test -> atom '(' guard_parameter_list ')' :
+ case erl_parse:erlang_guard_test(element(3, '$1'), length('$3')) of
+ true ->
+ {test, element(2, '$1'), element(3, '$1'), '$3'};
+ false ->
+ throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}})
+ end.
+
+
+guard_test -> bif_test : '$1'.
+guard_test -> guard_expr comp_op guard_expr :
+ {Op, Pos} = '$2',
+ {comp, Pos, Op, '$1', '$3'}.
+
+guard_tests -> guard_test : ['$1'].
+guard_tests -> guard_test ',' guard_tests : ['$1' | '$3'].
+
+% guard -> 'true' : [].
+guard -> atom :
+ case '$1' of
+ {atom, _, true} ->
+ [];
+ _ ->
+ throw({error, {element(2, '$1'), yecc, "illegal test in guard **"}})
+ end.
+guard -> guard_tests : '$1'.
+
+
+function_clause -> clause_head clause_guard clause_body :
+ {Name, Line, Arity, Parameters} = '$1',
+ {function, Line, Name, Arity,
+ [{clause, element(2, hd('$3')), Parameters, '$2', '$3'}]}.
+
+clause_head -> atom '(' formal_parameter_list ')' :
+ {element(3, '$1'), element(2, '$1'), length('$3'), '$3'}.
+
+formal_parameter_list -> patterns : '$1'.
+formal_parameter_list -> '$empty' : [].
+
+clause_guard -> 'when' guard : '$2'.
+clause_guard -> '$empty' : [].
+
+clause_body -> '->' exprs: '$2'.
+
+
+function -> function_clause : '$1'.
+function -> function_clause ';' function :
+ case '$1' of
+ {function, Pos1, Name1, Arity1, [Clause]} ->
+ case '$3' of
+ {function, _, Name1, Arity2, Clauses} ->
+ if
+ Arity1 /= Arity2 ->
+ throw({error, {Pos1, yecc,
+ io_lib:format('arity conflict in definition of ~w',
+ [Name1])}});
+ true ->
+ {function, Pos1, Name1, Arity1, [Clause | Clauses]}
+ end;
+ _ ->
+ throw({error, {Pos1, yecc,
+ io_lib:format('missing final dot in def of ~w/~w',
+ [Name1, Arity1])}})
+ end
+ end.
+
+
+attribute -> atom : element(3, '$1').
+attribute -> '[' farity_list ']' : '$2'.
+
+farity_list -> farity : ['$1'].
+farity_list -> farity ',' farity_list : ['$1' | '$3'].
+
+farity -> atom '/' integer : {element(3, '$1'), element(3, '$3')}.
+
+
+form -> '-' atom '(' attribute ')' :
+ {attribute, element(2, '$2'), element(3, '$2'), '$4'}.
+form -> function : '$1'.
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
new file mode 100644
index 0000000000..fd494eaf06
--- /dev/null
+++ b/lib/parsetools/src/leex.erl
@@ -0,0 +1,1608 @@
+%% Copyright (c) 2008,2009 Robert Virding. All rights reserved.
+%%
+%% Redistribution and use in source and binary forms, with or without
+%% modification, are permitted provided that the following conditions
+%% are met:
+%%
+%% 1. Redistributions of source code must retain the above copyright
+%% notice, this list of conditions and the following disclaimer.
+%% 2. Redistributions in binary form must reproduce the above copyright
+%% notice, this list of conditions and the following disclaimer in the
+%% documentation and/or other materials provided with the distribution.
+%%
+%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+%% "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+%% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+%% FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+%% COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+%% INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+%% BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+%% LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+%% CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+%% LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+%% ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+%% POSSIBILITY OF SUCH DAMAGE.
+
+%%% A Lexical Analyser Generator for Erlang.
+%%%
+%%% Most of the algorithms used here are taken pretty much as
+%%% described in the "Dragon Book" by Aho, Sethi and Ullman. Some
+%%% completing details were taken from "Compiler Design in C" by
+%%% Hollub.
+
+-module(leex).
+
+-export([compile/3,file/1,file/2,format_error/1]).
+
+-import(lists, [member/2,reverse/1,sort/1,delete/2,
+ keysearch/3,keysort/2,keydelete/3,keyfind/3,
+ map/2,foldl/3,foreach/2,flatmap/2]).
+-import(string, [substr/2,substr/3,span/2]).
+-import(ordsets, [is_element/2,add_element/2,union/2]).
+-import(orddict, [store/3]).
+
+-include("erl_compile.hrl").
+%%-include_lib("stdlib/include/erl_compile.hrl").
+
+-define(LEEXINC, "leexinc.hrl"). % Include file
+-define(LEEXLIB, parsetools). % Leex is in lib parsetools
+%%-define(LEEXLIB, leex). % Leex is in lib leex
+
+-define(DEFS_HEAD, "Definitions.").
+-define(RULE_HEAD, "Rules.").
+-define(CODE_HEAD, "Erlang code.").
+
+-record(leex, {xfile=[], % Xrl file
+ efile=[], % Erl file
+ ifile=[], % Include file
+ gfile=[], % Graph file
+ module, % Module name
+ opts=[], % Options
+ posix=false, % POSIX regular expressions
+ errors=[],
+ warnings=[]
+ }).
+
+-record(nfa_state, {no,edges=[],accept=noaccept}).
+-record(dfa_state, {no,nfa=[],trans=[],accept=noaccept}).
+
+%%%
+%%% Exported functions
+%%%
+
+%%% Interface to erl_compile.
+
+compile(Input0, Output0,
+ #options{warning = WarnLevel, verbose=Verbose, includes=Includes}) ->
+ Input = assure_extension(shorten_filename(Input0), ".xrl"),
+ Output = assure_extension(shorten_filename(Output0), ".erl"),
+ Includefile = lists:sublist(Includes, 1),
+ Opts = [{scannerfile,Output},{includefile,Includefile},{verbose,Verbose},
+ {report_errors,true},{report_warnings,WarnLevel > 0}],
+ case file(Input, Opts) of
+ {ok, _} ->
+ ok;
+ error ->
+ error
+ end.
+
+%% file(File) -> ok | error.
+%% file(File, Options) -> ok | error.
+
+file(File) -> file(File, []).
+
+file(File, Opts0) ->
+ case is_filename(File) of
+ no -> erlang:error(badarg, [File,Opts0]);
+ _ -> ok
+ end,
+ Opts = case options(Opts0) of
+ badarg ->
+ erlang:error(badarg, [File,Opts0]);
+ Options ->
+ Options
+ end,
+ St0 = #leex{},
+ St1 = filenames(File, Opts, St0), % Get all the filenames
+ St = try
+ {ok,REAs,Actions,Code,St2} = parse_file(St1),
+ {DFA,DF} = make_dfa(REAs, St2),
+ St3 = out_file(St2, DFA, DF, Actions, Code),
+ case lists:member(dfa_graph, St3#leex.opts) of
+ true -> out_dfa_graph(St3, DFA, DF);
+ false -> St3
+ end
+ catch #leex{}=St4 ->
+ St4
+ end,
+ leex_ret(St).
+
+format_error({file_error, Reason}) ->
+ io_lib:fwrite("~s",[file:format_error(Reason)]);
+format_error(missing_defs) -> "missing Definitions";
+format_error(missing_rules) -> "missing Rules";
+format_error(missing_code) -> "missing Erlang code";
+format_error(empty_rules) -> "no rules";
+format_error(bad_rule) -> "bad rule";
+format_error({regexp,E})->
+ Es = case E of
+ {interval_range,_} -> "interval range";
+ {unterminated,Cs} ->
+ "unterminated " ++ Cs;
+ {illegal_char,Cs} ->
+ "illegal character " ++ Cs;
+ {posix_cc,What} ->
+ ["illegal POSIX character class ",io_lib:write_string(What)];
+ {char_class,What} ->
+ ["illegal character class ",io_lib:write_string(What)]
+ end,
+ ["bad regexp `",Es,"'"];
+format_error(ignored_characters) ->
+ "ignored characters".
+
+%%%
+%%% Local functions
+%%%
+
+assure_extension(File, Ext) ->
+ lists:concat([strip_extension(File, Ext), Ext]).
+
+%% Assumes File is a filename.
+strip_extension(File, Ext) ->
+ case filename:extension(File) of
+ Ext -> filename:rootname(File);
+ _Other -> File
+ end.
+
+options(Options0) when is_list(Options0) ->
+ try
+ Options = flatmap(fun(return) -> short_option(return, true);
+ (report) -> short_option(report, true);
+ ({return,T}) -> short_option(return, T);
+ ({report,T}) -> short_option(report, T);
+ (T) -> [T]
+ end, Options0),
+ options(Options, [scannerfile,includefile,report_errors,
+ report_warnings,return_errors,return_warnings,
+ verbose,dfa_graph], [])
+ catch error: _ -> badarg
+ end;
+options(Option) ->
+ options([Option]).
+
+short_option(return, T) ->
+ [{return_errors,T}, {return_warnings,T}];
+short_option(report, T) ->
+ [{report_errors,T}, {report_warnings,T}].
+
+options(Options0, [Key|Keys], L) when is_list(Options0) ->
+ Options = case member(Key, Options0) of
+ true ->
+ [atom_option(Key)|delete(Key, Options0)];
+ false ->
+ Options0
+ end,
+ V = case keysearch(Key, 1, Options) of
+ {value, {Key, Filename0}} when Key =:= includefile;
+ Key =:= scannerfile ->
+ case is_filename(Filename0) of
+ no ->
+ badarg;
+ Filename ->
+ {ok,[{Key,Filename}]}
+ end;
+ {value,{Key,Bool}} when Bool; not Bool ->
+ {ok,[{Key, Bool}]};
+ {value,{Key, _}} ->
+ badarg;
+ false ->
+ {ok,[{Key,default_option(Key)}]}
+ end,
+ case V of
+ badarg ->
+ badarg;
+ {ok,KeyValueL} ->
+ NewOptions = keydelete(Key, 1, Options),
+ options(NewOptions, Keys, KeyValueL ++ L)
+ end;
+options([], [], L) ->
+ foldl(fun({_,false}, A) -> A;
+ ({Tag,true}, A) -> [Tag|A];
+ (F,A) -> [F|A]
+ end, [], L);
+options(_Options, _, _L) ->
+ badarg.
+
+default_option(dfa_graph) -> false;
+default_option(includefile) -> [];
+default_option(report_errors) -> true;
+default_option(report_warnings) -> true;
+default_option(return_errors) -> false;
+default_option(return_warnings) -> false;
+default_option(scannerfile) -> [];
+default_option(verbose) -> false.
+
+atom_option(dfa_graph) -> {dfa_graph,true};
+atom_option(report_errors) -> {report_errors,true};
+atom_option(report_warnings) -> {report_warnings,true};
+atom_option(return_errors) -> {return_errors,true};
+atom_option(return_warnings) -> {return_warnings,true};
+atom_option(verbose) -> {verbose,true};
+atom_option(Key) -> Key.
+
+is_filename(T) ->
+ try filename:flatten(T) of
+ Filename -> Filename
+ catch error: _ -> no
+ end.
+
+shorten_filename(Name0) ->
+ {ok,Cwd} = file:get_cwd(),
+ case lists:prefix(Cwd, Name0) of
+ false -> Name0;
+ true ->
+ case lists:nthtail(length(Cwd), Name0) of
+ "/"++N -> N;
+ N -> N
+ end
+ end.
+
+leex_ret(St) ->
+ report_errors(St),
+ report_warnings(St),
+ Es = pack_errors(St#leex.errors),
+ Ws = pack_warnings(St#leex.warnings),
+ if
+ Es =:= [] ->
+ case member(return_warnings, St#leex.opts) of
+ true -> {ok, St#leex.efile, Ws};
+ false -> {ok, St#leex.efile}
+ end;
+ true ->
+ case member(return_errors, St#leex.opts) of
+ true -> {error, Es, Ws};
+ false -> error
+ end
+ end.
+
+pack_errors([{File,_} | _] = Es) ->
+ [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}];
+pack_errors([]) ->
+ [].
+
+pack_warnings([{File,_} | _] = Ws) ->
+ [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}];
+pack_warnings([]) ->
+ [].
+
+report_errors(St) ->
+ when_opt(fun () ->
+ foreach(fun({File,{none,Mod,E}}) ->
+ io:fwrite("~s: ~s\n",
+ [File,Mod:format_error(E)]);
+ ({File,{Line,Mod,E}}) ->
+ io:fwrite("~s:~w: ~s\n",
+ [File,Line,Mod:format_error(E)])
+ end, sort(St#leex.errors))
+ end, report_errors, St#leex.opts).
+
+report_warnings(St) ->
+ when_opt(fun () ->
+ foreach(fun({File,{none,Mod,W}}) ->
+ io:fwrite("~s: Warning: ~s\n",
+ [File,Mod:format_error(W)]);
+ ({File,{Line,Mod,W}}) ->
+ io:fwrite("~s:~w: Warning: ~s\n",
+ [File,Line,Mod:format_error(W)])
+ end, sort(St#leex.warnings))
+ end, report_warnings, St#leex.opts).
+
+add_error(E, St) ->
+ add_error(St#leex.xfile, E, St).
+
+add_error(File, Error, St) ->
+ throw(St#leex{errors = [{File,Error}|St#leex.errors]}).
+
+add_warning(Line, W, St) ->
+ St#leex{warnings = [{St#leex.xfile,{Line,leex,W}}|St#leex.warnings]}.
+
+%% filenames(File, Options, State) -> State.
+%% The default output dir is the current directory unless an
+%% explicit one has been given in the options.
+
+filenames(File, Opts, St0) ->
+ Dir = filename:dirname(File),
+ Base = filename:basename(File, ".xrl"),
+ Xfile = filename:join(Dir, Base ++ ".xrl"),
+ Efile = Base ++ ".erl",
+ Gfile = Base ++ ".dot",
+ Module = list_to_atom(Base),
+ St1 = St0#leex{xfile=Xfile,
+ opts=Opts,
+ module=Module},
+ {value,{includefile,Ifile0}} = keysearch(includefile, 1, Opts),
+ Ifile = inc_file_name(Ifile0),
+ %% Test for explicit scanner file.
+ {value,{scannerfile,Ofile}} = keysearch(scannerfile, 1, Opts),
+ if
+ Ofile =:= [] ->
+ St1#leex{efile=filename:join(Dir, Efile),
+ ifile=Ifile,
+ gfile=filename:join(Dir, Gfile)};
+ true ->
+ D = filename:dirname(Ofile),
+ St1#leex{efile=Ofile,
+ ifile=Ifile,
+ gfile=filename:join(D, Gfile)}
+ end.
+
+when_opt(Do, Opt, Opts) ->
+ case member(Opt, Opts) of
+ true -> Do();
+ false -> ok
+ end.
+
+verbose_print(St, Format, Args) ->
+ when_opt(fun () -> io:fwrite(Format, Args) end, verbose, St#leex.opts).
+
+%% parse_file(State) -> {ok,[REA],[Action],Code,NewState} | throw(NewState)
+%% when
+%% REA = {RegExp,ActionNo};
+%% Action = {ActionNo,ActionString};
+%% Code = {StartLine, StartPos, NumOfLines}. Where the Erlang code is.
+%%
+%% Read and parse the file Xfile.
+%% After each section of the file has been parsed we directly call the
+%% next section. This is done when we detect a line we don't recognise
+%% in the current section. The file format is very simple and Erlang
+%% token based, we allow empty lines and Erlang style comments.
+
+parse_file(St0) ->
+ case file:open(St0#leex.xfile, [read]) of
+ {ok,Xfile} ->
+ try
+ verbose_print(St0, "Parsing file ~s, ", [St0#leex.xfile]),
+ %% We KNOW that errors throw so we can ignore them here.
+ {ok,Line1,St1} = parse_head(Xfile, St0),
+ {ok,Line2,Macs,St2} = parse_defs(Xfile, Line1, St1),
+ {ok,Line3,REAs,Actions,St3} =
+ parse_rules(Xfile, Line2, Macs, St2),
+ {ok,Code,St4} = parse_code(Xfile, Line3, St3),
+ verbose_print(St1, "contained ~w rules.~n", [length(REAs)]),
+ {ok,REAs,Actions,Code,St4}
+ after file:close(Xfile)
+ end;
+ {error,Error} ->
+ add_error({none,leex,{file_error,Error}}, St0)
+ end.
+
+%% parse_head(File, State) -> {ok,NextLine,State}.
+%% Parse the head of the file. Skip all comments and blank lines.
+
+parse_head(Ifile, St) -> {ok,nextline(Ifile, 0),St}.
+
+%% parse_defs(File, Line, State) -> {ok,NextLine,Macros,State}.
+%% Parse the macro definition section of a file. This must exist.
+%% The section is ended by a non-blank line which is not a macro def.
+
+parse_defs(Ifile, {ok,?DEFS_HEAD ++ Rest,L}, St) ->
+ St1 = warn_ignored_chars(L, Rest, St),
+ parse_defs(Ifile, nextline(Ifile, L), [], St1);
+parse_defs(_, {ok,_,L}, St) ->
+ add_error({L,leex,missing_defs}, St);
+parse_defs(_, {eof,L}, St) ->
+ add_error({L,leex,missing_defs}, St).
+
+parse_defs(Ifile, {ok,Chars,L}=Line, Ms, St) ->
+ %% This little beauty matches out a macro definition, RE's are so clear.
+ MS = "^[ \t]*([A-Z_][A-Za-z0-9_]*)[ \t]*=[ \t]*([^ \t\r\n]*)[ \t\r\n]*\$",
+ case re:run(Chars, MS, [{capture,all_but_first,list}]) of
+ {match,[Name,Def]} ->
+ %%io:fwrite("~p = ~p\n", [Name,Def]),
+ parse_defs(Ifile, nextline(Ifile, L), [{Name,Def}|Ms], St);
+ _ -> {ok,Line,Ms,St} % Anything else
+ end;
+parse_defs(_, Line, Ms, St) ->
+ {ok,Line,Ms,St}.
+
+%% parse_rules(File, Line, Macros, State) -> {ok,NextLine,REAs,Actions,State}.
+%% Parse the RE rules section of the file. This must exist.
+
+parse_rules(Ifile, {ok,?RULE_HEAD ++ Rest,L}, Ms, St) ->
+ St1 = warn_ignored_chars(L, Rest, St),
+ parse_rules(Ifile, nextline(Ifile, L), Ms, [], [], 0, St1);
+parse_rules(_, {ok,_,L}, _, St) ->
+ add_error({L,leex,missing_rules}, St);
+parse_rules(_, {eof,L}, _, St) ->
+ add_error({L,leex,missing_rules}, St).
+
+%% parse_rules(File, Result, Macros, RegExpActions, Actions, Acount, State) ->
+%% {ok,NextCLine,RegExpActions,Actions,NewState} | throw(NewState)
+
+parse_rules(Ifile, NextLine, Ms, REAs, As, N, St) ->
+ case NextLine of
+ {ok,?CODE_HEAD ++ _Rest,_} ->
+ parse_rules_end(Ifile, NextLine, REAs, As, St);
+ {ok,Chars,L0} ->
+ %%io:fwrite("~w: ~p~n", [L0,Chars]),
+ case collect_rule(Ifile, Chars, L0) of
+ {ok,Re,Atoks,L1} ->
+ {ok,REA,A,St1} = parse_rule(Re, L0, Atoks, Ms, N, St),
+ parse_rules(Ifile, nextline(Ifile, L1), Ms,
+ [REA|REAs], [A|As], N+1, St1);
+ {error,E} -> add_error(E, St)
+ end;
+ {eof,_} ->
+ parse_rules_end(Ifile, NextLine, REAs, As, St)
+ end.
+
+parse_rules_end(_, {ok,_,L}, [], [], St) ->
+ add_error({L,leex,empty_rules}, St);
+parse_rules_end(_, {eof,L}, [], [], St) ->
+ add_error({L,leex,empty_rules}, St);
+parse_rules_end(_, NextLine, REAs, As, St) ->
+ %% Must be *VERY* careful to put rules in correct order!
+ {ok,NextLine,reverse(REAs),reverse(As),St}.
+
+%% collect_rule(File, Line, Lineno) ->
+%% {ok,RegExp,ActionTokens,NewLineno} | {error,E}.
+%% Collect a complete rule by reading lines until the the regexp and
+%% action has been read. Keep track of line number.
+
+collect_rule(Ifile, Chars, L0) ->
+ %% Erlang strings are 1 based, but re 0 :-(
+ {match,[{St0,Len}|_]} = re:run(Chars, "[^ \t\r\n]+"),
+ St = St0 + 1,
+ %%io:fwrite("RE = ~p~n", [substr(Chars, St, Len)]),
+ case collect_action(Ifile, substr(Chars, St+Len), L0, []) of
+ {ok,[{':',_}|Toks],L1} -> {ok,substr(Chars, St, Len),Toks,L1};
+ {ok,_,_} -> {error,{L0,leex,bad_rule}};
+ {eof,L1} -> {error,{L1,leex,bad_rule}};
+ {error,E,_} -> {error,E}
+ end.
+
+collect_action(Ifile, Chars, L0, Cont0) ->
+ case erl_scan:tokens(Cont0, Chars, L0) of
+ {done,{ok,Toks,_},_} -> {ok,Toks,L0};
+ {done,{eof,_},_} -> {eof,L0};
+ {done,{error,E,_},_} -> {error,E,L0};
+ {more,Cont1} ->
+ collect_action(Ifile, io:get_line(Ifile, leex), L0+1, Cont1)
+ end.
+
+%% parse_rule(RegExpString, RegExpLine, ActionTokens, Macros, Counter, State) ->
+%% {ok,{RE,Action},ActionData,State}.
+%% Parse one regexp after performing macro substition.
+
+parse_rule(S, Line, [{dot,_}], Ms, N, St) ->
+ case parse_rule_regexp(S, Ms, St) of
+ {ok,R} ->
+ {ok,{R,N},{N,empty_action},St};
+ {error,E} ->
+ add_error({Line,leex,E}, St)
+ end;
+parse_rule(S, Line, Atoks, Ms, N, St) ->
+ case parse_rule_regexp(S, Ms, St) of
+ {ok,R} ->
+ %%io:fwrite("RE = ~p~n", [R]),
+ %% Check for token variables.
+ TokenChars = var_used('TokenChars', Atoks),
+ TokenLen = var_used('TokenLen', Atoks),
+ TokenLine = var_used('TokenLine', Atoks),
+ {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St};
+ {error,E} ->
+ add_error({Line,leex,E}, St)
+ end.
+
+var_used(Name, Toks) ->
+ case keyfind(Name, 3, Toks) of
+ {var,_,Name} -> true; %It's the var we want
+ _ -> false
+ end.
+
+%% parse_rule_regexp(RegExpString, Macros, State) ->
+%% {ok,RegExp} | {error,Error}.
+%% Substitute in macros and parse RegExpString. Cannot use re:replace
+%% here as it uses info in replace string (&).
+
+parse_rule_regexp(RE0, [{M,Exp}|Ms], St) ->
+ Split= re:split(RE0, "\\{" ++ M ++ "\\}", [{return,list}]),
+ RE1 = string:join(Split, Exp),
+ parse_rule_regexp(RE1, Ms, St);
+parse_rule_regexp(RE, [], St) ->
+ %%io:fwrite("RE = ~p~n", [RE]),
+ case re_parse(RE, St) of
+ {ok,R} -> {ok,R};
+ {error,E} -> {error,{regexp,E}}
+ end.
+
+%% parse_code(File, Line, State) -> {ok,Code,NewState}.
+%% Finds the line and the position where the code section of the file
+%% begins. This must exist.
+
+parse_code(Ifile, {ok,?CODE_HEAD ++ Rest,CodeL}, St) ->
+ St1 = warn_ignored_chars(CodeL, Rest, St),
+ {ok, CodePos} = file:position(Ifile, cur),
+ %% Just count the lines; copy the code from file to file later.
+ NCodeLines = count_lines(Ifile, 0),
+ {ok,{CodeL,CodePos,NCodeLines},St1};
+parse_code(_, {ok,_,L}, St) ->
+ add_error({L,leex,missing_code}, St);
+parse_code(_, {eof,L}, St) ->
+ add_error({L,leex,missing_code}, St).
+
+count_lines(File, N) ->
+ case io:get_line(File, leex) of
+ eof -> N;
+ _Line -> count_lines(File, N+1)
+ end.
+
+%% nextline(InputFile, PrevLineNo) -> {ok,Chars,LineNo} | {eof,LineNo}.
+%% Get the next line skipping comment lines and blank lines.
+
+nextline(Ifile, L) ->
+ case io:get_line(Ifile, leex) of
+ eof -> {eof,L};
+ Chars ->
+ case substr(Chars, span(Chars, " \t\n")+1) of
+ [$%|_Rest] -> nextline(Ifile, L+1);
+ [] -> nextline(Ifile, L+1);
+ _Other -> {ok,Chars,L+1}
+ end
+ end.
+
+warn_ignored_chars(Line, S, St) ->
+ case non_white(S) of
+ [] -> St;
+ _ -> add_warning(Line, ignored_characters, St)
+ end.
+
+non_white(S) ->
+ [C || C <- S, C > $\s, C < $\200 orelse C > $\240].
+
+%% This is the regular expression grammar used. It is equivalent to the
+%% one used in AWK, except that we allow ^ $ to be used anywhere and fail
+%% in the matching.
+%%
+%% reg -> alt : '$1'.
+%% alt -> seq "|" seq ... : {alt,['$1','$2'...]}.
+%% seq -> repeat repeat ... : {seq,['$1','$2'...]}.
+%% repeat -> repeat "*" : {kclosure,'$1'}.
+%% repeat -> repeat "+" : {pclosure,'$1'}.
+%% repeat -> repeat "?" : {optional,'$1'}.
+%% repeat -> repeat "{" [Min],[Max] "}" : {interval,'$1',Min,Max}
+%% repeat -> single : '$1'.
+%% single -> "(" reg ")" : {sub,'$2',Number}.
+%% single -> "^" : bos/bol.
+%% single -> "$" : eos/eol.
+%% single -> "." : any.
+%% single -> "[" class "]" : {char_class,char_class('$2')}
+%% single -> "[" "^" class "]" : {comp_class,char_class('$3')}.
+%% single -> "\"" chars "\"" : {lit,'$2'}.
+%% single -> "\\" char : {lit,['$2']}.
+%% single -> char : {lit,['$1']}.
+%% single -> empty : epsilon.
+%% The grammar of the current regular expressions. The actual parser
+%% is a recursive descent implementation of the grammar.
+
+%% re_parse(Chars, State) -> {ok,RegExp} | {error,Error}.
+
+re_parse(Cs0, St) ->
+ case catch re_reg(Cs0, 0, St) of
+ {RE,_,[]} -> {ok,RE};
+ {_,_,[C|_]} -> {error,{illegal_char,[C]}};
+ {parse_error,E} -> {error,E}
+ end.
+
+parse_error(E) -> throw({parse_error,E}).
+
+re_reg(Cs, Sn, St) -> re_alt(Cs, Sn, St).
+
+re_alt(Cs0, Sn0, St) ->
+ {L,Sn1,Cs1} = re_seq(Cs0, Sn0, St),
+ case re_alt1(Cs1, Sn1, St) of
+ {[],Sn2,Cs2} -> {L,Sn2,Cs2};
+ {Rs,Sn2,Cs2} -> {{alt,[L|Rs]},Sn2,Cs2}
+ end.
+
+re_alt1([$||Cs0], Sn0, St) ->
+ {L,Sn1,Cs1} = re_seq(Cs0, Sn0, St),
+ {Rs,Sn2,Cs2} = re_alt1(Cs1, Sn1, St),
+ {[L|Rs],Sn2,Cs2};
+re_alt1(Cs, Sn, _) -> {[],Sn,Cs}.
+
+%% Parse a sequence of regexps. Don't allow the empty sequence.
+%% re_seq(Cs0, Sn0, St) ->
+%% {L,Sn1,Cs1} = repeat(Cs0, Sn0, St),
+%% case re_seq1(Cs1, Sn1, St) of
+%% {[],Sn2,Cs2} -> {L,Sn2,Cs2};
+%% {Rs,Sn2,Cs2} -> {{seq,[L|Rs]},Sn2,Cs2}
+%% end.
+
+%% re_seq(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}.
+%% Parse a sequence of regexps. Allow the empty sequence, returns epsilon.
+
+re_seq(Cs0, Sn0, St) ->
+ case re_seq1(Cs0, Sn0, St) of
+ {[],Sn1,Cs1} -> {epsilon,Sn1,Cs1};
+ {[R],Sn1,Cs1} -> {R,Sn1,Cs1};
+ {Rs,Sn1,Cs1} -> {{seq,Rs},Sn1,Cs1}
+ end.
+
+re_seq1([C|_]=Cs0, Sn0, St) when C /= $|, C /= $) ->
+ {L,Sn1,Cs1} = re_repeat(Cs0, Sn0, St),
+ {Rs,Sn2,Cs2} = re_seq1(Cs1, Sn1, St),
+ {[L|Rs],Sn2,Cs2};
+re_seq1(Cs, Sn, _) -> {[],Sn,Cs}.
+
+%% re_repeat(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}.
+
+re_repeat(Cs0, Sn0, St) ->
+ {S,Sn1,Cs1} = re_single(Cs0, Sn0, St),
+ re_repeat1(Cs1, Sn1, S, St).
+
+re_repeat1([$*|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {kclosure,S}, St);
+re_repeat1([$+|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {pclosure,S}, St);
+re_repeat1([$?|Cs], Sn, S, St) -> re_repeat1(Cs, Sn, {optional,S}, St);
+%% { only starts interval when ere is true, otherwise normal character.
+re_repeat1([${|Cs0], Sn, S, #leex{posix=true}=St) -> % $}
+ case re_interval_range(Cs0) of
+ {Min,Max,[$}|Cs1]} when is_integer(Min), is_integer(Max), Min =< Max ->
+ re_repeat1(Cs1, Sn, {interval,S,Min,Max}, St);
+ {Min,Max,[$}|Cs1]} when is_integer(Min), is_atom(Max) ->
+ re_repeat1(Cs1, Sn, {interval,S,Min,Max}, St);
+ {_,_,Cs1} -> parse_error({interval_range,string_between([${|Cs0], Cs1)})
+ end;
+re_repeat1(Cs, Sn, S, _) -> {S,Sn,Cs}.
+
+%% re_single(Chars, SubNumber, State) -> {RegExp,SubNumber,Chars}.
+%% Parse a re_single regexp.
+
+re_single([$(|Cs0], Sn0, St) -> % $)
+ Sn1 = Sn0 + 1, % Keep track of sub count anyway
+ case re_reg(Cs0, Sn1, St) of
+ {S,Sn2,[$)|Cs1]} -> {S,Sn2,Cs1};
+ %%{S,Sn2,[$)|Cs1]} -> {{sub,S,Sn1},Sn2,Cs1};
+ _ -> parse_error({unterminated,"("})
+ end;
+%% These are not legal inside a regexp.
+%% re_single([$^|Cs], Sn, St) -> {bos,Sn,Cs};
+%% re_single([$$|Cs], Sn, St) -> {eos,Sn,Cs};
+%% re_single([$.|Cs], Sn, St) -> {any,Sn,Cs};
+re_single([$.|Cs], Sn, _) -> {{comp_class,"\n"},Sn,Cs}; % Do this here?
+re_single("[^" ++ Cs0, Sn, St) ->
+ case re_char_class(Cs0, St) of
+ {Cc,[$]|Cs1]} -> {{comp_class,Cc},Sn,Cs1};
+ _ -> parse_error({unterminated,"["})
+ end;
+re_single([$[|Cs0], Sn, St) ->
+ case re_char_class(Cs0, St) of
+ {Cc,[$]|Cs1]} -> {{char_class,Cc},Sn,Cs1};
+ _ -> parse_error({unterminated,"["})
+ end;
+re_single([$\\|Cs0], Sn, _) ->
+ {C,Cs1} = re_char($\\, Cs0),
+ {{lit,[C]},Sn,Cs1};
+re_single([C|Cs0], Sn, St) ->
+ case special_char(C, St) of
+ true -> parse_error({illegal_char,[C]});
+ false ->
+ {C,Cs1} = re_char(C, Cs0),
+ {{lit,[C]},Sn,Cs1}
+ end.
+
+-define(IS_HEX(C), C >= $0 andalso C =< $9 orelse
+ C >= $A andalso C =< $F orelse
+ C >= $a andalso C =< $f).
+
+%% re_char(Char, Chars) -> {CharValue,Chars}.
+%% Reads one character value from the input list, it knows about escapes.
+
+re_char($\\, [O1,O2,O3|S]) when
+ O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
+ {(O1*8 + O2)*8 + O3 - 73*$0,S};
+re_char($\\, [$x,H1,H2|S]) when ?IS_HEX(H1), ?IS_HEX(H2) ->
+ {erlang:list_to_integer([H1,H2], 16),S};
+re_char($\\,[$x,${|S0]) ->
+ re_hex(S0, []);
+re_char($\\,[$x|_]) ->
+ parse_error({illegal_char,"\\x"});
+re_char($\\, [C|S]) -> {escape_char(C),S};
+re_char($\\, []) -> parse_error({unterminated,"\\"});
+re_char(C, S) -> {C,S}. % Just this character
+
+re_hex([C|Cs], L) when ?IS_HEX(C) -> re_hex(Cs, [C|L]);
+re_hex([$}|S], L0) ->
+ L = lists:reverse(L0),
+ case erlang:list_to_integer(L, 16) of
+ C when C =< 16#10FFFF -> {C,S};
+ _ -> parse_error({illegal_char,[$\\,$x,${|L]++"}"})
+ end;
+re_hex(_, _) -> parse_error({unterminated,"\\x{"}).
+
+%% special_char(Char, State) -> bool().
+%% These are the special characters for an ERE.
+%% N.B. ]}) are only special in the context after [{(.
+
+special_char($^, _) -> true;
+special_char($., _) -> true;
+special_char($[, _) -> true;
+special_char($$, _) -> true;
+special_char($(, _) -> true;
+special_char($), _) -> true;
+special_char($|, _) -> true;
+special_char($*, _) -> true;
+special_char($+, _) -> true;
+special_char($?, _) -> true;
+special_char(${, #leex{posix=true}) -> true; % Only when POSIX set
+special_char($\\, _) -> true;
+special_char(_, _) -> false.
+
+%% re_char_class(Chars, State) -> {CharClass,Chars}.
+%% Parse a character class.
+
+re_char_class([$]|Cs], St) -> % Must special case this.
+ re_char_class(Cs, [$]], St);
+re_char_class(Cs, St) -> re_char_class(Cs, [], St).
+
+re_char_class("[:" ++ Cs0, Cc, #leex{posix=true}=St) ->
+ %% POSIX char class only.
+ case posix_cc(Cs0) of
+ {Pcl,":]" ++ Cs1} -> re_char_class(Cs1, [{posix,Pcl}|Cc], St);
+ {_,Cs1} -> parse_error({posix_cc,string_between(Cs0, Cs1)})
+ end;
+re_char_class([C1|Cs0], Cc, St) when C1 /= $] ->
+ case re_char(C1, Cs0) of
+ {Cf,[$-,C2|Cs1]} when C2 /= $] ->
+ case re_char(C2, Cs1) of
+ {Cl,Cs2} when Cf < Cl ->
+ re_char_class(Cs2, [{range,Cf,Cl}|Cc], St);
+ {_,Cs2} ->
+ parse_error({char_class,string_between([C1|Cs0], Cs2)})
+ end;
+ {C,Cs1} -> re_char_class(Cs1, [C|Cc], St)
+ end;
+re_char_class(Cs, Cc, _) -> {reverse(Cc),Cs}. % Preserve order
+
+%% posix_cc(String) -> {PosixClass,RestString}.
+%% Handle POSIX character classes.
+
+posix_cc("alnum" ++ Cs) -> {alnum,Cs};
+posix_cc("alpha" ++ Cs) -> {alpha,Cs};
+posix_cc("blank" ++ Cs) -> {blank,Cs};
+posix_cc("cntrl" ++ Cs) -> {cntrl,Cs};
+posix_cc("digit" ++ Cs) -> {digit,Cs};
+posix_cc("graph" ++ Cs) -> {graph,Cs};
+posix_cc("lower" ++ Cs) -> {lower,Cs};
+posix_cc("print" ++ Cs) -> {print,Cs};
+posix_cc("punct" ++ Cs) -> {punct,Cs};
+posix_cc("space" ++ Cs) -> {space,Cs};
+posix_cc("upper" ++ Cs) -> {upper,Cs};
+posix_cc("xdigit" ++ Cs) -> {xdigit,Cs};
+posix_cc(Cs) -> parse_error({posix_cc,substr(Cs, 1, 5)}).
+
+escape_char($n) -> $\n; % \n = LF
+escape_char($r) -> $\r; % \r = CR
+escape_char($t) -> $\t; % \t = TAB
+escape_char($v) -> $\v; % \v = VT
+escape_char($b) -> $\b; % \b = BS
+escape_char($f) -> $\f; % \f = FF
+escape_char($e) -> $\e; % \e = ESC
+escape_char($s) -> $\s; % \s = SPACE
+escape_char($d) -> $\d; % \d = DEL
+escape_char(C) -> C. % Pass it straight through
+
+%% re_interval_range(Chars) -> {Min,Max,RestChars}.
+%% NoInt -> none,none
+%% Int -> Int,none
+%% Int, -> Int,any
+%% Int1,Int2 -> Int1,Int2
+
+re_interval_range(Cs0) ->
+ case re_number(Cs0) of
+ {none,Cs1} -> {none,none,Cs1};
+ {N,[$,|Cs1]} ->
+ case re_number(Cs1) of
+ {none,Cs2} -> {N,any,Cs2};
+ {M,Cs2} -> {N,M,Cs2}
+ end;
+ {N,Cs1} -> {N,none,Cs1}
+ end.
+
+re_number([C|Cs]) when C >= $0, C =< $9 ->
+ re_number(Cs, C - $0);
+re_number(Cs) -> {none,Cs}.
+
+re_number([C|Cs], Acc) when C >= $0, C =< $9 ->
+ re_number(Cs, 10*Acc + (C - $0));
+re_number(Cs, Acc) -> {Acc,Cs}.
+
+string_between(Cs1, Cs2) ->
+ substr(Cs1, 1, length(Cs1)-length(Cs2)).
+
+%% We use standard methods, Thompson's construction and subset
+%% construction, to create first an NFA and then a DFA from the
+%% regexps. A non-standard feature is that we work with sets of
+%% character ranges (crs) instead sets of characters. This is most
+%% noticeable when constructing DFAs. The major benefit is that we can
+%% handle characters from any set, not just limited ASCII or 8859,
+%% even 16/32 bit unicode.
+%%
+%% The whole range of characters is 0-maxchar, where maxchar is a BIG
+%% number. We don't make any assumptions about the size of maxchar, it
+%% is just bigger than any character.
+%%
+%% Using character ranges makes describing many regexps very simple,
+%% for example the regexp "." just becomes the range
+%% [{0-9},{11-maxchar}].
+
+%% make_nfa(RegExpActions) -> {ok,{NFA,StartState}} | {error,E}.
+%% Build a complete nfa from a list of {RegExp,Action}. The NFA field
+%% accept has values {yes,Action}|no. The NFA is a list of states.
+
+make_dfa(REAs, St) ->
+ {NFA,NF} = build_combined_nfa(REAs),
+ verbose_print(St, "NFA contains ~w states, ", [tuple_size(NFA)]),
+ {DFA0,DF0} = build_dfa(NFA, NF),
+ verbose_print(St, "DFA contains ~w states, ", [length(DFA0)]),
+ {DFA,DF} = minimise_dfa(DFA0, DF0),
+ verbose_print(St, "minimised to ~w states.~n", [length(DFA)]),
+ %%io:fwrite("~p\n", [{NF,NFA}]),
+ %%io:fwrite("~p\n", [{DF0,DFA0}]),
+ %%io:fwrite("~p\n", [{DF,DFA}]),
+ {DFA,DF}.
+
+%% build_combined_nfa(RegExpActionList) -> {NFA,FirstState}.
+%% Build the combined NFA using Thompson's construction straight out
+%% of the book. Build the separate NFAs in the same order as the
+%% rules so that the accepting have ascending states have ascending
+%% state numbers. Start numbering the states from 1 as we put the
+%% states in a tuple with the state number as the index.
+%%
+%% The edges from a state are a list of {CharRange,State} | {epsilon,State}.
+
+build_combined_nfa(REAs) ->
+ {NFA0,Firsts,Free} = build_nfa_list(REAs, [], [], 1),
+ F = #nfa_state{no=Free,edges=epsilon_trans(Firsts)},
+ {list_to_tuple(keysort(#nfa_state.no, [F|NFA0])),Free}.
+
+build_nfa_list([{RE,Action}|REAs], NFA0, Firsts, Free0) ->
+ {NFA1,Free1,First} = build_nfa(RE, Free0, Action),
+ build_nfa_list(REAs, NFA1 ++ NFA0, [First|Firsts], Free1);
+build_nfa_list([], NFA, Firsts, Free) ->
+ {NFA,reverse(Firsts),Free}.
+
+epsilon_trans(Firsts) -> [ {epsilon,F} || F <- Firsts ].
+
+%% build_nfa(RegExp, NextState, Action) -> {NFA,NextState,FirstState}.
+%% When building the NFA states for a regexp we don't build the end
+%% state, just allocate a State for it and return this state's
+%% number. This allows us to avoid building unnecessary states for
+%% concatenation which would then have to be removed by overwriting
+%% an existing state.
+
+build_nfa(RE, N0, Action) ->
+ {NFA,N1,E} = build_nfa(RE, N0+1, N0, []),
+ {[#nfa_state{no=E,accept={accept,Action}}|NFA],N1,N0}.
+
+%% build_nfa(RegExp, NextState, FirstState, NFA) -> {NFA,NextState,EndState}.
+%% Build an NFA from the RegExp. NFA is a list of #nfa_state{} in no
+%% predefined order. NextState is the number of the next free state
+%% to use, FirstState is the the state which must be the start for
+%% this regexp as a previous regexp refers to it, EndState is the
+%% state to which this NFA will exit to. The number of the returned
+%% EndState is already allocated!
+
+build_nfa({alt,REs}, N, F, NFA) ->
+ build_nfa_alt(REs, N, F, NFA);
+build_nfa({seq,REs}, N, F, NFA) ->
+ build_nfa_seq(REs, N, F, NFA);
+build_nfa({kclosure,RE}, N0, F, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
+ E = N1, % End state
+ {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,E}]},
+ #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],
+ N1+1,E};
+build_nfa({pclosure,RE}, N0, F, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
+ E = N1, % End state
+ {[#nfa_state{no=F,edges=[{epsilon,N0}]},
+ #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],
+ N1+1,E};
+build_nfa({optional,RE}, N0, F, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
+ E = N1, % End state
+ {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,E}]},
+ #nfa_state{no=E1,edges=[{epsilon,E}]}|NFA1],
+ N1+1,E};
+build_nfa({char_class,Cc}, N, F, NFA) ->
+ {[#nfa_state{no=F,edges=[{pack_cc(Cc),N}]}|NFA],N+1,N};
+build_nfa({comp_class,Cc}, N, F, NFA) ->
+ {[#nfa_state{no=F,edges=[{comp_class(Cc),N}]}|NFA],N+1,N};
+build_nfa({lit,Cs}, N, F, NFA) -> % Implicit concatenation
+ build_nfa_lit(Cs, N, F, NFA);
+build_nfa(epsilon, N, F, NFA) -> % Just an epsilon transition
+ {[#nfa_state{no=F,edges=[{epsilon,N}]}|NFA],N+1,N}.
+
+%% build_nfa_lit(Chars, NextState, FirstState, NFA) -> {NFA,NextState,EndState}.
+%% Build an NFA for the sequence of literal characters.
+
+build_nfa_lit(Cs, N0, F0, NFA0) ->
+ foldl(fun (C, {NFA,N,F}) ->
+ {[#nfa_state{no=F,edges=[{[{C,C}],N}]}|NFA],N+1,N}
+ end, {NFA0,N0,F0}, Cs).
+
+%% build_nfa_lit([C|Cs], N, F, NFA0) when is_integer(C) ->
+%% NFA1 = [#nfa_state{no=F,edges=[{[{C,C}],N}]}|NFA0],
+%% build_nfa_lit(Cs, N+1, N, NFA1);
+%% build_nfa_lit([], N, F, NFA) -> {NFA,N,F}.
+
+%% build_nfa_seq(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}.
+%% Build an NFA for the regexps in a sequence.
+
+build_nfa_seq(REs, N0, F0, NFA0) ->
+ foldl(fun (RE, {NFA,N,F}) -> build_nfa(RE, N, F, NFA) end,
+ {NFA0,N0,F0}, REs).
+
+%% build_nfa_seq([RE|REs], N0, F, NFA0) ->
+%% {NFA1,N1,E1} = build_nfa(RE, N0, F, NFA0),
+%% build_nfa_seq(REs, N1, E1, NFA1);
+%% build_nfa_seq([], N, F, NFA) -> {NFA,N,F}.
+
+%% build_nfa_alt(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}.
+%% Build an NFA for the regexps in an alternative. N.B. we don't
+%% handle empty alts here but the parser should never generate them
+%% anyway.
+
+build_nfa_alt([RE], N, F, NFA) -> build_nfa(RE, N, F, NFA);
+build_nfa_alt([RE|REs], N0, F, NFA0) ->
+ {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
+ {NFA2,N2,E2} = build_nfa_alt(REs, N1+1, N1, NFA1),
+ E = N2, % End state
+ {[#nfa_state{no=F,edges=[{epsilon,N0},{epsilon,N1}]},
+ #nfa_state{no=E1,edges=[{epsilon,E}]},
+ #nfa_state{no=E2,edges=[{epsilon,E}]}|NFA2],
+ N2+1,E}.
+
+%% build_nfa_alt(REs, NextState, FirstState, NFA) -> {NFA,NextState,EndState}.
+%% Build an NFA for the regexps in an alternative. Make one big
+%% epsilon split state, not necessary but fun.
+
+%% build_nfa_alt(REs, N0, F0, NFA0) ->
+%% E = N0, % Must reserve End state first
+%% {Fs,{NFA1,N1}} = mapfoldl(fun (RE, {NFA,N}) ->
+%% build_nfa_alt1(RE, N, E, NFA)
+%% end, {NFA0,N0+1}, REs),
+%% {[#nfa_state{no=F0,edges=epsilon_trans(Fs)},
+%% #nfa_state{no=E,edges=[{epsilon,N1}]}|NFA1],N1+1,N1}.
+
+%% build_nfa_alt1(RE, N0, End, NFA0) ->
+%% {NFA1,N1,E} = build_nfa(RE, N0+1, N0, NFA0),
+%% {N0,{[#nfa_state{no=E,edges=[{epsilon,End}]}|NFA1],N1}}.
+
+%% pack_cc(CharClass) -> CharClass
+%% Pack and optimise a character class specification (bracket
+%% expression). First sort it and then compact it.
+
+pack_cc(Cc) ->
+ Crs = foldl(fun ({range,Cf,Cl}, Set) -> add_element({Cf,Cl}, Set);
+ (C, Set) -> add_element({C,C}, Set)
+ end, ordsets:new(), Cc),
+ pack_crs(Crs). % An ordset IS a list!
+
+pack_crs([{C1,C2}=Cr,{C3,C4}|Crs]) when C1 =< C3, C2 >= C4 ->
+ %% C1 C2
+ %% C3 C4
+ pack_crs([Cr|Crs]);
+pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 >= C3, C2 < C4 ->
+ %% C1 C2
+ %% C3 C4
+ pack_crs([{C1,C4}|Crs]);
+pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 + 1 == C3 ->
+ %% C1 C2
+ %% C3 C4
+ pack_crs([{C1,C4}|Crs]);
+pack_crs([Cr|Crs]) -> [Cr|pack_crs(Crs)];
+pack_crs([]) -> [].
+
+comp_class(Cc) ->
+ Crs = pack_cc(Cc),
+ Comp = comp_crs(Crs, 0),
+ %% io:fwrite("comp: ~p\n ~p\n", [Crs,Comp]),
+ Comp.
+
+comp_crs([{0,C2}|Crs], 0) -> % Get first range right
+ comp_crs(Crs, C2+1);
+comp_crs([{C1,C2}|Crs], Last) ->
+ [{Last,C1-1}|comp_crs(Crs, C2+1)];
+comp_crs([], Last) -> [{Last,maxchar}].
+
+%% build_dfa(NFA, NfaFirstState) -> {DFA,DfaFirstState}.
+%% Build a DFA from an NFA using "subset construction". The major
+%% difference from the book is that we keep the marked and unmarked
+%% DFA states in seperate lists. New DFA states are added to the
+%% unmarked list and states are marked by moving them to the marked
+%% list. We assume that the NFA accepting state numbers are in
+%% ascending order for the rules and use ordsets to keep this order.
+
+build_dfa(NFA, Nf) ->
+ D = #dfa_state{no=0,nfa=eclosure([Nf], NFA)},
+ {build_dfa([D], 1, [], NFA),0}.
+
+%% build_dfa([UnMarked], NextState, [Marked], NFA) -> DFA.
+%% Traverse the unmarked states. Temporarily add the current unmarked
+%% state to the marked list before calculating translation, this is
+%% to avoid adding too many duplicate states. Add it properly to the
+%% marked list afterwards with correct translations.
+
+build_dfa([U|Us0], N0, Ms, NFA) ->
+ {Ts,Us1,N1} = build_dfa(U#dfa_state.nfa, Us0, N0, [], [U|Ms], NFA),
+ M = U#dfa_state{trans=Ts,accept=accept(U#dfa_state.nfa, NFA)},
+ build_dfa(Us1, N1, [M|Ms], NFA);
+build_dfa([], _, Ms, _) -> Ms.
+
+%% build_dfa([NfaState], [Unmarked], NextState, [Transition], [Marked], NFA) ->
+%% {Transitions,UnmarkedStates,NextState}.
+%% Foreach NFA state set calculate the legal translations. N.B. must
+%% search *BOTH* the unmarked and marked lists to check if DFA state
+%% already exists. As the range of characters is potentially VERY
+%% large we cannot explicitly test all characters. Instead we first
+%% calculate the set of all disjoint character ranges which are
+%% possible candidates to the set of NFA states. The transitions are
+%% an orddict so we get the transition lists in ascending order.
+
+build_dfa(Set, Us, N, Ts, Ms, NFA) ->
+ %% List of all transition sets.
+ Crs0 = [Cr || S <- Set,
+ {Crs,_St} <- (element(S, NFA))#nfa_state.edges,
+ Crs /= epsilon, % Not an epsilon transition
+ Cr <- Crs ],
+ Crs1 = lists:usort(Crs0), % Must remove duplicates!
+ %% Build list of disjoint test ranges.
+ Test = disjoint_crs(Crs1),
+ %% io:fwrite("bd: ~p\n ~p\n ~p\n ~p\n", [Set,Crs0,Crs1,Test]),
+ build_dfa(Test, Set, Us, N, Ts, Ms, NFA).
+
+%% disjoint_crs([CharRange]) -> [CharRange].
+%% Take a sorted list of char ranges and make a sorted list of
+%% disjoint char ranges. No new char range extends past an existing
+%% char range.
+
+disjoint_crs([{_C1,C2}=Cr1,{C3,_C4}=Cr2|Crs]) when C2 < C3 ->
+ %% C1 C2
+ %% C3 C4
+ [Cr1|disjoint_crs([Cr2|Crs])];
+disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 == C3 ->
+ %% C1 C2
+ %% C3 C4
+ [{C1,C2}|disjoint_crs(add_element({C2+1,C4}, Crs))];
+disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 >= C3, C2 < C4 ->
+ %% C1 C2
+ %% C3 C4
+ [{C1,C3-1}|disjoint_crs(union([{C3,C2},{C2+1,C4}], Crs))];
+disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 == C4 ->
+ %% C1 C2
+ %% C3 C4
+ [{C1,C3-1}|disjoint_crs(add_element({C3,C4}, Crs))];
+disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 > C4 ->
+ %% C1 C2
+ %% C3 C4
+ [{C1,C3-1}|disjoint_crs(union([{C3,C4},{C4+1,C2}], Crs))];
+disjoint_crs([Cr|Crs]) -> [Cr|disjoint_crs(Crs)];
+disjoint_crs([]) -> [].
+
+build_dfa([Cr|Crs], Set, Us, N, Ts, Ms, NFA) ->
+ case eclosure(move(Set, Cr, NFA), NFA) of
+ S when S /= [] ->
+ case dfa_state_exist(S, Us, Ms) of
+ {yes,T} ->
+ build_dfa(Crs, Set, Us, N, store(Cr, T, Ts), Ms, NFA);
+ no ->
+ U = #dfa_state{no=N,nfa=S},
+ build_dfa(Crs, Set, [U|Us], N+1, store(Cr, N, Ts), Ms, NFA)
+ end;
+ [] ->
+ build_dfa(Crs, Set, Us, N, Ts, Ms, NFA)
+ end;
+build_dfa([], _, Us, N, Ts, _, _) ->
+ {Ts,Us,N}.
+
+%% dfa_state_exist(Set, Unmarked, Marked) -> {yes,State} | no.
+
+dfa_state_exist(S, Us, Ms) ->
+ case keysearch(S, #dfa_state.nfa, Us) of
+ {value,#dfa_state{no=T}} -> {yes,T};
+ false ->
+ case keysearch(S, #dfa_state.nfa, Ms) of
+ {value,#dfa_state{no=T}} -> {yes,T};
+ false -> no
+ end
+ end.
+
+%% eclosure([State], NFA) -> [State].
+%% move([State], Char, NFA) -> [State].
+%% These are straight out of the book. As eclosure uses ordsets then
+%% the generated state sets are in ascending order.
+
+eclosure(Sts, NFA) -> eclosure(Sts, NFA, []).
+
+eclosure([St|Sts], NFA, Ec) ->
+ #nfa_state{edges=Es} = element(St, NFA),
+ eclosure([ N || {epsilon,N} <- Es,
+ not is_element(N, Ec) ] ++ Sts,
+ NFA, add_element(St, Ec));
+eclosure([], _, Ec) -> Ec.
+
+move(Sts, Cr, NFA) ->
+ %% io:fwrite("move1: ~p\n", [{Sts,Cr}]),
+ [ St || N <- Sts,
+ {Crs,St} <- (element(N, NFA))#nfa_state.edges,
+ Crs /= epsilon, % Not an epsilon transition
+ in_crs(Cr, Crs) ].
+
+in_crs({C1,C2}, [{C3,C4}|_Crs]) when C1 >= C3, C2 =< C4 -> true;
+in_crs(Cr, [Cr|_Crs]) -> true; % Catch bos and eos.
+in_crs(Cr, [_|Crs]) -> in_crs(Cr, Crs);
+in_crs(_Cr, []) -> false.
+
+%% accept([State], NFA) -> {accept,A} | noaccept.
+%% Scan down the state list until we find an accepting state.
+
+accept([St|Sts], NFA) ->
+ case element(St, NFA) of
+ #nfa_state{accept={accept,A}} -> {accept,A};
+ #nfa_state{accept=noaccept} -> accept(Sts, NFA)
+ end;
+accept([], _) -> noaccept.
+
+%% minimise_dfa(DFA, DfaFirst) -> {DFA,DfaFirst}.
+%% Minimise the DFA by removing equivalent states. We consider a
+%% state if both the transitions and the their accept state is the
+%% same. First repeatedly run throught the DFA state list removing
+%% equivalent states and updating remaining transitions with
+%% remaining equivalent state numbers. When no more reductions are
+%% possible then pack the remaining state numbers to get consecutive
+%% states.
+
+minimise_dfa(DFA0, Df0) ->
+ case min_dfa(DFA0) of
+ {DFA1,[]} -> % No reduction!
+ {DFA2,Rs} = pack_dfa(DFA1),
+ {min_update(DFA2, Rs),min_use(Df0, Rs)};
+ {DFA1,Rs} ->
+ minimise_dfa(min_update(DFA1, Rs), min_use(Df0, Rs))
+ end.
+
+min_dfa(DFA) -> min_dfa(DFA, [], []).
+
+min_dfa([D|DFA0], Rs0, MDFA) ->
+ {DFA1,Rs1} = min_delete(DFA0, D#dfa_state.trans, D#dfa_state.accept,
+ D#dfa_state.no, Rs0, []),
+ min_dfa(DFA1, Rs1, [D|MDFA]);
+min_dfa([], Rs, MDFA) -> {MDFA,Rs}.
+
+%% min_delete(States, Trans, Action, NewN, Rs, MiniDFA) -> {MiniDFA,Rs}.
+%% Delete all states with same transactions and action. Return
+%% rewrites and minimised DFA with no duplicate states.
+
+min_delete([#dfa_state{no=N,trans=T,accept=A}|DFA], T, A, NewN, Rs, MDFA) ->
+ min_delete(DFA, T, A, NewN, [{N,NewN}|Rs], MDFA);
+min_delete([D|DFA], T, A, NewN, Rs, MDFA) ->
+ min_delete(DFA, T, A, NewN, Rs, [D|MDFA]);
+min_delete([], _, _, _, Rs, MDFA) -> {MDFA,Rs}.
+
+min_update(DFA, Rs) ->
+ [ D#dfa_state{trans=min_update_trans(D#dfa_state.trans, Rs)} || D <- DFA ].
+
+min_update_trans(Tr, Rs) ->
+ [ {C,min_use(S, Rs)} || {C,S} <- Tr ].
+
+min_use(Old, [{Old,New}|_]) -> New;
+min_use(Old, [_|Reds]) -> min_use(Old, Reds);
+min_use(Old, []) -> Old.
+
+pack_dfa(DFA) -> pack_dfa(DFA, 0, [], []).
+
+pack_dfa([D|DFA], NewN, Rs, PDFA) ->
+ pack_dfa(DFA, NewN+1,
+ [{D#dfa_state.no,NewN}|Rs], [D#dfa_state{no=NewN}|PDFA]);
+pack_dfa([], _, Rs, PDFA) -> {PDFA,Rs}.
+
+%% The main output is the yystate function which is built from the
+%% DFA. It has the spec:
+%%
+%% yystate() -> InitialState.
+%% yystate(State, InChars, Line, CurrTokLen, AcceptAction, AcceptLen) ->
+%% {Action, AcceptLength, RestChars, Line} | Accepting end state
+%% {Action, AcceptLength, RestChars, Line, State} | Accepting state
+%% {reject, AcceptLength, CurrTokLen, RestChars, Line, State} |
+%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State}.
+
+%% The return CurrTokLen is always the current number of characters
+%% scanned in the current token. The returns have the follwoing
+%% meanings:
+%% {Action, AcceptLength, RestChars, Line} -
+%% The scanner has reached an accepting end-state, for example after
+%% a regexp "abc". Action is the action number and AcceptLength is
+%% the length of the matching token.
+%%
+%% {Action, AcceptLength, RestChars, Line, State} -
+%% The scanner has reached an accepting transition state, for example
+%% after c in regexp "abc(xyz)?", continuation depends on
+%% RestChars. If RestChars == [] (no more current characters) then we
+%% need to get more characters to see if it is an end-state,
+%% otherwise (eof or chars) then we have not found continuing
+%% characters and it is an end state.
+%%
+%% {reject, AcceptLength, CurrTokLen, RestChars, Line, State} -
+%% {Action, AcceptLength, CurrTokLen, RestChars, Line, State} -
+%% The scanner has reached a non-accepting transistion state. If
+%% RestChars == [] we need to get more characters to continue.
+%% Otherwise if 'reject' then no accepting state has been reached it
+%% is an error. If we have an Action and AcceptLength then these are
+%% the last accept state, use them and continue from there.
+
+%% out_file(LeexState, DFA, DfaStart, [Action], Code) -> ok | error.
+%% Generate an output .erl file from the include file, the DFA and
+%% the code for the actions.
+
+out_file(St0, DFA, DF, Actions, Code) ->
+ verbose_print(St0, "Writing file ~s, ", [St0#leex.efile]),
+ case open_inc_file(St0) of
+ {ok,Ifile} ->
+ try
+ case file:open(St0#leex.efile, [write]) of
+ {ok,Ofile} ->
+ try
+ output_file_directive(Ofile, St0#leex.ifile, 0),
+ out_file(Ifile, Ofile, St0, DFA, DF, Actions,
+ Code, 1),
+ verbose_print(St0, "ok~n", []),
+ St0
+ after file:close(Ofile)
+ end;
+ {error,Error} ->
+ verbose_print(St0, "error~n", []),
+ add_error({none,leex,{file_error,Error}}, St0)
+ end
+ after file:close(Ifile)
+ end;
+ {{error,Error},Ifile} ->
+ add_error(Ifile, {none,leex,{file_error,Error}}, St0)
+ end.
+
+open_inc_file(State) ->
+ Ifile = State#leex.ifile,
+ case file:open(Ifile, [read]) of
+ {ok,F} -> {ok,F};
+ Error -> {Error,Ifile}
+ end.
+
+inc_file_name([]) ->
+ Incdir = filename:join(code:lib_dir(parsetools), "include"),
+ filename:join(Incdir, ?LEEXINC);
+inc_file_name(Filename) ->
+ Filename.
+
+%% out_file(IncFile, OutFile, State, DFA, DfaStart, Actions, Code, Line) -> ok
+%% Copy the include file line by line substituting special lines with
+%% generated code. We cheat by only looking at the first 5
+%% characters.
+
+out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) ->
+ case io:get_line(Ifile, leex) of
+ eof -> output_file_directive(Ofile, St#leex.ifile, L);
+ Line ->
+ case substr(Line, 1, 5) of
+ "##mod" -> out_module(Ofile, St);
+ "##cod" -> out_erlang_code(Ofile, St, Code, L);
+ "##dfa" -> out_dfa(Ofile, St, DFA, Code, DF, L);
+ "##act" -> out_actions(Ofile, St#leex.xfile, Actions);
+ _ -> io:put_chars(Ofile, Line)
+ end,
+ out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L+1)
+ end.
+
+out_module(File, St) ->
+ io:fwrite(File, "-module(~w).\n", [St#leex.module]).
+
+out_erlang_code(File, St, Code, L) ->
+ {CodeL,CodePos,_NCodeLines} = Code,
+ output_file_directive(File, St#leex.xfile, CodeL),
+ {ok,Xfile} = file:open(St#leex.xfile, [read]),
+ try
+ {ok,_} = file:position(Xfile, CodePos),
+ {ok,_} = file:copy(Xfile, File)
+ after
+ file:close(Xfile)
+ end,
+ io:nl(File),
+ output_file_directive(File, St#leex.ifile, L).
+
+out_dfa(File, St, DFA, Code, DF, L) ->
+ {_CodeL,_CodePos,NCodeLines} = Code,
+ %% Three file attributes before this one...
+ output_file_directive(File, St#leex.efile, L+(NCodeLines-1)+3),
+ io:fwrite(File, "yystate() -> ~w.~n~n", [DF]),
+ foreach(fun (S) -> out_trans(File, S) end, DFA),
+ io:fwrite(File, "yystate(S, Ics, Line, Tlen, Action, Alen) ->~n", []),
+ io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,S}.~n", []).
+
+out_trans(File, #dfa_state{no=N,trans=[],accept={accept,A}}) ->
+ %% Accepting end state, guaranteed done.
+ io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]),
+ io:fwrite(File, " {~w,Tlen,Ics,Line};~n", [A]);
+out_trans(File, #dfa_state{no=N,trans=Tr,accept={accept,A}}) ->
+ %% Accepting state, but there maybe more.
+ foreach(fun (T) -> out_accept_tran(File, N, A, T) end, pack_trans(Tr)),
+ io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]),
+ io:fwrite(File, " {~w,Tlen,Ics,Line,~w};~n", [A,N]);
+out_trans(File, #dfa_state{no=N,trans=Tr,accept=noaccept}) ->
+ %% Non-accepting transition state.
+ foreach(fun (T) -> out_noaccept_tran(File, N, T) end, pack_trans(Tr)),
+ io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]),
+ io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,~w};~n", [N]).
+
+out_accept_tran(File, N, A, {{Cf,maxchar},S}) ->
+ out_accept_head_max(File, N, Cf),
+ out_accept_body(File, S, "Line", A);
+out_accept_tran(File, N, A, {{Cf,Cl},S}) ->
+ out_accept_head_range(File, N, Cf, Cl),
+ out_accept_body(File, S, "Line", A);
+out_accept_tran(File, N, A, {$\n,S}) ->
+ out_accept_head_1(File, N, $\n),
+ out_accept_body(File, S, "Line+1", A);
+out_accept_tran(File, N, A, {C,S}) ->
+ out_accept_head_1(File, N, C),
+ out_accept_body(File, S, "Line", A).
+
+out_accept_head_1(File, State, Char) ->
+ out_head_1(File, State, Char, "_", "_").
+
+out_accept_head_max(File, State, Min) ->
+ out_head_max(File, State, Min, "_", "_").
+
+out_accept_head_range(File, State, Min, Max) ->
+ out_head_range(File, State, Min, Max, "_", "_").
+
+out_accept_body(File, Next, Line, Action) ->
+ out_body(File, Next, Line, io_lib:write(Action), "Tlen").
+
+out_noaccept_tran(File, N, {{Cf,maxchar},S}) ->
+ out_noaccept_head_max(File, N, Cf),
+ out_noaccept_body(File, S, "Line");
+out_noaccept_tran(File, N, {{Cf,Cl},S}) ->
+ out_noaccept_head_range(File, N, Cf, Cl),
+ out_noaccept_body(File, S, "Line");
+out_noaccept_tran(File, N, {$\n,S}) ->
+ out_noaccept_head_1(File, N, $\n),
+ out_noaccept_body(File, S, "Line+1");
+out_noaccept_tran(File, N, {C,S}) ->
+ out_noaccept_head_1(File, N, C),
+ out_noaccept_body(File, S, "Line").
+
+out_noaccept_head_1(File, State, Char) ->
+ out_head_1(File, State, Char, "Action", "Alen").
+
+out_noaccept_head_max(File, State, Min) ->
+ out_head_max(File, State, Min, "Action", "Alen").
+
+out_noaccept_head_range(File, State, Min, Max) ->
+ out_head_range(File, State, Min, Max, "Action", "Alen").
+
+out_noaccept_body(File, Next, Line) ->
+ out_body(File, Next, Line, "Action", "Alen").
+
+out_head_1(File, State, Char, Action, Alen) ->
+ io:fwrite(File, "yystate(~w, [~w|Ics], Line, Tlen, ~s, ~s) ->\n",
+ [State,Char,Action,Alen]).
+
+out_head_max(File, State, Min, Action, Alen) ->
+ io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w ->\n",
+ [State,Action,Alen,Min]).
+
+out_head_range(File, State, Min, Max, Action, Alen) ->
+ io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w, C =< ~w ->\n",
+ [State,Action,Alen,Min,Max]).
+
+out_body(File, Next, Line, Action, Alen) ->
+ io:fwrite(File, " yystate(~w, Ics, ~s, Tlen+1, ~s, ~s);\n",
+ [Next,Line,Action,Alen]).
+
+%% pack_trans([{Crange,State}]) -> [{Crange,State}] when
+%% Crange = {Char,Char} | Char.
+%% Pack the translation table into something more suitable for
+%% generating code. We KNOW how the pattern matching compiler works
+%% so solitary characters are stored before ranges. We do this by
+%% prepending singletons to the front of the packed transitions and
+%% appending ranges to the back. This preserves the smallest to
+%% largest order of ranges. Newline characters, $\n, are always
+%% extracted and handled as singeltons.
+
+pack_trans(Trs) -> pack_trans(Trs, []).
+
+%% pack_trans(Trs) ->
+%% Trs1 = pack_trans(Trs, []),
+%% io:fwrite("tr:~p\n=> ~p\n", [Trs,Trs1]),
+%% Trs1.
+
+pack_trans([{{C,C},S}|Trs], Pt) -> % Singletons to the head
+ pack_trans(Trs, [{C,S}|Pt]);
+%% Special detection and handling of $\n.
+pack_trans([{{Cf,$\n},S}|Trs], Pt) ->
+ pack_trans([{{Cf,$\n-1},S}|Trs], [{$\n,S}|Pt]);
+pack_trans([{{$\n,Cl},S}|Trs], Pt) ->
+ pack_trans([{{$\n+1,Cl},S}|Trs], [{$\n,S}|Pt]);
+pack_trans([{{Cf,Cl},S}|Trs], Pt) when Cf < $\n, Cl > $\n ->
+ pack_trans([{{Cf,$\n-1},S},{{$\n+1,Cl},S}|Trs], [{$\n,S}|Pt]);
+%% Small ranges become singletons.
+pack_trans([{{Cf,Cl},S}|Trs], Pt) when Cl == Cf + 1 ->
+ pack_trans(Trs, [{Cf,S},{Cl,S}|Pt]);
+pack_trans([Tr|Trs], Pt) -> % The default uninteresting case
+ pack_trans(Trs, Pt ++ [Tr]);
+pack_trans([], Pt) -> Pt.
+
+%% out_actions(File, XrlFile, ActionList) -> ok.
+%% Write out the action table.
+
+out_actions(File, XrlFile, As) ->
+ As1 = prep_out_actions(As),
+ foreach(fun (A) -> out_action(File, A) end, As1),
+ io:fwrite(File, "yyaction(_, _, _, _) -> error.~n", []),
+ foreach(fun (A) -> out_action_code(File, XrlFile, A) end, As1).
+
+prep_out_actions(As) ->
+ map(fun ({A,empty_action}) ->
+ {A,empty_action};
+ ({A,Code,TokenChars,TokenLen,TokenLine}) ->
+ Vs = [{TokenChars,"TokenChars"},
+ {TokenLen,"TokenLen"},
+ {TokenLine,"TokenLine"},
+ {TokenChars,"YYtcs"},
+ {TokenLen or TokenChars,"TokenLen"}],
+ Vars = [if F -> S; true -> "_" end || {F,S} <- Vs],
+ Name = list_to_atom(lists:concat([yyaction_,A])),
+ [Chars,Len,Line,_,_] = Vars,
+ Args = [V || V <- [Chars,Len,Line], V =/= "_"],
+ ArgsChars = string:join(Args, ", "),
+ {A,Code,Vars,Name,Args,ArgsChars}
+ end, As).
+
+out_action(File, {A,empty_action}) ->
+ io:fwrite(File, "yyaction(~w, _, _, _) -> skip_token;~n", [A]);
+out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars}) ->
+ [_,_,Line,Tcs,Len] = Vars,
+ io:fwrite(File, "yyaction(~w, ~s, ~s, ~s) ->~n", [A,Len,Tcs,Line]),
+ if
+ Tcs =/= "_" ->
+ io:fwrite(File, " TokenChars = yypre(YYtcs, TokenLen),~n", []);
+ true -> ok
+ end,
+ io:fwrite(File, " ~s(~s);~n", [Name, ArgsChars]).
+
+out_action_code(_File, _XrlFile, {_A,empty_action}) ->
+ ok;
+out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) ->
+ %% Should set the file to the .erl file, but instead assumes that
+ %% ?LEEXINC is syntactically correct.
+ io:fwrite(File, "\n-compile({inline,~w/~w}).\n", [Name, length(Args)]),
+ {line, L} = erl_scan:token_info(hd(Code), line),
+ output_file_directive(File, XrlFile, L-2),
+ io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]),
+ io:fwrite(File, " ~s\n", [pp_tokens(Code, L)]).
+
+%% pp_tokens(Tokens, Line) -> [char()].
+%% Prints the tokens keeping the line breaks of the original code.
+
+pp_tokens(Tokens, Line0) -> pp_tokens(Tokens, Line0, none).
+
+pp_tokens([], _Line0, _) -> [];
+pp_tokens([T | Ts], Line0, Prev) ->
+ {line, Line} = erl_scan:token_info(T, line),
+ [pp_sep(Line, Line0, Prev, T), pp_symbol(T) | pp_tokens(Ts, Line, T)].
+
+pp_symbol({var,_,Var}) -> atom_to_list(Var);
+pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]);
+pp_symbol({dot, _}) -> ".";
+pp_symbol({Symbol, _}) -> atom_to_list(Symbol).
+
+pp_sep(Line, Line0, Prev, T) when Line > Line0 ->
+ ["\n " | pp_sep(Line - 1, Line0, Prev, T)];
+pp_sep(_, _, {'.',_}, _) -> ""; % No space after '.' (not a dot)
+pp_sep(_, _, {'#',_}, _) -> ""; % No space after '#'
+pp_sep(_, _, {'(',_}, _) -> ""; % No space after '('
+pp_sep(_, _, {'[',_}, _) -> ""; % No space after '['
+pp_sep(_, _, _, {'.',_}) -> ""; % No space before '.'
+pp_sep(_, _, _, {'#',_}) -> ""; % No space before '#'
+pp_sep(_, _, _, {',',_}) -> ""; % No space before ','
+pp_sep(_, _, _, {')',_}) -> ""; % No space before ')'
+pp_sep(_, _, _, _) -> " ".
+
+%% out_dfa_graph(LeexState, DFA, DfaStart) -> ok | error.
+%% Writes the DFA to a .dot file in DOT-format which can be viewed
+%% with Graphviz.
+
+out_dfa_graph(St, DFA, DF) ->
+ verbose_print(St, "Writing DFA to file ~s, ", [St#leex.gfile]),
+ case file:open(St#leex.gfile, [write]) of
+ {ok,Gfile} ->
+ try
+ io:fwrite(Gfile, "digraph DFA {~n", []),
+ out_dfa_states(Gfile, DFA, DF),
+ out_dfa_edges(Gfile, DFA),
+ io:fwrite(Gfile, "}~n", []),
+ verbose_print(St, "ok~n", []),
+ St
+ after file:close(Gfile)
+ end;
+ {error,Error} ->
+ verbose_print(St, "error~n", []),
+ add_error({none,leex,{file_error,Error}}, St)
+ end.
+
+out_dfa_states(File, DFA, DF) ->
+ foreach(fun (S) -> out_dfa_state(File, DF, S) end, DFA),
+ io:fwrite(File, "~n", []).
+
+out_dfa_state(File, DF, #dfa_state{no=DF, accept={accept,_}}) ->
+ io:fwrite(File, " ~b [shape=doublecircle color=green];~n", [DF]);
+out_dfa_state(File, DF, #dfa_state{no=DF, accept=noaccept}) ->
+ io:fwrite(File, " ~b [shape=circle color=green];~n", [DF]);
+out_dfa_state(File, _, #dfa_state{no=S, accept={accept,_}}) ->
+ io:fwrite(File, " ~b [shape=doublecircle];~n", [S]);
+out_dfa_state(File, _, #dfa_state{no=S, accept=noaccept}) ->
+ io:fwrite(File, " ~b [shape=circle];~n", [S]).
+
+out_dfa_edges(File, DFA) ->
+ foreach(fun (#dfa_state{no=S,trans=Trans}) ->
+ Pt = pack_trans(Trans),
+ Tdict = foldl(fun ({Cr,T}, D) ->
+ orddict:append(T, Cr, D)
+ end, orddict:new(), Pt),
+ foreach(fun (T) ->
+ Crs = orddict:fetch(T, Tdict),
+ Edgelab = dfa_edgelabel(Crs),
+ io:fwrite(File, " ~b -> ~b [label=\"~s\"];~n",
+ [S,T,Edgelab])
+ end, sort(orddict:fetch_keys(Tdict)))
+ end, DFA).
+
+dfa_edgelabel([C]) when is_integer(C) -> quote(C);
+dfa_edgelabel(Cranges) ->
+ %% io:fwrite("el: ~p\n", [Cranges]),
+ "[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)];
+ (C) -> [quote(C)]
+ end, Cranges) ++ "]".
+
+output_file_directive(File, Filename, Line) ->
+ io:fwrite(File, <<"-file(~s, ~w).\n">>,
+ [format_filename(Filename), Line]).
+
+format_filename(Filename) ->
+ io_lib:write_string(filename:flatten(Filename)).
+
+quote($^) -> "\\^";
+quote($.) -> "\\.";
+quote($$) -> "\\$";
+quote($-) -> "\\-";
+quote($[) -> "\\[";
+quote($]) -> "\\]";
+quote($\s) -> "\\\\s";
+quote($\") -> "\\\"";
+quote($\b) -> "\\\\b";
+quote($\f) -> "\\\\f";
+quote($\n) -> "\\\\n";
+quote($\r) -> "\\\\r";
+quote($\t) -> "\\\\t";
+quote($\e) -> "\\\\e";
+quote($\v) -> "\\\\v";
+quote($\d) -> "\\\\d";
+quote($\\) -> "\\\\";
+quote(C) when is_integer(C) ->
+ %% Must remove the $ and get the \'s right.
+ case io_lib:write_unicode_char(C) of
+ [$$,$\\|Cs] -> "\\\\" ++ Cs;
+ [$$|Cs] -> Cs
+ end;
+quote(maxchar) ->
+ "MAXCHAR".
diff --git a/lib/parsetools/src/parsetools.app.src b/lib/parsetools/src/parsetools.app.src
new file mode 100644
index 0000000000..af62fc4f6b
--- /dev/null
+++ b/lib/parsetools/src/parsetools.app.src
@@ -0,0 +1,33 @@
+{application, parsetools,
+ [{description, "XLATETOOLS CXC 138 xx"},
+ {vsn, "%VSN%"},
+ {modules, [leex,
+ yecc,
+ yeccparser,
+ yeccscan
+ ]
+ },
+ {registered,[]},
+ {applications, [kernel,stdlib]},
+ {env, [{file_util_search_methods,[{"", ""}, {"ebin", "esrc"}, {"ebin", "src"}]}
+ ]
+ }
+ ]
+}.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/parsetools/src/parsetools.appup.src b/lib/parsetools/src/parsetools.appup.src
new file mode 100644
index 0000000000..54a63833e6
--- /dev/null
+++ b/lib/parsetools/src/parsetools.appup.src
@@ -0,0 +1 @@
+{"%VSN%",[],[]}.
diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl
new file mode 100644
index 0000000000..f4d76f471d
--- /dev/null
+++ b/lib/parsetools/src/yecc.erl
@@ -0,0 +1,2531 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Yacc like LALR-1 parser generator for Erlang.
+%% Ref: Aho & Johnson: "LR Parsing", ACM Computing Surveys, vol. 6:2, 1974.
+%% Auxiliary files: yeccgramm.yrl, yeccparser.erl, yeccpre.hrl, yeccscan.erl.
+%%
+
+-module(yecc).
+
+-export([compile/3, file/1, file/2, format_error/1]).
+
+%% Kept for compatibility with R10B.
+-export([yecc/2, yecc/3, yecc/4]).
+
+-import(lists, [append/1, append/2, concat/1, delete/2, filter/2,
+ flatmap/2, foldl/3, foldr/3, foreach/2, keydelete/3,
+ keysearch/3, keysort/2, last/1, map/2, member/2,
+ reverse/1, sort/1, usort/1]).
+
+-include("erl_compile.hrl").
+-include("ms_transform.hrl").
+
+-record(yecc, {
+ infile,
+ outfile,
+ includefile,
+ includefile_version,
+ module,
+ options = [],
+ verbose = false,
+ file_attrs = true,
+ errors = [],
+ warnings = [],
+ conflicts_done = false,
+ shift_reduce = [],
+ reduce_reduce = [],
+ n_states = 0,
+ inport,
+ outport,
+ line,
+
+ parse_actions,
+ symbol_tab,
+ inv_symbol_tab,
+ state_tab,
+ prec_tab,
+ goto_tab,
+
+ terminals = [],
+ nonterminals = [],
+ all_symbols = [],
+ prec = [],
+ rules_list = [],
+ rules, % a tuple of rules_list
+ rule_pointer2rule,
+ rootsymbol = [],
+ endsymbol = [],
+ expect_shift_reduce = [],
+ expect_n_states = [],
+ header = [],
+ erlang_code = none
+ }).
+
+-record(rule, {
+ n, % rule n in the grammar file
+ line,
+ symbols, % the names of symbols
+ tokens,
+ is_guard, % the action is a guard (not used)
+ is_well_formed % can be parsed (without macro expansion)
+ }).
+
+-record(reduce, {
+ rule_nmbr,
+ head,
+ nmbr_of_daughters,
+ prec,
+ unused % assure that #reduce{} comes before #shift{} when sorting
+ }).
+
+-record(shift, {
+ state,
+ pos,
+ prec,
+ rule_nmbr
+ }).
+
+-record(user_code, {state, terminal, funname, action}).
+
+-record(symbol, {line = none, name}).
+
+%% ACCEPT is neither an atom nor a non-terminal.
+-define(ACCEPT, {}).
+
+%% During the phase 'compute_states' terminals in lookahead sets are
+%% coded as integers; sets of terminals are integer bit masks. This is
+%% for efficiency only. '$empty' is always given the mask 1. The
+%% behaviour can be turned off by un-defining SYMBOLS_AS_CODES (useful
+%% when debugging).
+
+%% Non-terminals are also given integer codes, starting with -1. The
+%% absolut value of the code is used for indexing a tuple of lists of
+%% rules.
+
+-define(SYMBOLS_AS_CODES, true).
+
+-ifdef(SYMBOLS_AS_CODES).
+-define(EMPTY, 0).
+-else.
+-define(EMPTY, '$empty').
+-endif.
+
+%%%
+%%% Exported functions
+%%%
+
+%%% Interface to erl_compile.
+
+compile(Input0, Output0,
+ #options{warning = WarnLevel, verbose=Verbose, includes=Includes}) ->
+ Input = shorten_filename(Input0),
+ Output = shorten_filename(Output0),
+ Includefile = lists:sublist(Includes, 1),
+ Opts = [{parserfile,Output}, {includefile,Includefile}, {verbose,Verbose},
+ {report_errors, true}, {report_warnings, WarnLevel > 0}],
+ case file(Input, Opts) of
+ {ok, _OutFile} ->
+ ok;
+ error ->
+ error
+ end.
+
+format_error(bad_declaration) ->
+ io_lib:fwrite("unknown or bad declaration, ignored", []);
+format_error({bad_expect, SymName}) ->
+ io_lib:fwrite("argument ~s of Expect is not an integer",
+ [format_symbol(SymName)]);
+format_error({bad_rootsymbol, SymName}) ->
+ io_lib:fwrite("rootsymbol ~s is not a nonterminal",
+ [format_symbol(SymName)]);
+format_error({bad_states, SymName}) ->
+ io_lib:fwrite("argument ~s of States is not an integer",
+ [format_symbol(SymName)]);
+format_error({conflict, Conflict}) ->
+ format_conflict(Conflict);
+format_error({conflicts, SR, RR}) ->
+ io_lib:fwrite("conflicts: ~w shift/reduce, ~w reduce/reduce", [SR, RR]);
+format_error({duplicate_declaration, Tag}) ->
+ io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]);
+format_error({duplicate_nonterminal, Nonterminal}) ->
+ io_lib:fwrite("duplicate non-terminals ~s",
+ [format_symbol(Nonterminal)]);
+format_error({duplicate_precedence, Op}) ->
+ io_lib:fwrite("duplicate precedence operator ~s",
+ [format_symbol(Op)]);
+format_error({duplicate_terminal, Terminal}) ->
+ io_lib:fwrite("duplicate terminal ~s",
+ [format_symbol(Terminal)]);
+format_error({endsymbol_is_nonterminal, Symbol}) ->
+ io_lib:fwrite("endsymbol ~s is a nonterminal",
+ [format_symbol(Symbol)]);
+format_error({endsymbol_is_terminal, Symbol}) ->
+ io_lib:fwrite("endsymbol ~s is a terminal",
+ [format_symbol(Symbol)]);
+format_error({error, Module, Error}) ->
+ Module:format_error(Error);
+format_error({file_error, Reason}) ->
+ io_lib:fwrite("~s",[file:format_error(Reason)]);
+format_error(illegal_empty) ->
+ io_lib:fwrite("illegal use of empty symbol", []);
+format_error({internal_error, Error}) ->
+ io_lib:fwrite("internal yecc error: ~w", [Error]);
+format_error({missing_syntax_rule, Nonterminal}) ->
+ io_lib:fwrite("no syntax rule for non-terminal symbol ~s",
+ [format_symbol(Nonterminal)]);
+format_error({n_states, Exp, N}) ->
+ io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]);
+format_error(no_grammar_rules) ->
+ io_lib:fwrite("grammar rules are missing", []);
+format_error(nonterminals_missing) ->
+ io_lib:fwrite("Nonterminals is missing", []);
+format_error({precedence_op_is_endsymbol, SymName}) ->
+ io_lib:fwrite("precedence operator ~s is endsymbol",
+ [format_symbol(SymName)]);
+format_error({precedence_op_is_unknown, SymName}) ->
+ io_lib:fwrite("unknown precedence operator ~s",
+ [format_symbol(SymName)]);
+format_error({reserved, N}) ->
+ io_lib:fwrite("the use of ~w should be avoided", [N]);
+format_error({symbol_terminal_and_nonterminal, SymName}) ->
+ io_lib:fwrite("symbol ~s is both a terminal and nonterminal",
+ [format_symbol(SymName)]);
+format_error(rootsymbol_missing) ->
+ io_lib:fwrite("Rootsymbol is missing", []);
+format_error(terminals_missing) ->
+ io_lib:fwrite("Terminals is missing", []);
+format_error({undefined_nonterminal, Symbol}) ->
+ io_lib:fwrite("undefined nonterminal: ~s", [format_symbol(Symbol)]);
+format_error({undefined_pseudo_variable, Atom}) ->
+ io_lib:fwrite("undefined pseudo variable ~w", [Atom]);
+format_error({undefined_symbol, SymName}) ->
+ io_lib:fwrite("undefined rhs symbol ~s", [format_symbol(SymName)]);
+format_error({unused_nonterminal, Nonterminal}) ->
+ io_lib:fwrite("non-terminal symbol ~s not used",
+ [format_symbol(Nonterminal)]);
+format_error({unused_terminal, Terminal}) ->
+ io_lib:fwrite("terminal symbol ~s not used",
+ [format_symbol(Terminal)]).
+
+file(File) ->
+ file(File, [report_errors, report_warnings]).
+
+file(File, Options) ->
+ case is_filename(File) of
+ no -> erlang:error(badarg, [File, Options]);
+ _ -> ok
+ end,
+ case options(Options) of
+ badarg ->
+ erlang:error(badarg, [File, Options]);
+ OptionValues ->
+ Self = self(),
+ Flag = process_flag(trap_exit, false),
+ Pid = spawn_link(fun() -> infile(Self, File, OptionValues) end),
+ receive
+ {Pid, Rep} ->
+ receive after 1 -> ok end,
+ process_flag(trap_exit, Flag),
+ Rep
+ end
+ end.
+
+%% Kept for backward compatibility.
+yecc(Infile, Outfile) ->
+ yecc(Infile, Outfile, false, []).
+
+yecc(Infile, Outfile, Verbose) ->
+ yecc(Infile, Outfile, Verbose, []).
+
+yecc(Infilex, Outfilex, Verbose, Includefilex) ->
+ statistics(runtime),
+ case file(Infilex, [{parserfile, Outfilex},
+ {verbose, Verbose},
+ {report, true},
+ {includefile, Includefilex}]) of
+ {ok, _File} ->
+ statistics(runtime);
+ error ->
+ exit(error)
+ end.
+
+%%%
+%%% Local functions
+%%%
+
+options(Options0) when is_list(Options0) ->
+ try
+ Options = flatmap(fun(return) -> short_option(return, true);
+ (report) -> short_option(report, true);
+ ({return,T}) -> short_option(return, T);
+ ({report,T}) -> short_option(report, T);
+ (T) -> [T]
+ end, Options0),
+ options(Options, [file_attributes, includefile, parserfile,
+ report_errors, report_warnings, return_errors,
+ return_warnings, time, verbose], [])
+ catch error: _ -> badarg
+ end;
+options(Option) ->
+ options([Option]).
+
+short_option(return, T) ->
+ [{return_errors,T}, {return_warnings,T}];
+short_option(report, T) ->
+ [{report_errors,T}, {report_warnings,T}].
+
+options(Options0, [Key | Keys], L) when is_list(Options0) ->
+ Options = case member(Key, Options0) of
+ true ->
+ [atom_option(Key) | delete(Key, Options0)];
+ false ->
+ Options0
+ end,
+ V = case keysearch(Key, 1, Options) of
+ {value, {Key, Filename0}} when Key =:= includefile;
+ Key =:= parserfile ->
+ case is_filename(Filename0) of
+ no ->
+ badarg;
+ Filename ->
+ {ok, [{Key, Filename}]}
+ end;
+ {value, {Key, Bool}} when Bool =:= true; Bool =:= false ->
+ {ok, [{Key, Bool}]};
+ {value, {Key, _}} ->
+ badarg;
+ false ->
+ {ok, [{Key, default_option(Key)}]}
+ end,
+ case V of
+ badarg ->
+ badarg;
+ {ok, KeyValueL} ->
+ NewOptions = keydelete(Key, 1, Options),
+ options(NewOptions, Keys, KeyValueL ++ L)
+ end;
+options([], [], L) ->
+ foldl(fun({_,false}, A) -> A;
+ ({Tag,true}, A) -> [Tag | A];
+ (F, A) -> [F | A]
+ end, [], L);
+options(_Options, _, _L) ->
+ badarg.
+
+default_option(file_attributes) -> true;
+default_option(includefile) -> [];
+default_option(parserfile) -> [];
+default_option(report_errors) -> true;
+default_option(report_warnings) -> true;
+default_option(return_errors) -> false;
+default_option(return_warnings) -> false;
+default_option(time) -> false;
+default_option(verbose) -> false.
+
+atom_option(file_attributes) -> {file_attributes, true};
+atom_option(report_errors) -> {report_errors, true};
+atom_option(report_warnings) -> {report_warnings, true};
+atom_option(return_errors) -> {return_errors, true};
+atom_option(return_warnings) -> {return_warnings, true};
+atom_option(time) -> {time, true};
+atom_option(verbose) -> {verbose, true};
+atom_option(Key) -> Key.
+
+is_filename(T) ->
+ try filename:flatten(T) of
+ Filename -> Filename
+ catch error: _ -> no
+ end.
+
+shorten_filename(Name0) ->
+ {ok,Cwd} = file:get_cwd(),
+ case lists:prefix(Cwd, Name0) of
+ false -> Name0;
+ true ->
+ case lists:nthtail(length(Cwd), Name0) of
+ "/"++N -> N;
+ N -> N
+ end
+ end.
+
+start(Infilex, Options) ->
+ Infile = assure_extension(Infilex, ".yrl"),
+ {value, {_, Outfilex0}} = keysearch(parserfile, 1, Options),
+ {value, {_, Includefilex}} = keysearch(includefile, 1, Options),
+ Outfilex = case Outfilex0 of
+ [] -> filename:rootname(Infilex, ".yrl");
+ _ -> Outfilex0
+ end,
+ Includefile = case Includefilex of
+ [] -> [];
+ _ -> assure_extension(Includefilex,".hrl")
+ end,
+ IncludefileVersion = includefile_version(Includefile),
+ Outfile = assure_extension(Outfilex, ".erl"),
+ Module = list_to_atom(filename:basename(Outfile, ".erl")),
+ #yecc{infile = Infile,
+ outfile = Outfile,
+ includefile = Includefile,
+ includefile_version = IncludefileVersion,
+ module = Module,
+ options = Options,
+ verbose = member(verbose, Options),
+ file_attrs = member(file_attributes, Options)}.
+
+assure_extension(File, Ext) ->
+ concat([strip_extension(File, Ext), Ext]).
+
+%% Assumes File is a filename.
+strip_extension(File, Ext) ->
+ case filename:extension(File) of
+ Ext -> filename:rootname(File);
+ _Other -> File
+ end.
+
+infile(Parent, Infilex, Options) ->
+ St0 = start(Infilex, Options),
+ St = case file:open(St0#yecc.infile, [read, read_ahead]) of
+ {ok, Inport} ->
+ try
+ outfile(St0#yecc{inport = Inport})
+ after
+ ok = file:close(Inport)
+ end;
+ {error, Reason} ->
+ add_error(St0#yecc.infile, none, {file_error, Reason}, St0)
+ end,
+ case St#yecc.errors of
+ [] -> ok;
+ _ -> _ = file:delete(St#yecc.outfile)
+ end,
+ Parent ! {self(), yecc_ret(St)}.
+
+outfile(St0) ->
+ case file:open(St0#yecc.outfile, [write, delayed_write]) of
+ {ok, Outport} ->
+ try
+ generate(St0#yecc{outport = Outport, line = 1})
+ catch
+ throw: St1 ->
+ St1;
+ exit: Reason ->
+ add_error({internal_error, Reason}, St0)
+ after
+ ok = file:close(Outport)
+ end;
+ {error, Reason} ->
+ add_error(St0#yecc.outfile, none, {file_error, Reason}, St0)
+ end.
+
+os_process_size() ->
+ case os:type() of
+ {unix, sunos} ->
+ Size = os:cmd("ps -o vsz -p " ++ os:getpid() ++ " | tail -1"),
+ list_to_integer(lib:nonl(Size));
+ _ ->
+ 0
+ end.
+
+timeit(Name, Fun, St0) ->
+ Time = runtime,
+ %% Time = wall_clock,
+ {Before, _} = statistics(Time),
+ St = Fun(St0),
+ {After, _} = statistics(Time),
+ Mem0 = erts_debug:flat_size(St)*erlang:system_info(wordsize),
+ Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])),
+ Sz = lists:flatten(io_lib:format("~.1f MB", [os_process_size()/1024])),
+ io:fwrite(" ~-30w: ~10.2f s ~12s ~10s\n",
+ [Name, (After-Before)/1000, Mem, Sz]),
+ St.
+
+-define(PASS(P), {P, fun P/1}).
+
+generate(St0) ->
+ Passes = [?PASS(parse_grammar), ?PASS(check_grammar),
+ ?PASS(states_and_goto_table), ?PASS(parse_actions),
+ ?PASS(action_conflicts), ?PASS(write_file)],
+ F = case member(time, St0#yecc.options) of
+ true ->
+ io:fwrite(<<"Generating parser from grammar in ~s\n">>,
+ [format_filename(St0#yecc.infile)]),
+ fun timeit/3;
+ false ->
+ fun(_Name, Fn, St) -> Fn(St) end
+ end,
+ Fun = fun({Name, Fun}, St) ->
+ St2 = F(Name, Fun, St),
+ if
+ St2#yecc.errors =:= [] -> St2;
+ true -> throw(St2)
+ end
+ end,
+ foldl(Fun, St0, Passes).
+
+parse_grammar(St) ->
+ parse_grammar(St#yecc.inport, 1, St).
+
+parse_grammar(Inport, Line, St) ->
+ {NextLine, Grammar} = read_grammar(Inport, Line),
+ parse_grammar(Grammar, Inport, NextLine, St).
+
+parse_grammar(eof, _Inport, _NextLine, St) ->
+ St;
+parse_grammar({#symbol{name = 'Header'}, Ss}, Inport, NextLine, St0) ->
+ St1 = St0#yecc{header = [S || {string,_,S} <- Ss]},
+ parse_grammar(Inport, NextLine, St1);
+parse_grammar({#symbol{name = 'Erlang'}, [#symbol{name = code}]}, _Inport,
+ NextLine, St) ->
+ St#yecc{erlang_code = NextLine};
+parse_grammar(Grammar, Inport, NextLine, St0) ->
+ St = parse_grammar(Grammar, St0),
+ parse_grammar(Inport, NextLine, St).
+
+parse_grammar({error,ErrorLine,Error}, St) ->
+ add_error(ErrorLine, Error, St);
+parse_grammar({rule, Rule, Tokens}, St0) ->
+ NmbrOfDaughters = case Rule of
+ [_, #symbol{name = '$empty'}] -> 0;
+ _ -> length(Rule) - 1
+ end,
+ {IsGuard, IsWellFormed} = check_action(Tokens),
+ {Tokens1, St} = subst_pseudo_vars(Tokens,
+ NmbrOfDaughters,
+ St0),
+ RuleDef = #rule{symbols = Rule,
+ tokens = Tokens1,
+ is_guard = IsGuard,
+ is_well_formed = IsWellFormed},
+ St#yecc{rules_list = [RuleDef | St#yecc.rules_list]};
+parse_grammar({prec, Prec}, St) ->
+ St#yecc{prec = Prec ++ St#yecc.prec};
+parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) ->
+ CF = fun(I) ->
+ case element(I, St) of
+ [] ->
+ setelement(I, St, Symbols);
+ _ ->
+ add_error(Line, {duplicate_declaration, Name}, St)
+ end
+ end,
+ OneSymbol = length(Symbols) =:= 1,
+ case Name of
+ 'Nonterminals' -> CF(#yecc.nonterminals);
+ 'Terminals' -> CF(#yecc.terminals);
+ 'Rootsymbol' when OneSymbol -> CF(#yecc.rootsymbol);
+ 'Endsymbol' when OneSymbol -> CF(#yecc.endsymbol);
+ 'Expect' when OneSymbol -> CF(#yecc.expect_shift_reduce);
+ 'States' when OneSymbol -> CF(#yecc.expect_n_states); % undocumented
+ _ -> add_warning(Line, bad_declaration, St)
+ end.
+
+read_grammar(Inport, Line) ->
+ case yeccscan:scan(Inport, '', Line) of
+ {eof, NextLine} ->
+ {NextLine, eof};
+ {error, {ErrorLine, Mod, What}, NextLine} ->
+ {NextLine, {error, ErrorLine, {error, Mod, What}}};
+ {ok, Input, NextLine} ->
+ {NextLine, case yeccparser:parse(Input) of
+ {error, {ErrorLine, Mod, Message}} ->
+ {error, ErrorLine, {error, Mod, Message}};
+ {ok, {rule, Rule, {erlang_code, Tokens}}} ->
+ {rule, Rule, Tokens};
+ {ok, {#symbol{name=P},
+ [#symbol{name=I} | OpL]}=Ss} ->
+ A = precedence(P),
+ if
+ A =/= unknown,
+ is_integer(I),
+ OpL =/= [] ->
+ Ps = [{Op, I , A} || Op <- OpL],
+ {prec, Ps};
+ true ->
+ Ss
+ end;
+ {ok, Ss} ->
+ Ss
+ end}
+ end.
+
+precedence('Left') -> left;
+precedence('Right') -> right;
+precedence('Unary') -> unary;
+precedence('Nonassoc') -> nonassoc;
+precedence(_) -> unknown.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+check_grammar(St0) ->
+ Empty = #symbol{line = none, name = '$empty'},
+ AllSymbols = St0#yecc.nonterminals ++ St0#yecc.terminals ++ [Empty],
+ St1 = St0#yecc{all_symbols = AllSymbols},
+ Cs = [fun check_nonterminals/1, fun check_terminals/1,
+ fun check_rootsymbol/1, fun check_endsymbol/1,
+ fun check_expect/1, fun check_states/1,
+ fun check_precedences/1, fun check_rules/1],
+ foldl(fun(F, St) -> F(St) end, St1, Cs).
+
+check_nonterminals(St) ->
+ case St#yecc.nonterminals of
+ [] ->
+ add_error(nonterminals_missing, St);
+ Nonterminals ->
+ {Unique, Dups} = duplicates(names(Nonterminals)),
+ St1 = add_warnings(Dups, duplicate_nonterminal, St),
+ St2 = check_reserved(Unique, St1),
+ St2#yecc{nonterminals = [?ACCEPT | Unique]}
+ end.
+
+check_terminals(St0) ->
+ case St0#yecc.terminals of
+ [] ->
+ add_error(terminals_missing, St0);
+ Terminals ->
+ {Unique, Dups} = duplicates(names(Terminals)),
+ St1 = add_warnings(Dups, duplicate_terminal, St0),
+ Common = intersect(St1#yecc.nonterminals, Unique),
+ St2 = add_errors(Common, symbol_terminal_and_nonterminal, St1),
+ St3 = check_reserved(Unique, St2),
+ St3#yecc{terminals = ['$empty' | Unique]}
+ end.
+
+check_reserved(Names, St) ->
+ add_errors(intersect(Names, ['$empty', '$end', '$undefined']),
+ reserved, St).
+
+check_rootsymbol(St) ->
+ case St#yecc.rootsymbol of
+ [] ->
+ add_error(rootsymbol_missing, St);
+ [#symbol{line = Line, name = SymName}] ->
+ case kind_of_symbol(St, SymName) of
+ nonterminal ->
+ St#yecc{rootsymbol = SymName};
+ _ ->
+ add_error(Line, {bad_rootsymbol, SymName}, St)
+ end
+ end.
+
+check_endsymbol(St) ->
+ case St#yecc.endsymbol of
+ [] ->
+ St#yecc{endsymbol = '$end'};
+ [#symbol{line = Line, name = SymName}] ->
+ case kind_of_symbol(St, SymName) of
+ nonterminal ->
+ add_error(Line, {endsymbol_is_nonterminal, SymName}, St);
+ terminal ->
+ add_error(Line, {endsymbol_is_terminal, SymName}, St);
+ _ ->
+ St#yecc{endsymbol = SymName}
+ end
+ end.
+
+check_expect(St0) ->
+ case St0#yecc.expect_shift_reduce of
+ [] ->
+ St0#yecc{expect_shift_reduce = 0};
+ [#symbol{name = Expect}] when is_integer(Expect) ->
+ St0#yecc{expect_shift_reduce = Expect};
+ [#symbol{line = Line, name = Name}] ->
+ St1 = add_error(Line, {bad_expect, Name}, St0),
+ St1#yecc{expect_shift_reduce = 0}
+ end.
+
+check_states(St) ->
+ case St#yecc.expect_n_states of
+ [] ->
+ St;
+ [#symbol{name = NStates}] when is_integer(NStates) ->
+ St#yecc{expect_n_states = NStates};
+ [#symbol{line = Line, name = Name}] ->
+ add_error(Line, {bad_states, Name}, St)
+ end.
+
+check_precedences(St0) ->
+ {St1, _} =
+ foldr(fun({#symbol{line = Line, name = Op},_I,_A}, {St,Ps}) ->
+ case member(Op, Ps) of
+ true ->
+ {add_error(Line, {duplicate_precedence,Op}, St),
+ Ps};
+ false ->
+ {St, [Op | Ps]}
+ end
+ end, {St0,[]}, St0#yecc.prec),
+ foldl(fun({#symbol{line = Line, name = Op},I,A}, St) ->
+ case kind_of_symbol(St, Op) of
+ endsymbol ->
+ add_error(Line,{precedence_op_is_endsymbol,Op}, St);
+ unknown ->
+ add_error(Line, {precedence_op_is_unknown, Op}, St);
+ _ ->
+ St#yecc{prec = [{Op,I,A} | St#yecc.prec]}
+ end
+ end, St1#yecc{prec = []}, St1#yecc.prec).
+
+check_rule(Rule0, {St0,Rules}) ->
+ Symbols = Rule0#rule.symbols,
+ #symbol{line = HeadLine, name = Head} = hd(Symbols),
+ case member(Head, St0#yecc.nonterminals) of
+ false ->
+ {add_error(HeadLine, {undefined_nonterminal, Head}, St0), Rules};
+ true ->
+ St = check_rhs(tl(Symbols), St0),
+ Rule = Rule0#rule{line = HeadLine, symbols = names(Symbols)},
+ {St, [Rule | Rules]}
+ end.
+
+check_rules(St0) ->
+ {St,Rules0} = foldl(fun check_rule/2, {St0,[]}, St0#yecc.rules_list),
+ case St#yecc.rules_list of
+ [] ->
+ add_error(no_grammar_rules, St);
+ _ ->
+ Rule = #rule{line = none,
+ symbols = [?ACCEPT, St#yecc.rootsymbol],
+ tokens = []},
+ Rules1 = [Rule | Rules0],
+ Rules = map(fun({R,I}) -> R#rule{n = I} end, count(0, Rules1)),
+ St#yecc{rules_list = Rules, rules = list_to_tuple(Rules)}
+ end.
+
+duplicates(List) ->
+ Unique = usort(List),
+ {Unique, List -- Unique}.
+
+names(Symbols) ->
+ map(fun(Symbol) -> Symbol#symbol.name end, Symbols).
+
+symbol_line(Name, St) ->
+ {value, #symbol{line = Line}} = symbol_search(Name, St#yecc.all_symbols),
+ Line.
+
+symbol_member(Symbol, Symbols) ->
+ symbol_search(Symbol#symbol.name, Symbols) =/= false.
+
+symbol_search(Name, Symbols) ->
+ keysearch(Name, #symbol.name, Symbols).
+
+states_and_goto_table(St0) ->
+ St1 = create_symbol_table(St0),
+ St = compute_states(St1),
+ create_precedence_table(St).
+
+parse_actions(St) ->
+ erase(), % the pd is used when decoding lookahead sets
+ ParseActions = compute_parse_actions(St#yecc.n_states, St, []),
+ erase(),
+ St#yecc{parse_actions = ParseActions, state_tab = []}.
+
+action_conflicts(St0) ->
+ St = find_action_conflicts(St0),
+ St#yecc{conflicts_done = true}.
+
+-record(state_info, {reduce_only, state_repr, comment}).
+
+write_file(St0) ->
+ #yecc{parse_actions = ParseActions, goto_tab = GotoTab} = St0,
+ Sorted = sort_parse_actions(ParseActions),
+ StateReprs = find_identical_shift_states(Sorted),
+ StateInfo = collect_some_state_info(Sorted, StateReprs),
+ StateJumps = find_partial_shift_states(Sorted, StateReprs),
+ UserCodeActions = find_user_code(Sorted, St0),
+ #yecc{infile = Infile, outfile = Outfile,
+ inport = Inport, outport = Outport,
+ nonterminals = Nonterminals} = St0,
+ {St10, N_lines, LastErlangCodeLine} =
+ output_prelude(Outport, Inport, St0),
+ St20 = St10#yecc{line = St10#yecc.line + N_lines},
+ St25 = nl(St20),
+ St30 = output_file_directive(St25, Outfile, St25#yecc.line),
+ St40 = nl(St30),
+ St50 = output_actions(St40, StateJumps, StateInfo),
+ Go0 = [{Symbol,{From,To}} || {{From,Symbol},To} <- ets:tab2list(GotoTab)],
+ Go = family_with_domain(Go0, Nonterminals),
+ St60 = output_goto(St50, Go, StateInfo),
+ St70 = output_inlined(St60, UserCodeActions, Infile),
+ St = nl(St70),
+ case LastErlangCodeLine of
+ %% Just in case warnings or errors are emitted after the last
+ %% line of the file.
+ {last_erlang_code_line, Last_line} ->
+ output_file_directive(St, Infile, Last_line);
+ no_erlang_code ->
+ St
+ end.
+
+yecc_ret(St0) ->
+ St = check_expected(St0),
+ report_errors(St),
+ report_warnings(St),
+ Es = pack_errors(St#yecc.errors),
+ Ws = pack_warnings(St#yecc.warnings),
+ if
+ Es =:= [] ->
+ case member(return_warnings, St#yecc.options) of
+ true -> {ok, St#yecc.outfile, Ws};
+ false -> {ok, St#yecc.outfile}
+ end;
+ true ->
+ case member(return_errors, St#yecc.options) of
+ true -> {error, Es, Ws};
+ false -> error
+ end
+ end.
+
+check_expected(St0) ->
+ #yecc{shift_reduce = SR, reduce_reduce = RR, expect_shift_reduce = ExpSR,
+ n_states = NStates0, expect_n_states = ExpStates,
+ conflicts_done = Done} = St0,
+ N_RR = length(usort(RR)),
+ N_SR = length(usort(SR)),
+ St1 = if
+ not Done ->
+ St0;
+ N_SR =:= ExpSR, N_RR =:= 0 ->
+ St0;
+ true ->
+ add_warning(none, {conflicts, N_SR, N_RR}, St0)
+ end,
+ NStates = NStates0 + 1,
+ if
+ (not Done) or (ExpStates =:= []) or (NStates =:= ExpStates) ->
+ St1;
+ true ->
+ add_warning(none, {n_states, ExpStates, NStates}, St1)
+ end.
+
+pack_errors([{File,_} | _] = Es) ->
+ [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}];
+pack_errors([]) ->
+ [].
+
+pack_warnings([{File,_} | _] = Ws) ->
+ [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}];
+pack_warnings([]) ->
+ [].
+
+report_errors(St) ->
+ case member(report_errors, St#yecc.options) of
+ true ->
+ foreach(fun({File,{none,Mod,E}}) ->
+ io:fwrite(<<"~s: ~s\n">>,
+ [File,Mod:format_error(E)]);
+ ({File,{Line,Mod,E}}) ->
+ io:fwrite(<<"~s:~w: ~s\n">>,
+ [File,Line,Mod:format_error(E)])
+ end, sort(St#yecc.errors));
+ false ->
+ ok
+ end.
+
+report_warnings(St) ->
+ case member(report_warnings, St#yecc.options) of
+ true ->
+ foreach(fun({File,{none,Mod,W}}) ->
+ io:fwrite(<<"~s: Warning: ~s\n">>,
+ [File,Mod:format_error(W)]);
+ ({File,{Line,Mod,W}}) ->
+ io:fwrite(<<"~s:~w: Warning: ~s\n">>,
+ [File,Line,Mod:format_error(W)])
+ end, sort(St#yecc.warnings));
+ false ->
+ ok
+ end.
+
+add_error(E, St) ->
+ add_error(none, E, St).
+
+add_error(Line, E, St) ->
+ add_error(St#yecc.infile, Line, E, St).
+
+add_error(File, Line, E, St) ->
+ St#yecc{errors = [{File,{Line,?MODULE,E}}|St#yecc.errors]}.
+
+add_errors(SymNames, E0, St0) ->
+ foldl(fun(SymName, St) ->
+ add_error(symbol_line(SymName, St), {E0, SymName}, St)
+ end, St0, SymNames).
+
+add_warning(Line, W, St) ->
+ St#yecc{warnings = [{St#yecc.infile,{Line,?MODULE,W}}|St#yecc.warnings]}.
+
+add_warnings(SymNames, W0, St0) ->
+ foldl(fun(SymName, St) ->
+ add_warning(symbol_line(SymName, St), {W0, SymName}, St)
+ end, St0, SymNames).
+
+check_rhs([#symbol{name = '$empty'}], St) ->
+ St;
+check_rhs(Rhs, St0) ->
+ case symbol_search('$empty', Rhs) of
+ {value, #symbol{line = Line}} ->
+ add_error(Line, illegal_empty, St0);
+ false ->
+ foldl(fun(Sym, St) ->
+ case symbol_member(Sym, St#yecc.all_symbols) of
+ true ->
+ St;
+ false ->
+ E = {undefined_symbol,Sym#symbol.name},
+ add_error(Sym#symbol.line, E, St)
+ end
+ end, St0, Rhs)
+ end.
+
+check_action(Tokens) ->
+ case erl_parse:parse_exprs(add_roberts_dot(Tokens, 0)) of
+ {error, _Error} ->
+ {false, false};
+ {ok, [Expr | Exprs]} ->
+ IsGuard = Exprs =:= [] andalso erl_lint:is_guard_test(Expr),
+ {IsGuard, true}
+ end.
+
+add_roberts_dot([], Line) ->
+ [{'dot', Line}];
+add_roberts_dot([{'dot', Line} | _], _) ->
+ [{'dot', Line}];
+add_roberts_dot([Token | Tokens], _) ->
+ [Token | add_roberts_dot(Tokens, element(2, Token))].
+
+subst_pseudo_vars([], _, St) ->
+ {[], St};
+subst_pseudo_vars([H0 | T0], NmbrOfDaughters, St0) ->
+ {H, St1} = subst_pseudo_vars(H0, NmbrOfDaughters, St0),
+ {T, St} = subst_pseudo_vars(T0, NmbrOfDaughters, St1),
+ {[H | T], St};
+subst_pseudo_vars({atom, Line, Atom}, NmbrOfDaughters, St0) ->
+ case atom_to_list(Atom) of
+ [$$ | Rest] ->
+ try list_to_integer(Rest) of
+ N when N > 0, N =< NmbrOfDaughters ->
+ {{var, Line, list_to_atom(append("__", Rest))}, St0};
+ _ ->
+ St = add_error(Line, {undefined_pseudo_variable, Atom},
+ St0),
+ {{atom, Line, '$undefined'}, St}
+ catch
+ error: _ -> {{atom, Line, Atom}, St0}
+ end;
+ _ ->
+ {{atom, Line, Atom}, St0}
+ end;
+subst_pseudo_vars(Tuple, NmbrOfDaughters, St0) when is_tuple(Tuple) ->
+ {L, St} = subst_pseudo_vars(tuple_to_list(Tuple), NmbrOfDaughters, St0),
+ {list_to_tuple(L), St};
+subst_pseudo_vars(Something_else, _, St) ->
+ {Something_else, St}.
+
+kind_of_symbol(St, SymName) ->
+ case member(SymName, St#yecc.nonterminals) of
+ false ->
+ case member(SymName, St#yecc.terminals) of
+ false ->
+ case St#yecc.endsymbol of
+ SymName ->
+ endsymbol;
+ _ ->
+ unknown
+ end;
+ true ->
+ terminal
+ end;
+ true ->
+ nonterminal
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Computing parse states and goto table from grammar.
+% Start item: {0, [Endsymbol]} <->
+% (['ACCEPT' '.', Rootsymbol], {'$'}) in Aho & Johnson
+% where '$end' is the default end of input symbol of the
+% scanner if no 'Endsymbol' has been declared in the syntax file.
+
+-record(tabs, {
+ symbols, % ETS-set, keypos 1: {SymbolName, SymbolCode}
+ inv_symbols, % ETS-set, keypos 2: {SymbolName, SymbolCode}
+ state_id, % ETS-bag, keypos 1: {StateId, StateNum}
+ % StateId is not unique for a state.
+ rp_rhs, % rule pointer -> the remaining rhs symbols
+ rp_info, % rule pointer -> expanding rules and lookahead
+ goto % ETS-bag, keypos 1: first
+ % {{FromStateNum, Symbol, ToStateNum}}, then
+ % {{FromStateNum, Symbol}, ToStateNum}
+ }).
+
+-record(item, { % what states are made of
+ rule_pointer,
+ look_ahead,
+ rhs
+ }).
+
+compute_states(St0) ->
+ SymbolTab = St0#yecc.symbol_tab,
+ CodedRules = map(fun(#rule{symbols = Syms} = R) ->
+ R#rule{symbols = code_symbols(Syms, SymbolTab)}
+ end, St0#yecc.rules_list),
+ CodedNonterminals = code_symbols(St0#yecc.nonterminals, SymbolTab),
+ %% Only coded in this phase; StC is thrown away.
+ StC = St0#yecc{rules_list = CodedRules,
+ rules = list_to_tuple(CodedRules),
+ nonterminals = CodedNonterminals},
+ {RuleIndex, RulePointer2Rule} =
+ make_rule_index(StC, St0#yecc.rules_list),
+ StateTab0 = {},
+ StateIdTab = ets:new(yecc_state_id, [set]),
+ GotoTab = ets:new(yecc_goto, [bag]),
+ RulePointerRhs = make_rhs_index(StC#yecc.rules_list),
+ RulePointerInfo = make_rule_pointer_info(StC, RulePointerRhs, RuleIndex),
+
+ Tables = #tabs{symbols = SymbolTab,
+ state_id = StateIdTab,
+ rp_rhs = RulePointerRhs,
+ rp_info = RulePointerInfo,
+ goto = GotoTab},
+
+ erase(),
+ EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab),
+ {StateId, State0} = compute_state([{EndsymCode, 1}], Tables),
+
+ StateNum0 = first_state(),
+ FirstState = {StateNum0, State0},
+ StateTab1 = insert_state(Tables, StateTab0, FirstState, StateId),
+ {StateTab, N} =
+ compute_states1([{StateNum0, get_current_symbols(State0)}],
+ FirstState, StateTab1, Tables),
+ true = ets:delete(StateIdTab),
+ St = St0#yecc{state_tab = StateTab, goto_tab = GotoTab, n_states = N,
+ rule_pointer2rule = RulePointer2Rule},
+ decode_goto(GotoTab, St#yecc.inv_symbol_tab),
+ check_usage(St).
+
+first_state() ->
+ 0.
+
+decode_goto(GotoTab, InvSymTab) ->
+ G = ets:tab2list(GotoTab),
+ ets:delete_all_objects(GotoTab),
+ ets:insert(GotoTab,
+ map(fun({{From, Sym, Next}}) ->
+ {{From, decode_symbol(Sym, InvSymTab)}, Next}
+ end, G)).
+
+check_usage(St0) ->
+ SelSyms = ets:fun2ms(fun({{_,Sym},_}) -> Sym end),
+ UsedSymbols = ets:select(St0#yecc.goto_tab, SelSyms),
+ Syms = ordsets:from_list([?ACCEPT, '$empty' | UsedSymbols]),
+ NonTerms = ordsets:from_list(St0#yecc.nonterminals),
+ UnusedNonTerms = ordsets:to_list(ordsets:subtract(NonTerms, Syms)),
+ St1 = add_warnings(UnusedNonTerms, unused_nonterminal, St0),
+ Terms = ordsets:from_list(St0#yecc.terminals),
+ St2 = add_warnings(ordsets:to_list(ordsets:subtract(Terms, Syms)),
+ unused_terminal, St1),
+ DefinedNonTerminals = map(fun(#rule{symbols = [Name | _]}) ->
+ Name
+ end, St2#yecc.rules_list),
+ DefNonTerms = ordsets:from_list(DefinedNonTerminals),
+ UndefNonTerms = ordsets:subtract(NonTerms, DefNonTerms),
+ add_errors(ordsets:to_list(ordsets:subtract(UndefNonTerms,
+ UnusedNonTerms)),
+ missing_syntax_rule, St2).
+
+%% States are sometimes big, should not be copied to ETS tables.
+%% Here an "extendible" tuple is used.
+lookup_state(StateTab, N) ->
+ element(N+1, StateTab).
+
+insert_state(#tabs{state_id = StateIdTab}, StateTab0, State, StateId) ->
+ {N, _Items} = State,
+ insert_state_id(StateIdTab, N, StateId),
+ StateTab = if
+ tuple_size(StateTab0) > N ->
+ StateTab0;
+ true ->
+ list_to_tuple(tuple_to_list(StateTab0) ++
+ lists:duplicate(round(1 + N * 1.5), []))
+ end,
+ setelement(N+1, StateTab, State).
+
+insert_state_id(StateIdTab, N, StateId) ->
+ true = ets:insert(StateIdTab, {StateId, N}).
+
+compute_states1([], {N, _}=_CurrState, StateTab0, _Tables) ->
+ {StateTab0, N};
+compute_states1([{N, Symbols} | Try], CurrState, StateTab, Tables) ->
+ {_N, S} = lookup_state(StateTab, N),
+ Seeds = state_seeds(S, Symbols),
+ compute_states2(Seeds, N, Try, CurrState, StateTab, Tables).
+
+compute_states2([], _N, Try, CurrState, StateTab, Tables) ->
+ compute_states1(Try, CurrState, StateTab, Tables);
+compute_states2([{Sym,Seed} | Seeds], N, Try, CurrState, StateTab, Tables) ->
+ {StateId, NewState} = compute_state(Seed, Tables),
+ case check_states(NewState, StateId, StateTab, Tables) of
+ add ->
+ {M, _} = CurrState,
+ %% io:fwrite(<<"Adding state ~w\n">>, [M + 1]),
+ CurrentSymbols = get_current_symbols(NewState),
+ Next = M + 1,
+ NextState = {Next, NewState},
+ NewStateTab = insert_state(Tables, StateTab, NextState, StateId),
+ insert_goto(Tables, N, Sym, Next),
+ compute_states2(Seeds, N, [{Next, CurrentSymbols} | Try],
+ NextState, NewStateTab, Tables);
+ {old, M} ->
+ %% io:fwrite(<<"Identical to old state ~w\n">>, [M]),
+ insert_goto(Tables, N, Sym, M),
+ compute_states2(Seeds, N, Try, CurrState, StateTab, Tables);
+ {merge, M, NewCurrent} ->
+ %% io:fwrite(<<"Merging with state ~w\n">>, [M]),
+ Try1 = case keysearch(M, 1, Try) of
+ false ->
+ [{M, NewCurrent} | Try];
+ {value, {_, OldCurrent}} ->
+ case ordsets:is_subset(NewCurrent, OldCurrent) of
+ true ->
+ Try;
+ false ->
+ [{M, ordsets:union(NewCurrent, OldCurrent)}
+ | keydelete(M, 1, Try)]
+ end
+ end,
+ NewStateTab = merge_states(NewState, StateTab, Tables, M,StateId),
+ insert_goto(Tables, N, Sym, M),
+ compute_states2(Seeds, N, Try1, CurrState, NewStateTab, Tables)
+ end.
+
+insert_goto(Tables, From, Sym, To) ->
+ true = ets:insert(Tables#tabs.goto, {{From, Sym, To}}).
+
+%% Create an ets table for faster lookups.
+create_symbol_table(St) ->
+ #yecc{terminals = Terminals, endsymbol = Endsymbol} = St,
+ SymbolTab = ets:new(yecc_symbols, [{keypos,1}]),
+ %% '$empty' is always assigned 0
+ Ts = ['$empty', Endsymbol | delete('$empty', Terminals)],
+ TsC = count(0, Ts),
+ NTsC = map(fun({NT,I}) -> {NT,-I} end, count(1, St#yecc.nonterminals)),
+ Cs = TsC++NTsC,
+ true = ets:insert(SymbolTab, Cs),
+
+ InvSymTable = ets:new(yecc_inverted_terminals, [{keypos,2}]),
+ true = ets:insert(InvSymTable, Cs),
+
+ St#yecc{symbol_tab = SymbolTab, inv_symbol_tab = InvSymTable}.
+
+get_current_symbols(State) ->
+ usort(get_current_symbols1(State, [])).
+
+get_current_symbols1([], Syms) ->
+ Syms;
+get_current_symbols1([#item{rhs = Rhs} | Items], Syms) ->
+ case Rhs of
+ [] ->
+ get_current_symbols1(Items, Syms);
+ [Symbol | _] ->
+ get_current_symbols1(Items, [Symbol | Syms])
+ end.
+
+state_seeds(Items, Symbols) ->
+ L = [{S,{LA,RP + 1}} || #item{rule_pointer = RP, look_ahead = LA,
+ rhs = [S | _]} <- Items],
+ state_seeds1(keysort(1, L), Symbols).
+
+state_seeds1(_L, []) ->
+ [];
+state_seeds1(L, [Symbol | Symbols]) ->
+ state_seeds(L, Symbol, Symbols, []).
+
+state_seeds([{Symbol, Item} | L], Symbol, Symbols, Is) ->
+ state_seeds(L, Symbol, Symbols, [Item | Is]);
+state_seeds([{S, _Item} | L], Symbol, Symbols, Is) when S < Symbol ->
+ state_seeds(L, Symbol, Symbols, Is);
+state_seeds(L, Symbol, Symbols, Is) ->
+ [{Symbol, Is} | state_seeds1(L, Symbols)].
+
+compute_state(Seed, Tables) ->
+ RpInfo = Tables#tabs.rp_info,
+ foreach(fun({LA, RulePointer}) -> put(RulePointer, LA) end, Seed),
+ foreach(fun({LA, RP}) -> compute_closure(LA, RP, RpInfo) end, Seed),
+ Closure = keysort(1, erase()),
+ state_items(Closure, [], [], Tables#tabs.rp_rhs).
+
+%% Collects a uniqe id for the state (all rule pointers).
+state_items([{RP, LA} | L], Is, Id, RpRhs) ->
+ I = #item{rule_pointer = RP, look_ahead = LA, rhs = element(RP, RpRhs)},
+ state_items(L, [I | Is], [RP | Id], RpRhs);
+state_items(_, Is, Id, _RpRhs) ->
+ {Id, Is}.
+
+-compile({inline,[compute_closure/3]}).
+compute_closure(Lookahead, RulePointer, RpInfo) ->
+ case element(RulePointer, RpInfo) of
+ []=Void -> % no followers, or terminal
+ Void;
+ {no_union, ExpandingRules, NewLookahead} ->
+ compute_closure1(ExpandingRules, NewLookahead, RpInfo);
+ {union, ExpandingRules, Lookahead0} ->
+ NewLookahead = set_union(Lookahead0, Lookahead),
+ compute_closure1(ExpandingRules, NewLookahead, RpInfo);
+ ExpandingRules ->
+ compute_closure1(ExpandingRules, Lookahead, RpInfo)
+ end.
+
+compute_closure1([RulePointer | Tail], NewLookahead, RpInfo) ->
+ compute_closure1(Tail, NewLookahead, RpInfo),
+ case get(RulePointer) of
+ undefined -> % New
+ put(RulePointer, NewLookahead),
+ compute_closure(NewLookahead, RulePointer, RpInfo);
+ Lookahead2 ->
+ Lookahead = set_union(Lookahead2, NewLookahead),
+ if
+ Lookahead =:= Lookahead2 -> % Old
+ Lookahead2; % void()
+ true -> % Merge
+ put(RulePointer, Lookahead),
+ compute_closure(NewLookahead, RulePointer, RpInfo)
+ end
+ end;
+compute_closure1(Nil, _, _RpInfo) ->
+ Nil.
+
+%% Check if some old state is a superset of our NewState
+check_states(NewState, StateId, StateTab, #tabs{state_id = StateIdTab}) ->
+ try ets:lookup_element(StateIdTab, StateId, 2) of
+ N ->
+ {_N, OldState} = lookup_state(StateTab, N),
+ check_state1(NewState, OldState, [], N)
+ catch error:_ -> add
+ end.
+
+check_state1([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1],
+ [#item{look_ahead = Lookahead2} | Items2], Symbols, N) ->
+ case set_is_subset(Lookahead1, Lookahead2) of
+ true ->
+ check_state1(Items1, Items2, Symbols, N);
+ false ->
+ case Rhs of
+ [] ->
+ check_state2(Items1, Items2, Symbols, N);
+ [Symbol | _] ->
+ check_state2(Items1, Items2, [Symbol | Symbols], N)
+ end
+ end;
+check_state1([], [], _Symbols, N) ->
+ {old, N}.
+
+check_state2([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1],
+ [#item{look_ahead = Lookahead2} | Items2], Symbols, N) ->
+ case set_is_subset(Lookahead1, Lookahead2) of
+ true ->
+ check_state2(Items1, Items2, Symbols, N);
+ false ->
+ case Rhs of
+ [] ->
+ check_state2(Items1, Items2, Symbols, N);
+ [Symbol | _] ->
+ check_state2(Items1, Items2, [Symbol | Symbols], N)
+ end
+ end;
+check_state2([], [], Symbols, N) ->
+ {merge, N, usort(Symbols)}.
+
+merge_states(NewState, StateTab, Tables, M, StateId) ->
+ {_M, Old_state} = lookup_state(StateTab, M),
+ MergedState = merge_states1(NewState, Old_state),
+ insert_state(Tables, StateTab, {M, MergedState}, StateId).
+
+merge_states1([Item1 | Items1], [Item2 | Items2]) ->
+ LA1 = Item1#item.look_ahead,
+ LA2 = Item2#item.look_ahead,
+ if
+ LA1 =:= LA2 ->
+ [Item1 | merge_states1(Items1, Items2)];
+ true ->
+ [Item1#item{look_ahead = set_union(LA1, LA2)}
+ | merge_states1(Items1, Items2)]
+ end;
+merge_states1(_, _) ->
+ [].
+
+%% RulePointer -> Rhs. Every position Rhs in has its unique "rule pointer".
+make_rhs_index(RulesList) ->
+ Index = flatmap(fun(#rule{symbols = [_Non | Daughters]}) ->
+ suffixes0(Daughters)
+ end, RulesList),
+ list_to_tuple(Index).
+
+suffixes0([?EMPTY]) ->
+ [[], []];
+suffixes0(L) ->
+ suffixes(L).
+
+suffixes([]=L) ->
+ [L];
+suffixes([_ | T]=L) ->
+ [L | suffixes(T)].
+
+%% Setup info about lookahead and expanding rules for each point
+%% ("rule pointer") in the grammar.
+make_rule_pointer_info(StC, RpRhs, RuleIndex) ->
+ SymbolTab = StC#yecc.symbol_tab,
+ LcTab = make_left_corner_table(StC),
+ LA_index = map(fun(Syms) ->
+ rp_info(Syms, SymbolTab, LcTab, RuleIndex)
+ end, tuple_to_list(RpRhs)),
+ list_to_tuple(LA_index).
+
+rp_info([], _SymbolTab, _LcTab, _RuleIndex) ->
+ [];
+rp_info([Category | Followers], SymbolTab, LcTab, RuleIndex) ->
+ case dict:find(Category, RuleIndex) of
+ error -> % terminal
+ [];
+ {ok, ExpandingRules} when Followers =:= [] ->
+ ExpandingRules;
+ {ok, ExpandingRules} ->
+ case make_lookahead(Followers, SymbolTab, LcTab, set_empty()) of
+ {empty, LA} ->
+ {union, ExpandingRules, LA};
+ LA ->
+ {no_union, ExpandingRules, LA}
+ end
+ end.
+
+%% Lookahead computation is complicated by the possible existence
+%% of null string rewriting rules, such as A -> '$empty'.
+make_lookahead([], _, _, LA) ->
+ {empty, LA};
+make_lookahead([Symbol | Symbols], SymbolTab, LcTab, LA) ->
+ case dict:find(Symbol, LcTab) of
+ {ok, LeftCorner} -> % nonterminal
+ case empty_member(LeftCorner) of
+ true ->
+ make_lookahead(Symbols, SymbolTab, LcTab,
+ set_union(empty_delete(LeftCorner), LA));
+ false ->
+ set_union(LeftCorner, LA)
+ end;
+ error -> % terminal
+ set_add(Symbol, LA)
+ end.
+
+%% -> dict-of({Nonterminal, [Terminal]}).
+%% The algorithm FIRST/1 from the Dragon Book.
+%% Left corner table, all terminals (including '$empty') that can
+%% begin strings generated by Nonterminal.
+make_left_corner_table(#yecc{rules_list = RulesList} = St) ->
+ SymbolTab = left_corner_symbol_table(St),
+ Rules = map(fun(#rule{symbols = [Lhs | Rhs]}) ->
+ {Lhs,{Lhs, Rhs}}
+ end, RulesList),
+ LeftHandTab = dict:from_list(family(Rules)),
+ X0 = [{S,H} || {H,{H,Rhs}} <- Rules,
+ S <- Rhs,
+ not is_terminal(SymbolTab, S)],
+ XL = family_with_domain(X0, St#yecc.nonterminals),
+ X = dict:from_list(XL),
+ Xref = fun(NT) -> dict:fetch(NT, X) end,
+ E = set_empty(),
+ LC0 = dict:from_list([{H, E} || {H,_} <- XL]),
+ %% Handle H -> a S, where a is a terminal ('$empty' inclusive).
+ {Q, LC1} =
+ foldl(fun({H,{H,[S | _]}}, {Q0, LC}) ->
+ case ets:lookup(SymbolTab, S) of
+ [{_,Num}=SymbolAndNum] when Num >= 0 ->
+ F = set_add_terminal(SymbolAndNum, E),
+ {[Xref(H) | Q0], upd_first(H, F, LC)};
+ _ ->
+ {Q0, LC}
+ end
+ end, {[], LC0}, Rules),
+ left_corners(Q, LC1, LeftHandTab, SymbolTab, Xref).
+
+left_corners(Q0, LC0, LeftHandTab, SymbolTab, Xref) ->
+ case usort(append(Q0)) of
+ [] ->
+ LC0;
+ Q1 ->
+ Rs = flatmap(fun(NT) -> dict:fetch(NT, LeftHandTab) end, Q1),
+ {LC, Q} = left_corners2(Rs, LC0, [], SymbolTab, Xref),
+ left_corners(Q, LC, LeftHandTab, SymbolTab, Xref)
+ end.
+
+left_corners2([], LC, Q, _SymbolTab, _Xref) ->
+ {LC, Q};
+left_corners2([{Head,Rhs} | Rs], LC, Q0, SymbolTab, Xref) ->
+ Ts = left_corner_rhs(Rhs, Head, LC, set_empty(), SymbolTab),
+ First0 = dict:fetch(Head, LC),
+ case set_is_subset(Ts, First0) of
+ true ->
+ left_corners2(Rs, LC, Q0, SymbolTab, Xref);
+ false ->
+ LC1 = upd_first(Head, Ts, LC),
+ left_corners2(Rs, LC1, [Xref(Head) | Q0], SymbolTab, Xref)
+ end.
+
+upd_first(NT, Ts, LC) ->
+ dict:update(NT, fun(First) -> set_union(First, Ts) end, LC).
+
+left_corner_rhs([S | Ss], Head, LC, Ts, SymbolTab) ->
+ case ets:lookup(SymbolTab, S) of
+ [{_,Num}=SymbolAndNum] when Num >= 0 ->
+ set_add_terminal(SymbolAndNum, Ts);
+ [_NonTerminalSymbol] ->
+ First = dict:fetch(S, LC),
+ case empty_member(First) of
+ true ->
+ NTs = set_union(empty_delete(First), Ts),
+ left_corner_rhs(Ss, Head, LC, NTs, SymbolTab);
+ false ->
+ set_union(First, Ts)
+ end
+ end;
+left_corner_rhs([], _Head, _LC, Ts, _SymbolTab) ->
+ set_add(?EMPTY, Ts).
+
+%% For every non-terminal return a list of "rule pointers" for rules
+%% expanding the non-terminal.
+%% Also assigns a unique number to each point in the grammar, "rule pointer".
+make_rule_index(#yecc{nonterminals = Nonterminals,
+ rules_list = RulesList}, RulesListNoCodes) ->
+ {RulesL, _N} =
+ lists:mapfoldl(fun(#rule{symbols = [Nonterminal | Daughters]}, I) ->
+ I1 = I + length(Daughters)+1,
+ {{Nonterminal, I}, I1}
+ end, 1, RulesList),
+ IndexedTab = family_with_domain(RulesL, Nonterminals),
+
+ Symbol2Rule = [{Foo,R} || #rule{symbols = Symbols}=R <- RulesListNoCodes,
+ Foo <- Symbols],
+ Pointer2Rule = [{I, R} || {{_Foo,R},I} <- count(1, Symbol2Rule)],
+ {dict:from_list(IndexedTab), dict:from_list(Pointer2Rule)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Computing parse action table from list of states and goto table:
+
+compute_parse_actions(N, St, StateActions) ->
+ case N < first_state() of
+ true ->
+ StateActions;
+ false ->
+ {N, StateN} = lookup_state(St#yecc.state_tab, N),
+ %% There can be duplicates in Actions.
+ Actions = compute_parse_actions1(StateN, N, St),
+ compute_parse_actions(N - 1, St, [{N, Actions} | StateActions])
+ end.
+
+compute_parse_actions1([], _, _) ->
+ [];
+compute_parse_actions1([#item{rule_pointer = RulePointer,
+ look_ahead = Lookahead0,
+ rhs = Rhs} | Items], N, St) ->
+ case Rhs of
+ [] ->
+ Lookahead = decode_terminals(Lookahead0, St#yecc.inv_symbol_tab),
+ case rule(RulePointer, St) of
+ {[?ACCEPT | _], _RuleLine, _} ->
+ [{Lookahead, accept}
+ | compute_parse_actions1(Items, N, St)];
+ %% Head is placed after the daughters when finding the
+ %% precedence. This is how giving precedence to
+ %% non-terminals takes effect.
+ {[Head | Daughters0], _RuleLine, _} ->
+ Daughters = delete('$empty', Daughters0),
+ [{Lookahead,
+ #reduce{rule_nmbr = RulePointer, head = Head,
+ nmbr_of_daughters = length(Daughters),
+ prec = get_prec(Daughters ++ [Head], St)}}
+ | compute_parse_actions1(Items, N, St)]
+ end;
+ [Symbol | Daughters] ->
+ case is_terminal(St#yecc.symbol_tab, Symbol) of
+ true ->
+ DecSymbol = decode_symbol(Symbol, St#yecc.inv_symbol_tab),
+ {[Head | _], _RuleLine, _} = rule(RulePointer, St),
+ %% A bogus shift-shift conflict can be introduced
+ %% here if some terminal occurs in different rules
+ %% which have been given precedence "one level up".
+ Prec1 = case Daughters of
+ [] -> get_prec([DecSymbol, Head], St);
+ _ -> get_prec([DecSymbol], St)
+ end,
+ Pos = case Daughters of
+ [] -> z;
+ _ -> a
+ end,
+ [{[DecSymbol],
+ #shift{state = goto(N, DecSymbol, St),
+ pos = Pos,
+ prec = Prec1,
+ rule_nmbr = RulePointer}}
+ | compute_parse_actions1(Items, N, St)];
+ false ->
+ compute_parse_actions1(Items, N, St)
+ end
+ end.
+
+get_prec(Symbols, St) ->
+ get_prec1(Symbols, St#yecc.prec_tab, {0, none}).
+
+get_prec1([], _, P) ->
+ P;
+get_prec1([Symbol | T], PrecTab, P) ->
+ case ets:lookup(PrecTab, Symbol) of
+ [] ->
+ get_prec1(T, PrecTab, P);
+ [{_, N, Ass}] ->
+ get_prec1(T, PrecTab, {N, Ass})
+ end.
+
+create_precedence_table(St) ->
+ PrecTab = ets:new(yecc_precedences, []),
+ true = ets:insert(PrecTab, St#yecc.prec),
+ St#yecc{prec_tab = PrecTab}.
+
+-record(cxt, {terminal, state_n, yecc, res}).
+
+%% Detects shift-reduce and reduce-reduce conflicts.
+%% Also removes all but one conflicting action. As a consequence the
+%% lookahead sets for a state are always disjoint.
+%% Reduce/reduce conflicts are considered errors.
+find_action_conflicts(St0) ->
+ Cxt0 = #cxt{yecc = St0, res = []},
+ {#cxt{yecc = St, res = Res}, NewParseActions0} =
+ foldl(fun({N, Actions0}, {Cxt1, StateActions}) ->
+ L = [{Terminal, Act} || {Lookahead, Act} <- Actions0,
+ Terminal <- Lookahead],
+ {Cxt, Actions} =
+ foldl(fun({Terminal, As}, {Cxt2,Acts0}) ->
+ Cxt3 = Cxt2#cxt{terminal = Terminal,
+ state_n = N},
+ {Action, Cxt} =
+ find_action_conflicts2(As, Cxt3),
+ {Cxt,[{Action,Terminal} | Acts0]}
+ end, {Cxt1,[]}, family(L)),
+ {Cxt,[{N,inverse(family(Actions))} | StateActions]}
+ end, {Cxt0, []}, St0#yecc.parse_actions),
+ if
+ length(Res) > 0, St#yecc.verbose ->
+ io:fwrite(<<"\n*** Conflicts resolved by operator "
+ "precedences:\n\n">>),
+ foreach(fun({Confl, Name}) ->
+ report_conflict(Confl, St, Name, prec)
+ end, reverse(Res)),
+ io:fwrite(<<"*** End of resolved conflicts\n\n">>);
+ true ->
+ ok
+ end,
+ NewParseActions = reverse(NewParseActions0),
+ St#yecc{parse_actions = NewParseActions}.
+
+find_action_conflicts2([Action], Cxt) ->
+ {Action, Cxt};
+find_action_conflicts2([#shift{state = St, pos = Pos, prec = Prec},
+ #shift{state = St}=S | As],
+ Cxt) when Pos =:= a; Prec =:= {0,none} ->
+ %% This is a kludge to remove the bogus shift-shift conflict
+ %% introduced in compute_parse_actions1().
+ find_action_conflicts2([S | As], Cxt);
+find_action_conflicts2([#shift{state = NewState, pos = z}=S1,
+ #shift{state = NewState}=S2 | _], Cxt) ->
+ %% This is even worse than last clause. Give up.
+ Confl = conflict(S1, S2, Cxt),
+ #cxt{yecc = St0} = Cxt,
+ St = conflict_error(Confl, St0),
+ {S1, Cxt#cxt{yecc = St}}; % return any action
+find_action_conflicts2([#shift{prec = {P1, Ass1}}=S | Rs], Cxt0) ->
+ {R, Cxt1} = find_reduce_reduce(Rs, Cxt0),
+ #cxt{res = Res0, yecc = St0} = Cxt1,
+ #reduce{prec = {P2, Ass2}} = R,
+ Confl = conflict(R, S, Cxt1),
+ if
+ P1 > P2 ->
+ {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}};
+ P2 > P1 ->
+ {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}};
+ Ass1 =:= left, Ass2 =:= left ->
+ {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}};
+ Ass1 =:= right, Ass2 =:= right ->
+ {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}};
+ Ass1 =:= nonassoc, Ass2 =:= nonassoc ->
+ {nonassoc, Cxt1};
+ P1 =:= 0, P2 =:= 0 ->
+ report_conflict(Confl, St0, shift, default),
+ St = add_conflict(Confl, St0),
+ {S, Cxt1#cxt{yecc = St}};
+ true ->
+ St = conflict_error(Confl, St0),
+ {S, Cxt1#cxt{yecc = St}} % return any action
+ end;
+find_action_conflicts2(Rs, Cxt0) ->
+ find_reduce_reduce(Rs, Cxt0).
+
+find_reduce_reduce([R], Cxt) ->
+ {R, Cxt};
+find_reduce_reduce([#reduce{head = Categ1, prec = {P1, _}}=R1,
+ #reduce{head = Categ2, prec = {P2, _}}=R2 | Rs], Cxt0) ->
+ #cxt{res = Res0, yecc = St0} = Cxt0,
+ Confl = conflict(R1, R2, Cxt0),
+ {R, Res, St} =
+ if
+ P1 > P2 ->
+ {R1, [{Confl, Categ1} | Res0], St0};
+ P2 > P1 ->
+ {R2, [{Confl, Categ2} | Res0], St0};
+ true ->
+ St1 = conflict_error(Confl, St0),
+ {R1, Res0, St1}
+ end,
+ Cxt = Cxt0#cxt{res = Res, yecc = St},
+ find_reduce_reduce([R | Rs], Cxt).
+
+%% Since the lookahead sets are disjoint (assured by
+%% find_action_conflicts), the order between actions can be chosen
+%% almost arbitrarily. nonassoc has to come last, though (but is later
+%% discarded!). And shift has to come before reduce.
+sort_parse_actions([]) ->
+ [];
+sort_parse_actions([{N, La_actions} | Tail]) ->
+ [{N, sort_parse_actions1(La_actions)} | sort_parse_actions(Tail)].
+
+sort_parse_actions1(LaActions) ->
+ As = filter(fun({_LA, A}) -> A =:= accept end, LaActions),
+ Ss = filter(fun({_LA, A}) -> is_record(A, shift) end, LaActions),
+ Rs = filter(fun({_LA, A}) -> is_record(A, reduce) end, LaActions),
+ Ns = filter(fun({_LA, A}) -> A =:= nonassoc end, LaActions),
+ As ++ Ss ++ Rs ++ Ns.
+
+%% -> {State, StateRepr}. StateRepr has the same set of shift actions
+%% as State. No code will be output for State if State =/= StateRepr.
+find_identical_shift_states(StateActions) ->
+ L1 = [{Actions, State} || {State,Actions} <- StateActions],
+ {SO, NotSO} = lists:partition(fun({Actions,_States}) ->
+ shift_actions_only(Actions)
+ end, family(L1)),
+ R = [{State, hd(States)} || {_Actions, States} <- SO, State <- States]
+ ++
+ [{State, State} || {_Actions, States} <- NotSO, State <- States],
+ lists:keysort(1, R).
+
+-record(part_data, {name, eq_state, actions, n_actions, states}).
+
+%% Replace {SStates,Actions} with {SStates,{Actions,Jump}} where
+%% Jump describes which clauses that have been extracted from shift
+%% states so that they can be used from other states. Some space is
+%% saved.
+find_partial_shift_states(StateActionsL, StateReprs) ->
+ L = [{State, Actions} || {{State,Actions}, {State,State}} <-
+ lists:zip(StateActionsL, StateReprs),
+ shift_actions_only(Actions)],
+ StateActions = sofs:family(L, [{state,[action]}]),
+ StateAction = sofs:family_to_relation(StateActions),
+
+ %% Two actions are equal if they occur in the same states:
+ Parts = sofs:partition(sofs:range(StateActions)),
+ PartsL = sofs:to_external(Parts),
+ %% Assign temporary names to the parts of the partition (of actions):
+ PartNameL = lists:zip(seq1(length(PartsL)), PartsL),
+ ActPartL = [{Action,PartName} ||
+ {PartName,Actions} <- PartNameL,
+ Action <- Actions],
+ ActionPartName = sofs:relation(ActPartL, [{action,partname}]),
+ StatePartName = sofs:relative_product(StateAction, ActionPartName),
+ PartInStates = sofs:relation_to_family(sofs:converse(StatePartName)),
+
+ %% Parts that equal all actions of a state:
+ PartActions = sofs:family(PartNameL, [{partname,[action]}]),
+ PartState =
+ sofs:relative_product(PartActions, sofs:converse(StateActions)),
+ PartStates = sofs_family_with_domain(PartState, sofs:domain(PartActions)),
+
+ PartDataL = [#part_data{name = Nm, eq_state = EqS, actions = P,
+ n_actions = length(P),
+ states = ordsets:from_list(S)} ||
+ {{Nm,P}, {Nm,S}, {Nm,EqS}} <-
+ lists:zip3(PartNameL,
+ sofs:to_external(PartInStates),
+ sofs:to_external(PartStates))],
+ true = length(PartDataL) =:= length(PartNameL),
+ Ps = select_parts(PartDataL),
+
+ J1 = [{State, Actions, {jump_some,hd(States)}} ||
+ {_W, #part_data{actions = Actions, eq_state = [],
+ states = States}} <- Ps,
+ State <- States],
+ J2 = [{State, Actions, {jump_all,To}} ||
+ {_W, #part_data{actions = Actions, eq_state = EqS,
+ states = States}} <- Ps,
+ To <- EqS,
+ State <- States,
+ State =/= To],
+ J = lists:keysort(1, J1 ++ J2),
+
+ JumpStates = ordsets:from_list([S || {S,_,_} <- J]),
+ {JS, NJS} =
+ sofs:partition(1, sofs:relation(StateActionsL, [{state, actions}]),
+ sofs:set(JumpStates, [state])),
+ R =
+ [{S, {Actions,jump_none}} || {S,Actions} <- sofs:to_external(NJS)]
+ ++
+ [{S, {Actions--Part, {Tag,ToS,Part}}} ||
+ {{S,Actions}, {S,Part,{Tag,ToS}}} <-
+ lists:zip(sofs:to_external(JS), J)],
+ true = length(StateActionsL) =:= length(R),
+ lists:keysort(1, R).
+
+%% Very greedy. By no means optimal.
+select_parts([]) ->
+ [];
+select_parts(PartDataL) ->
+ T1 = [{score(PD), PD} || PD <- PartDataL],
+ [{W, PD} | Ws] = lists:reverse(lists:keysort(1, T1)),
+ #part_data{n_actions = NActions, states = S} = PD,
+ if
+ W < 8 -> % don't bother
+ [];
+ true ->
+ %% Cannot extract more clauses from the chosen part's states:
+ NL = [D#part_data{states = NewS} ||
+ {W1, #part_data{states = S0}=D} <- Ws,
+ W1 > 0,
+ (NewS = ordsets:subtract(S0, S)) =/= []],
+ if
+ length(S) =:= 1; NActions =:= 1 ->
+ select_parts(NL);
+ true ->
+ [{W,PD} | select_parts(NL)]
+ end
+ end.
+
+%% Does it pay off to extract clauses into a new function?
+%% Assumptions:
+%% - a call costs 8 (C = 8);
+%% - a clause (per action) costs 20 plus 8 (select) (Cl = 28);
+%% - a new function costs 20 (funinfo) plus 16 (select) (F = 36).
+%% A is number of actions, S is number of states.
+%% Special case (the part equals all actions of some state):
+%% C * (S - 1) < (S - 1) * A * Cl
+%% Normal case (introduce new function):
+%% F + A * Cl + C * S < S * A * Cl
+score(#part_data{n_actions = NActions, eq_state = [], states = S}) ->
+ (length(S) * NActions * 28) - (36 + NActions * 28 + length(S) * 8);
+score(#part_data{n_actions = NActions, states = S}) ->
+ ((length(S) - 1) * NActions * 28) - (8 * (length(S) - 1)).
+
+shift_actions_only(Actions) ->
+ length([foo || {_Ts,{shift,_,_,_,_}} <- Actions]) =:= length(Actions).
+
+collect_some_state_info(StateActions, StateReprs) ->
+ RF = fun({_LA, A}) -> is_record(A, reduce) end,
+ L = [{State,
+ begin
+ RO = lists:all(RF, LaActions),
+ %% C is currently always ""; identical states are all shift.
+ C = [io_lib:fwrite(<<" %% ~w\n">>, [State]) ||
+ true <- [RO], Repr =/= State],
+ #state_info{reduce_only = RO, state_repr = Repr, comment = C}
+ end} ||
+ {{State, LaActions}, {State, Repr}} <-
+ lists:zip(StateActions, StateReprs)],
+ list_to_tuple(L).
+
+conflict_error(Conflict, St0) ->
+ St1 = add_conflict(Conflict, St0),
+ add_error({conflict, Conflict}, St1).
+
+report_conflict(Conflict, St, ActionName, How) ->
+ if
+ St#yecc.verbose ->
+ io:fwrite(<<"~s\n">>, [format_conflict(Conflict)]),
+ Formated = format_symbol(ActionName),
+ case How of
+ prec ->
+ io:fwrite(<<"Resolved in favor of ~s.\n\n">>, [Formated]);
+ default ->
+ io:fwrite(<<"Conflict resolved in favor of ~s.\n\n">>,
+ [Formated])
+ end;
+ true ->
+ ok
+ end.
+
+add_conflict(Conflict, St) ->
+ case Conflict of
+ {Symbol, StateN, _, {reduce, _, _, _}} ->
+ St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]};
+ {Symbol, StateN, _, {shift, _, _}} ->
+ St#yecc{shift_reduce = [{StateN,Symbol} | St#yecc.shift_reduce]};
+ {_Symbol, _StateN, {one_level_up, _, _}, _Confl} ->
+ St
+ end.
+
+conflict(#shift{prec = Prec1, rule_nmbr = RuleNmbr1},
+ #shift{prec = Prec2, rule_nmbr = RuleNmbr2}, Cxt) ->
+ %% Conflict due to precedences "one level up". Kludge.
+ #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt,
+ {_, L1, RuleN1} = rule(RuleNmbr1, St),
+ {_, L2, RuleN2} = rule(RuleNmbr2, St),
+ Confl = {one_level_up, {L1, RuleN1, Prec1}, {L2, RuleN2, Prec2}},
+ {Symbol, N, Confl, Confl};
+conflict(#reduce{rule_nmbr = RuleNmbr1}, NewAction, Cxt) ->
+ #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt,
+ {R1, RuleLine1, RuleN1} = rule(RuleNmbr1, St),
+ Confl = case NewAction of
+ #reduce{rule_nmbr = RuleNmbr2} ->
+ {R2, RuleLine2, RuleN2} = rule(RuleNmbr2, St),
+ {reduce, R2, RuleN2, RuleLine2};
+ #shift{state = NewState} ->
+ {shift, NewState, last(R1)}
+ end,
+ {Symbol, N, {R1, RuleN1, RuleLine1}, Confl}.
+
+format_conflict({Symbol, N, _, {one_level_up,
+ {L1, RuleN1, {P1, Ass1}},
+ {L2, RuleN2, {P2, Ass2}}}}) ->
+ S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when "
+ "scanning ~s in state ~w:\n">>,
+ [format_symbol(Symbol), N]),
+ S2 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n"
+ " vs.\n">>,
+ [format_assoc(Ass1), P1, RuleN1, L1]),
+ S3 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n">>,
+ [format_assoc(Ass2), P2, RuleN2, L2]),
+ [S1, S2, S3];
+format_conflict({Symbol, N, Reduce, Confl}) ->
+ S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol "
+ "~s in state ~w:\n">>, [format_symbol(Symbol), N]),
+ S2 = case Reduce of
+ {[HR | TR], RuleNmbr, RuleLine} ->
+ io_lib:fwrite(<<" Reduce to ~s from ~s (rule ~w at "
+ "line ~w)\n vs.\n">>,
+ [format_symbol(HR), format_symbols(TR),
+ RuleNmbr, RuleLine])
+ end,
+ S3 = case Confl of
+ {reduce, [HR2|TR2], RuleNmbr2, RuleLine2} ->
+ io_lib:fwrite(<<" reduce to ~s from ~s "
+ "(rule ~w at line ~w).">>,
+ [format_symbol(HR2), format_symbols(TR2),
+ RuleNmbr2, RuleLine2]);
+ {shift, NewState, Sym} ->
+ io_lib:fwrite(<<" shift to state ~w, adding right "
+ "sisters to ~s.">>,
+ [NewState, format_symbol(Sym)])
+ end,
+ [S1, S2, S3].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Code generation:
+
+%% The version up to and including parsetools-1.3 is called "1.0".
+%%
+%% "1.1", parsetools-1.4:
+%% - the prologue file has been updated;
+%% - nonassoc is new;
+%% - different order of clauses;
+%% - never more than one clause matching a given symbol in a given state;
+%% - file attributes relate messages to .yrl file;
+%% - actions put in inlined functions;
+%% - a few other minor fixes.
+%%
+%% "1.2", parsetools-1.4.2:
+%% - the generated code has been changed as follows:
+%% - yeccpars2() calls the functions yeccpars2_State();
+%% - several states can share yeccpars2_State(), which reduces code size;
+%% - yeccgoto() has been split on one function per nonterminal;
+%% - several minor changes have made the loaded code smaller.
+%% - the include file yeccpre.hrl has been changed incompatibly.
+%%
+%% "1.3", parsetools-1.4.4:
+%% - the generated code has been changed as follows:
+%% - yeccgoto_T() no longer returns the next state, but calls yeccpars_S();
+%% - yeccpars2() is not called when it is known which yeccpars2_S() to call;
+%% - "__Stack" has been substituted for "Stack";
+%% - several states can share yeccpars2_S_cont(), which reduces code size;
+%% - instead if calling lists:nthtail() matching code is emitted.
+
+-define(CODE_VERSION, "1.3").
+-define(YECC_BUG(M, A),
+ iolist_to_binary([" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",",
+ io_lib:fwrite(M, A), "}).\n\n"])).
+
+%% Returns number of newlines in included files.
+output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] ->
+ St5 = output_header(St0),
+ #yecc{infile = Infile, module = Module} = St5,
+ St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]),
+ St20 =
+ fwrite(St10,
+ <<"-export([parse/1, parse_and_scan/1, format_error/1]).\n">>,
+ []),
+ {St25, N_lines_1, LastErlangCodeLine} =
+ case St20#yecc.erlang_code of
+ none ->
+ {St20, 0, no_erlang_code};
+ Next_line ->
+ St_10 = output_file_directive(St20, Infile, Next_line-1),
+ Nmbr_of_lines = include1([], Inport, Outport),
+ {St_10, Nmbr_of_lines,
+ {last_erlang_code_line, Next_line+Nmbr_of_lines}}
+ end,
+ St30 = nl(St25),
+ IncludeFile =
+ filename:join([code:lib_dir(parsetools), "include","yeccpre.hrl"]),
+ %% Maybe one could assume there are no warnings in this file.
+ St = output_file_directive(St30, IncludeFile, 0),
+ N_lines_2 = include(St, IncludeFile, Outport),
+ {St, N_lines_1 + N_lines_2, LastErlangCodeLine};
+output_prelude(Outport, Inport, St0) ->
+ St5 = output_header(St0),
+ #yecc{infile = Infile, module = Module, includefile = Includefile} = St5,
+ St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]),
+ St20 = output_file_directive(St10, Includefile, 0),
+ N_lines_1 = include(St20, Includefile, Outport),
+ St30 = nl(St20),
+ case St30#yecc.erlang_code of
+ none ->
+ {St30, N_lines_1, no_erlang_code};
+ Next_line ->
+ St = output_file_directive(St30, Infile, Next_line-1),
+ Nmbr_of_lines = include1([], Inport, Outport),
+ {St, Nmbr_of_lines + N_lines_1,
+ {last_erlang_code_line, Next_line+Nmbr_of_lines}}
+ end.
+
+output_header(St0) ->
+ lists:foldl(fun(Str, St) -> fwrite(St, <<"~s\n">>, [Str])
+ end, St0, St0#yecc.header).
+
+output_goto(St, [{_Nonterminal, []} | Go], StateInfo) ->
+ output_goto(St, Go, StateInfo);
+output_goto(St0, [{Nonterminal, List} | Go], StateInfo) ->
+ F = function_name(yeccgoto, Nonterminal),
+ St10 = output_goto1(St0, List, F, StateInfo, true),
+ St = output_goto_fini(F, Nonterminal, St10),
+ output_goto(St, Go, StateInfo);
+output_goto(St, [], _StateInfo) ->
+ St.
+
+output_goto1(St0, [{From, To} | Tail], F, StateInfo, IsFirst) ->
+ St10 = delim(St0, IsFirst),
+ {To, ToInfo} = lookup_state(StateInfo, To),
+ #state_info{reduce_only = RO, state_repr = Repr, comment = C} = ToInfo,
+ if
+ RO ->
+ %% Reduce actions do not use the state, so we just pass
+ %% the old (now bogus) on:
+ FromS = io_lib:fwrite("~w=_S", [From]),
+ ToS = "_S";
+ true ->
+ FromS = io_lib:fwrite("~w", [From]),
+ ToS = io_lib:fwrite("~w", [To])
+ end,
+ St20 = fwrite(St10, <<"~w(~s, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>,
+ [F,FromS]),
+ St30 = fwrite(St20, <<"~s">>, [C]),
+ %% Short-circuit call to yeccpars2:
+ St = fwrite(St30, <<" yeccpars2_~w(~s, Cat, Ss, Stack, T, Ts, Tzr)">>,
+ [Repr, ToS]),
+ output_goto1(St, Tail, F, StateInfo, false);
+output_goto1(St, [], _F, _StateInfo, _IsFirst) ->
+ St.
+
+output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) ->
+ %% Backward compatibility.
+ St10 = delim(St0, false),
+ St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>,
+ [F]),
+ fwrite(St,
+ ?YECC_BUG(<<"{~w, State, missing_in_goto_table}">>, [NT]),
+ []);
+output_goto_fini(_F, _NT, St) ->
+ fwrite(St, <<".\n\n">>, []).
+
+%% Find actions having user code.
+find_user_code(ParseActions, St) ->
+ [#user_code{state = State,
+ terminal = Terminal,
+ funname = inlined_function_name(State, Terminal),
+ action = Action} ||
+ {State, La_actions} <- ParseActions,
+ {Action, Terminals, RuleNmbr, NmbrOfDaughters}
+ <- find_user_code2(La_actions),
+ case tokens(RuleNmbr, St) of
+ [{var, _, '__1'}] -> NmbrOfDaughters =/= 1;
+ _ -> true
+ end,
+ Terminal <- Terminals].
+
+find_user_code2([]) ->
+ [];
+find_user_code2([{_, #reduce{rule_nmbr = RuleNmbr,
+ nmbr_of_daughters = NmbrOfDaughters}
+ =Action}]) ->
+ %% Same optimization as in output_state_actions.
+ [{Action, ["Cat"], RuleNmbr, NmbrOfDaughters}];
+find_user_code2([{La, #reduce{rule_nmbr = RuleNmbr,
+ nmbr_of_daughters = NmbrOfDaughters}
+ =Action} | T]) ->
+ [{Action,La, RuleNmbr, NmbrOfDaughters} | find_user_code2(T)];
+find_user_code2([_ | T]) ->
+ find_user_code2(T).
+
+output_actions(St0, StateJumps, StateInfo) ->
+ %% Not all the clauses of the dispatcher function yeccpars2() can
+ %% be reached. Only when shifting, that is, calling yeccpars1(),
+ %% will yeccpars2() be called.
+ Y2CL = [NewState || {_State,{Actions,_J}} <- StateJumps,
+ {_LA, #shift{state = NewState}} <- Actions],
+ Y2CS = ordsets:from_list([0 | Y2CL]),
+ Y2S = ordsets:from_list([S || {S,_} <- StateJumps]),
+ NY2CS = ordsets:subtract(Y2S, Y2CS),
+ Sel = [{S,true} || S <- ordsets:to_list(Y2CS)] ++
+ [{S,false} || S <- ordsets:to_list(NY2CS)],
+
+ SelS = [{State,Called} ||
+ {{State,_JActions}, {State,Called}} <-
+ lists:zip(StateJumps, lists:keysort(1, Sel))],
+ St10 = foldl(fun({State, Called}, St_0) ->
+ {State, #state_info{state_repr = IState}} =
+ lookup_state(StateInfo, State),
+ output_state_selection(St_0, State, IState, Called)
+ end, St0, SelS),
+ St20 = fwrite(St10, <<"yeccpars2(Other, _, _, _, _, _, _) ->\n">>, []),
+ St = fwrite(St20,
+ ?YECC_BUG(<<"{missing_state_in_action_table, Other}">>, []),
+ []),
+ foldl(fun({State, JActions}, St_0) ->
+ {State, #state_info{state_repr = IState}} =
+ lookup_state(StateInfo, State),
+ output_state_actions(St_0, State, IState,
+ JActions, StateInfo)
+ end, St, StateJumps).
+
+output_state_selection(St0, State, IState, Called) ->
+ Comment = [<<"%% ">> || false <- [Called]],
+ St = fwrite(St0, <<"~syeccpars2(~w=S, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>,
+ [Comment, State]),
+ fwrite(St,
+ <<"~s yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr);\n">>,
+ [Comment, IState]).
+
+output_state_actions(St, State, State, {Actions,jump_none}, SI) ->
+ output_state_actions1(St, State, Actions, true, normal, SI);
+output_state_actions(St0, State, State, {Actions, Jump}, SI) ->
+ {Tag, To, Common} = Jump,
+ CS = case Tag of
+ jump_some -> list_to_atom(lists:concat([cont_, To]));
+ jump_all -> To
+ end,
+ St = output_state_actions1(St0, State, Actions, true, {to, CS}, SI),
+ if
+ To =:= State ->
+ output_state_actions1(St, CS, Common, true, normal, SI);
+ true ->
+ St
+ end;
+output_state_actions(St, State, JState, _XActions, _SI) ->
+ fwrite(St, <<"%% yeccpars2_~w: see yeccpars2_~w\n\n">>, [State, JState]).
+
+output_state_actions1(St, State, [], _IsFirst, normal, _SI) ->
+ output_state_actions_fini(State, St);
+output_state_actions1(St0, State, [], IsFirst, {to, ToS}, _SI) ->
+ St = delim(St0, IsFirst),
+ fwrite(St,
+ <<"yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr) ->\n"
+ " yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr).\n\n">>,
+ [State, ToS]);
+output_state_actions1(St0, State, [{_, #reduce{}=Action}],
+ IsFirst, _End, SI) ->
+ St = output_reduce(St0, State, "Cat", Action, IsFirst, SI),
+ fwrite(St, <<".\n\n">>, []);
+output_state_actions1(St0, State, [{Lookahead,Action} | Tail],
+ IsFirst, End, SI) ->
+ {_, St} =
+ foldl(fun(Terminal, {IsFst,St_0}) ->
+ {false,
+ output_action(St_0, State, Terminal, Action, IsFst,SI)}
+ end, {IsFirst,St0}, Lookahead),
+ output_state_actions1(St, State, Tail, false, End, SI).
+
+output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) ->
+ output_reduce(St, State, Terminal, Action, IsFirst, SI);
+output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) ->
+ St10 = delim(St0, IsFirst),
+ St = fwrite(St10, <<"yeccpars2_~w(S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>,
+ [State, quoted_atom(Terminal)]),
+ output_call_to_includefile(NewState, St);
+output_action(St0, State, Terminal, accept, IsFirst, _SI) ->
+ St10 = delim(St0, IsFirst),
+ St = fwrite(St10,
+ <<"yeccpars2_~w(_S, ~s, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>,
+ [State, quoted_atom(Terminal)]),
+ fwrite(St, <<" {ok, hd(Stack)}">>, []);
+output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) ->
+ St.
+
+output_call_to_includefile(NewState, #yecc{includefile_version = {1,1}}=St) ->
+ %% Backward compatibility.
+ fwrite(St, <<" yeccpars1(Ts, Tzr, ~w, [S | Ss], [T | Stack])">>,
+ [NewState]);
+output_call_to_includefile(NewState, St) ->
+ fwrite(St, <<" yeccpars1(S, ~w, Ss, Stack, T, Ts, Tzr)">>,
+ [NewState]).
+
+output_state_actions_fini(State, #yecc{includefile_version = {1,1}}=St0) ->
+ %% Backward compatibility.
+ St10 = delim(St0, false),
+ St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]),
+ fwrite(St, <<" yeccerror(T).\n\n">>, []);
+output_state_actions_fini(_State, St) ->
+ fwrite(St, <<".\n\n">>, []).
+
+output_reduce(St0, State, Terminal0,
+ #reduce{rule_nmbr = RuleNmbr,
+ head = Head,
+ nmbr_of_daughters = NmbrOfDaughters},
+ IsFirst, StateInfo) ->
+ St10 = delim(St0, IsFirst),
+ Terminal = if
+ is_atom(Terminal0) -> quoted_atom(Terminal0);
+ true -> Terminal0
+ end,
+ St20 = fwrite(St10,
+ <<"yeccpars2_~w(_S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>,
+ [State, Terminal]),
+ St30 =
+ if
+ NmbrOfDaughters < 2 ->
+ Ns = "Ss",
+ St20;
+ true ->
+ Ns = "Nss",
+ Tmp = string:join(lists:duplicate(NmbrOfDaughters - 1, "_"),
+ ","),
+ fwrite(St20, <<" [~s|Nss] = Ss,\n">>, [Tmp])
+ end,
+ St40 = case tokens(RuleNmbr, St30) of
+ [{var, _, '__1'}] when NmbrOfDaughters =:= 1 ->
+ NewStack = "Stack",
+ St30;
+ _ ->
+ NewStack = "NewStack",
+ fwrite(St30, <<" NewStack = ~w(Stack),\n">>,
+ [inlined_function_name(State, Terminal0)])
+ end,
+ if
+ NmbrOfDaughters =:= 0 ->
+ NextState = goto(State, Head, St40),
+ {NextState, I} = lookup_state(StateInfo, NextState),
+ #state_info{reduce_only = RO, state_repr = Repr, comment = C} = I,
+ %% Reduce actions do not use the state, so we just pass
+ %% the old (now bogus) on:
+ if
+ RO -> NextS = "_S";
+ true -> NextS = io_lib:fwrite("~w", [NextState])
+ end,
+ St = fwrite(St40, <<"~s">>, [C]),
+ %% Short-circuit call to yeccpars2:
+ fwrite(St,
+ <<" yeccpars2_~w(~s, ~s, [~w | Ss], ~s, T, Ts, Tzr)">>,
+ [Repr, NextS, Terminal, State, NewStack]);
+ true ->
+ fwrite(St40,
+ <<" ~w(hd(~s), ~s, ~s, ~s, T, Ts, Tzr)">>,
+ [function_name(yeccgoto, Head), Ns,
+ Terminal, Ns, NewStack])
+ end.
+
+delim(St, true) ->
+ St;
+delim(St, false) ->
+ fwrite(St, <<";\n">>, []).
+
+quoted_atom(Atom) ->
+ io_lib:fwrite(<<"~w">>, [Atom]).
+
+output_inlined(St, UserCodeActions, Infile) ->
+ foldl(fun(#user_code{funname = InlinedFunctionName,
+ action = Action}, St_0) ->
+ output_inlined(St_0, InlinedFunctionName,
+ Action, Infile)
+ end, St, UserCodeActions).
+
+%% Each action with user code is placed in a separate inlined function.
+%% The purpose is to be able to pinpoint errors and warnings correctly.
+output_inlined(St0, FunctionName, Reduce, Infile) ->
+ #reduce{rule_nmbr = RuleNmbr, nmbr_of_daughters = N_daughters} = Reduce,
+ #rule{tokens = Tokens, is_well_formed = WF} = get_rule(RuleNmbr, St0),
+ Line0 = first_line(Tokens),
+ NLines = last_line(Tokens) - Line0,
+
+ St5 = if
+ WF ->
+ St0;
+ not WF ->
+ %% The compiler will generate an error message for
+ %% the inlined function (unless the reason that yecc
+ %% failed to parse the action was some macro). The
+ %% line number of the message will be correct since
+ %% we are keeping track of the current line of the
+ %% output file...
+ #yecc{outfile = Outfile, line = CurLine} = St0,
+ output_file_directive(St0, Outfile, CurLine)
+ end,
+
+ CodeStartLine = lists:max([0, Line0 - 4]),
+ St10 = fwrite(St5, <<"-compile({inline,~w/1}).\n">>, [FunctionName]),
+ St20 = output_file_directive(St10, Infile, CodeStartLine),
+ St30 = fwrite(St20, <<"~w(__Stack0) ->\n">>, [FunctionName]),
+ %% Currently the (old) inliner emits less code if matching the
+ %% stack inside the body rather than in the head...
+ St40 = case N_daughters of
+ 0 ->
+ Stack = "__Stack0",
+ St30;
+ _ ->
+ Stack = "__Stack",
+ A = concat(flatmap(fun(I) -> [",__",I] end,
+ lists:seq(N_daughters, 1, -1))),
+ fwrite(St30, <<" ~s = __Stack0,\n">>,
+ [append(["[", tl(A), " | __Stack]"])])
+ end,
+ St = St40#yecc{line = St40#yecc.line + NLines},
+ fwrite(St, <<" [begin\n ~s\n end | ~s].\n\n">>,
+ [pp_tokens(Tokens, Line0), Stack]).
+
+inlined_function_name(State, "Cat") ->
+ inlined_function_name(State, "");
+inlined_function_name(State, Terminal) ->
+ list_to_atom(concat([yeccpars2_, State, '_', Terminal])).
+
+-compile({nowarn_unused_function,function_name/2}).
+function_name(Name, Suf) ->
+ list_to_atom(concat([Name, '_' | quoted_atom(Suf)])).
+
+rule(RulePointer, St) ->
+ #rule{n = N, line = Line, symbols = Symbols} =
+ dict:fetch(RulePointer, St#yecc.rule_pointer2rule),
+ {Symbols, Line, N}.
+
+get_rule(RuleNmbr, St) ->
+ dict:fetch(RuleNmbr, St#yecc.rule_pointer2rule).
+
+tokens(RuleNmbr, St) ->
+ Rule = dict:fetch(RuleNmbr, St#yecc.rule_pointer2rule),
+ Rule#rule.tokens.
+
+goto(From, Symbol, St) ->
+ case ets:lookup(St#yecc.goto_tab, {From, Symbol}) of
+ [{_, To}] ->
+ To;
+ [] ->
+ erlang:error({error_in_goto_table, From, Symbol})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Auxiliaries:
+
+-ifdef(SYMBOLS_AS_CODES).
+
+%%% Bit mask operations.
+
+-compile({inline,[set_empty/0]}).
+set_empty() ->
+ 0.
+
+set_add(I, BM) ->
+ (1 bsl I) bor BM.
+
+-compile({inline,[set_member/2]}).
+set_member(I, BM) ->
+ ((1 bsl I) band BM) =/= 0.
+
+%% Assumes I is a member...
+-compile({inline,[set_delete/2]}).
+set_delete(I, BM) ->
+ (1 bsl I) bxor BM.
+
+-compile({inline,[set_union/2]}).
+set_union(BM1, BM2) ->
+ BM1 bor BM2.
+
+-compile({inline,[set_is_subset/2]}).
+set_is_subset(BM1, BM2) ->
+ (BM1 band BM2) =:= BM1.
+
+empty_member(BM) ->
+ set_member(0, BM).
+
+empty_delete(BM) ->
+ set_delete(0, BM).
+
+code_symbols(Ss, SymbolTable) ->
+ map(fun(S) -> ets:lookup_element(SymbolTable, S, 2) end, Ss).
+
+decode_symbol(C, InvSymbolTable) ->
+ ets:lookup_element(InvSymbolTable, C, 1).
+
+code_terminal(T, SymbolTab) ->
+ set_add(ets:lookup_element(SymbolTab, T, 2), 0).
+
+decode_terminals(BM, InvSymbolTab) ->
+ case get(BM) of
+ undefined ->
+ Symbols = decode_terminals(BM, 0, InvSymbolTab),
+ put(BM, Symbols),
+ Symbols;
+ Symbols ->
+ Symbols
+ end.
+
+decode_terminals(0, _I, _InvSymbolTab) ->
+ [];
+decode_terminals(BM, I, InvSymbolTab) ->
+ case set_member(I, BM) of
+ true ->
+ [ets:lookup_element(InvSymbolTab, I, 1)
+ | decode_terminals(set_delete(I, BM), I+1, InvSymbolTab)];
+ false ->
+ decode_terminals(BM, I+1, InvSymbolTab)
+ end.
+
+set_add_terminal({_Symbol, TerminalNum}, BM) ->
+ set_add(TerminalNum, BM).
+
+-compile({inline,[is_terminal/2]}).
+is_terminal(_Tab, SymbolCode) ->
+ SymbolCode >= 0.
+
+left_corner_symbol_table(St) ->
+ St#yecc.inv_symbol_tab.
+
+-else.
+
+set_empty() ->
+ [].
+
+set_add(Symbol, L) ->
+ ordsets:union([Symbol], L).
+
+set_union(Es1, Es2) ->
+ ordsets:union(Es1, Es2).
+
+set_is_subset(Es1, Es2) ->
+ ordsets:is_subset(Es1, Es2).
+
+code_symbols(Ss, _SymbolTab) ->
+ Ss.
+
+decode_symbol(S, _InvSymbolTab) ->
+ S.
+
+code_terminal(T, _SymbolTab) ->
+ [T].
+
+decode_terminals(Ts, _InvSymbolTab) ->
+ Ts.
+
+empty_member(['$empty' | _]) ->
+ true;
+empty_member(_) ->
+ false.
+
+empty_delete(['$empty' | Terminals]) ->
+ Terminals.
+
+set_add_terminal({Symbol, _TerminalNum}, L) ->
+ set_add(Symbol, L).
+
+is_terminal(Tab, SymbolName) ->
+ ets:lookup_element(Tab, SymbolName, 2) >= 0.
+
+left_corner_symbol_table(St) ->
+ St#yecc.symbol_tab.
+
+-endif. % SYMBOLS_AS_CODES
+
+intersect(L1, L2) ->
+ ordsets:to_list(ordsets:intersection(ordsets:from_list(L1),
+ ordsets:from_list(L2))).
+
+format_symbols([Sym | Syms]) ->
+ concat([format_symbol(Sym) | format_symbols1(Syms)]).
+
+format_symbols1([]) ->
+ [];
+format_symbols1([H | T]) ->
+ [" ", format_symbol(H) | format_symbols1(T)].
+
+include(St, File, Outport) ->
+ case file:open(File, [read]) of
+ {error, Reason} ->
+ throw(add_error(File, none, {file_error, Reason}, St));
+ {ok, Inport} ->
+ Line = io:get_line(Inport, ''),
+ N_lines = include1(Line, Inport, Outport),
+ file:close(Inport),
+ N_lines
+ end.
+
+include1(Line, Inport, Outport) ->
+ include1(Line, Inport, Outport, 0).
+
+include1(eof, _, _, Nmbr_of_lines) ->
+ Nmbr_of_lines;
+include1(Line, Inport, Outport, Nmbr_of_lines) ->
+ Incr = case member($\n, Line) of
+ true -> 1;
+ false -> 0
+ end,
+ io:put_chars(Outport, Line),
+ include1(io:get_line(Inport, ''), Inport, Outport, Nmbr_of_lines + Incr).
+
+includefile_version([]) ->
+ {1,2};
+includefile_version(Includefile) ->
+ case epp:open(Includefile, []) of
+ {ok, Epp} ->
+ try
+ parse_file(Epp)
+ after
+ epp:close(Epp)
+ end;
+ {error, _Error} ->
+ {1,1}
+ end.
+
+parse_file(Epp) ->
+ case epp:parse_erl_form(Epp) of
+ {ok, {function,_Line,yeccpars1,7,_Clauses}} ->
+ {1,2};
+ {eof,_Line} ->
+ {1,1};
+ _Form ->
+ parse_file(Epp)
+ end.
+
+%% Keeps the line breaks of the original code.
+pp_tokens(Tokens, Line0) ->
+ concat(pp_tokens1(Tokens, Line0, [])).
+
+pp_tokens1([], _Line0, _T0) ->
+ [];
+pp_tokens1([T | Ts], Line0, T0) ->
+ Line = element(2, T),
+ [pp_sep(Line, Line0, T0), pp_symbol(T) | pp_tokens1(Ts, Line, T)].
+
+pp_symbol({var,_,Var}) -> Var;
+pp_symbol({_,_,Symbol}) -> io_lib:fwrite(<<"~p">>, [Symbol]);
+pp_symbol({Symbol, _}) -> Symbol.
+
+pp_sep(Line, Line0, T0) when Line > Line0 ->
+ ["\n " | pp_sep(Line - 1, Line0, T0)];
+pp_sep(_Line, _Line0, {'.',_}) ->
+ "";
+pp_sep(_Line, _Line0, _T0) ->
+ " ".
+
+output_file_directive(St, Filename, Line) when St#yecc.file_attrs ->
+ fwrite(St, <<"-file(~s, ~w).\n">>,
+ [format_filename(Filename), Line]);
+output_file_directive(St, _Filename, _Line) ->
+ St.
+
+first_line(Tokens) ->
+ element(2, hd(Tokens)).
+
+last_line(Tokens) ->
+ element(2, lists:last(Tokens)).
+
+%% Keep track of the current line in the generated file.
+fwrite(#yecc{outport = Outport, line = Line}=St, Format, Args) ->
+ NLines = count_nl(Format),
+ io:fwrite(Outport, Format, Args),
+ St#yecc{line = Line + NLines}.
+
+%% Assumes \n is used, and never ~n.
+count_nl(<<$\n,Rest/binary>>) ->
+ 1 + count_nl(Rest);
+count_nl(<<_,Rest/binary>>) ->
+ count_nl(Rest);
+count_nl(<<>>) ->
+ 0.
+
+nl(#yecc{outport = Outport, line = Line}=St) ->
+ io:nl(Outport),
+ St#yecc{line = Line + 1}.
+
+format_filename(Filename) ->
+ io_lib:write_string(filename:flatten(Filename)).
+
+format_assoc(left) ->
+ "Left";
+format_assoc(right) ->
+ "Right";
+format_assoc(unary) ->
+ "Unary";
+format_assoc(nonassoc) ->
+ "Nonassoc".
+
+format_symbol(Symbol) ->
+ String = concat([Symbol]),
+ case erl_scan:string(String) of
+ {ok, [{atom, _, _}], _} ->
+ io_lib:fwrite(<<"~w">>, [Symbol]);
+ {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' ->
+ case erl_scan:reserved_word(Word) of
+ true ->
+ String;
+ false ->
+ io_lib:fwrite(<<"~w">>, [Symbol])
+ end;
+ {ok, [{var, _, _}], _} ->
+ String;
+ _ ->
+ io_lib:fwrite(<<"~w">>, [Symbol])
+ end.
+
+inverse(L) ->
+ sort([{A,B} || {B,A} <- L]).
+
+family(L) ->
+ sofs:to_external(sofs:relation_to_family(sofs:relation(L))).
+
+seq1(To) when To < 1 ->
+ [];
+seq1(To) ->
+ lists:seq(1, To).
+
+count(From, L) ->
+ lists:zip(L, lists:seq(From, length(L)-1+From)).
+
+family_with_domain(L, DL) ->
+ sofs:to_external(sofs_family_with_domain(sofs:relation(L), sofs:set(DL))).
+
+sofs_family_with_domain(R0, D) ->
+ R = sofs:restriction(R0, D),
+ F = sofs:relation_to_family(R),
+ FD = sofs:constant_function(D, sofs:from_term([])),
+ sofs:family_union(F, FD).
diff --git a/lib/parsetools/src/yeccgramm.yrl b/lib/parsetools/src/yeccgramm.yrl
new file mode 100644
index 0000000000..562a9a7458
--- /dev/null
+++ b/lib/parsetools/src/yeccgramm.yrl
@@ -0,0 +1,74 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% This is the syntax (metagrammar) of grammar definitions of the yecc
+%% parser generator.
+
+Nonterminals
+grammar declaration rule head symbol symbols strings attached_code
+token tokens.
+
+Terminals
+atom float integer reserved_symbol reserved_word string char var
+'->' ':' dot.
+
+Rootsymbol grammar.
+
+grammar -> declaration : '$1'.
+grammar -> rule : '$1'.
+declaration -> symbol symbols dot: {'$1', '$2'}.
+declaration -> symbol strings dot: {'$1', '$2'}.
+rule -> head '->' symbols attached_code dot: {rule, ['$1' | '$3'], '$4'}.
+head -> symbol : '$1'.
+symbols -> symbol : ['$1'].
+symbols -> symbol symbols : ['$1' | '$2'].
+strings -> string : ['$1'].
+strings -> string strings : ['$1' | '$2'].
+attached_code -> ':' tokens : {erlang_code, '$2'}.
+attached_code -> '$empty' : {erlang_code, [{atom, 0, '$undefined'}]}.
+tokens -> token : ['$1'].
+tokens -> token tokens : ['$1' | '$2'].
+symbol -> var : symbol('$1').
+symbol -> atom : symbol('$1').
+symbol -> integer : symbol('$1').
+symbol -> reserved_word : symbol('$1').
+token -> var : '$1'.
+token -> atom : '$1'.
+token -> float : '$1'.
+token -> integer : '$1'.
+token -> string : '$1'.
+token -> char : '$1'.
+token -> reserved_symbol : {value_of('$1'), line_of('$1')}.
+token -> reserved_word : {value_of('$1'), line_of('$1')}.
+token -> '->' : {'->', line_of('$1')}. % Have to be treated in this
+token -> ':' : {':', line_of('$1')}. % manner, because they are also
+ % special symbols of the metagrammar
+
+Erlang code.
+
+-record(symbol, {line, name}).
+
+symbol(Symbol) ->
+ #symbol{line = line_of(Symbol), name = value_of(Symbol)}.
+
+value_of(Token) ->
+ element(3, Token).
+
+line_of(Token) ->
+ element(2, Token).
diff --git a/lib/parsetools/src/yeccparser.erl b/lib/parsetools/src/yeccparser.erl
new file mode 100644
index 0000000000..80a6bbce0e
--- /dev/null
+++ b/lib/parsetools/src/yeccparser.erl
@@ -0,0 +1,642 @@
+-module(yeccparser).
+-export([parse/1, parse_and_scan/1, format_error/1]).
+-file("yeccgramm.yrl", 63).
+
+-record(symbol, {line, name}).
+
+symbol(Symbol) ->
+ #symbol{line = line_of(Symbol), name = value_of(Symbol)}.
+
+value_of(Token) ->
+ element(3, Token).
+
+line_of(Token) ->
+ element(2, Token).
+
+-file("/clearcase/otp/erts/lib/parsetools/include/yeccpre.hrl", 0).
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% The parser generator will insert appropriate declarations before this line.%
+
+-type(yecc_ret() :: {'error', _} | {'ok', _}).
+
+-spec(parse/1 :: (_) -> yecc_ret()).
+parse(Tokens) ->
+ yeccpars0(Tokens, false).
+
+-spec(parse_and_scan/1 ::
+ ({function() | {atom(), atom()}, [_]} | {atom(), atom(), [_]}) ->
+ yecc_ret()).
+parse_and_scan({F, A}) -> % Fun or {M, F}
+ yeccpars0([], {F, A});
+parse_and_scan({M, F, A}) ->
+ yeccpars0([], {{M, F}, A}).
+
+-spec(format_error/1 :: (any()) -> [char() | list()]).
+format_error(Message) ->
+ case io_lib:deep_char_list(Message) of
+ true ->
+ Message;
+ _ ->
+ io_lib:write(Message)
+ end.
+
+% To be used in grammar files to throw an error message to the parser
+% toplevel. Doesn't have to be exported!
+-compile({nowarn_unused_function,{return_error,2}}).
+-spec(return_error/2 :: (integer(), any()) -> no_return()).
+return_error(Line, Message) ->
+ throw({error, {Line, ?MODULE, Message}}).
+
+-define(CODE_VERSION, "1.3").
+
+yeccpars0(Tokens, MFA) ->
+ try yeccpars1(Tokens, MFA, 0, [], [])
+ catch
+ error: Error ->
+ Stacktrace = erlang:get_stacktrace(),
+ try yecc_error_type(Error, Stacktrace) of
+ {syntax_error, Token} ->
+ yeccerror(Token);
+ {missing_in_goto_table=Tag, Symbol, State} ->
+ Desc = {Symbol, State, Tag},
+ erlang:raise(error, {yecc_bug, ?CODE_VERSION, Desc},
+ Stacktrace)
+ catch _:_ -> erlang:raise(error, Error, Stacktrace)
+ end;
+ throw: {error, {_Line, ?MODULE, _M}} = Error ->
+ Error % probably from return_error/2
+ end.
+
+yecc_error_type(function_clause, [{?MODULE,F,[State,_,_,_,Token,_,_]} | _]) ->
+ case atom_to_list(F) of
+ "yeccpars2" ++ _ ->
+ {syntax_error, Token};
+ "yeccgoto_" ++ SymbolL ->
+ {ok,[{atom,_,Symbol}],_} = erl_scan:string(SymbolL),
+ {missing_in_goto_table, Symbol, State}
+ end.
+
+yeccpars1([Token | Tokens], Tokenizer, State, States, Vstack) ->
+ yeccpars2(State, element(1, Token), States, Vstack, Token, Tokens,
+ Tokenizer);
+yeccpars1([], {F, A}, State, States, Vstack) ->
+ case apply(F, A) of
+ {ok, Tokens, _Endline} ->
+ yeccpars1(Tokens, {F, A}, State, States, Vstack);
+ {eof, _Endline} ->
+ yeccpars1([], false, State, States, Vstack);
+ {error, Descriptor, _Endline} ->
+ {error, Descriptor}
+ end;
+yeccpars1([], false, State, States, Vstack) ->
+ yeccpars2(State, '$end', States, Vstack, {'$end', 999999}, [], false).
+
+%% yeccpars1/7 is called from generated code.
+%%
+%% When using the {includefile, Includefile} option, make sure that
+%% yeccpars1/7 can be found by parsing the file without following
+%% include directives. yecc will otherwise assume that an old
+%% yeccpre.hrl is included (one which defines yeccpars1/5).
+yeccpars1(State1, State, States, Vstack, Stack1, [Token | Tokens],
+ Tokenizer) ->
+ yeccpars2(State, element(1, Token), [State1 | States],
+ [Stack1 | Vstack], Token, Tokens, Tokenizer);
+yeccpars1(State1, State, States, Vstack, Stack1, [], {F, A}) ->
+ case apply(F, A) of
+ {ok, Tokens, _Endline} ->
+ yeccpars1(State1, State, States, Vstack, Stack1, Tokens, {F, A});
+ {eof, _Endline} ->
+ yeccpars1(State1, State, States, Vstack, Stack1, [], false);
+ {error, Descriptor, _Endline} ->
+ {error, Descriptor}
+ end;
+yeccpars1(State1, State, States, Vstack, Stack1, [], false) ->
+ yeccpars2(State, '$end', [State1 | States], [Stack1 | Vstack],
+ {'$end', 999999}, [], false).
+
+% For internal use only.
+yeccerror(Token) ->
+ Text = case catch erl_scan:token_info(Token, text) of
+ {text, Txt} -> Txt;
+ _ -> yecctoken2string(Token)
+ end,
+ Location = case catch erl_scan:token_info(Token, location) of
+ {location, Loc} -> Loc;
+ _ -> element(2, Token)
+ end,
+ {error, {Location, ?MODULE, ["syntax error before: ", Text]}}.
+
+yecctoken2string({atom, _, A}) -> io_lib:write(A);
+yecctoken2string({integer,_,N}) -> io_lib:write(N);
+yecctoken2string({float,_,F}) -> io_lib:write(F);
+yecctoken2string({char,_,C}) -> io_lib:write_char(C);
+yecctoken2string({var,_,V}) -> io_lib:format("~s", [V]);
+yecctoken2string({string,_,S}) -> io_lib:write_unicode_string(S);
+yecctoken2string({reserved_symbol, _, A}) -> io_lib:format("~w", [A]);
+yecctoken2string({_Cat, _, Val}) -> io_lib:format("~w", [Val]);
+yecctoken2string({dot, _}) -> "'.'";
+yecctoken2string({'$end', _}) ->
+ [];
+yecctoken2string({Other, _}) when is_atom(Other) ->
+ io_lib:format("~w", [Other]);
+yecctoken2string(Other) ->
+ io_lib:write(Other).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+
+-file("yeccparser.erl", 168).
+
+yeccpars2(0=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_0(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(1=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_1(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(2=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_2(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(3=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_3(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(4=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_4(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(5=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_5(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(6=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_6(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(7=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_7(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(8=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_8(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(9=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_9(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(10=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_0(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(11=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_11(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(12=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_12(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(13=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_13(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(14=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_14(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(15=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_15(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(16=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_16(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(17=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_17(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(18=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_18(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(19=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_19(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(20=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_20(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(21=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_21(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(22=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_22(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(23=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_23(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(24=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_24(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(25=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_25(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(26=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_26(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(27=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_27(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(28=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_28(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(29=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_29(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(30=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_30(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(31=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_31(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(32=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_32(S, Cat, Ss, Stack, T, Ts, Tzr);
+%% yeccpars2(33=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+%% yeccpars2_33(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(34=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_34(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(35=S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_35(S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccpars2(Other, _, _, _, _, _, _) ->
+ erlang:error({yecc_bug,"1.3",{missing_state_in_action_table, Other}}).
+
+yeccpars2_0(S, atom, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr);
+yeccpars2_0(S, integer, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr);
+yeccpars2_0(S, reserved_word, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr);
+yeccpars2_0(S, var, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_1(S, atom, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr);
+yeccpars2_1(S, integer, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr);
+yeccpars2_1(S, reserved_word, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr);
+yeccpars2_1(S, string, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 32, Ss, Stack, T, Ts, Tzr);
+yeccpars2_1(S, var, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr);
+yeccpars2_1(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_head(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_2(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_grammar(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_3(S, '->', Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 10, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_4(_S, '$end', _Ss, Stack, _T, _Ts, _Tzr) ->
+ {ok, hd(Stack)}.
+
+yeccpars2_5(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_grammar(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_6(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_6_(Stack),
+ yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_7(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_7_(Stack),
+ yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_8(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_8_(Stack),
+ yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_9(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_9_(Stack),
+ yeccgoto_symbol(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+%% yeccpars2_10: see yeccpars2_0
+
+yeccpars2_11(S, ':', Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 15, Ss, Stack, T, Ts, Tzr);
+yeccpars2_11(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_11_(Stack),
+ yeccpars2_14(14, Cat, [11 | Ss], NewStack, T, Ts, Tzr).
+
+yeccpars2_12(S, atom, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 6, Ss, Stack, T, Ts, Tzr);
+yeccpars2_12(S, integer, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 7, Ss, Stack, T, Ts, Tzr);
+yeccpars2_12(S, reserved_word, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 8, Ss, Stack, T, Ts, Tzr);
+yeccpars2_12(S, var, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 9, Ss, Stack, T, Ts, Tzr);
+yeccpars2_12(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_12_(Stack),
+ yeccgoto_symbols(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_13(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ [_|Nss] = Ss,
+ NewStack = yeccpars2_13_(Stack),
+ yeccgoto_symbols(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr).
+
+yeccpars2_14(S, dot, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 29, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_15(S, '->', Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 18, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, ':', Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 19, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, atom, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 20, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, char, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 21, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, float, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 22, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, integer, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 23, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, reserved_symbol, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 24, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, reserved_word, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 25, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, string, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 26, Ss, Stack, T, Ts, Tzr);
+yeccpars2_15(S, var, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 27, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_16(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ [_|Nss] = Ss,
+ NewStack = yeccpars2_16_(Stack),
+ yeccgoto_attached_code(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr).
+
+yeccpars2_17(S, '->', Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 18, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, ':', Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 19, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, atom, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 20, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, char, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 21, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, float, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 22, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, integer, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 23, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, reserved_symbol, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 24, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, reserved_word, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 25, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, string, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 26, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(S, var, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 27, Ss, Stack, T, Ts, Tzr);
+yeccpars2_17(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_17_(Stack),
+ yeccgoto_tokens(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_18(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_18_(Stack),
+ yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_19(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_19_(Stack),
+ yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_20(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_21(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_22(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_23(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_24(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_24_(Stack),
+ yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_25(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_25_(Stack),
+ yeccgoto_token(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_26(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_27(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccgoto_token(hd(Ss), Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_28(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ [_|Nss] = Ss,
+ NewStack = yeccpars2_28_(Stack),
+ yeccgoto_tokens(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr).
+
+yeccpars2_29(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ [_,_,_,_|Nss] = Ss,
+ NewStack = yeccpars2_29_(Stack),
+ yeccgoto_rule(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr).
+
+yeccpars2_30(S, dot, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 35, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_31(S, dot, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 34, Ss, Stack, T, Ts, Tzr).
+
+yeccpars2_32(S, string, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars1(S, 32, Ss, Stack, T, Ts, Tzr);
+yeccpars2_32(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ NewStack = yeccpars2_32_(Stack),
+ yeccgoto_strings(hd(Ss), Cat, Ss, NewStack, T, Ts, Tzr).
+
+yeccpars2_33(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ [_|Nss] = Ss,
+ NewStack = yeccpars2_33_(Stack),
+ yeccgoto_strings(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr).
+
+yeccpars2_34(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ [_,_|Nss] = Ss,
+ NewStack = yeccpars2_34_(Stack),
+ yeccgoto_declaration(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr).
+
+yeccpars2_35(_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ [_,_|Nss] = Ss,
+ NewStack = yeccpars2_35_(Stack),
+ yeccgoto_declaration(hd(Nss), Cat, Nss, NewStack, T, Ts, Tzr).
+
+yeccgoto_attached_code(11, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_14(14, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_declaration(0=_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_5(_S, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_grammar(0, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_4(4, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_head(0, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_3(3, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_rule(0=_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_2(_S, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_strings(1, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_31(31, Cat, Ss, Stack, T, Ts, Tzr);
+yeccgoto_strings(32=_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_33(_S, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_symbol(0, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_1(1, Cat, Ss, Stack, T, Ts, Tzr);
+yeccgoto_symbol(1, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr);
+yeccgoto_symbol(10, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr);
+yeccgoto_symbol(12, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_12(12, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_symbols(1, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_30(30, Cat, Ss, Stack, T, Ts, Tzr);
+yeccgoto_symbols(10, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_11(11, Cat, Ss, Stack, T, Ts, Tzr);
+yeccgoto_symbols(12=_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_13(_S, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_token(15, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_17(17, Cat, Ss, Stack, T, Ts, Tzr);
+yeccgoto_token(17, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_17(17, Cat, Ss, Stack, T, Ts, Tzr).
+
+yeccgoto_tokens(15=_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_16(_S, Cat, Ss, Stack, T, Ts, Tzr);
+yeccgoto_tokens(17=_S, Cat, Ss, Stack, T, Ts, Tzr) ->
+ yeccpars2_28(_S, Cat, Ss, Stack, T, Ts, Tzr).
+
+-compile({inline,{yeccpars2_6_,1}}).
+-file("yeccgramm.yrl", 44).
+yeccpars2_6_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ symbol ( __1 )
+ end | __Stack].
+
+-compile({inline,{yeccpars2_7_,1}}).
+-file("yeccgramm.yrl", 45).
+yeccpars2_7_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ symbol ( __1 )
+ end | __Stack].
+
+-compile({inline,{yeccpars2_8_,1}}).
+-file("yeccgramm.yrl", 46).
+yeccpars2_8_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ symbol ( __1 )
+ end | __Stack].
+
+-compile({inline,{yeccpars2_9_,1}}).
+-file("yeccgramm.yrl", 43).
+yeccpars2_9_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ symbol ( __1 )
+ end | __Stack].
+
+-compile({inline,{yeccpars2_11_,1}}).
+-file("yeccgramm.yrl", 40).
+yeccpars2_11_(__Stack0) ->
+ [begin
+ { erlang_code , [ { atom , 0 , '$undefined' } ] }
+ end | __Stack0].
+
+-compile({inline,{yeccpars2_12_,1}}).
+-file("yeccgramm.yrl", 35).
+yeccpars2_12_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ [ __1 ]
+ end | __Stack].
+
+-compile({inline,{yeccpars2_13_,1}}).
+-file("yeccgramm.yrl", 36).
+yeccpars2_13_(__Stack0) ->
+ [__2,__1 | __Stack] = __Stack0,
+ [begin
+ [ __1 | __2 ]
+ end | __Stack].
+
+-compile({inline,{yeccpars2_16_,1}}).
+-file("yeccgramm.yrl", 39).
+yeccpars2_16_(__Stack0) ->
+ [__2,__1 | __Stack] = __Stack0,
+ [begin
+ { erlang_code , __2 }
+ end | __Stack].
+
+-compile({inline,{yeccpars2_17_,1}}).
+-file("yeccgramm.yrl", 41).
+yeccpars2_17_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ [ __1 ]
+ end | __Stack].
+
+-compile({inline,{yeccpars2_18_,1}}).
+-file("yeccgramm.yrl", 55).
+yeccpars2_18_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ { '->' , line_of ( __1 ) }
+ end | __Stack].
+
+-compile({inline,{yeccpars2_19_,1}}).
+-file("yeccgramm.yrl", 56).
+yeccpars2_19_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ { ':' , line_of ( __1 ) }
+ end | __Stack].
+
+-compile({inline,{yeccpars2_24_,1}}).
+-file("yeccgramm.yrl", 53).
+yeccpars2_24_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ { value_of ( __1 ) , line_of ( __1 ) }
+ end | __Stack].
+
+-compile({inline,{yeccpars2_25_,1}}).
+-file("yeccgramm.yrl", 54).
+yeccpars2_25_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ { value_of ( __1 ) , line_of ( __1 ) }
+ end | __Stack].
+
+-compile({inline,{yeccpars2_28_,1}}).
+-file("yeccgramm.yrl", 42).
+yeccpars2_28_(__Stack0) ->
+ [__2,__1 | __Stack] = __Stack0,
+ [begin
+ [ __1 | __2 ]
+ end | __Stack].
+
+-compile({inline,{yeccpars2_29_,1}}).
+-file("yeccgramm.yrl", 33).
+yeccpars2_29_(__Stack0) ->
+ [__5,__4,__3,__2,__1 | __Stack] = __Stack0,
+ [begin
+ { rule , [ __1 | __3 ] , __4 }
+ end | __Stack].
+
+-compile({inline,{yeccpars2_32_,1}}).
+-file("yeccgramm.yrl", 37).
+yeccpars2_32_(__Stack0) ->
+ [__1 | __Stack] = __Stack0,
+ [begin
+ [ __1 ]
+ end | __Stack].
+
+-compile({inline,{yeccpars2_33_,1}}).
+-file("yeccgramm.yrl", 38).
+yeccpars2_33_(__Stack0) ->
+ [__2,__1 | __Stack] = __Stack0,
+ [begin
+ [ __1 | __2 ]
+ end | __Stack].
+
+-compile({inline,{yeccpars2_34_,1}}).
+-file("yeccgramm.yrl", 32).
+yeccpars2_34_(__Stack0) ->
+ [__3,__2,__1 | __Stack] = __Stack0,
+ [begin
+ { __1 , __2 }
+ end | __Stack].
+
+-compile({inline,{yeccpars2_35_,1}}).
+-file("yeccgramm.yrl", 31).
+yeccpars2_35_(__Stack0) ->
+ [__3,__2,__1 | __Stack] = __Stack0,
+ [begin
+ { __1 , __2 }
+ end | __Stack].
+
+
+-file("yeccgramm.yrl", 75).
diff --git a/lib/parsetools/src/yeccscan.erl b/lib/parsetools/src/yeccscan.erl
new file mode 100644
index 0000000000..d7ec3ba8d3
--- /dev/null
+++ b/lib/parsetools/src/yeccscan.erl
@@ -0,0 +1,62 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(yeccscan).
+-export([scan/1, scan/3]).
+
+scan(Inport) ->
+ scan(Inport, '', 1).
+
+scan(Inport, Prompt, Line1) ->
+ case catch io:scan_erl_form(Inport, Prompt, Line1) of
+ {eof, Line2} ->
+ {eof, Line2};
+ {ok, Tokens, Line2} ->
+ case Tokens of
+ [] ->
+ scan(Inport, Prompt, Line2);
+ _ ->
+ {ok, lex(Tokens), Line2}
+ end;
+ {error, Descriptor, Line2} ->
+ {error, Descriptor, Line2};
+ {'EXIT', Why} ->
+ io:format('yeccscan: Error scanning input line ~w~n', [Line1]),
+ exit(Why)
+ end.
+
+lex([]) ->
+ [];
+lex([Token | Tokens]) ->
+ case Token of
+ {'dot', Line} ->
+ [{'dot', Line} | lex(Tokens)];
+ {':', Line} ->
+ [{':', Line} | lex(Tokens)];
+ {'->', Line} ->
+ [{'->', Line} | lex(Tokens)];
+ {Category, Line, Symbol} ->
+ [{Category, Line, Symbol} | lex(Tokens)];
+ {Other, Line} ->
+ Cat = case erl_scan:reserved_word(Other) of
+ true -> reserved_word;
+ false -> reserved_symbol
+ end,
+ [{Cat, Line, Other} | lex(Tokens)]
+ end.