diff options
Diffstat (limited to 'lib/parsetools/src/yecc.erl')
-rw-r--r-- | lib/parsetools/src/yecc.erl | 2531 |
1 files changed, 2531 insertions, 0 deletions
diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl new file mode 100644 index 0000000000..f4d76f471d --- /dev/null +++ b/lib/parsetools/src/yecc.erl @@ -0,0 +1,2531 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Yacc like LALR-1 parser generator for Erlang. +%% Ref: Aho & Johnson: "LR Parsing", ACM Computing Surveys, vol. 6:2, 1974. +%% Auxiliary files: yeccgramm.yrl, yeccparser.erl, yeccpre.hrl, yeccscan.erl. +%% + +-module(yecc). + +-export([compile/3, file/1, file/2, format_error/1]). + +%% Kept for compatibility with R10B. +-export([yecc/2, yecc/3, yecc/4]). + +-import(lists, [append/1, append/2, concat/1, delete/2, filter/2, + flatmap/2, foldl/3, foldr/3, foreach/2, keydelete/3, + keysearch/3, keysort/2, last/1, map/2, member/2, + reverse/1, sort/1, usort/1]). + +-include("erl_compile.hrl"). +-include("ms_transform.hrl"). + +-record(yecc, { + infile, + outfile, + includefile, + includefile_version, + module, + options = [], + verbose = false, + file_attrs = true, + errors = [], + warnings = [], + conflicts_done = false, + shift_reduce = [], + reduce_reduce = [], + n_states = 0, + inport, + outport, + line, + + parse_actions, + symbol_tab, + inv_symbol_tab, + state_tab, + prec_tab, + goto_tab, + + terminals = [], + nonterminals = [], + all_symbols = [], + prec = [], + rules_list = [], + rules, % a tuple of rules_list + rule_pointer2rule, + rootsymbol = [], + endsymbol = [], + expect_shift_reduce = [], + expect_n_states = [], + header = [], + erlang_code = none + }). + +-record(rule, { + n, % rule n in the grammar file + line, + symbols, % the names of symbols + tokens, + is_guard, % the action is a guard (not used) + is_well_formed % can be parsed (without macro expansion) + }). + +-record(reduce, { + rule_nmbr, + head, + nmbr_of_daughters, + prec, + unused % assure that #reduce{} comes before #shift{} when sorting + }). + +-record(shift, { + state, + pos, + prec, + rule_nmbr + }). + +-record(user_code, {state, terminal, funname, action}). + +-record(symbol, {line = none, name}). + +%% ACCEPT is neither an atom nor a non-terminal. +-define(ACCEPT, {}). + +%% During the phase 'compute_states' terminals in lookahead sets are +%% coded as integers; sets of terminals are integer bit masks. This is +%% for efficiency only. '$empty' is always given the mask 1. The +%% behaviour can be turned off by un-defining SYMBOLS_AS_CODES (useful +%% when debugging). + +%% Non-terminals are also given integer codes, starting with -1. The +%% absolut value of the code is used for indexing a tuple of lists of +%% rules. + +-define(SYMBOLS_AS_CODES, true). + +-ifdef(SYMBOLS_AS_CODES). +-define(EMPTY, 0). +-else. +-define(EMPTY, '$empty'). +-endif. + +%%% +%%% Exported functions +%%% + +%%% Interface to erl_compile. + +compile(Input0, Output0, + #options{warning = WarnLevel, verbose=Verbose, includes=Includes}) -> + Input = shorten_filename(Input0), + Output = shorten_filename(Output0), + Includefile = lists:sublist(Includes, 1), + Opts = [{parserfile,Output}, {includefile,Includefile}, {verbose,Verbose}, + {report_errors, true}, {report_warnings, WarnLevel > 0}], + case file(Input, Opts) of + {ok, _OutFile} -> + ok; + error -> + error + end. + +format_error(bad_declaration) -> + io_lib:fwrite("unknown or bad declaration, ignored", []); +format_error({bad_expect, SymName}) -> + io_lib:fwrite("argument ~s of Expect is not an integer", + [format_symbol(SymName)]); +format_error({bad_rootsymbol, SymName}) -> + io_lib:fwrite("rootsymbol ~s is not a nonterminal", + [format_symbol(SymName)]); +format_error({bad_states, SymName}) -> + io_lib:fwrite("argument ~s of States is not an integer", + [format_symbol(SymName)]); +format_error({conflict, Conflict}) -> + format_conflict(Conflict); +format_error({conflicts, SR, RR}) -> + io_lib:fwrite("conflicts: ~w shift/reduce, ~w reduce/reduce", [SR, RR]); +format_error({duplicate_declaration, Tag}) -> + io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]); +format_error({duplicate_nonterminal, Nonterminal}) -> + io_lib:fwrite("duplicate non-terminals ~s", + [format_symbol(Nonterminal)]); +format_error({duplicate_precedence, Op}) -> + io_lib:fwrite("duplicate precedence operator ~s", + [format_symbol(Op)]); +format_error({duplicate_terminal, Terminal}) -> + io_lib:fwrite("duplicate terminal ~s", + [format_symbol(Terminal)]); +format_error({endsymbol_is_nonterminal, Symbol}) -> + io_lib:fwrite("endsymbol ~s is a nonterminal", + [format_symbol(Symbol)]); +format_error({endsymbol_is_terminal, Symbol}) -> + io_lib:fwrite("endsymbol ~s is a terminal", + [format_symbol(Symbol)]); +format_error({error, Module, Error}) -> + Module:format_error(Error); +format_error({file_error, Reason}) -> + io_lib:fwrite("~s",[file:format_error(Reason)]); +format_error(illegal_empty) -> + io_lib:fwrite("illegal use of empty symbol", []); +format_error({internal_error, Error}) -> + io_lib:fwrite("internal yecc error: ~w", [Error]); +format_error({missing_syntax_rule, Nonterminal}) -> + io_lib:fwrite("no syntax rule for non-terminal symbol ~s", + [format_symbol(Nonterminal)]); +format_error({n_states, Exp, N}) -> + io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]); +format_error(no_grammar_rules) -> + io_lib:fwrite("grammar rules are missing", []); +format_error(nonterminals_missing) -> + io_lib:fwrite("Nonterminals is missing", []); +format_error({precedence_op_is_endsymbol, SymName}) -> + io_lib:fwrite("precedence operator ~s is endsymbol", + [format_symbol(SymName)]); +format_error({precedence_op_is_unknown, SymName}) -> + io_lib:fwrite("unknown precedence operator ~s", + [format_symbol(SymName)]); +format_error({reserved, N}) -> + io_lib:fwrite("the use of ~w should be avoided", [N]); +format_error({symbol_terminal_and_nonterminal, SymName}) -> + io_lib:fwrite("symbol ~s is both a terminal and nonterminal", + [format_symbol(SymName)]); +format_error(rootsymbol_missing) -> + io_lib:fwrite("Rootsymbol is missing", []); +format_error(terminals_missing) -> + io_lib:fwrite("Terminals is missing", []); +format_error({undefined_nonterminal, Symbol}) -> + io_lib:fwrite("undefined nonterminal: ~s", [format_symbol(Symbol)]); +format_error({undefined_pseudo_variable, Atom}) -> + io_lib:fwrite("undefined pseudo variable ~w", [Atom]); +format_error({undefined_symbol, SymName}) -> + io_lib:fwrite("undefined rhs symbol ~s", [format_symbol(SymName)]); +format_error({unused_nonterminal, Nonterminal}) -> + io_lib:fwrite("non-terminal symbol ~s not used", + [format_symbol(Nonterminal)]); +format_error({unused_terminal, Terminal}) -> + io_lib:fwrite("terminal symbol ~s not used", + [format_symbol(Terminal)]). + +file(File) -> + file(File, [report_errors, report_warnings]). + +file(File, Options) -> + case is_filename(File) of + no -> erlang:error(badarg, [File, Options]); + _ -> ok + end, + case options(Options) of + badarg -> + erlang:error(badarg, [File, Options]); + OptionValues -> + Self = self(), + Flag = process_flag(trap_exit, false), + Pid = spawn_link(fun() -> infile(Self, File, OptionValues) end), + receive + {Pid, Rep} -> + receive after 1 -> ok end, + process_flag(trap_exit, Flag), + Rep + end + end. + +%% Kept for backward compatibility. +yecc(Infile, Outfile) -> + yecc(Infile, Outfile, false, []). + +yecc(Infile, Outfile, Verbose) -> + yecc(Infile, Outfile, Verbose, []). + +yecc(Infilex, Outfilex, Verbose, Includefilex) -> + statistics(runtime), + case file(Infilex, [{parserfile, Outfilex}, + {verbose, Verbose}, + {report, true}, + {includefile, Includefilex}]) of + {ok, _File} -> + statistics(runtime); + error -> + exit(error) + end. + +%%% +%%% Local functions +%%% + +options(Options0) when is_list(Options0) -> + try + Options = flatmap(fun(return) -> short_option(return, true); + (report) -> short_option(report, true); + ({return,T}) -> short_option(return, T); + ({report,T}) -> short_option(report, T); + (T) -> [T] + end, Options0), + options(Options, [file_attributes, includefile, parserfile, + report_errors, report_warnings, return_errors, + return_warnings, time, verbose], []) + catch error: _ -> badarg + end; +options(Option) -> + options([Option]). + +short_option(return, T) -> + [{return_errors,T}, {return_warnings,T}]; +short_option(report, T) -> + [{report_errors,T}, {report_warnings,T}]. + +options(Options0, [Key | Keys], L) when is_list(Options0) -> + Options = case member(Key, Options0) of + true -> + [atom_option(Key) | delete(Key, Options0)]; + false -> + Options0 + end, + V = case keysearch(Key, 1, Options) of + {value, {Key, Filename0}} when Key =:= includefile; + Key =:= parserfile -> + case is_filename(Filename0) of + no -> + badarg; + Filename -> + {ok, [{Key, Filename}]} + end; + {value, {Key, Bool}} when Bool =:= true; Bool =:= false -> + {ok, [{Key, Bool}]}; + {value, {Key, _}} -> + badarg; + false -> + {ok, [{Key, default_option(Key)}]} + end, + case V of + badarg -> + badarg; + {ok, KeyValueL} -> + NewOptions = keydelete(Key, 1, Options), + options(NewOptions, Keys, KeyValueL ++ L) + end; +options([], [], L) -> + foldl(fun({_,false}, A) -> A; + ({Tag,true}, A) -> [Tag | A]; + (F, A) -> [F | A] + end, [], L); +options(_Options, _, _L) -> + badarg. + +default_option(file_attributes) -> true; +default_option(includefile) -> []; +default_option(parserfile) -> []; +default_option(report_errors) -> true; +default_option(report_warnings) -> true; +default_option(return_errors) -> false; +default_option(return_warnings) -> false; +default_option(time) -> false; +default_option(verbose) -> false. + +atom_option(file_attributes) -> {file_attributes, true}; +atom_option(report_errors) -> {report_errors, true}; +atom_option(report_warnings) -> {report_warnings, true}; +atom_option(return_errors) -> {return_errors, true}; +atom_option(return_warnings) -> {return_warnings, true}; +atom_option(time) -> {time, true}; +atom_option(verbose) -> {verbose, true}; +atom_option(Key) -> Key. + +is_filename(T) -> + try filename:flatten(T) of + Filename -> Filename + catch error: _ -> no + end. + +shorten_filename(Name0) -> + {ok,Cwd} = file:get_cwd(), + case lists:prefix(Cwd, Name0) of + false -> Name0; + true -> + case lists:nthtail(length(Cwd), Name0) of + "/"++N -> N; + N -> N + end + end. + +start(Infilex, Options) -> + Infile = assure_extension(Infilex, ".yrl"), + {value, {_, Outfilex0}} = keysearch(parserfile, 1, Options), + {value, {_, Includefilex}} = keysearch(includefile, 1, Options), + Outfilex = case Outfilex0 of + [] -> filename:rootname(Infilex, ".yrl"); + _ -> Outfilex0 + end, + Includefile = case Includefilex of + [] -> []; + _ -> assure_extension(Includefilex,".hrl") + end, + IncludefileVersion = includefile_version(Includefile), + Outfile = assure_extension(Outfilex, ".erl"), + Module = list_to_atom(filename:basename(Outfile, ".erl")), + #yecc{infile = Infile, + outfile = Outfile, + includefile = Includefile, + includefile_version = IncludefileVersion, + module = Module, + options = Options, + verbose = member(verbose, Options), + file_attrs = member(file_attributes, Options)}. + +assure_extension(File, Ext) -> + concat([strip_extension(File, Ext), Ext]). + +%% Assumes File is a filename. +strip_extension(File, Ext) -> + case filename:extension(File) of + Ext -> filename:rootname(File); + _Other -> File + end. + +infile(Parent, Infilex, Options) -> + St0 = start(Infilex, Options), + St = case file:open(St0#yecc.infile, [read, read_ahead]) of + {ok, Inport} -> + try + outfile(St0#yecc{inport = Inport}) + after + ok = file:close(Inport) + end; + {error, Reason} -> + add_error(St0#yecc.infile, none, {file_error, Reason}, St0) + end, + case St#yecc.errors of + [] -> ok; + _ -> _ = file:delete(St#yecc.outfile) + end, + Parent ! {self(), yecc_ret(St)}. + +outfile(St0) -> + case file:open(St0#yecc.outfile, [write, delayed_write]) of + {ok, Outport} -> + try + generate(St0#yecc{outport = Outport, line = 1}) + catch + throw: St1 -> + St1; + exit: Reason -> + add_error({internal_error, Reason}, St0) + after + ok = file:close(Outport) + end; + {error, Reason} -> + add_error(St0#yecc.outfile, none, {file_error, Reason}, St0) + end. + +os_process_size() -> + case os:type() of + {unix, sunos} -> + Size = os:cmd("ps -o vsz -p " ++ os:getpid() ++ " | tail -1"), + list_to_integer(lib:nonl(Size)); + _ -> + 0 + end. + +timeit(Name, Fun, St0) -> + Time = runtime, + %% Time = wall_clock, + {Before, _} = statistics(Time), + St = Fun(St0), + {After, _} = statistics(Time), + Mem0 = erts_debug:flat_size(St)*erlang:system_info(wordsize), + Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])), + Sz = lists:flatten(io_lib:format("~.1f MB", [os_process_size()/1024])), + io:fwrite(" ~-30w: ~10.2f s ~12s ~10s\n", + [Name, (After-Before)/1000, Mem, Sz]), + St. + +-define(PASS(P), {P, fun P/1}). + +generate(St0) -> + Passes = [?PASS(parse_grammar), ?PASS(check_grammar), + ?PASS(states_and_goto_table), ?PASS(parse_actions), + ?PASS(action_conflicts), ?PASS(write_file)], + F = case member(time, St0#yecc.options) of + true -> + io:fwrite(<<"Generating parser from grammar in ~s\n">>, + [format_filename(St0#yecc.infile)]), + fun timeit/3; + false -> + fun(_Name, Fn, St) -> Fn(St) end + end, + Fun = fun({Name, Fun}, St) -> + St2 = F(Name, Fun, St), + if + St2#yecc.errors =:= [] -> St2; + true -> throw(St2) + end + end, + foldl(Fun, St0, Passes). + +parse_grammar(St) -> + parse_grammar(St#yecc.inport, 1, St). + +parse_grammar(Inport, Line, St) -> + {NextLine, Grammar} = read_grammar(Inport, Line), + parse_grammar(Grammar, Inport, NextLine, St). + +parse_grammar(eof, _Inport, _NextLine, St) -> + St; +parse_grammar({#symbol{name = 'Header'}, Ss}, Inport, NextLine, St0) -> + St1 = St0#yecc{header = [S || {string,_,S} <- Ss]}, + parse_grammar(Inport, NextLine, St1); +parse_grammar({#symbol{name = 'Erlang'}, [#symbol{name = code}]}, _Inport, + NextLine, St) -> + St#yecc{erlang_code = NextLine}; +parse_grammar(Grammar, Inport, NextLine, St0) -> + St = parse_grammar(Grammar, St0), + parse_grammar(Inport, NextLine, St). + +parse_grammar({error,ErrorLine,Error}, St) -> + add_error(ErrorLine, Error, St); +parse_grammar({rule, Rule, Tokens}, St0) -> + NmbrOfDaughters = case Rule of + [_, #symbol{name = '$empty'}] -> 0; + _ -> length(Rule) - 1 + end, + {IsGuard, IsWellFormed} = check_action(Tokens), + {Tokens1, St} = subst_pseudo_vars(Tokens, + NmbrOfDaughters, + St0), + RuleDef = #rule{symbols = Rule, + tokens = Tokens1, + is_guard = IsGuard, + is_well_formed = IsWellFormed}, + St#yecc{rules_list = [RuleDef | St#yecc.rules_list]}; +parse_grammar({prec, Prec}, St) -> + St#yecc{prec = Prec ++ St#yecc.prec}; +parse_grammar({#symbol{line = Line, name = Name}, Symbols}, St) -> + CF = fun(I) -> + case element(I, St) of + [] -> + setelement(I, St, Symbols); + _ -> + add_error(Line, {duplicate_declaration, Name}, St) + end + end, + OneSymbol = length(Symbols) =:= 1, + case Name of + 'Nonterminals' -> CF(#yecc.nonterminals); + 'Terminals' -> CF(#yecc.terminals); + 'Rootsymbol' when OneSymbol -> CF(#yecc.rootsymbol); + 'Endsymbol' when OneSymbol -> CF(#yecc.endsymbol); + 'Expect' when OneSymbol -> CF(#yecc.expect_shift_reduce); + 'States' when OneSymbol -> CF(#yecc.expect_n_states); % undocumented + _ -> add_warning(Line, bad_declaration, St) + end. + +read_grammar(Inport, Line) -> + case yeccscan:scan(Inport, '', Line) of + {eof, NextLine} -> + {NextLine, eof}; + {error, {ErrorLine, Mod, What}, NextLine} -> + {NextLine, {error, ErrorLine, {error, Mod, What}}}; + {ok, Input, NextLine} -> + {NextLine, case yeccparser:parse(Input) of + {error, {ErrorLine, Mod, Message}} -> + {error, ErrorLine, {error, Mod, Message}}; + {ok, {rule, Rule, {erlang_code, Tokens}}} -> + {rule, Rule, Tokens}; + {ok, {#symbol{name=P}, + [#symbol{name=I} | OpL]}=Ss} -> + A = precedence(P), + if + A =/= unknown, + is_integer(I), + OpL =/= [] -> + Ps = [{Op, I , A} || Op <- OpL], + {prec, Ps}; + true -> + Ss + end; + {ok, Ss} -> + Ss + end} + end. + +precedence('Left') -> left; +precedence('Right') -> right; +precedence('Unary') -> unary; +precedence('Nonassoc') -> nonassoc; +precedence(_) -> unknown. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +check_grammar(St0) -> + Empty = #symbol{line = none, name = '$empty'}, + AllSymbols = St0#yecc.nonterminals ++ St0#yecc.terminals ++ [Empty], + St1 = St0#yecc{all_symbols = AllSymbols}, + Cs = [fun check_nonterminals/1, fun check_terminals/1, + fun check_rootsymbol/1, fun check_endsymbol/1, + fun check_expect/1, fun check_states/1, + fun check_precedences/1, fun check_rules/1], + foldl(fun(F, St) -> F(St) end, St1, Cs). + +check_nonterminals(St) -> + case St#yecc.nonterminals of + [] -> + add_error(nonterminals_missing, St); + Nonterminals -> + {Unique, Dups} = duplicates(names(Nonterminals)), + St1 = add_warnings(Dups, duplicate_nonterminal, St), + St2 = check_reserved(Unique, St1), + St2#yecc{nonterminals = [?ACCEPT | Unique]} + end. + +check_terminals(St0) -> + case St0#yecc.terminals of + [] -> + add_error(terminals_missing, St0); + Terminals -> + {Unique, Dups} = duplicates(names(Terminals)), + St1 = add_warnings(Dups, duplicate_terminal, St0), + Common = intersect(St1#yecc.nonterminals, Unique), + St2 = add_errors(Common, symbol_terminal_and_nonterminal, St1), + St3 = check_reserved(Unique, St2), + St3#yecc{terminals = ['$empty' | Unique]} + end. + +check_reserved(Names, St) -> + add_errors(intersect(Names, ['$empty', '$end', '$undefined']), + reserved, St). + +check_rootsymbol(St) -> + case St#yecc.rootsymbol of + [] -> + add_error(rootsymbol_missing, St); + [#symbol{line = Line, name = SymName}] -> + case kind_of_symbol(St, SymName) of + nonterminal -> + St#yecc{rootsymbol = SymName}; + _ -> + add_error(Line, {bad_rootsymbol, SymName}, St) + end + end. + +check_endsymbol(St) -> + case St#yecc.endsymbol of + [] -> + St#yecc{endsymbol = '$end'}; + [#symbol{line = Line, name = SymName}] -> + case kind_of_symbol(St, SymName) of + nonterminal -> + add_error(Line, {endsymbol_is_nonterminal, SymName}, St); + terminal -> + add_error(Line, {endsymbol_is_terminal, SymName}, St); + _ -> + St#yecc{endsymbol = SymName} + end + end. + +check_expect(St0) -> + case St0#yecc.expect_shift_reduce of + [] -> + St0#yecc{expect_shift_reduce = 0}; + [#symbol{name = Expect}] when is_integer(Expect) -> + St0#yecc{expect_shift_reduce = Expect}; + [#symbol{line = Line, name = Name}] -> + St1 = add_error(Line, {bad_expect, Name}, St0), + St1#yecc{expect_shift_reduce = 0} + end. + +check_states(St) -> + case St#yecc.expect_n_states of + [] -> + St; + [#symbol{name = NStates}] when is_integer(NStates) -> + St#yecc{expect_n_states = NStates}; + [#symbol{line = Line, name = Name}] -> + add_error(Line, {bad_states, Name}, St) + end. + +check_precedences(St0) -> + {St1, _} = + foldr(fun({#symbol{line = Line, name = Op},_I,_A}, {St,Ps}) -> + case member(Op, Ps) of + true -> + {add_error(Line, {duplicate_precedence,Op}, St), + Ps}; + false -> + {St, [Op | Ps]} + end + end, {St0,[]}, St0#yecc.prec), + foldl(fun({#symbol{line = Line, name = Op},I,A}, St) -> + case kind_of_symbol(St, Op) of + endsymbol -> + add_error(Line,{precedence_op_is_endsymbol,Op}, St); + unknown -> + add_error(Line, {precedence_op_is_unknown, Op}, St); + _ -> + St#yecc{prec = [{Op,I,A} | St#yecc.prec]} + end + end, St1#yecc{prec = []}, St1#yecc.prec). + +check_rule(Rule0, {St0,Rules}) -> + Symbols = Rule0#rule.symbols, + #symbol{line = HeadLine, name = Head} = hd(Symbols), + case member(Head, St0#yecc.nonterminals) of + false -> + {add_error(HeadLine, {undefined_nonterminal, Head}, St0), Rules}; + true -> + St = check_rhs(tl(Symbols), St0), + Rule = Rule0#rule{line = HeadLine, symbols = names(Symbols)}, + {St, [Rule | Rules]} + end. + +check_rules(St0) -> + {St,Rules0} = foldl(fun check_rule/2, {St0,[]}, St0#yecc.rules_list), + case St#yecc.rules_list of + [] -> + add_error(no_grammar_rules, St); + _ -> + Rule = #rule{line = none, + symbols = [?ACCEPT, St#yecc.rootsymbol], + tokens = []}, + Rules1 = [Rule | Rules0], + Rules = map(fun({R,I}) -> R#rule{n = I} end, count(0, Rules1)), + St#yecc{rules_list = Rules, rules = list_to_tuple(Rules)} + end. + +duplicates(List) -> + Unique = usort(List), + {Unique, List -- Unique}. + +names(Symbols) -> + map(fun(Symbol) -> Symbol#symbol.name end, Symbols). + +symbol_line(Name, St) -> + {value, #symbol{line = Line}} = symbol_search(Name, St#yecc.all_symbols), + Line. + +symbol_member(Symbol, Symbols) -> + symbol_search(Symbol#symbol.name, Symbols) =/= false. + +symbol_search(Name, Symbols) -> + keysearch(Name, #symbol.name, Symbols). + +states_and_goto_table(St0) -> + St1 = create_symbol_table(St0), + St = compute_states(St1), + create_precedence_table(St). + +parse_actions(St) -> + erase(), % the pd is used when decoding lookahead sets + ParseActions = compute_parse_actions(St#yecc.n_states, St, []), + erase(), + St#yecc{parse_actions = ParseActions, state_tab = []}. + +action_conflicts(St0) -> + St = find_action_conflicts(St0), + St#yecc{conflicts_done = true}. + +-record(state_info, {reduce_only, state_repr, comment}). + +write_file(St0) -> + #yecc{parse_actions = ParseActions, goto_tab = GotoTab} = St0, + Sorted = sort_parse_actions(ParseActions), + StateReprs = find_identical_shift_states(Sorted), + StateInfo = collect_some_state_info(Sorted, StateReprs), + StateJumps = find_partial_shift_states(Sorted, StateReprs), + UserCodeActions = find_user_code(Sorted, St0), + #yecc{infile = Infile, outfile = Outfile, + inport = Inport, outport = Outport, + nonterminals = Nonterminals} = St0, + {St10, N_lines, LastErlangCodeLine} = + output_prelude(Outport, Inport, St0), + St20 = St10#yecc{line = St10#yecc.line + N_lines}, + St25 = nl(St20), + St30 = output_file_directive(St25, Outfile, St25#yecc.line), + St40 = nl(St30), + St50 = output_actions(St40, StateJumps, StateInfo), + Go0 = [{Symbol,{From,To}} || {{From,Symbol},To} <- ets:tab2list(GotoTab)], + Go = family_with_domain(Go0, Nonterminals), + St60 = output_goto(St50, Go, StateInfo), + St70 = output_inlined(St60, UserCodeActions, Infile), + St = nl(St70), + case LastErlangCodeLine of + %% Just in case warnings or errors are emitted after the last + %% line of the file. + {last_erlang_code_line, Last_line} -> + output_file_directive(St, Infile, Last_line); + no_erlang_code -> + St + end. + +yecc_ret(St0) -> + St = check_expected(St0), + report_errors(St), + report_warnings(St), + Es = pack_errors(St#yecc.errors), + Ws = pack_warnings(St#yecc.warnings), + if + Es =:= [] -> + case member(return_warnings, St#yecc.options) of + true -> {ok, St#yecc.outfile, Ws}; + false -> {ok, St#yecc.outfile} + end; + true -> + case member(return_errors, St#yecc.options) of + true -> {error, Es, Ws}; + false -> error + end + end. + +check_expected(St0) -> + #yecc{shift_reduce = SR, reduce_reduce = RR, expect_shift_reduce = ExpSR, + n_states = NStates0, expect_n_states = ExpStates, + conflicts_done = Done} = St0, + N_RR = length(usort(RR)), + N_SR = length(usort(SR)), + St1 = if + not Done -> + St0; + N_SR =:= ExpSR, N_RR =:= 0 -> + St0; + true -> + add_warning(none, {conflicts, N_SR, N_RR}, St0) + end, + NStates = NStates0 + 1, + if + (not Done) or (ExpStates =:= []) or (NStates =:= ExpStates) -> + St1; + true -> + add_warning(none, {n_states, ExpStates, NStates}, St1) + end. + +pack_errors([{File,_} | _] = Es) -> + [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}]; +pack_errors([]) -> + []. + +pack_warnings([{File,_} | _] = Ws) -> + [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}]; +pack_warnings([]) -> + []. + +report_errors(St) -> + case member(report_errors, St#yecc.options) of + true -> + foreach(fun({File,{none,Mod,E}}) -> + io:fwrite(<<"~s: ~s\n">>, + [File,Mod:format_error(E)]); + ({File,{Line,Mod,E}}) -> + io:fwrite(<<"~s:~w: ~s\n">>, + [File,Line,Mod:format_error(E)]) + end, sort(St#yecc.errors)); + false -> + ok + end. + +report_warnings(St) -> + case member(report_warnings, St#yecc.options) of + true -> + foreach(fun({File,{none,Mod,W}}) -> + io:fwrite(<<"~s: Warning: ~s\n">>, + [File,Mod:format_error(W)]); + ({File,{Line,Mod,W}}) -> + io:fwrite(<<"~s:~w: Warning: ~s\n">>, + [File,Line,Mod:format_error(W)]) + end, sort(St#yecc.warnings)); + false -> + ok + end. + +add_error(E, St) -> + add_error(none, E, St). + +add_error(Line, E, St) -> + add_error(St#yecc.infile, Line, E, St). + +add_error(File, Line, E, St) -> + St#yecc{errors = [{File,{Line,?MODULE,E}}|St#yecc.errors]}. + +add_errors(SymNames, E0, St0) -> + foldl(fun(SymName, St) -> + add_error(symbol_line(SymName, St), {E0, SymName}, St) + end, St0, SymNames). + +add_warning(Line, W, St) -> + St#yecc{warnings = [{St#yecc.infile,{Line,?MODULE,W}}|St#yecc.warnings]}. + +add_warnings(SymNames, W0, St0) -> + foldl(fun(SymName, St) -> + add_warning(symbol_line(SymName, St), {W0, SymName}, St) + end, St0, SymNames). + +check_rhs([#symbol{name = '$empty'}], St) -> + St; +check_rhs(Rhs, St0) -> + case symbol_search('$empty', Rhs) of + {value, #symbol{line = Line}} -> + add_error(Line, illegal_empty, St0); + false -> + foldl(fun(Sym, St) -> + case symbol_member(Sym, St#yecc.all_symbols) of + true -> + St; + false -> + E = {undefined_symbol,Sym#symbol.name}, + add_error(Sym#symbol.line, E, St) + end + end, St0, Rhs) + end. + +check_action(Tokens) -> + case erl_parse:parse_exprs(add_roberts_dot(Tokens, 0)) of + {error, _Error} -> + {false, false}; + {ok, [Expr | Exprs]} -> + IsGuard = Exprs =:= [] andalso erl_lint:is_guard_test(Expr), + {IsGuard, true} + end. + +add_roberts_dot([], Line) -> + [{'dot', Line}]; +add_roberts_dot([{'dot', Line} | _], _) -> + [{'dot', Line}]; +add_roberts_dot([Token | Tokens], _) -> + [Token | add_roberts_dot(Tokens, element(2, Token))]. + +subst_pseudo_vars([], _, St) -> + {[], St}; +subst_pseudo_vars([H0 | T0], NmbrOfDaughters, St0) -> + {H, St1} = subst_pseudo_vars(H0, NmbrOfDaughters, St0), + {T, St} = subst_pseudo_vars(T0, NmbrOfDaughters, St1), + {[H | T], St}; +subst_pseudo_vars({atom, Line, Atom}, NmbrOfDaughters, St0) -> + case atom_to_list(Atom) of + [$$ | Rest] -> + try list_to_integer(Rest) of + N when N > 0, N =< NmbrOfDaughters -> + {{var, Line, list_to_atom(append("__", Rest))}, St0}; + _ -> + St = add_error(Line, {undefined_pseudo_variable, Atom}, + St0), + {{atom, Line, '$undefined'}, St} + catch + error: _ -> {{atom, Line, Atom}, St0} + end; + _ -> + {{atom, Line, Atom}, St0} + end; +subst_pseudo_vars(Tuple, NmbrOfDaughters, St0) when is_tuple(Tuple) -> + {L, St} = subst_pseudo_vars(tuple_to_list(Tuple), NmbrOfDaughters, St0), + {list_to_tuple(L), St}; +subst_pseudo_vars(Something_else, _, St) -> + {Something_else, St}. + +kind_of_symbol(St, SymName) -> + case member(SymName, St#yecc.nonterminals) of + false -> + case member(SymName, St#yecc.terminals) of + false -> + case St#yecc.endsymbol of + SymName -> + endsymbol; + _ -> + unknown + end; + true -> + terminal + end; + true -> + nonterminal + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Computing parse states and goto table from grammar. +% Start item: {0, [Endsymbol]} <-> +% (['ACCEPT' '.', Rootsymbol], {'$'}) in Aho & Johnson +% where '$end' is the default end of input symbol of the +% scanner if no 'Endsymbol' has been declared in the syntax file. + +-record(tabs, { + symbols, % ETS-set, keypos 1: {SymbolName, SymbolCode} + inv_symbols, % ETS-set, keypos 2: {SymbolName, SymbolCode} + state_id, % ETS-bag, keypos 1: {StateId, StateNum} + % StateId is not unique for a state. + rp_rhs, % rule pointer -> the remaining rhs symbols + rp_info, % rule pointer -> expanding rules and lookahead + goto % ETS-bag, keypos 1: first + % {{FromStateNum, Symbol, ToStateNum}}, then + % {{FromStateNum, Symbol}, ToStateNum} + }). + +-record(item, { % what states are made of + rule_pointer, + look_ahead, + rhs + }). + +compute_states(St0) -> + SymbolTab = St0#yecc.symbol_tab, + CodedRules = map(fun(#rule{symbols = Syms} = R) -> + R#rule{symbols = code_symbols(Syms, SymbolTab)} + end, St0#yecc.rules_list), + CodedNonterminals = code_symbols(St0#yecc.nonterminals, SymbolTab), + %% Only coded in this phase; StC is thrown away. + StC = St0#yecc{rules_list = CodedRules, + rules = list_to_tuple(CodedRules), + nonterminals = CodedNonterminals}, + {RuleIndex, RulePointer2Rule} = + make_rule_index(StC, St0#yecc.rules_list), + StateTab0 = {}, + StateIdTab = ets:new(yecc_state_id, [set]), + GotoTab = ets:new(yecc_goto, [bag]), + RulePointerRhs = make_rhs_index(StC#yecc.rules_list), + RulePointerInfo = make_rule_pointer_info(StC, RulePointerRhs, RuleIndex), + + Tables = #tabs{symbols = SymbolTab, + state_id = StateIdTab, + rp_rhs = RulePointerRhs, + rp_info = RulePointerInfo, + goto = GotoTab}, + + erase(), + EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab), + {StateId, State0} = compute_state([{EndsymCode, 1}], Tables), + + StateNum0 = first_state(), + FirstState = {StateNum0, State0}, + StateTab1 = insert_state(Tables, StateTab0, FirstState, StateId), + {StateTab, N} = + compute_states1([{StateNum0, get_current_symbols(State0)}], + FirstState, StateTab1, Tables), + true = ets:delete(StateIdTab), + St = St0#yecc{state_tab = StateTab, goto_tab = GotoTab, n_states = N, + rule_pointer2rule = RulePointer2Rule}, + decode_goto(GotoTab, St#yecc.inv_symbol_tab), + check_usage(St). + +first_state() -> + 0. + +decode_goto(GotoTab, InvSymTab) -> + G = ets:tab2list(GotoTab), + ets:delete_all_objects(GotoTab), + ets:insert(GotoTab, + map(fun({{From, Sym, Next}}) -> + {{From, decode_symbol(Sym, InvSymTab)}, Next} + end, G)). + +check_usage(St0) -> + SelSyms = ets:fun2ms(fun({{_,Sym},_}) -> Sym end), + UsedSymbols = ets:select(St0#yecc.goto_tab, SelSyms), + Syms = ordsets:from_list([?ACCEPT, '$empty' | UsedSymbols]), + NonTerms = ordsets:from_list(St0#yecc.nonterminals), + UnusedNonTerms = ordsets:to_list(ordsets:subtract(NonTerms, Syms)), + St1 = add_warnings(UnusedNonTerms, unused_nonterminal, St0), + Terms = ordsets:from_list(St0#yecc.terminals), + St2 = add_warnings(ordsets:to_list(ordsets:subtract(Terms, Syms)), + unused_terminal, St1), + DefinedNonTerminals = map(fun(#rule{symbols = [Name | _]}) -> + Name + end, St2#yecc.rules_list), + DefNonTerms = ordsets:from_list(DefinedNonTerminals), + UndefNonTerms = ordsets:subtract(NonTerms, DefNonTerms), + add_errors(ordsets:to_list(ordsets:subtract(UndefNonTerms, + UnusedNonTerms)), + missing_syntax_rule, St2). + +%% States are sometimes big, should not be copied to ETS tables. +%% Here an "extendible" tuple is used. +lookup_state(StateTab, N) -> + element(N+1, StateTab). + +insert_state(#tabs{state_id = StateIdTab}, StateTab0, State, StateId) -> + {N, _Items} = State, + insert_state_id(StateIdTab, N, StateId), + StateTab = if + tuple_size(StateTab0) > N -> + StateTab0; + true -> + list_to_tuple(tuple_to_list(StateTab0) ++ + lists:duplicate(round(1 + N * 1.5), [])) + end, + setelement(N+1, StateTab, State). + +insert_state_id(StateIdTab, N, StateId) -> + true = ets:insert(StateIdTab, {StateId, N}). + +compute_states1([], {N, _}=_CurrState, StateTab0, _Tables) -> + {StateTab0, N}; +compute_states1([{N, Symbols} | Try], CurrState, StateTab, Tables) -> + {_N, S} = lookup_state(StateTab, N), + Seeds = state_seeds(S, Symbols), + compute_states2(Seeds, N, Try, CurrState, StateTab, Tables). + +compute_states2([], _N, Try, CurrState, StateTab, Tables) -> + compute_states1(Try, CurrState, StateTab, Tables); +compute_states2([{Sym,Seed} | Seeds], N, Try, CurrState, StateTab, Tables) -> + {StateId, NewState} = compute_state(Seed, Tables), + case check_states(NewState, StateId, StateTab, Tables) of + add -> + {M, _} = CurrState, + %% io:fwrite(<<"Adding state ~w\n">>, [M + 1]), + CurrentSymbols = get_current_symbols(NewState), + Next = M + 1, + NextState = {Next, NewState}, + NewStateTab = insert_state(Tables, StateTab, NextState, StateId), + insert_goto(Tables, N, Sym, Next), + compute_states2(Seeds, N, [{Next, CurrentSymbols} | Try], + NextState, NewStateTab, Tables); + {old, M} -> + %% io:fwrite(<<"Identical to old state ~w\n">>, [M]), + insert_goto(Tables, N, Sym, M), + compute_states2(Seeds, N, Try, CurrState, StateTab, Tables); + {merge, M, NewCurrent} -> + %% io:fwrite(<<"Merging with state ~w\n">>, [M]), + Try1 = case keysearch(M, 1, Try) of + false -> + [{M, NewCurrent} | Try]; + {value, {_, OldCurrent}} -> + case ordsets:is_subset(NewCurrent, OldCurrent) of + true -> + Try; + false -> + [{M, ordsets:union(NewCurrent, OldCurrent)} + | keydelete(M, 1, Try)] + end + end, + NewStateTab = merge_states(NewState, StateTab, Tables, M,StateId), + insert_goto(Tables, N, Sym, M), + compute_states2(Seeds, N, Try1, CurrState, NewStateTab, Tables) + end. + +insert_goto(Tables, From, Sym, To) -> + true = ets:insert(Tables#tabs.goto, {{From, Sym, To}}). + +%% Create an ets table for faster lookups. +create_symbol_table(St) -> + #yecc{terminals = Terminals, endsymbol = Endsymbol} = St, + SymbolTab = ets:new(yecc_symbols, [{keypos,1}]), + %% '$empty' is always assigned 0 + Ts = ['$empty', Endsymbol | delete('$empty', Terminals)], + TsC = count(0, Ts), + NTsC = map(fun({NT,I}) -> {NT,-I} end, count(1, St#yecc.nonterminals)), + Cs = TsC++NTsC, + true = ets:insert(SymbolTab, Cs), + + InvSymTable = ets:new(yecc_inverted_terminals, [{keypos,2}]), + true = ets:insert(InvSymTable, Cs), + + St#yecc{symbol_tab = SymbolTab, inv_symbol_tab = InvSymTable}. + +get_current_symbols(State) -> + usort(get_current_symbols1(State, [])). + +get_current_symbols1([], Syms) -> + Syms; +get_current_symbols1([#item{rhs = Rhs} | Items], Syms) -> + case Rhs of + [] -> + get_current_symbols1(Items, Syms); + [Symbol | _] -> + get_current_symbols1(Items, [Symbol | Syms]) + end. + +state_seeds(Items, Symbols) -> + L = [{S,{LA,RP + 1}} || #item{rule_pointer = RP, look_ahead = LA, + rhs = [S | _]} <- Items], + state_seeds1(keysort(1, L), Symbols). + +state_seeds1(_L, []) -> + []; +state_seeds1(L, [Symbol | Symbols]) -> + state_seeds(L, Symbol, Symbols, []). + +state_seeds([{Symbol, Item} | L], Symbol, Symbols, Is) -> + state_seeds(L, Symbol, Symbols, [Item | Is]); +state_seeds([{S, _Item} | L], Symbol, Symbols, Is) when S < Symbol -> + state_seeds(L, Symbol, Symbols, Is); +state_seeds(L, Symbol, Symbols, Is) -> + [{Symbol, Is} | state_seeds1(L, Symbols)]. + +compute_state(Seed, Tables) -> + RpInfo = Tables#tabs.rp_info, + foreach(fun({LA, RulePointer}) -> put(RulePointer, LA) end, Seed), + foreach(fun({LA, RP}) -> compute_closure(LA, RP, RpInfo) end, Seed), + Closure = keysort(1, erase()), + state_items(Closure, [], [], Tables#tabs.rp_rhs). + +%% Collects a uniqe id for the state (all rule pointers). +state_items([{RP, LA} | L], Is, Id, RpRhs) -> + I = #item{rule_pointer = RP, look_ahead = LA, rhs = element(RP, RpRhs)}, + state_items(L, [I | Is], [RP | Id], RpRhs); +state_items(_, Is, Id, _RpRhs) -> + {Id, Is}. + +-compile({inline,[compute_closure/3]}). +compute_closure(Lookahead, RulePointer, RpInfo) -> + case element(RulePointer, RpInfo) of + []=Void -> % no followers, or terminal + Void; + {no_union, ExpandingRules, NewLookahead} -> + compute_closure1(ExpandingRules, NewLookahead, RpInfo); + {union, ExpandingRules, Lookahead0} -> + NewLookahead = set_union(Lookahead0, Lookahead), + compute_closure1(ExpandingRules, NewLookahead, RpInfo); + ExpandingRules -> + compute_closure1(ExpandingRules, Lookahead, RpInfo) + end. + +compute_closure1([RulePointer | Tail], NewLookahead, RpInfo) -> + compute_closure1(Tail, NewLookahead, RpInfo), + case get(RulePointer) of + undefined -> % New + put(RulePointer, NewLookahead), + compute_closure(NewLookahead, RulePointer, RpInfo); + Lookahead2 -> + Lookahead = set_union(Lookahead2, NewLookahead), + if + Lookahead =:= Lookahead2 -> % Old + Lookahead2; % void() + true -> % Merge + put(RulePointer, Lookahead), + compute_closure(NewLookahead, RulePointer, RpInfo) + end + end; +compute_closure1(Nil, _, _RpInfo) -> + Nil. + +%% Check if some old state is a superset of our NewState +check_states(NewState, StateId, StateTab, #tabs{state_id = StateIdTab}) -> + try ets:lookup_element(StateIdTab, StateId, 2) of + N -> + {_N, OldState} = lookup_state(StateTab, N), + check_state1(NewState, OldState, [], N) + catch error:_ -> add + end. + +check_state1([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], + [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> + case set_is_subset(Lookahead1, Lookahead2) of + true -> + check_state1(Items1, Items2, Symbols, N); + false -> + case Rhs of + [] -> + check_state2(Items1, Items2, Symbols, N); + [Symbol | _] -> + check_state2(Items1, Items2, [Symbol | Symbols], N) + end + end; +check_state1([], [], _Symbols, N) -> + {old, N}. + +check_state2([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], + [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> + case set_is_subset(Lookahead1, Lookahead2) of + true -> + check_state2(Items1, Items2, Symbols, N); + false -> + case Rhs of + [] -> + check_state2(Items1, Items2, Symbols, N); + [Symbol | _] -> + check_state2(Items1, Items2, [Symbol | Symbols], N) + end + end; +check_state2([], [], Symbols, N) -> + {merge, N, usort(Symbols)}. + +merge_states(NewState, StateTab, Tables, M, StateId) -> + {_M, Old_state} = lookup_state(StateTab, M), + MergedState = merge_states1(NewState, Old_state), + insert_state(Tables, StateTab, {M, MergedState}, StateId). + +merge_states1([Item1 | Items1], [Item2 | Items2]) -> + LA1 = Item1#item.look_ahead, + LA2 = Item2#item.look_ahead, + if + LA1 =:= LA2 -> + [Item1 | merge_states1(Items1, Items2)]; + true -> + [Item1#item{look_ahead = set_union(LA1, LA2)} + | merge_states1(Items1, Items2)] + end; +merge_states1(_, _) -> + []. + +%% RulePointer -> Rhs. Every position Rhs in has its unique "rule pointer". +make_rhs_index(RulesList) -> + Index = flatmap(fun(#rule{symbols = [_Non | Daughters]}) -> + suffixes0(Daughters) + end, RulesList), + list_to_tuple(Index). + +suffixes0([?EMPTY]) -> + [[], []]; +suffixes0(L) -> + suffixes(L). + +suffixes([]=L) -> + [L]; +suffixes([_ | T]=L) -> + [L | suffixes(T)]. + +%% Setup info about lookahead and expanding rules for each point +%% ("rule pointer") in the grammar. +make_rule_pointer_info(StC, RpRhs, RuleIndex) -> + SymbolTab = StC#yecc.symbol_tab, + LcTab = make_left_corner_table(StC), + LA_index = map(fun(Syms) -> + rp_info(Syms, SymbolTab, LcTab, RuleIndex) + end, tuple_to_list(RpRhs)), + list_to_tuple(LA_index). + +rp_info([], _SymbolTab, _LcTab, _RuleIndex) -> + []; +rp_info([Category | Followers], SymbolTab, LcTab, RuleIndex) -> + case dict:find(Category, RuleIndex) of + error -> % terminal + []; + {ok, ExpandingRules} when Followers =:= [] -> + ExpandingRules; + {ok, ExpandingRules} -> + case make_lookahead(Followers, SymbolTab, LcTab, set_empty()) of + {empty, LA} -> + {union, ExpandingRules, LA}; + LA -> + {no_union, ExpandingRules, LA} + end + end. + +%% Lookahead computation is complicated by the possible existence +%% of null string rewriting rules, such as A -> '$empty'. +make_lookahead([], _, _, LA) -> + {empty, LA}; +make_lookahead([Symbol | Symbols], SymbolTab, LcTab, LA) -> + case dict:find(Symbol, LcTab) of + {ok, LeftCorner} -> % nonterminal + case empty_member(LeftCorner) of + true -> + make_lookahead(Symbols, SymbolTab, LcTab, + set_union(empty_delete(LeftCorner), LA)); + false -> + set_union(LeftCorner, LA) + end; + error -> % terminal + set_add(Symbol, LA) + end. + +%% -> dict-of({Nonterminal, [Terminal]}). +%% The algorithm FIRST/1 from the Dragon Book. +%% Left corner table, all terminals (including '$empty') that can +%% begin strings generated by Nonterminal. +make_left_corner_table(#yecc{rules_list = RulesList} = St) -> + SymbolTab = left_corner_symbol_table(St), + Rules = map(fun(#rule{symbols = [Lhs | Rhs]}) -> + {Lhs,{Lhs, Rhs}} + end, RulesList), + LeftHandTab = dict:from_list(family(Rules)), + X0 = [{S,H} || {H,{H,Rhs}} <- Rules, + S <- Rhs, + not is_terminal(SymbolTab, S)], + XL = family_with_domain(X0, St#yecc.nonterminals), + X = dict:from_list(XL), + Xref = fun(NT) -> dict:fetch(NT, X) end, + E = set_empty(), + LC0 = dict:from_list([{H, E} || {H,_} <- XL]), + %% Handle H -> a S, where a is a terminal ('$empty' inclusive). + {Q, LC1} = + foldl(fun({H,{H,[S | _]}}, {Q0, LC}) -> + case ets:lookup(SymbolTab, S) of + [{_,Num}=SymbolAndNum] when Num >= 0 -> + F = set_add_terminal(SymbolAndNum, E), + {[Xref(H) | Q0], upd_first(H, F, LC)}; + _ -> + {Q0, LC} + end + end, {[], LC0}, Rules), + left_corners(Q, LC1, LeftHandTab, SymbolTab, Xref). + +left_corners(Q0, LC0, LeftHandTab, SymbolTab, Xref) -> + case usort(append(Q0)) of + [] -> + LC0; + Q1 -> + Rs = flatmap(fun(NT) -> dict:fetch(NT, LeftHandTab) end, Q1), + {LC, Q} = left_corners2(Rs, LC0, [], SymbolTab, Xref), + left_corners(Q, LC, LeftHandTab, SymbolTab, Xref) + end. + +left_corners2([], LC, Q, _SymbolTab, _Xref) -> + {LC, Q}; +left_corners2([{Head,Rhs} | Rs], LC, Q0, SymbolTab, Xref) -> + Ts = left_corner_rhs(Rhs, Head, LC, set_empty(), SymbolTab), + First0 = dict:fetch(Head, LC), + case set_is_subset(Ts, First0) of + true -> + left_corners2(Rs, LC, Q0, SymbolTab, Xref); + false -> + LC1 = upd_first(Head, Ts, LC), + left_corners2(Rs, LC1, [Xref(Head) | Q0], SymbolTab, Xref) + end. + +upd_first(NT, Ts, LC) -> + dict:update(NT, fun(First) -> set_union(First, Ts) end, LC). + +left_corner_rhs([S | Ss], Head, LC, Ts, SymbolTab) -> + case ets:lookup(SymbolTab, S) of + [{_,Num}=SymbolAndNum] when Num >= 0 -> + set_add_terminal(SymbolAndNum, Ts); + [_NonTerminalSymbol] -> + First = dict:fetch(S, LC), + case empty_member(First) of + true -> + NTs = set_union(empty_delete(First), Ts), + left_corner_rhs(Ss, Head, LC, NTs, SymbolTab); + false -> + set_union(First, Ts) + end + end; +left_corner_rhs([], _Head, _LC, Ts, _SymbolTab) -> + set_add(?EMPTY, Ts). + +%% For every non-terminal return a list of "rule pointers" for rules +%% expanding the non-terminal. +%% Also assigns a unique number to each point in the grammar, "rule pointer". +make_rule_index(#yecc{nonterminals = Nonterminals, + rules_list = RulesList}, RulesListNoCodes) -> + {RulesL, _N} = + lists:mapfoldl(fun(#rule{symbols = [Nonterminal | Daughters]}, I) -> + I1 = I + length(Daughters)+1, + {{Nonterminal, I}, I1} + end, 1, RulesList), + IndexedTab = family_with_domain(RulesL, Nonterminals), + + Symbol2Rule = [{Foo,R} || #rule{symbols = Symbols}=R <- RulesListNoCodes, + Foo <- Symbols], + Pointer2Rule = [{I, R} || {{_Foo,R},I} <- count(1, Symbol2Rule)], + {dict:from_list(IndexedTab), dict:from_list(Pointer2Rule)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Computing parse action table from list of states and goto table: + +compute_parse_actions(N, St, StateActions) -> + case N < first_state() of + true -> + StateActions; + false -> + {N, StateN} = lookup_state(St#yecc.state_tab, N), + %% There can be duplicates in Actions. + Actions = compute_parse_actions1(StateN, N, St), + compute_parse_actions(N - 1, St, [{N, Actions} | StateActions]) + end. + +compute_parse_actions1([], _, _) -> + []; +compute_parse_actions1([#item{rule_pointer = RulePointer, + look_ahead = Lookahead0, + rhs = Rhs} | Items], N, St) -> + case Rhs of + [] -> + Lookahead = decode_terminals(Lookahead0, St#yecc.inv_symbol_tab), + case rule(RulePointer, St) of + {[?ACCEPT | _], _RuleLine, _} -> + [{Lookahead, accept} + | compute_parse_actions1(Items, N, St)]; + %% Head is placed after the daughters when finding the + %% precedence. This is how giving precedence to + %% non-terminals takes effect. + {[Head | Daughters0], _RuleLine, _} -> + Daughters = delete('$empty', Daughters0), + [{Lookahead, + #reduce{rule_nmbr = RulePointer, head = Head, + nmbr_of_daughters = length(Daughters), + prec = get_prec(Daughters ++ [Head], St)}} + | compute_parse_actions1(Items, N, St)] + end; + [Symbol | Daughters] -> + case is_terminal(St#yecc.symbol_tab, Symbol) of + true -> + DecSymbol = decode_symbol(Symbol, St#yecc.inv_symbol_tab), + {[Head | _], _RuleLine, _} = rule(RulePointer, St), + %% A bogus shift-shift conflict can be introduced + %% here if some terminal occurs in different rules + %% which have been given precedence "one level up". + Prec1 = case Daughters of + [] -> get_prec([DecSymbol, Head], St); + _ -> get_prec([DecSymbol], St) + end, + Pos = case Daughters of + [] -> z; + _ -> a + end, + [{[DecSymbol], + #shift{state = goto(N, DecSymbol, St), + pos = Pos, + prec = Prec1, + rule_nmbr = RulePointer}} + | compute_parse_actions1(Items, N, St)]; + false -> + compute_parse_actions1(Items, N, St) + end + end. + +get_prec(Symbols, St) -> + get_prec1(Symbols, St#yecc.prec_tab, {0, none}). + +get_prec1([], _, P) -> + P; +get_prec1([Symbol | T], PrecTab, P) -> + case ets:lookup(PrecTab, Symbol) of + [] -> + get_prec1(T, PrecTab, P); + [{_, N, Ass}] -> + get_prec1(T, PrecTab, {N, Ass}) + end. + +create_precedence_table(St) -> + PrecTab = ets:new(yecc_precedences, []), + true = ets:insert(PrecTab, St#yecc.prec), + St#yecc{prec_tab = PrecTab}. + +-record(cxt, {terminal, state_n, yecc, res}). + +%% Detects shift-reduce and reduce-reduce conflicts. +%% Also removes all but one conflicting action. As a consequence the +%% lookahead sets for a state are always disjoint. +%% Reduce/reduce conflicts are considered errors. +find_action_conflicts(St0) -> + Cxt0 = #cxt{yecc = St0, res = []}, + {#cxt{yecc = St, res = Res}, NewParseActions0} = + foldl(fun({N, Actions0}, {Cxt1, StateActions}) -> + L = [{Terminal, Act} || {Lookahead, Act} <- Actions0, + Terminal <- Lookahead], + {Cxt, Actions} = + foldl(fun({Terminal, As}, {Cxt2,Acts0}) -> + Cxt3 = Cxt2#cxt{terminal = Terminal, + state_n = N}, + {Action, Cxt} = + find_action_conflicts2(As, Cxt3), + {Cxt,[{Action,Terminal} | Acts0]} + end, {Cxt1,[]}, family(L)), + {Cxt,[{N,inverse(family(Actions))} | StateActions]} + end, {Cxt0, []}, St0#yecc.parse_actions), + if + length(Res) > 0, St#yecc.verbose -> + io:fwrite(<<"\n*** Conflicts resolved by operator " + "precedences:\n\n">>), + foreach(fun({Confl, Name}) -> + report_conflict(Confl, St, Name, prec) + end, reverse(Res)), + io:fwrite(<<"*** End of resolved conflicts\n\n">>); + true -> + ok + end, + NewParseActions = reverse(NewParseActions0), + St#yecc{parse_actions = NewParseActions}. + +find_action_conflicts2([Action], Cxt) -> + {Action, Cxt}; +find_action_conflicts2([#shift{state = St, pos = Pos, prec = Prec}, + #shift{state = St}=S | As], + Cxt) when Pos =:= a; Prec =:= {0,none} -> + %% This is a kludge to remove the bogus shift-shift conflict + %% introduced in compute_parse_actions1(). + find_action_conflicts2([S | As], Cxt); +find_action_conflicts2([#shift{state = NewState, pos = z}=S1, + #shift{state = NewState}=S2 | _], Cxt) -> + %% This is even worse than last clause. Give up. + Confl = conflict(S1, S2, Cxt), + #cxt{yecc = St0} = Cxt, + St = conflict_error(Confl, St0), + {S1, Cxt#cxt{yecc = St}}; % return any action +find_action_conflicts2([#shift{prec = {P1, Ass1}}=S | Rs], Cxt0) -> + {R, Cxt1} = find_reduce_reduce(Rs, Cxt0), + #cxt{res = Res0, yecc = St0} = Cxt1, + #reduce{prec = {P2, Ass2}} = R, + Confl = conflict(R, S, Cxt1), + if + P1 > P2 -> + {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; + P2 > P1 -> + {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; + Ass1 =:= left, Ass2 =:= left -> + {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; + Ass1 =:= right, Ass2 =:= right -> + {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; + Ass1 =:= nonassoc, Ass2 =:= nonassoc -> + {nonassoc, Cxt1}; + P1 =:= 0, P2 =:= 0 -> + report_conflict(Confl, St0, shift, default), + St = add_conflict(Confl, St0), + {S, Cxt1#cxt{yecc = St}}; + true -> + St = conflict_error(Confl, St0), + {S, Cxt1#cxt{yecc = St}} % return any action + end; +find_action_conflicts2(Rs, Cxt0) -> + find_reduce_reduce(Rs, Cxt0). + +find_reduce_reduce([R], Cxt) -> + {R, Cxt}; +find_reduce_reduce([#reduce{head = Categ1, prec = {P1, _}}=R1, + #reduce{head = Categ2, prec = {P2, _}}=R2 | Rs], Cxt0) -> + #cxt{res = Res0, yecc = St0} = Cxt0, + Confl = conflict(R1, R2, Cxt0), + {R, Res, St} = + if + P1 > P2 -> + {R1, [{Confl, Categ1} | Res0], St0}; + P2 > P1 -> + {R2, [{Confl, Categ2} | Res0], St0}; + true -> + St1 = conflict_error(Confl, St0), + {R1, Res0, St1} + end, + Cxt = Cxt0#cxt{res = Res, yecc = St}, + find_reduce_reduce([R | Rs], Cxt). + +%% Since the lookahead sets are disjoint (assured by +%% find_action_conflicts), the order between actions can be chosen +%% almost arbitrarily. nonassoc has to come last, though (but is later +%% discarded!). And shift has to come before reduce. +sort_parse_actions([]) -> + []; +sort_parse_actions([{N, La_actions} | Tail]) -> + [{N, sort_parse_actions1(La_actions)} | sort_parse_actions(Tail)]. + +sort_parse_actions1(LaActions) -> + As = filter(fun({_LA, A}) -> A =:= accept end, LaActions), + Ss = filter(fun({_LA, A}) -> is_record(A, shift) end, LaActions), + Rs = filter(fun({_LA, A}) -> is_record(A, reduce) end, LaActions), + Ns = filter(fun({_LA, A}) -> A =:= nonassoc end, LaActions), + As ++ Ss ++ Rs ++ Ns. + +%% -> {State, StateRepr}. StateRepr has the same set of shift actions +%% as State. No code will be output for State if State =/= StateRepr. +find_identical_shift_states(StateActions) -> + L1 = [{Actions, State} || {State,Actions} <- StateActions], + {SO, NotSO} = lists:partition(fun({Actions,_States}) -> + shift_actions_only(Actions) + end, family(L1)), + R = [{State, hd(States)} || {_Actions, States} <- SO, State <- States] + ++ + [{State, State} || {_Actions, States} <- NotSO, State <- States], + lists:keysort(1, R). + +-record(part_data, {name, eq_state, actions, n_actions, states}). + +%% Replace {SStates,Actions} with {SStates,{Actions,Jump}} where +%% Jump describes which clauses that have been extracted from shift +%% states so that they can be used from other states. Some space is +%% saved. +find_partial_shift_states(StateActionsL, StateReprs) -> + L = [{State, Actions} || {{State,Actions}, {State,State}} <- + lists:zip(StateActionsL, StateReprs), + shift_actions_only(Actions)], + StateActions = sofs:family(L, [{state,[action]}]), + StateAction = sofs:family_to_relation(StateActions), + + %% Two actions are equal if they occur in the same states: + Parts = sofs:partition(sofs:range(StateActions)), + PartsL = sofs:to_external(Parts), + %% Assign temporary names to the parts of the partition (of actions): + PartNameL = lists:zip(seq1(length(PartsL)), PartsL), + ActPartL = [{Action,PartName} || + {PartName,Actions} <- PartNameL, + Action <- Actions], + ActionPartName = sofs:relation(ActPartL, [{action,partname}]), + StatePartName = sofs:relative_product(StateAction, ActionPartName), + PartInStates = sofs:relation_to_family(sofs:converse(StatePartName)), + + %% Parts that equal all actions of a state: + PartActions = sofs:family(PartNameL, [{partname,[action]}]), + PartState = + sofs:relative_product(PartActions, sofs:converse(StateActions)), + PartStates = sofs_family_with_domain(PartState, sofs:domain(PartActions)), + + PartDataL = [#part_data{name = Nm, eq_state = EqS, actions = P, + n_actions = length(P), + states = ordsets:from_list(S)} || + {{Nm,P}, {Nm,S}, {Nm,EqS}} <- + lists:zip3(PartNameL, + sofs:to_external(PartInStates), + sofs:to_external(PartStates))], + true = length(PartDataL) =:= length(PartNameL), + Ps = select_parts(PartDataL), + + J1 = [{State, Actions, {jump_some,hd(States)}} || + {_W, #part_data{actions = Actions, eq_state = [], + states = States}} <- Ps, + State <- States], + J2 = [{State, Actions, {jump_all,To}} || + {_W, #part_data{actions = Actions, eq_state = EqS, + states = States}} <- Ps, + To <- EqS, + State <- States, + State =/= To], + J = lists:keysort(1, J1 ++ J2), + + JumpStates = ordsets:from_list([S || {S,_,_} <- J]), + {JS, NJS} = + sofs:partition(1, sofs:relation(StateActionsL, [{state, actions}]), + sofs:set(JumpStates, [state])), + R = + [{S, {Actions,jump_none}} || {S,Actions} <- sofs:to_external(NJS)] + ++ + [{S, {Actions--Part, {Tag,ToS,Part}}} || + {{S,Actions}, {S,Part,{Tag,ToS}}} <- + lists:zip(sofs:to_external(JS), J)], + true = length(StateActionsL) =:= length(R), + lists:keysort(1, R). + +%% Very greedy. By no means optimal. +select_parts([]) -> + []; +select_parts(PartDataL) -> + T1 = [{score(PD), PD} || PD <- PartDataL], + [{W, PD} | Ws] = lists:reverse(lists:keysort(1, T1)), + #part_data{n_actions = NActions, states = S} = PD, + if + W < 8 -> % don't bother + []; + true -> + %% Cannot extract more clauses from the chosen part's states: + NL = [D#part_data{states = NewS} || + {W1, #part_data{states = S0}=D} <- Ws, + W1 > 0, + (NewS = ordsets:subtract(S0, S)) =/= []], + if + length(S) =:= 1; NActions =:= 1 -> + select_parts(NL); + true -> + [{W,PD} | select_parts(NL)] + end + end. + +%% Does it pay off to extract clauses into a new function? +%% Assumptions: +%% - a call costs 8 (C = 8); +%% - a clause (per action) costs 20 plus 8 (select) (Cl = 28); +%% - a new function costs 20 (funinfo) plus 16 (select) (F = 36). +%% A is number of actions, S is number of states. +%% Special case (the part equals all actions of some state): +%% C * (S - 1) < (S - 1) * A * Cl +%% Normal case (introduce new function): +%% F + A * Cl + C * S < S * A * Cl +score(#part_data{n_actions = NActions, eq_state = [], states = S}) -> + (length(S) * NActions * 28) - (36 + NActions * 28 + length(S) * 8); +score(#part_data{n_actions = NActions, states = S}) -> + ((length(S) - 1) * NActions * 28) - (8 * (length(S) - 1)). + +shift_actions_only(Actions) -> + length([foo || {_Ts,{shift,_,_,_,_}} <- Actions]) =:= length(Actions). + +collect_some_state_info(StateActions, StateReprs) -> + RF = fun({_LA, A}) -> is_record(A, reduce) end, + L = [{State, + begin + RO = lists:all(RF, LaActions), + %% C is currently always ""; identical states are all shift. + C = [io_lib:fwrite(<<" %% ~w\n">>, [State]) || + true <- [RO], Repr =/= State], + #state_info{reduce_only = RO, state_repr = Repr, comment = C} + end} || + {{State, LaActions}, {State, Repr}} <- + lists:zip(StateActions, StateReprs)], + list_to_tuple(L). + +conflict_error(Conflict, St0) -> + St1 = add_conflict(Conflict, St0), + add_error({conflict, Conflict}, St1). + +report_conflict(Conflict, St, ActionName, How) -> + if + St#yecc.verbose -> + io:fwrite(<<"~s\n">>, [format_conflict(Conflict)]), + Formated = format_symbol(ActionName), + case How of + prec -> + io:fwrite(<<"Resolved in favor of ~s.\n\n">>, [Formated]); + default -> + io:fwrite(<<"Conflict resolved in favor of ~s.\n\n">>, + [Formated]) + end; + true -> + ok + end. + +add_conflict(Conflict, St) -> + case Conflict of + {Symbol, StateN, _, {reduce, _, _, _}} -> + St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]}; + {Symbol, StateN, _, {shift, _, _}} -> + St#yecc{shift_reduce = [{StateN,Symbol} | St#yecc.shift_reduce]}; + {_Symbol, _StateN, {one_level_up, _, _}, _Confl} -> + St + end. + +conflict(#shift{prec = Prec1, rule_nmbr = RuleNmbr1}, + #shift{prec = Prec2, rule_nmbr = RuleNmbr2}, Cxt) -> + %% Conflict due to precedences "one level up". Kludge. + #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, + {_, L1, RuleN1} = rule(RuleNmbr1, St), + {_, L2, RuleN2} = rule(RuleNmbr2, St), + Confl = {one_level_up, {L1, RuleN1, Prec1}, {L2, RuleN2, Prec2}}, + {Symbol, N, Confl, Confl}; +conflict(#reduce{rule_nmbr = RuleNmbr1}, NewAction, Cxt) -> + #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, + {R1, RuleLine1, RuleN1} = rule(RuleNmbr1, St), + Confl = case NewAction of + #reduce{rule_nmbr = RuleNmbr2} -> + {R2, RuleLine2, RuleN2} = rule(RuleNmbr2, St), + {reduce, R2, RuleN2, RuleLine2}; + #shift{state = NewState} -> + {shift, NewState, last(R1)} + end, + {Symbol, N, {R1, RuleN1, RuleLine1}, Confl}. + +format_conflict({Symbol, N, _, {one_level_up, + {L1, RuleN1, {P1, Ass1}}, + {L2, RuleN2, {P2, Ass2}}}}) -> + S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when " + "scanning ~s in state ~w:\n">>, + [format_symbol(Symbol), N]), + S2 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n" + " vs.\n">>, + [format_assoc(Ass1), P1, RuleN1, L1]), + S3 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n">>, + [format_assoc(Ass2), P2, RuleN2, L2]), + [S1, S2, S3]; +format_conflict({Symbol, N, Reduce, Confl}) -> + S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol " + "~s in state ~w:\n">>, [format_symbol(Symbol), N]), + S2 = case Reduce of + {[HR | TR], RuleNmbr, RuleLine} -> + io_lib:fwrite(<<" Reduce to ~s from ~s (rule ~w at " + "line ~w)\n vs.\n">>, + [format_symbol(HR), format_symbols(TR), + RuleNmbr, RuleLine]) + end, + S3 = case Confl of + {reduce, [HR2|TR2], RuleNmbr2, RuleLine2} -> + io_lib:fwrite(<<" reduce to ~s from ~s " + "(rule ~w at line ~w).">>, + [format_symbol(HR2), format_symbols(TR2), + RuleNmbr2, RuleLine2]); + {shift, NewState, Sym} -> + io_lib:fwrite(<<" shift to state ~w, adding right " + "sisters to ~s.">>, + [NewState, format_symbol(Sym)]) + end, + [S1, S2, S3]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Code generation: + +%% The version up to and including parsetools-1.3 is called "1.0". +%% +%% "1.1", parsetools-1.4: +%% - the prologue file has been updated; +%% - nonassoc is new; +%% - different order of clauses; +%% - never more than one clause matching a given symbol in a given state; +%% - file attributes relate messages to .yrl file; +%% - actions put in inlined functions; +%% - a few other minor fixes. +%% +%% "1.2", parsetools-1.4.2: +%% - the generated code has been changed as follows: +%% - yeccpars2() calls the functions yeccpars2_State(); +%% - several states can share yeccpars2_State(), which reduces code size; +%% - yeccgoto() has been split on one function per nonterminal; +%% - several minor changes have made the loaded code smaller. +%% - the include file yeccpre.hrl has been changed incompatibly. +%% +%% "1.3", parsetools-1.4.4: +%% - the generated code has been changed as follows: +%% - yeccgoto_T() no longer returns the next state, but calls yeccpars_S(); +%% - yeccpars2() is not called when it is known which yeccpars2_S() to call; +%% - "__Stack" has been substituted for "Stack"; +%% - several states can share yeccpars2_S_cont(), which reduces code size; +%% - instead if calling lists:nthtail() matching code is emitted. + +-define(CODE_VERSION, "1.3"). +-define(YECC_BUG(M, A), + iolist_to_binary([" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",", + io_lib:fwrite(M, A), "}).\n\n"])). + +%% Returns number of newlines in included files. +output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] -> + St5 = output_header(St0), + #yecc{infile = Infile, module = Module} = St5, + St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), + St20 = + fwrite(St10, + <<"-export([parse/1, parse_and_scan/1, format_error/1]).\n">>, + []), + {St25, N_lines_1, LastErlangCodeLine} = + case St20#yecc.erlang_code of + none -> + {St20, 0, no_erlang_code}; + Next_line -> + St_10 = output_file_directive(St20, Infile, Next_line-1), + Nmbr_of_lines = include1([], Inport, Outport), + {St_10, Nmbr_of_lines, + {last_erlang_code_line, Next_line+Nmbr_of_lines}} + end, + St30 = nl(St25), + IncludeFile = + filename:join([code:lib_dir(parsetools), "include","yeccpre.hrl"]), + %% Maybe one could assume there are no warnings in this file. + St = output_file_directive(St30, IncludeFile, 0), + N_lines_2 = include(St, IncludeFile, Outport), + {St, N_lines_1 + N_lines_2, LastErlangCodeLine}; +output_prelude(Outport, Inport, St0) -> + St5 = output_header(St0), + #yecc{infile = Infile, module = Module, includefile = Includefile} = St5, + St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), + St20 = output_file_directive(St10, Includefile, 0), + N_lines_1 = include(St20, Includefile, Outport), + St30 = nl(St20), + case St30#yecc.erlang_code of + none -> + {St30, N_lines_1, no_erlang_code}; + Next_line -> + St = output_file_directive(St30, Infile, Next_line-1), + Nmbr_of_lines = include1([], Inport, Outport), + {St, Nmbr_of_lines + N_lines_1, + {last_erlang_code_line, Next_line+Nmbr_of_lines}} + end. + +output_header(St0) -> + lists:foldl(fun(Str, St) -> fwrite(St, <<"~s\n">>, [Str]) + end, St0, St0#yecc.header). + +output_goto(St, [{_Nonterminal, []} | Go], StateInfo) -> + output_goto(St, Go, StateInfo); +output_goto(St0, [{Nonterminal, List} | Go], StateInfo) -> + F = function_name(yeccgoto, Nonterminal), + St10 = output_goto1(St0, List, F, StateInfo, true), + St = output_goto_fini(F, Nonterminal, St10), + output_goto(St, Go, StateInfo); +output_goto(St, [], _StateInfo) -> + St. + +output_goto1(St0, [{From, To} | Tail], F, StateInfo, IsFirst) -> + St10 = delim(St0, IsFirst), + {To, ToInfo} = lookup_state(StateInfo, To), + #state_info{reduce_only = RO, state_repr = Repr, comment = C} = ToInfo, + if + RO -> + %% Reduce actions do not use the state, so we just pass + %% the old (now bogus) on: + FromS = io_lib:fwrite("~w=_S", [From]), + ToS = "_S"; + true -> + FromS = io_lib:fwrite("~w", [From]), + ToS = io_lib:fwrite("~w", [To]) + end, + St20 = fwrite(St10, <<"~w(~s, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, + [F,FromS]), + St30 = fwrite(St20, <<"~s">>, [C]), + %% Short-circuit call to yeccpars2: + St = fwrite(St30, <<" yeccpars2_~w(~s, Cat, Ss, Stack, T, Ts, Tzr)">>, + [Repr, ToS]), + output_goto1(St, Tail, F, StateInfo, false); +output_goto1(St, [], _F, _StateInfo, _IsFirst) -> + St. + +output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) -> + %% Backward compatibility. + St10 = delim(St0, false), + St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>, + [F]), + fwrite(St, + ?YECC_BUG(<<"{~w, State, missing_in_goto_table}">>, [NT]), + []); +output_goto_fini(_F, _NT, St) -> + fwrite(St, <<".\n\n">>, []). + +%% Find actions having user code. +find_user_code(ParseActions, St) -> + [#user_code{state = State, + terminal = Terminal, + funname = inlined_function_name(State, Terminal), + action = Action} || + {State, La_actions} <- ParseActions, + {Action, Terminals, RuleNmbr, NmbrOfDaughters} + <- find_user_code2(La_actions), + case tokens(RuleNmbr, St) of + [{var, _, '__1'}] -> NmbrOfDaughters =/= 1; + _ -> true + end, + Terminal <- Terminals]. + +find_user_code2([]) -> + []; +find_user_code2([{_, #reduce{rule_nmbr = RuleNmbr, + nmbr_of_daughters = NmbrOfDaughters} + =Action}]) -> + %% Same optimization as in output_state_actions. + [{Action, ["Cat"], RuleNmbr, NmbrOfDaughters}]; +find_user_code2([{La, #reduce{rule_nmbr = RuleNmbr, + nmbr_of_daughters = NmbrOfDaughters} + =Action} | T]) -> + [{Action,La, RuleNmbr, NmbrOfDaughters} | find_user_code2(T)]; +find_user_code2([_ | T]) -> + find_user_code2(T). + +output_actions(St0, StateJumps, StateInfo) -> + %% Not all the clauses of the dispatcher function yeccpars2() can + %% be reached. Only when shifting, that is, calling yeccpars1(), + %% will yeccpars2() be called. + Y2CL = [NewState || {_State,{Actions,_J}} <- StateJumps, + {_LA, #shift{state = NewState}} <- Actions], + Y2CS = ordsets:from_list([0 | Y2CL]), + Y2S = ordsets:from_list([S || {S,_} <- StateJumps]), + NY2CS = ordsets:subtract(Y2S, Y2CS), + Sel = [{S,true} || S <- ordsets:to_list(Y2CS)] ++ + [{S,false} || S <- ordsets:to_list(NY2CS)], + + SelS = [{State,Called} || + {{State,_JActions}, {State,Called}} <- + lists:zip(StateJumps, lists:keysort(1, Sel))], + St10 = foldl(fun({State, Called}, St_0) -> + {State, #state_info{state_repr = IState}} = + lookup_state(StateInfo, State), + output_state_selection(St_0, State, IState, Called) + end, St0, SelS), + St20 = fwrite(St10, <<"yeccpars2(Other, _, _, _, _, _, _) ->\n">>, []), + St = fwrite(St20, + ?YECC_BUG(<<"{missing_state_in_action_table, Other}">>, []), + []), + foldl(fun({State, JActions}, St_0) -> + {State, #state_info{state_repr = IState}} = + lookup_state(StateInfo, State), + output_state_actions(St_0, State, IState, + JActions, StateInfo) + end, St, StateJumps). + +output_state_selection(St0, State, IState, Called) -> + Comment = [<<"%% ">> || false <- [Called]], + St = fwrite(St0, <<"~syeccpars2(~w=S, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, + [Comment, State]), + fwrite(St, + <<"~s yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr);\n">>, + [Comment, IState]). + +output_state_actions(St, State, State, {Actions,jump_none}, SI) -> + output_state_actions1(St, State, Actions, true, normal, SI); +output_state_actions(St0, State, State, {Actions, Jump}, SI) -> + {Tag, To, Common} = Jump, + CS = case Tag of + jump_some -> list_to_atom(lists:concat([cont_, To])); + jump_all -> To + end, + St = output_state_actions1(St0, State, Actions, true, {to, CS}, SI), + if + To =:= State -> + output_state_actions1(St, CS, Common, true, normal, SI); + true -> + St + end; +output_state_actions(St, State, JState, _XActions, _SI) -> + fwrite(St, <<"%% yeccpars2_~w: see yeccpars2_~w\n\n">>, [State, JState]). + +output_state_actions1(St, State, [], _IsFirst, normal, _SI) -> + output_state_actions_fini(State, St); +output_state_actions1(St0, State, [], IsFirst, {to, ToS}, _SI) -> + St = delim(St0, IsFirst), + fwrite(St, + <<"yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr) ->\n" + " yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr).\n\n">>, + [State, ToS]); +output_state_actions1(St0, State, [{_, #reduce{}=Action}], + IsFirst, _End, SI) -> + St = output_reduce(St0, State, "Cat", Action, IsFirst, SI), + fwrite(St, <<".\n\n">>, []); +output_state_actions1(St0, State, [{Lookahead,Action} | Tail], + IsFirst, End, SI) -> + {_, St} = + foldl(fun(Terminal, {IsFst,St_0}) -> + {false, + output_action(St_0, State, Terminal, Action, IsFst,SI)} + end, {IsFirst,St0}, Lookahead), + output_state_actions1(St, State, Tail, false, End, SI). + +output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) -> + output_reduce(St, State, Terminal, Action, IsFirst, SI); +output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) -> + St10 = delim(St0, IsFirst), + St = fwrite(St10, <<"yeccpars2_~w(S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>, + [State, quoted_atom(Terminal)]), + output_call_to_includefile(NewState, St); +output_action(St0, State, Terminal, accept, IsFirst, _SI) -> + St10 = delim(St0, IsFirst), + St = fwrite(St10, + <<"yeccpars2_~w(_S, ~s, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>, + [State, quoted_atom(Terminal)]), + fwrite(St, <<" {ok, hd(Stack)}">>, []); +output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) -> + St. + +output_call_to_includefile(NewState, #yecc{includefile_version = {1,1}}=St) -> + %% Backward compatibility. + fwrite(St, <<" yeccpars1(Ts, Tzr, ~w, [S | Ss], [T | Stack])">>, + [NewState]); +output_call_to_includefile(NewState, St) -> + fwrite(St, <<" yeccpars1(S, ~w, Ss, Stack, T, Ts, Tzr)">>, + [NewState]). + +output_state_actions_fini(State, #yecc{includefile_version = {1,1}}=St0) -> + %% Backward compatibility. + St10 = delim(St0, false), + St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]), + fwrite(St, <<" yeccerror(T).\n\n">>, []); +output_state_actions_fini(_State, St) -> + fwrite(St, <<".\n\n">>, []). + +output_reduce(St0, State, Terminal0, + #reduce{rule_nmbr = RuleNmbr, + head = Head, + nmbr_of_daughters = NmbrOfDaughters}, + IsFirst, StateInfo) -> + St10 = delim(St0, IsFirst), + Terminal = if + is_atom(Terminal0) -> quoted_atom(Terminal0); + true -> Terminal0 + end, + St20 = fwrite(St10, + <<"yeccpars2_~w(_S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>, + [State, Terminal]), + St30 = + if + NmbrOfDaughters < 2 -> + Ns = "Ss", + St20; + true -> + Ns = "Nss", + Tmp = string:join(lists:duplicate(NmbrOfDaughters - 1, "_"), + ","), + fwrite(St20, <<" [~s|Nss] = Ss,\n">>, [Tmp]) + end, + St40 = case tokens(RuleNmbr, St30) of + [{var, _, '__1'}] when NmbrOfDaughters =:= 1 -> + NewStack = "Stack", + St30; + _ -> + NewStack = "NewStack", + fwrite(St30, <<" NewStack = ~w(Stack),\n">>, + [inlined_function_name(State, Terminal0)]) + end, + if + NmbrOfDaughters =:= 0 -> + NextState = goto(State, Head, St40), + {NextState, I} = lookup_state(StateInfo, NextState), + #state_info{reduce_only = RO, state_repr = Repr, comment = C} = I, + %% Reduce actions do not use the state, so we just pass + %% the old (now bogus) on: + if + RO -> NextS = "_S"; + true -> NextS = io_lib:fwrite("~w", [NextState]) + end, + St = fwrite(St40, <<"~s">>, [C]), + %% Short-circuit call to yeccpars2: + fwrite(St, + <<" yeccpars2_~w(~s, ~s, [~w | Ss], ~s, T, Ts, Tzr)">>, + [Repr, NextS, Terminal, State, NewStack]); + true -> + fwrite(St40, + <<" ~w(hd(~s), ~s, ~s, ~s, T, Ts, Tzr)">>, + [function_name(yeccgoto, Head), Ns, + Terminal, Ns, NewStack]) + end. + +delim(St, true) -> + St; +delim(St, false) -> + fwrite(St, <<";\n">>, []). + +quoted_atom(Atom) -> + io_lib:fwrite(<<"~w">>, [Atom]). + +output_inlined(St, UserCodeActions, Infile) -> + foldl(fun(#user_code{funname = InlinedFunctionName, + action = Action}, St_0) -> + output_inlined(St_0, InlinedFunctionName, + Action, Infile) + end, St, UserCodeActions). + +%% Each action with user code is placed in a separate inlined function. +%% The purpose is to be able to pinpoint errors and warnings correctly. +output_inlined(St0, FunctionName, Reduce, Infile) -> + #reduce{rule_nmbr = RuleNmbr, nmbr_of_daughters = N_daughters} = Reduce, + #rule{tokens = Tokens, is_well_formed = WF} = get_rule(RuleNmbr, St0), + Line0 = first_line(Tokens), + NLines = last_line(Tokens) - Line0, + + St5 = if + WF -> + St0; + not WF -> + %% The compiler will generate an error message for + %% the inlined function (unless the reason that yecc + %% failed to parse the action was some macro). The + %% line number of the message will be correct since + %% we are keeping track of the current line of the + %% output file... + #yecc{outfile = Outfile, line = CurLine} = St0, + output_file_directive(St0, Outfile, CurLine) + end, + + CodeStartLine = lists:max([0, Line0 - 4]), + St10 = fwrite(St5, <<"-compile({inline,~w/1}).\n">>, [FunctionName]), + St20 = output_file_directive(St10, Infile, CodeStartLine), + St30 = fwrite(St20, <<"~w(__Stack0) ->\n">>, [FunctionName]), + %% Currently the (old) inliner emits less code if matching the + %% stack inside the body rather than in the head... + St40 = case N_daughters of + 0 -> + Stack = "__Stack0", + St30; + _ -> + Stack = "__Stack", + A = concat(flatmap(fun(I) -> [",__",I] end, + lists:seq(N_daughters, 1, -1))), + fwrite(St30, <<" ~s = __Stack0,\n">>, + [append(["[", tl(A), " | __Stack]"])]) + end, + St = St40#yecc{line = St40#yecc.line + NLines}, + fwrite(St, <<" [begin\n ~s\n end | ~s].\n\n">>, + [pp_tokens(Tokens, Line0), Stack]). + +inlined_function_name(State, "Cat") -> + inlined_function_name(State, ""); +inlined_function_name(State, Terminal) -> + list_to_atom(concat([yeccpars2_, State, '_', Terminal])). + +-compile({nowarn_unused_function,function_name/2}). +function_name(Name, Suf) -> + list_to_atom(concat([Name, '_' | quoted_atom(Suf)])). + +rule(RulePointer, St) -> + #rule{n = N, line = Line, symbols = Symbols} = + dict:fetch(RulePointer, St#yecc.rule_pointer2rule), + {Symbols, Line, N}. + +get_rule(RuleNmbr, St) -> + dict:fetch(RuleNmbr, St#yecc.rule_pointer2rule). + +tokens(RuleNmbr, St) -> + Rule = dict:fetch(RuleNmbr, St#yecc.rule_pointer2rule), + Rule#rule.tokens. + +goto(From, Symbol, St) -> + case ets:lookup(St#yecc.goto_tab, {From, Symbol}) of + [{_, To}] -> + To; + [] -> + erlang:error({error_in_goto_table, From, Symbol}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Auxiliaries: + +-ifdef(SYMBOLS_AS_CODES). + +%%% Bit mask operations. + +-compile({inline,[set_empty/0]}). +set_empty() -> + 0. + +set_add(I, BM) -> + (1 bsl I) bor BM. + +-compile({inline,[set_member/2]}). +set_member(I, BM) -> + ((1 bsl I) band BM) =/= 0. + +%% Assumes I is a member... +-compile({inline,[set_delete/2]}). +set_delete(I, BM) -> + (1 bsl I) bxor BM. + +-compile({inline,[set_union/2]}). +set_union(BM1, BM2) -> + BM1 bor BM2. + +-compile({inline,[set_is_subset/2]}). +set_is_subset(BM1, BM2) -> + (BM1 band BM2) =:= BM1. + +empty_member(BM) -> + set_member(0, BM). + +empty_delete(BM) -> + set_delete(0, BM). + +code_symbols(Ss, SymbolTable) -> + map(fun(S) -> ets:lookup_element(SymbolTable, S, 2) end, Ss). + +decode_symbol(C, InvSymbolTable) -> + ets:lookup_element(InvSymbolTable, C, 1). + +code_terminal(T, SymbolTab) -> + set_add(ets:lookup_element(SymbolTab, T, 2), 0). + +decode_terminals(BM, InvSymbolTab) -> + case get(BM) of + undefined -> + Symbols = decode_terminals(BM, 0, InvSymbolTab), + put(BM, Symbols), + Symbols; + Symbols -> + Symbols + end. + +decode_terminals(0, _I, _InvSymbolTab) -> + []; +decode_terminals(BM, I, InvSymbolTab) -> + case set_member(I, BM) of + true -> + [ets:lookup_element(InvSymbolTab, I, 1) + | decode_terminals(set_delete(I, BM), I+1, InvSymbolTab)]; + false -> + decode_terminals(BM, I+1, InvSymbolTab) + end. + +set_add_terminal({_Symbol, TerminalNum}, BM) -> + set_add(TerminalNum, BM). + +-compile({inline,[is_terminal/2]}). +is_terminal(_Tab, SymbolCode) -> + SymbolCode >= 0. + +left_corner_symbol_table(St) -> + St#yecc.inv_symbol_tab. + +-else. + +set_empty() -> + []. + +set_add(Symbol, L) -> + ordsets:union([Symbol], L). + +set_union(Es1, Es2) -> + ordsets:union(Es1, Es2). + +set_is_subset(Es1, Es2) -> + ordsets:is_subset(Es1, Es2). + +code_symbols(Ss, _SymbolTab) -> + Ss. + +decode_symbol(S, _InvSymbolTab) -> + S. + +code_terminal(T, _SymbolTab) -> + [T]. + +decode_terminals(Ts, _InvSymbolTab) -> + Ts. + +empty_member(['$empty' | _]) -> + true; +empty_member(_) -> + false. + +empty_delete(['$empty' | Terminals]) -> + Terminals. + +set_add_terminal({Symbol, _TerminalNum}, L) -> + set_add(Symbol, L). + +is_terminal(Tab, SymbolName) -> + ets:lookup_element(Tab, SymbolName, 2) >= 0. + +left_corner_symbol_table(St) -> + St#yecc.symbol_tab. + +-endif. % SYMBOLS_AS_CODES + +intersect(L1, L2) -> + ordsets:to_list(ordsets:intersection(ordsets:from_list(L1), + ordsets:from_list(L2))). + +format_symbols([Sym | Syms]) -> + concat([format_symbol(Sym) | format_symbols1(Syms)]). + +format_symbols1([]) -> + []; +format_symbols1([H | T]) -> + [" ", format_symbol(H) | format_symbols1(T)]. + +include(St, File, Outport) -> + case file:open(File, [read]) of + {error, Reason} -> + throw(add_error(File, none, {file_error, Reason}, St)); + {ok, Inport} -> + Line = io:get_line(Inport, ''), + N_lines = include1(Line, Inport, Outport), + file:close(Inport), + N_lines + end. + +include1(Line, Inport, Outport) -> + include1(Line, Inport, Outport, 0). + +include1(eof, _, _, Nmbr_of_lines) -> + Nmbr_of_lines; +include1(Line, Inport, Outport, Nmbr_of_lines) -> + Incr = case member($\n, Line) of + true -> 1; + false -> 0 + end, + io:put_chars(Outport, Line), + include1(io:get_line(Inport, ''), Inport, Outport, Nmbr_of_lines + Incr). + +includefile_version([]) -> + {1,2}; +includefile_version(Includefile) -> + case epp:open(Includefile, []) of + {ok, Epp} -> + try + parse_file(Epp) + after + epp:close(Epp) + end; + {error, _Error} -> + {1,1} + end. + +parse_file(Epp) -> + case epp:parse_erl_form(Epp) of + {ok, {function,_Line,yeccpars1,7,_Clauses}} -> + {1,2}; + {eof,_Line} -> + {1,1}; + _Form -> + parse_file(Epp) + end. + +%% Keeps the line breaks of the original code. +pp_tokens(Tokens, Line0) -> + concat(pp_tokens1(Tokens, Line0, [])). + +pp_tokens1([], _Line0, _T0) -> + []; +pp_tokens1([T | Ts], Line0, T0) -> + Line = element(2, T), + [pp_sep(Line, Line0, T0), pp_symbol(T) | pp_tokens1(Ts, Line, T)]. + +pp_symbol({var,_,Var}) -> Var; +pp_symbol({_,_,Symbol}) -> io_lib:fwrite(<<"~p">>, [Symbol]); +pp_symbol({Symbol, _}) -> Symbol. + +pp_sep(Line, Line0, T0) when Line > Line0 -> + ["\n " | pp_sep(Line - 1, Line0, T0)]; +pp_sep(_Line, _Line0, {'.',_}) -> + ""; +pp_sep(_Line, _Line0, _T0) -> + " ". + +output_file_directive(St, Filename, Line) when St#yecc.file_attrs -> + fwrite(St, <<"-file(~s, ~w).\n">>, + [format_filename(Filename), Line]); +output_file_directive(St, _Filename, _Line) -> + St. + +first_line(Tokens) -> + element(2, hd(Tokens)). + +last_line(Tokens) -> + element(2, lists:last(Tokens)). + +%% Keep track of the current line in the generated file. +fwrite(#yecc{outport = Outport, line = Line}=St, Format, Args) -> + NLines = count_nl(Format), + io:fwrite(Outport, Format, Args), + St#yecc{line = Line + NLines}. + +%% Assumes \n is used, and never ~n. +count_nl(<<$\n,Rest/binary>>) -> + 1 + count_nl(Rest); +count_nl(<<_,Rest/binary>>) -> + count_nl(Rest); +count_nl(<<>>) -> + 0. + +nl(#yecc{outport = Outport, line = Line}=St) -> + io:nl(Outport), + St#yecc{line = Line + 1}. + +format_filename(Filename) -> + io_lib:write_string(filename:flatten(Filename)). + +format_assoc(left) -> + "Left"; +format_assoc(right) -> + "Right"; +format_assoc(unary) -> + "Unary"; +format_assoc(nonassoc) -> + "Nonassoc". + +format_symbol(Symbol) -> + String = concat([Symbol]), + case erl_scan:string(String) of + {ok, [{atom, _, _}], _} -> + io_lib:fwrite(<<"~w">>, [Symbol]); + {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' -> + case erl_scan:reserved_word(Word) of + true -> + String; + false -> + io_lib:fwrite(<<"~w">>, [Symbol]) + end; + {ok, [{var, _, _}], _} -> + String; + _ -> + io_lib:fwrite(<<"~w">>, [Symbol]) + end. + +inverse(L) -> + sort([{A,B} || {B,A} <- L]). + +family(L) -> + sofs:to_external(sofs:relation_to_family(sofs:relation(L))). + +seq1(To) when To < 1 -> + []; +seq1(To) -> + lists:seq(1, To). + +count(From, L) -> + lists:zip(L, lists:seq(From, length(L)-1+From)). + +family_with_domain(L, DL) -> + sofs:to_external(sofs_family_with_domain(sofs:relation(L), sofs:set(DL))). + +sofs_family_with_domain(R0, D) -> + R = sofs:restriction(R0, D), + F = sofs:relation_to_family(R), + FD = sofs:constant_function(D, sofs:from_term([])), + sofs:family_union(F, FD). |