aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Bolinder <hasse@erlang.org>2017-03-23 16:36:17 +0100
committerHans Bolinder <hasse@erlang.org>2017-04-07 08:57:27 +0200
commit9c013d50cc5abf3b0a0dbb5fc2be97c825bc0261 (patch)
tree678141bc4c972d5278b0477274e03d44ff066833
parent25271fa55aacf0b367ad74532c952352344ed97d (diff)
downloadotp-9c013d50cc5abf3b0a0dbb5fc2be97c825bc0261.tar.gz
otp-9c013d50cc5abf3b0a0dbb5fc2be97c825bc0261.tar.bz2
otp-9c013d50cc5abf3b0a0dbb5fc2be97c825bc0261.zip
parsetools: Fix Yecc regarding Unicode atoms
Terminals and non-terminals still need to be quoted, which is a side-effect of using the Erlang scanner.
-rw-r--r--lib/parsetools/src/yecc.erl116
-rw-r--r--lib/parsetools/test/yecc_SUITE.erl88
2 files changed, 147 insertions, 57 deletions
diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl
index 05446c1a85..48559ec402 100644
--- a/lib/parsetools/src/yecc.erl
+++ b/lib/parsetools/src/yecc.erl
@@ -154,13 +154,13 @@ compile(Input0, Output0,
format_error(bad_declaration) ->
io_lib:fwrite("unknown or bad declaration, ignored", []);
format_error({bad_expect, SymName}) ->
- io_lib:fwrite("argument ~s of Expect is not an integer",
+ io_lib:fwrite("argument ~ts of Expect is not an integer",
[format_symbol(SymName)]);
format_error({bad_rootsymbol, SymName}) ->
- io_lib:fwrite("rootsymbol ~s is not a nonterminal",
+ io_lib:fwrite("rootsymbol ~ts is not a nonterminal",
[format_symbol(SymName)]);
format_error({bad_states, SymName}) ->
- io_lib:fwrite("argument ~s of States is not an integer",
+ io_lib:fwrite("argument ~ts of States is not an integer",
[format_symbol(SymName)]);
format_error({conflict, Conflict}) ->
format_conflict(Conflict);
@@ -169,19 +169,19 @@ format_error({conflicts, SR, RR}) ->
format_error({duplicate_declaration, Tag}) ->
io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]);
format_error({duplicate_nonterminal, Nonterminal}) ->
- io_lib:fwrite("duplicate non-terminals ~s",
+ io_lib:fwrite("duplicate non-terminals ~ts",
[format_symbol(Nonterminal)]);
format_error({duplicate_precedence, Op}) ->
- io_lib:fwrite("duplicate precedence operator ~s",
+ io_lib:fwrite("duplicate precedence operator ~ts",
[format_symbol(Op)]);
format_error({duplicate_terminal, Terminal}) ->
- io_lib:fwrite("duplicate terminal ~s",
+ io_lib:fwrite("duplicate terminal ~ts",
[format_symbol(Terminal)]);
format_error({endsymbol_is_nonterminal, Symbol}) ->
- io_lib:fwrite("endsymbol ~s is a nonterminal",
+ io_lib:fwrite("endsymbol ~ts is a nonterminal",
[format_symbol(Symbol)]);
format_error({endsymbol_is_terminal, Symbol}) ->
- io_lib:fwrite("endsymbol ~s is a terminal",
+ io_lib:fwrite("endsymbol ~ts is a terminal",
[format_symbol(Symbol)]);
format_error({error, Module, Error}) ->
Module:format_error(Error);
@@ -192,7 +192,7 @@ format_error(illegal_empty) ->
format_error({internal_error, Error}) ->
io_lib:fwrite("internal yecc error: ~w", [Error]);
format_error({missing_syntax_rule, Nonterminal}) ->
- io_lib:fwrite("no syntax rule for non-terminal symbol ~s",
+ io_lib:fwrite("no syntax rule for non-terminal symbol ~ts",
[format_symbol(Nonterminal)]);
format_error({n_states, Exp, N}) ->
io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]);
@@ -201,31 +201,31 @@ format_error(no_grammar_rules) ->
format_error(nonterminals_missing) ->
io_lib:fwrite("Nonterminals is missing", []);
format_error({precedence_op_is_endsymbol, SymName}) ->
- io_lib:fwrite("precedence operator ~s is endsymbol",
+ io_lib:fwrite("precedence operator ~ts is endsymbol",
[format_symbol(SymName)]);
format_error({precedence_op_is_unknown, SymName}) ->
- io_lib:fwrite("unknown precedence operator ~s",
+ io_lib:fwrite("unknown precedence operator ~ts",
[format_symbol(SymName)]);
format_error({reserved, N}) ->
io_lib:fwrite("the use of ~w should be avoided", [N]);
format_error({symbol_terminal_and_nonterminal, SymName}) ->
- io_lib:fwrite("symbol ~s is both a terminal and nonterminal",
+ io_lib:fwrite("symbol ~ts is both a terminal and nonterminal",
[format_symbol(SymName)]);
format_error(rootsymbol_missing) ->
io_lib:fwrite("Rootsymbol is missing", []);
format_error(terminals_missing) ->
io_lib:fwrite("Terminals is missing", []);
format_error({undefined_nonterminal, Symbol}) ->
- io_lib:fwrite("undefined nonterminal: ~s", [format_symbol(Symbol)]);
+ io_lib:fwrite("undefined nonterminal: ~ts", [format_symbol(Symbol)]);
format_error({undefined_pseudo_variable, Atom}) ->
io_lib:fwrite("undefined pseudo variable ~w", [Atom]);
format_error({undefined_symbol, SymName}) ->
- io_lib:fwrite("undefined rhs symbol ~s", [format_symbol(SymName)]);
+ io_lib:fwrite("undefined rhs symbol ~ts", [format_symbol(SymName)]);
format_error({unused_nonterminal, Nonterminal}) ->
- io_lib:fwrite("non-terminal symbol ~s not used",
+ io_lib:fwrite("non-terminal symbol ~ts not used",
[format_symbol(Nonterminal)]);
format_error({unused_terminal, Terminal}) ->
- io_lib:fwrite("terminal symbol ~s not used",
+ io_lib:fwrite("terminal symbol ~ts not used",
[format_symbol(Terminal)]);
format_error({bad_symbol, String}) ->
io_lib:fwrite("bad symbol ~ts", [String]);
@@ -1809,9 +1809,9 @@ report_conflict(Conflict, St, ActionName, How) ->
Formated = format_symbol(ActionName),
case How of
prec ->
- io:fwrite(<<"Resolved in favor of ~s.\n\n">>, [Formated]);
+ io:fwrite(<<"Resolved in favor of ~ts.\n\n">>, [Formated]);
default ->
- io:fwrite(<<"Conflict resolved in favor of ~s.\n\n">>,
+ io:fwrite(<<"Conflict resolved in favor of ~ts.\n\n">>,
[Formated])
end;
true ->
@@ -1856,7 +1856,7 @@ format_conflict({Symbol, N, _, {one_level_up,
{L1, RuleN1, {P1, Ass1}},
{L2, RuleN2, {P2, Ass2}}}}) ->
S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when "
- "scanning ~s in state ~w:\n">>,
+ "scanning ~ts in state ~w:\n">>,
[format_symbol(Symbol), N]),
S2 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n"
" vs.\n">>,
@@ -1866,26 +1866,26 @@ format_conflict({Symbol, N, _, {one_level_up,
[S1, S2, S3];
format_conflict({Symbol, N, Reduce, Confl}) ->
S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol "
- "~s in state ~w:\n">>, [format_symbol(Symbol), N]),
+ "~ts in state ~w:\n">>, [format_symbol(Symbol), N]),
S2 = case Reduce of
{[HR | TR], RuleNmbr, RuleLine} ->
- io_lib:fwrite(<<" Reduce to ~s from ~s (rule ~w at "
+ io_lib:fwrite(<<" Reduce to ~ts from ~ts (rule ~w at "
"line ~w)\n vs.\n">>,
[format_symbol(HR), format_symbols(TR),
RuleNmbr, RuleLine])
end,
S3 = case Confl of
{reduce, [HR2|TR2], RuleNmbr2, RuleLine2} ->
- io_lib:fwrite(<<" reduce to ~s from ~s "
+ io_lib:fwrite(<<" reduce to ~ts from ~ts "
"(rule ~w at line ~w).">>,
[format_symbol(HR2), format_symbols(TR2),
RuleNmbr2, RuleLine2]);
{shift, NewState, Sym} ->
io_lib:fwrite(<<" shift to state ~w, adding right "
- "sisters to ~s.">>,
+ "sisters to ~ts.">>,
[NewState, format_symbol(Sym)]);
{accept, Rootsymbol} ->
- io_lib:fwrite(<<" reduce to rootsymbol ~s.">>,
+ io_lib:fwrite(<<" reduce to rootsymbol ~ts.">>,
[format_symbol(Rootsymbol)])
end,
[S1, S2, S3].
@@ -1926,8 +1926,9 @@ format_conflict({Symbol, N, Reduce, Confl}) ->
-define(CODE_VERSION, "1.4").
-define(YECC_BUG(M, A),
- iolist_to_binary([" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",",
- io_lib:fwrite(M, A), "}).\n\n"])).
+ unicode:characters_to_binary(
+ [" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",",
+ io_lib:fwrite(M, A), "}).\n\n"])).
%% Returns number of newlines in included files.
output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] ->
@@ -1980,7 +1981,7 @@ output_header(St0) ->
output_goto(St, [{_Nonterminal, []} | Go], StateInfo) ->
output_goto(St, Go, StateInfo);
output_goto(St0, [{Nonterminal, List} | Go], StateInfo) ->
- F = function_name(yeccgoto, Nonterminal),
+ F = function_name(St0, yeccgoto, Nonterminal),
St05 = fwrite(St0, <<"-dialyzer({nowarn_function, ~w/7}).\n">>, [F]),
St10 = output_goto1(St05, List, F, StateInfo, true),
St = output_goto_fini(F, Nonterminal, St10),
@@ -2018,7 +2019,8 @@ output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) ->
St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>,
[F]),
fwrite(St,
- ?YECC_BUG(<<"{~w, State, missing_in_goto_table}">>, [NT]),
+ ?YECC_BUG(<<"{~ts, State, missing_in_goto_table}">>,
+ [quoted_atom(St0, NT)]),
[]);
output_goto_fini(_F, _NT, St) ->
fwrite(St, <<".\n\n">>, []).
@@ -2027,7 +2029,7 @@ output_goto_fini(_F, _NT, St) ->
find_user_code(ParseActions, St) ->
[#user_code{state = State,
terminal = Terminal,
- funname = inlined_function_name(State, Terminal),
+ funname = inlined_function_name(St, State, Terminal),
action = Action} ||
{State, La_actions} <- ParseActions,
{Action, Terminals, RuleNmbr, NmbrOfDaughters}
@@ -2148,14 +2150,14 @@ output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) ->
output_reduce(St, State, Terminal, Action, IsFirst, SI);
output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) ->
St10 = delim(St0, IsFirst),
- St = fwrite(St10, <<"yeccpars2_~w(S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>,
- [State, quoted_atom(Terminal)]),
+ St = fwrite(St10, <<"yeccpars2_~w(S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>,
+ [State, quoted_atom(St10, Terminal)]),
output_call_to_includefile(NewState, St);
output_action(St0, State, Terminal, accept, IsFirst, _SI) ->
St10 = delim(St0, IsFirst),
St = fwrite(St10,
- <<"yeccpars2_~w(_S, ~s, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>,
- [State, quoted_atom(Terminal)]),
+ <<"yeccpars2_~w(_S, ~ts, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>,
+ [State, quoted_atom(St10, Terminal)]),
fwrite(St, <<" {ok, hd(Stack)}">>, []);
output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) ->
St.
@@ -2174,19 +2176,19 @@ output_state_actions_fini(State, IsFirst, St0) ->
St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]),
fwrite(St, <<" yeccerror(T).\n\n">>, []).
-output_reduce(St0, State, Terminal0,
+output_reduce(St0, State, Terminal,
#reduce{rule_nmbr = RuleNmbr,
head = Head,
nmbr_of_daughters = NmbrOfDaughters},
IsFirst, StateInfo) ->
St10 = delim(St0, IsFirst),
- Terminal = if
- is_atom(Terminal0) -> quoted_atom(Terminal0);
- true -> Terminal0
- end,
+ QuotedTerminal = if
+ is_atom(Terminal) -> quoted_atom(St10, Terminal);
+ true -> Terminal
+ end,
St20 = fwrite(St10,
- <<"yeccpars2_~w(_S, ~s, Ss, Stack, T, Ts, Tzr) ->\n">>,
- [State, Terminal]),
+ <<"yeccpars2_~w(_S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>,
+ [State, QuotedTerminal]),
St30 =
if
NmbrOfDaughters < 2 ->
@@ -2205,7 +2207,7 @@ output_reduce(St0, State, Terminal0,
_ ->
NewStack = "NewStack",
fwrite(St30, <<" NewStack = ~w(Stack),\n">>,
- [inlined_function_name(State, Terminal0)])
+ [inlined_function_name(St30, State, Terminal)])
end,
if
NmbrOfDaughters =:= 0 ->
@@ -2221,13 +2223,13 @@ output_reduce(St0, State, Terminal0,
St = fwrite(St40, <<"~s">>, [C]),
%% Short-circuit call to yeccpars2:
fwrite(St,
- <<" yeccpars2_~w(~s, ~s, [~w | Ss], ~s, T, Ts, Tzr)">>,
- [Repr, NextS, Terminal, State, NewStack]);
+ <<" yeccpars2_~w(~s, ~ts, [~w | Ss], ~s, T, Ts, Tzr)">>,
+ [Repr, NextS, QuotedTerminal, State, NewStack]);
true ->
fwrite(St40,
- <<" ~w(hd(~s), ~s, ~s, ~s, T, Ts, Tzr)">>,
- [function_name(yeccgoto, Head), Ns,
- Terminal, Ns, NewStack])
+ <<" ~w(hd(~s), ~ts, ~s, ~s, T, Ts, Tzr)">>,
+ [function_name(St40, yeccgoto, Head), Ns,
+ QuotedTerminal, Ns, NewStack])
end.
delim(St, true) ->
@@ -2235,8 +2237,10 @@ delim(St, true) ->
delim(St, false) ->
fwrite(St, <<";\n">>, []).
-quoted_atom(Atom) ->
- io_lib:fwrite(<<"~w">>, [Atom]).
+quoted_atom(#yecc{encoding = latin1}, Atom) when is_atom(Atom) ->
+ io_lib:write_atom_as_latin1(Atom);
+quoted_atom(_St, Atomic) ->
+ io_lib:write(Atomic).
output_inlined(St, UserCodeActions, Infile) ->
foldl(fun(#user_code{funname = InlinedFunctionName,
@@ -2288,14 +2292,16 @@ output_inlined(St0, FunctionName, Reduce, Infile) ->
fwrite(St, <<" [begin\n ~ts\n end | ~s].\n\n">>,
[pp_tokens(Tokens, Line0, St#yecc.encoding), Stack]).
-inlined_function_name(State, "Cat") ->
- inlined_function_name(State, "");
-inlined_function_name(State, Terminal) ->
- list_to_atom(concat([yeccpars2_, State, '_', Terminal])).
+inlined_function_name(St, State, Terminal) ->
+ End = case Terminal of
+ "Cat" -> [];
+ _ -> [quoted_atom(St, Terminal)]
+ end,
+ list_to_atom(concat([yeccpars2_, State, '_'] ++ End)).
--compile({nowarn_unused_function,function_name/2}).
-function_name(Name, Suf) ->
- list_to_atom(concat([Name, '_' | quoted_atom(Suf)])).
+-compile({nowarn_unused_function,function_name/3}).
+function_name(St, Name, Suf) ->
+ list_to_atom(concat([Name, '_'] ++ [quoted_atom(St, Suf)])).
rule(RulePointer, St) ->
#rule{n = N, anno = Anno, symbols = Symbols} =
diff --git a/lib/parsetools/test/yecc_SUITE.erl b/lib/parsetools/test/yecc_SUITE.erl
index 2c37278d4b..a7166b91ed 100644
--- a/lib/parsetools/test/yecc_SUITE.erl
+++ b/lib/parsetools/test/yecc_SUITE.erl
@@ -50,7 +50,7 @@
otp_5369/1, otp_6362/1, otp_7945/1, otp_8483/1, otp_8486/1,
otp_7292/1, otp_7969/1, otp_8919/1, otp_10302/1, otp_11269/1,
- otp_11286/1]).
+ otp_11286/1, otp_14285/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(1)).
@@ -78,7 +78,7 @@ groups() ->
{bugs, [],
[otp_5369, otp_6362, otp_7945, otp_8483, otp_8486]},
{improvements, [], [otp_7292, otp_7969, otp_8919, otp_10302,
- otp_11269, otp_11286]}].
+ otp_11269, otp_11286, otp_14285]}].
init_per_suite(Config) ->
Config.
@@ -2048,6 +2048,90 @@ otp_11286(Config) when is_list(Config) ->
true = test_server:stop_node(Node),
ok.
+otp_14285(Config) ->
+ Dir = ?privdir,
+ YeccPre = filename:join(Dir, "yeccpre.hrl"),
+ ?line ok = file:write_file(YeccPre,
+ [<<"-export([t/0]).\n">>,my_yeccpre()]),
+
+ T0 = <<"
+ Nonterminals '\\x{400}'.
+ Terminals t.
+ Rootsymbol '\\x{400}'.
+ '\\x{400}' -> t : '$1'.
+ Erlang code.
+ t() ->
+ L = [{t, 1}],
+ {ok, R} = parse(L),
+ {t, 1} = R,
+ ok.">>,
+ Ts0 = [{otp_14285_1,
+ [<<"%% coding: Latin-1\n">>,T0],YeccPre,ok},
+ {otp_14285_2,
+ [<<"%% coding: coding: UTF-8\n">>,T0],YeccPre,ok}],
+ run(Config, Ts0),
+ file:delete(YeccPre),
+
+ T1 = <<"
+ Nonterminals '1\\x{400}' list 'unused\\x{400}'.
+ Terminals '2\\x{400}'.
+ Rootsymbol '1\\x{400}'.
+
+ '1\\x{400}' -> list : '$1'.
+
+ list -> '2\\x{400}' : '$1'.
+ list -> list '2\\x{400}' : {foo,'\\x{400}'}.
+
+ Erlang code.
+
+ -export([t/0]).
+
+ t() ->
+ L = [{'2\\x{400}', 1}, {'2\\x{400}',2}],
+ {ok, R} = parse(L),
+ {foo,A} = R,
+ '\\x{400}' = A,
+ [1024] = atom_to_list(A),
+ ok.">>,
+
+ Ts1 = [{otp_14285_3,
+ [<<"%% coding: Latin-1\n">>,T1],default,ok},
+ {otp_14285_4,
+ [<<"%% coding: UTF-8\n">>,T1],default,ok}],
+ run(Config, Ts1),
+
+ T2 = <<"
+ Nonterminals E.
+ Terminals '-' '+' '=' id.
+ Rootsymbol E.
+ Endsymbol '\\x{400}'.
+
+ E -> E '=' E : {op, '=', '$1', '$3'}.
+ E -> E '+' E : {op, '+', '$1', '$3'}.
+ E -> '-' E : {op, '-', '$2'}.
+ E -> id : '$1'.
+
+ Nonassoc 100 '='.
+ Right 200 '+' '-'.
+
+ Erlang code.
+
+ -export([t/0]).
+
+ t() ->
+ {ok,{op,'=',{id,1},{op,'-',{op,'+',{id,4},{id,6}}}}} =
+ parse([{id,1},{'=',2},{'-',3},{id,4},{'+',5},{id,6},
+ {'\\x{400}',1}]),
+ ok.">>,
+
+ Ts2 = [{otp_14285_5,
+ [<<"%% coding: Latin-1\n">>,T2],default,ok},
+ {otp_14285_6,
+ [<<"%% coding: UTF-8\n">>,T2],default,ok}],
+ run(Config, Ts2),
+
+ ok.
+
start_node(Name, Args) ->
[_,Host] = string:tokens(atom_to_list(node()), "@"),
ct:log("Trying to start ~w@~s~n", [Name,Host]),