aboutsummaryrefslogtreecommitdiffstats
path: root/lib/parsetools
diff options
context:
space:
mode:
Diffstat (limited to 'lib/parsetools')
-rw-r--r--lib/parsetools/include/leexinc.hrl35
-rw-r--r--lib/parsetools/src/leex.erl4
-rw-r--r--lib/parsetools/test/leex_SUITE.erl52
-rw-r--r--lib/parsetools/test/yecc_SUITE.erl2
4 files changed, 75 insertions, 18 deletions
diff --git a/lib/parsetools/include/leexinc.hrl b/lib/parsetools/include/leexinc.hrl
index 2657fdcfaa..b4449607cb 100644
--- a/lib/parsetools/include/leexinc.hrl
+++ b/lib/parsetools/include/leexinc.hrl
@@ -36,8 +36,10 @@ string(Ics0, L0, Tcs, Ts) ->
string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L0), Ts);
{reject,_Alen,Tlen,_Ics1,L1,_S1} -> % After a non-accepting state
{error,{L0,?MODULE,{illegal,yypre(Tcs, Tlen+1)}},L1};
- {A,Alen,_Tlen,_Ics1,_L1,_S1} ->
- string_cont(yysuf(Tcs, Alen), L0, yyaction(A, Alen, Tcs, L0), Ts)
+ {A,Alen,Tlen,_Ics1,L1,_S1} ->
+ Tcs1 = yysuf(Tcs, Alen),
+ L2 = adjust_line(Tlen, Alen, Tcs1, L1),
+ string_cont(Tcs1, L2, yyaction(A, Alen, Tcs, L0), Ts)
end.
%% string_cont(RestChars, Line, Token, Tokens)
@@ -107,8 +109,10 @@ token(S0, Ics0, L0, Tcs, Tlen0, Tline, A0, Alen0) ->
{reject,_Alen1,Tlen1,Ics1,L1,_S1} -> % No token match
Error = {Tline,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}},
{done,{error,Error,L1},Ics1};
- {A1,Alen1,_Tlen1,_Ics1,_L1,_S1} -> % Use last accept match
- token_cont(yysuf(Tcs, Alen1), L0, yyaction(A1, Alen1, Tcs, Tline))
+ {A1,Alen1,Tlen1,_Ics1,L1,_S1} -> % Use last accept match
+ Tcs1 = yysuf(Tcs, Alen1),
+ L2 = adjust_line(Tlen1, Alen1, Tcs1, L1),
+ token_cont(Tcs1, L2, yyaction(A1, Alen1, Tcs, Tline))
end.
%% token_cont(RestChars, Line, Token)
@@ -181,9 +185,11 @@ tokens(S0, Ics0, L0, Tcs, Tlen0, Tline, Ts, A0, Alen0) ->
%% Skip rest of tokens.
Error = {L1,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}},
skip_tokens(yysuf(Tcs, Tlen1+1), L1, Error);
- {A1,Alen1,_Tlen1,_Ics1,_L1,_S1} ->
+ {A1,Alen1,Tlen1,_Ics1,L1,_S1} ->
Token = yyaction(A1, Alen1, Tcs, Tline),
- tokens_cont(yysuf(Tcs, Alen1), L0, Token, Ts)
+ Tcs1 = yysuf(Tcs, Alen1),
+ L2 = adjust_line(Tlen1, Alen1, Tcs1, L1),
+ tokens_cont(Tcs1, L2, Token, Ts)
end.
%% tokens_cont(RestChars, Line, Token, Tokens)
@@ -235,9 +241,11 @@ skip_tokens(S0, Ics0, L0, Tcs, Tlen0, Tline, Error, A0, Alen0) ->
{done,{error,Error,L1},eof};
{reject,_Alen1,Tlen1,_Ics1,L1,_S1} ->
skip_tokens(yysuf(Tcs, Tlen1+1), L1, Error);
- {A1,Alen1,_Tlen1,_Ics1,L1,_S1} ->
+ {A1,Alen1,Tlen1,_Ics1,L1,_S1} ->
Token = yyaction(A1, Alen1, Tcs, Tline),
- skip_cont(yysuf(Tcs, Alen1), L1, Token, Error)
+ Tcs1 = yysuf(Tcs, Alen1),
+ L2 = adjust_line(Tlen1, Alen1, Tcs1, L1),
+ skip_cont(Tcs1, L2, Token, Error)
end.
%% skip_cont(RestChars, Line, Token, Error)
@@ -269,6 +277,17 @@ yyrev(List, Tail) -> lists:reverse(List, Tail).
yypre(List, N) -> lists:sublist(List, N).
yysuf(List, N) -> lists:nthtail(N, List).
+%% adjust_line(TokenLength, AcceptLength, Chars, Line) -> NewLine
+%% Make sure that newlines in Chars are not counted twice.
+%% Line has been updated with respect to newlines in the prefix of
+%% Chars consisting of (TokenLength - AcceptLength) characters.
+
+adjust_line(N, N, _Cs, L) -> L;
+adjust_line(T, A, [$\n|Cs], L) ->
+ adjust_line(T-1, A, Cs, L-1);
+adjust_line(T, A, [_|Cs], L) ->
+ adjust_line(T-1, A, Cs, L).
+
%% yystate() -> InitialState.
%% yystate(State, InChars, Line, CurrTokLen, AcceptAction, AcceptLen) ->
%% {Action, AcceptLen, RestChars, Line} |
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
index 15d42a4d9c..602e47404d 100644
--- a/lib/parsetools/src/leex.erl
+++ b/lib/parsetools/src/leex.erl
@@ -1586,6 +1586,8 @@ out_dfa_graph(St, DFA, DF) ->
case file:open(St#leex.gfile, [write]) of
{ok,Gfile} ->
try
+ %% Set the same encoding as infile:
+ set_encoding(St, Gfile),
io:fwrite(Gfile, "digraph DFA {~n", []),
out_dfa_states(Gfile, DFA, DF),
out_dfa_edges(Gfile, DFA),
@@ -1621,7 +1623,7 @@ out_dfa_edges(File, DFA) ->
foreach(fun (T) ->
Crs = orddict:fetch(T, Tdict),
Edgelab = dfa_edgelabel(Crs),
- io:fwrite(File, " ~b -> ~b [label=\"~s\"];~n",
+ io:fwrite(File, " ~b -> ~b [label=\"~ts\"];~n",
[S,T,Edgelab])
end, sort(orddict:fetch_keys(Tdict)))
end, DFA).
diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl
index 949ef3c36e..54602848ec 100644
--- a/lib/parsetools/test/leex_SUITE.erl
+++ b/lib/parsetools/test/leex_SUITE.erl
@@ -45,7 +45,7 @@
pt/1, man/1, ex/1, ex2/1, not_yet/1,
line_wrap/1,
- otp_10302/1, otp_11286/1, unicode/1]).
+ otp_10302/1, otp_11286/1, unicode/1, otp_13916/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(1)).
@@ -62,12 +62,12 @@ end_per_testcase(_Case, Config) ->
suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
- [{group, checks}, {group, examples}, {group, bugs}].
+ [{group, checks}, {group, examples}, {group, tickets}, {group, bugs}].
groups() ->
[{checks, [], [file, compile, syntax]},
{examples, [], [pt, man, ex, ex2, not_yet, unicode]},
- {tickets, [], [otp_10302, otp_11286]},
+ {tickets, [], [otp_10302, otp_11286, otp_13916]},
{bugs, [], [line_wrap]}].
init_per_suite(Config) ->
@@ -408,12 +408,12 @@ unicode(Config) when is_list(Config) ->
Ts = [{unicode_1,
<<"%% -*- coding: utf-8 -*-\n"
"Definitions.\n"
- "RTLarrow = (←)\n"
+ "RTLarrow = (←)\n"
"Rules.\n"
- "{RTLarrow} : {token,{'<-',TokenLine}}.\n"
+ "{RTLarrow} : {token,{\"←\",TokenLine}}.\n"
"Erlang code.\n"
"-export([t/0]).\n"
- "t() -> {ok, [{'<-', 1}], 1} = string(\"←\"), ok.">>,
+ "t() -> {ok, [{\"←\", 1}], 1} = string(\"←\"), ok.">>,
default,
ok}],
@@ -1052,7 +1052,7 @@ otp_11286(Config) when is_list(Config) ->
Dir = ?privdir,
UName = [1024] ++ "u",
UDir = filename:join(Dir, UName),
- ok = rpc:call(Node, file, make_dir, [UDir]),
+ _ = rpc:call(Node, file, make_dir, [UDir]),
%% Note: Cannot use UName as filename since the filename is used
%% as module name. To be fixed in R18.
@@ -1095,6 +1095,42 @@ otp_11286(Config) when is_list(Config) ->
true = test_server:stop_node(Node),
ok.
+otp_13916(doc) ->
+ "OTP-13916. Leex rules with newlines result in bad line numbers";
+otp_13916(suite) -> [];
+otp_13916(Config) when is_list(Config) ->
+ Ts = [{otp_13916_1,
+ <<"Definitions.\n"
+ "W = [a-zA-Z0-9]\n"
+ "S = [\\s\\t]\n"
+ "B = [\\n\\r]\n"
+ "Rules.\n"
+ "%% mark line break(s) and empty lines by token 'break'\n"
+ "%% in order to use as delimiters\n"
+ "{B}({S}*{B})+ : {token, {break, TokenLine}}.\n"
+ "{B} : {token, {break, TokenLine}}.\n"
+ "{S}+ : {token, {blank, TokenLine, TokenChars}}.\n"
+ "{W}+ : {token, {word, TokenLine, TokenChars}}.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ " {ok,[{break,1},{blank,4,\" \"},{word,4,\"breaks\"}],4} =\n"
+ " string(\"\\n\\n \\n breaks\"),\n"
+ " {ok,[{break,1},{word,4,\"works\"}],4} =\n"
+ " string(\"\\n\\n \\nworks\"),\n"
+ " {ok,[{break,1},{word,4,\"L4\"},{break,4},\n"
+ " {word,5,\"L5\"},{break,5},{word,7,\"L7\"}], 7} =\n"
+ " string(\"\\n\\n \\nL4\\nL5\\n\\nL7\"),\n"
+ " {ok,[{break,1},{blank,4,\" \"},{word,4,\"L4\"},\n"
+ " {break,4},{blank,5,\" \"},{word,5,\"L5\"},\n"
+ " {break,5},{blank,7,\" \"},{word,7,\"L7\"}], 7} =\n"
+ " string(\"\\n\\n \\n L4\\n L5\\n\\n L7\"),\n"
+ " ok.\n">>,
+ default,
+ ok}],
+ ?line run(Config, Ts),
+ ok.
+
start_node(Name, Args) ->
[_,Host] = string:tokens(atom_to_list(node()), "@"),
ct:log("Trying to start ~w@~s~n", [Name,Host]),
@@ -1137,7 +1173,7 @@ run_test(Config, Def, Pre) ->
XrlFile = filename:join(DataDir, DefFile),
ErlFile = filename:join(DataDir, Filename),
Opts = [return, warn_unused_vars,{outdir,DataDir}],
- ok = file:write_file(XrlFile, Def, [{encoding, unicode}]),
+ ok = file:write_file(XrlFile, Def),
LOpts = [return, {report, false} |
case Pre of
default ->
diff --git a/lib/parsetools/test/yecc_SUITE.erl b/lib/parsetools/test/yecc_SUITE.erl
index e91ddb11d1..5bd71d5d19 100644
--- a/lib/parsetools/test/yecc_SUITE.erl
+++ b/lib/parsetools/test/yecc_SUITE.erl
@@ -2009,7 +2009,7 @@ otp_11286(Config) when is_list(Config) ->
Dir = ?privdir,
UName = [1024] ++ "u",
UDir = filename:join(Dir, UName),
- ok = rpc:call(Node, file, make_dir, [UDir]),
+ _ = rpc:call(Node, file, make_dir, [UDir]),
%% Note: Cannot use UName as filename since the filename is used
%% as module name. To be fixed in R18.