From 298ff42f5f80a603306a007540d1c75a013bcf81 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Mon, 2 Sep 2013 15:07:52 +0200 Subject: Fix a Unicode filename bug affecting Leex and Yecc A bug where Unicode filenames combined with Latin-1 encoding could crash Yecc and Leex has been fixed. --- lib/parsetools/doc/src/leex.xml | 16 +++++---- lib/parsetools/src/leex.erl | 10 ++++-- lib/parsetools/src/yecc.erl | 12 ++++--- lib/parsetools/test/leex_SUITE.erl | 66 ++++++++++++++++++++++++++++++++++++-- lib/parsetools/test/yecc_SUITE.erl | 63 ++++++++++++++++++++++++++++++++++-- 5 files changed, 149 insertions(+), 18 deletions(-) (limited to 'lib/parsetools') diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml index d5c24c303d..b4e2af6857 100644 --- a/lib/parsetools/doc/src/leex.xml +++ b/lib/parsetools/doc/src/leex.xml @@ -4,7 +4,7 @@
- 20092011 + 20092013 Ericsson AB. All Rights Reserved. @@ -38,19 +38,21 @@ Token = tuple() - file(FileName) -> ok | error - file(FileName, Options) -> ok | error + file(FileName, [, Options]) -> LeexRet Generate a lexical analyzer FileName = filename() Options = Option | [Option] Option = - see below - - FileReturn = {ok, Scannerfile} - | {ok, Scannerfile, Warnings} - | error - | {error, Warnings, Errors} + LeexRet = {ok, Scannerfile} + | {ok, Scannerfile, Warnings} + | error + | {error, Warnings, Errors} Scannerfile = filename() Warnings = Errors = [{filename(), [ErrorInfo]}] + ErrorInfo = {ErrorLine, module(), Reason} + ErrorLine = integer() + Reason = - formatable by format_error/1 -

Generates a lexical analyzer from the definition in the input diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index e531b78a5b..7039aea1ae 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -1645,10 +1645,14 @@ output_encoding_comment(File, #leex{encoding = Encoding}) -> output_file_directive(File, Filename, Line) -> io:fwrite(File, <<"-file(~ts, ~w).\n">>, - [format_filename(Filename), Line]). + [format_filename(Filename, File), Line]). -format_filename(Filename) -> - io_lib:write_string(filename:flatten(Filename)). +format_filename(Filename0, File) -> + Filename = filename:flatten(Filename0), + case lists:keyfind(encoding, 1, io:getopts(File)) of + {encoding, unicode} -> io_lib:write_string(Filename); + _ -> io_lib:write_string_as_latin1(Filename) + end. quote($^) -> "\\^"; quote($.) -> "\\."; diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl index f9207d926e..b698beb558 100644 --- a/lib/parsetools/src/yecc.erl +++ b/lib/parsetools/src/yecc.erl @@ -482,7 +482,7 @@ generate(St0) -> F = case member(time, St1#yecc.options) of true -> io:fwrite(<<"Generating parser from grammar in ~ts\n">>, - [format_filename(St1#yecc.infile)]), + [format_filename(St1#yecc.infile, St1)]), fun timeit/3; false -> fun(_Name, Fn, St) -> Fn(St) end @@ -2519,7 +2519,7 @@ output_encoding_comment(#yecc{encoding = Encoding}=St) -> output_file_directive(St, Filename, Line) when St#yecc.file_attrs -> fwrite(St, <<"-file(~ts, ~w).\n">>, - [format_filename(Filename), Line]); + [format_filename(Filename, St), Line]); output_file_directive(St, _Filename, _Line) -> St. @@ -2547,8 +2547,12 @@ nl(#yecc{outport = Outport, line = Line}=St) -> io:nl(Outport), St#yecc{line = Line + 1}. -format_filename(Filename) -> - io_lib:write_string(filename:flatten(Filename)). +format_filename(Filename0, St) -> + Filename = filename:flatten(Filename0), + case lists:keyfind(encoding, 1, io:getopts(St#yecc.outport)) of + {encoding, unicode} -> io_lib:write_string(Filename); + _ -> io_lib:write_string_as_latin1(Filename) + end. format_assoc(left) -> "Left"; diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl index afedd79a4e..7cbc72accb 100644 --- a/lib/parsetools/test/leex_SUITE.erl +++ b/lib/parsetools/test/leex_SUITE.erl @@ -45,7 +45,7 @@ pt/1, man/1, ex/1, ex2/1, not_yet/1, - otp_10302/1]). + otp_10302/1, otp_11286/1]). % Default timetrap timeout (set in init_per_testcase). -define(default_timeout, ?t:minutes(1)). @@ -67,7 +67,7 @@ all() -> groups() -> [{checks, [], [file, compile, syntax]}, {examples, [], [pt, man, ex, ex2, not_yet]}, - {tickets, [], [otp_10302]}]. + {tickets, [], [otp_10302, otp_11286]}]. init_per_suite(Config) -> Config. @@ -983,6 +983,68 @@ otp_10302(Config) when is_list(Config) -> ok. +otp_11286(doc) -> + "OTP-11286. A Unicode filename bug; both Leex and Yecc."; +otp_11286(suite) -> []; +otp_11286(Config) when is_list(Config) -> + Node = start_node(otp_11286, "+fnu"), + Dir = ?privdir, + UName = [1024] ++ "u", + UDir = filename:join(Dir, UName), + ok = rpc:call(Node, file, make_dir, [UDir]), + + %% Note: Cannot use UName as filename since the filename is used + %% as module name. To be fixed in R18. + Filename = filename:join(UDir, 'OTP-11286.xrl'), + Scannerfile = filename:join(UDir, 'OTP-11286.erl'), + Options = [return, {scannerfile, Scannerfile}], + + Mini1 = <<"%% coding: utf-8\n" + "Definitions.\n" + "D = [0-9]\n" + "Rules.\n" + "{L}+ : {token,{word,TokenLine,TokenChars}}.\n" + "Erlang code.\n">>, + ok = rpc:call(Node, file, write_file, [Filename, Mini1]), + {ok, _, []} = rpc:call(Node, leex, file, [Filename, Options]), + {ok,_,_} = rpc:call(Node, compile, file, + [Scannerfile,[basic_validation,return]]), + + Mini2 = <<"Definitions.\n" + "D = [0-9]\n" + "Rules.\n" + "{L}+ : {token,{word,TokenLine,TokenChars}}.\n" + "Erlang code.\n">>, + ok = rpc:call(Node, file, write_file, [Filename, Mini2]), + {ok, _, []} = rpc:call(Node, leex, file, [Filename, Options]), + {ok,_,_} = rpc:call(Node, compile, file, + [Scannerfile,[basic_validation,return]]), + + Mini3 = <<"%% coding: latin-1\n" + "Definitions.\n" + "D = [0-9]\n" + "Rules.\n" + "{L}+ : {token,{word,TokenLine,TokenChars}}.\n" + "Erlang code.\n">>, + ok = rpc:call(Node, file, write_file, [Filename, Mini3]), + {ok, _, []} = rpc:call(Node, leex, file, [Filename, Options]), + {ok,_,_} = rpc:call(Node, compile, file, + [Scannerfile,[basic_validation,return]]), + + true = test_server:stop_node(Node), + ok. + +start_node(Name, Args) -> + [_,Host] = string:tokens(atom_to_list(node()), "@"), + ct:log("Trying to start ~w@~s~n", [Name,Host]), + case test_server:start_node(Name, peer, [{args,Args}]) of + {error,Reason} -> + test_server:fail(Reason); + {ok,Node} -> + ct:log("Node ~p started~n", [Node]), + Node + end. + unwritable(Fname) -> {ok, Info} = file:read_file_info(Fname), Mode = Info#file_info.mode - 8#00200, diff --git a/lib/parsetools/test/yecc_SUITE.erl b/lib/parsetools/test/yecc_SUITE.erl index 9c865a1ec6..c7ac9fd232 100644 --- a/lib/parsetools/test/yecc_SUITE.erl +++ b/lib/parsetools/test/yecc_SUITE.erl @@ -49,7 +49,8 @@ otp_5369/1, otp_6362/1, otp_7945/1, otp_8483/1, otp_8486/1, - otp_7292/1, otp_7969/1, otp_8919/1, otp_10302/1, otp_11269/1]). + otp_7292/1, otp_7969/1, otp_8919/1, otp_10302/1, otp_11269/1, + otp_11286/1]). % Default timetrap timeout (set in init_per_testcase). -define(default_timeout, ?t:minutes(1)). @@ -77,7 +78,7 @@ groups() -> {bugs, [], [otp_5369, otp_6362, otp_7945, otp_8483, otp_8486]}, {improvements, [], [otp_7292, otp_7969, otp_8919, otp_10302, - otp_11269]}]. + otp_11269, otp_11286]}]. init_per_suite(Config) -> Config. @@ -1996,6 +1997,64 @@ otp_11269(Config) when is_list(Config) -> {ok,'OTP-11269',_Warnings} = compile:file(ErlFile, Opts), ok. +otp_11286(doc) -> + "OTP-11286. A Unicode filename bug; both Leex and Yecc."; +otp_11286(suite) -> []; +otp_11286(Config) when is_list(Config) -> + Node = start_node(otp_11286, "+fnu"), + Dir = ?privdir, + UName = [1024] ++ "u", + UDir = filename:join(Dir, UName), + ok = rpc:call(Node, file, make_dir, [UDir]), + + %% Note: Cannot use UName as filename since the filename is used + %% as module name. To be fixed in R18. + Filename = filename:join(UDir, 'OTP-11286.yrl'), + Ret = [return, {report, false}, time], + + Mini1 = <<"%% coding: utf-8 + Terminals t. + Nonterminals nt. + Rootsymbol nt. + nt -> t.">>, + ok = rpc:call(Node, file, write_file, [Filename, Mini1]), + {ok,ErlFile,[]} = rpc:call(Node, yecc, file, [Filename, Ret]), + Opts = [return, warn_unused_vars,{outdir,Dir}], + {ok,_,_Warnings} = rpc:call(Node, compile, file, [ErlFile, Opts]), + + Mini2 = <<"Terminals t. + Nonterminals nt. + Rootsymbol nt. + nt -> t.">>, + ok = rpc:call(Node, file, write_file, [Filename, Mini2]), + {ok,ErlFile,[]} = rpc:call(Node, yecc, file, [Filename, Ret]), + Opts = [return, warn_unused_vars,{outdir,Dir}], + {ok,_,_Warnings} = rpc:call(Node, compile, file, [ErlFile, Opts]), + + Mini3 = <<"%% coding: latin-1 + Terminals t. + Nonterminals nt. + Rootsymbol nt. + nt -> t.">>, + ok = rpc:call(Node, file, write_file, [Filename, Mini3]), + {ok,ErlFile,[]} = rpc:call(Node, yecc, file, [Filename, Ret]), + Opts = [return, warn_unused_vars,{outdir,Dir}], + {ok,_,_Warnings} = rpc:call(Node, compile, file, [ErlFile, Opts]), + + true = test_server:stop_node(Node), + ok. + +start_node(Name, Args) -> + [_,Host] = string:tokens(atom_to_list(node()), "@"), + ct:log("Trying to start ~w@~s~n", [Name,Host]), + case test_server:start_node(Name, peer, [{args,Args}]) of + {error,Reason} -> + test_server:fail(Reason); + {ok,Node} -> + ct:log("Node ~p started~n", [Node]), + Node + end. + yeccpre_size() -> yeccpre_size(default_yeccpre()). -- cgit v1.2.3