From 59c87f2df787beb5334c108da4c41e88245ad191 Mon Sep 17 00:00:00 2001 From: Pierre Fenoll Date: Mon, 9 Sep 2013 18:12:50 +0100 Subject: Fix leex module`s inability to build unicode-aware lexers. If you have declared your .xrl file as utf-8 encoded and that some of your definitions contain unicode characters, either leex wouldn`t be able to lex them or compilation of the .xrl file would crash. --- lib/parsetools/src/leex.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/parsetools') diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index 7039aea1ae..4544b34a1e 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -436,7 +436,7 @@ parse_defs(_, {eof,L}, St) -> parse_defs(Ifile, {ok,Chars,L}=Line, Ms, St) -> %% This little beauty matches out a macro definition, RE's are so clear. MS = "^[ \t]*([A-Z_][A-Za-z0-9_]*)[ \t]*=[ \t]*([^ \t\r\n]*)[ \t\r\n]*\$", - case re:run(Chars, MS, [{capture,all_but_first,list}]) of + case re:run(Chars, MS, [{capture,all_but_first,list},unicode]) of {match,[Name,Def]} -> %%io:fwrite("~p = ~p\n", [Name,Def]), parse_defs(Ifile, nextline(Ifile, L, St), [{Name,Def}|Ms], St); -- cgit v1.2.3 From ee55ee98e89fc2cf6bb6d7e4b596507c3b2d9ffb Mon Sep 17 00:00:00 2001 From: Pierre Fenoll Date: Mon, 9 Sep 2013 20:35:41 +0100 Subject: Missed a few similar calls --- lib/parsetools/src/leex.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/parsetools') diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index 4544b34a1e..03f864ff03 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -491,7 +491,7 @@ parse_rules_end(_, NextLine, REAs, As, St) -> collect_rule(Ifile, Chars, L0) -> %% Erlang strings are 1 based, but re 0 :-( - {match,[{St0,Len}|_]} = re:run(Chars, "[^ \t\r\n]+"), + {match,[{St0,Len}|_]} = re:run(Chars, "[^ \t\r\n]+", [unicode]), St = St0 + 1, %%io:fwrite("RE = ~p~n", [substr(Chars, St, Len)]), case collect_action(Ifile, substr(Chars, St+Len), L0, []) of @@ -548,7 +548,7 @@ var_used(Name, Toks) -> %% here as it uses info in replace string (&). parse_rule_regexp(RE0, [{M,Exp}|Ms], St) -> - Split= re:split(RE0, "\\{" ++ M ++ "\\}", [{return,list}]), + Split= re:split(RE0, "\\{" ++ M ++ "\\}", [{return,list},unicode]), RE1 = string:join(Split, Exp), parse_rule_regexp(RE1, Ms, St); parse_rule_regexp(RE, [], St) -> -- cgit v1.2.3 From b7e511364e3e7d05febb081dc170c413d45be666 Mon Sep 17 00:00:00 2001 From: Fredrik Gustafsson Date: Tue, 10 Sep 2013 09:37:09 +0200 Subject: parsetools: added testcase for unicode --- lib/parsetools/test/leex_SUITE.erl | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'lib/parsetools') diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl index 7cbc72accb..ff49c853f6 100644 --- a/lib/parsetools/test/leex_SUITE.erl +++ b/lib/parsetools/test/leex_SUITE.erl @@ -45,7 +45,7 @@ pt/1, man/1, ex/1, ex2/1, not_yet/1, - otp_10302/1, otp_11286/1]). + otp_10302/1, otp_11286/1, unicode/1]). % Default timetrap timeout (set in init_per_testcase). -define(default_timeout, ?t:minutes(1)). @@ -66,7 +66,7 @@ all() -> groups() -> [{checks, [], [file, compile, syntax]}, - {examples, [], [pt, man, ex, ex2, not_yet]}, + {examples, [], [pt, man, ex, ex2, not_yet, unicode]}, {tickets, [], [otp_10302, otp_11286]}]. init_per_suite(Config) -> @@ -401,6 +401,24 @@ pt(Config) when is_list(Config) -> ?line run(Config, Ts), ok. +unicode(suite) -> + []; +unicode(Config) when is_list(Config) -> + Ts = [{unicode_1, + <<"%% -*- coding: utf-8 -*-\n" + "Definitions.\n" + "RTLarrow = (←)\n" + "Rules.\n" + "{RTLarrow} : {token,{'<-',TokenLine}}.\n" + "Erlang code.\n" + "-export([t/0]).\n" + "t() -> {ok, [{'<-', 1}], 1} = string(\"←\"), ok.">>, + default, + ok}], + + ?line run(Config, Ts), + ok. + man(doc) -> "Examples from the manpage."; man(suite) -> []; @@ -1076,7 +1094,7 @@ run_test(Config, Def, Pre) -> XrlFile = filename:join(DataDir, DefFile), ErlFile = filename:join(DataDir, Filename), Opts = [return, warn_unused_vars,{outdir,DataDir}], - ok = file:write_file(XrlFile, Def), + ok = file:write_file(XrlFile, Def, [{encoding, unicode}]), LOpts = [return, {report, false} | case Pre of default -> -- cgit v1.2.3