aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFredrik Gustafsson <[email protected]>2013-09-11 09:34:16 +0200
committerFredrik Gustafsson <[email protected]>2013-09-11 09:34:16 +0200
commitde29dde83bc3aa6328e3db6876ddf929a5141e75 (patch)
tree266be25f83cdfd93e08264b014a371b72ed92614
parent8ee301f7d4a6e7b3254737e9e18250e266441ac6 (diff)
parentb7e511364e3e7d05febb081dc170c413d45be666 (diff)
downloadotp-de29dde83bc3aa6328e3db6876ddf929a5141e75.tar.gz
otp-de29dde83bc3aa6328e3db6876ddf929a5141e75.tar.bz2
otp-de29dde83bc3aa6328e3db6876ddf929a5141e75.zip
Merge branch 'fenollp/parsetools/fix_unicode_leex/OTP-11313' into maint
* fenollp/parsetools/fix_unicode_leex/OTP-11313: parsetools: added testcase for unicode Missed a few similar calls Fix leex module`s inability to build unicode-aware lexers.
-rw-r--r--lib/parsetools/src/leex.erl6
-rw-r--r--lib/parsetools/test/leex_SUITE.erl24
2 files changed, 24 insertions, 6 deletions
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
index 7039aea1ae..03f864ff03 100644
--- a/lib/parsetools/src/leex.erl
+++ b/lib/parsetools/src/leex.erl
@@ -436,7 +436,7 @@ parse_defs(_, {eof,L}, St) ->
parse_defs(Ifile, {ok,Chars,L}=Line, Ms, St) ->
%% This little beauty matches out a macro definition, RE's are so clear.
MS = "^[ \t]*([A-Z_][A-Za-z0-9_]*)[ \t]*=[ \t]*([^ \t\r\n]*)[ \t\r\n]*\$",
- case re:run(Chars, MS, [{capture,all_but_first,list}]) of
+ case re:run(Chars, MS, [{capture,all_but_first,list},unicode]) of
{match,[Name,Def]} ->
%%io:fwrite("~p = ~p\n", [Name,Def]),
parse_defs(Ifile, nextline(Ifile, L, St), [{Name,Def}|Ms], St);
@@ -491,7 +491,7 @@ parse_rules_end(_, NextLine, REAs, As, St) ->
collect_rule(Ifile, Chars, L0) ->
%% Erlang strings are 1 based, but re 0 :-(
- {match,[{St0,Len}|_]} = re:run(Chars, "[^ \t\r\n]+"),
+ {match,[{St0,Len}|_]} = re:run(Chars, "[^ \t\r\n]+", [unicode]),
St = St0 + 1,
%%io:fwrite("RE = ~p~n", [substr(Chars, St, Len)]),
case collect_action(Ifile, substr(Chars, St+Len), L0, []) of
@@ -548,7 +548,7 @@ var_used(Name, Toks) ->
%% here as it uses info in replace string (&).
parse_rule_regexp(RE0, [{M,Exp}|Ms], St) ->
- Split= re:split(RE0, "\\{" ++ M ++ "\\}", [{return,list}]),
+ Split= re:split(RE0, "\\{" ++ M ++ "\\}", [{return,list},unicode]),
RE1 = string:join(Split, Exp),
parse_rule_regexp(RE1, Ms, St);
parse_rule_regexp(RE, [], St) ->
diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl
index 7cbc72accb..ff49c853f6 100644
--- a/lib/parsetools/test/leex_SUITE.erl
+++ b/lib/parsetools/test/leex_SUITE.erl
@@ -45,7 +45,7 @@
pt/1, man/1, ex/1, ex2/1, not_yet/1,
- otp_10302/1, otp_11286/1]).
+ otp_10302/1, otp_11286/1, unicode/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(1)).
@@ -66,7 +66,7 @@ all() ->
groups() ->
[{checks, [], [file, compile, syntax]},
- {examples, [], [pt, man, ex, ex2, not_yet]},
+ {examples, [], [pt, man, ex, ex2, not_yet, unicode]},
{tickets, [], [otp_10302, otp_11286]}].
init_per_suite(Config) ->
@@ -401,6 +401,24 @@ pt(Config) when is_list(Config) ->
?line run(Config, Ts),
ok.
+unicode(suite) ->
+ [];
+unicode(Config) when is_list(Config) ->
+ Ts = [{unicode_1,
+ <<"%% -*- coding: utf-8 -*-\n"
+ "Definitions.\n"
+ "RTLarrow = (←)\n"
+ "Rules.\n"
+ "{RTLarrow} : {token,{'<-',TokenLine}}.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() -> {ok, [{'<-', 1}], 1} = string(\"←\"), ok.">>,
+ default,
+ ok}],
+
+ ?line run(Config, Ts),
+ ok.
+
man(doc) ->
"Examples from the manpage.";
man(suite) -> [];
@@ -1076,7 +1094,7 @@ run_test(Config, Def, Pre) ->
XrlFile = filename:join(DataDir, DefFile),
ErlFile = filename:join(DataDir, Filename),
Opts = [return, warn_unused_vars,{outdir,DataDir}],
- ok = file:write_file(XrlFile, Def),
+ ok = file:write_file(XrlFile, Def, [{encoding, unicode}]),
LOpts = [return, {report, false} |
case Pre of
default ->