aboutsummaryrefslogtreecommitdiffstats
path: root/lib/parsetools
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2017-02-02 10:16:07 +0100
committerHans Bolinder <[email protected]>2017-04-07 08:57:26 +0200
commit25271fa55aacf0b367ad74532c952352344ed97d (patch)
tree434285a14cc79b0801f064e7d150f3bb57d239f7 /lib/parsetools
parent46e08e6ac477d1acccb360ad5d616c96dcdfe850 (diff)
downloadotp-25271fa55aacf0b367ad74532c952352344ed97d.tar.gz
otp-25271fa55aacf0b367ad74532c952352344ed97d.tar.bz2
otp-25271fa55aacf0b367ad74532c952352344ed97d.zip
parsetools: Fix Leex regarding Unicode atoms
Notice that macro names are unquoted atoms.
Diffstat (limited to 'lib/parsetools')
-rw-r--r--lib/parsetools/doc/src/leex.xml5
-rw-r--r--lib/parsetools/src/leex.erl92
-rw-r--r--lib/parsetools/test/leex_SUITE.erl45
3 files changed, 100 insertions, 42 deletions
diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml
index 29d546105f..1227625287 100644
--- a/lib/parsetools/doc/src/leex.xml
+++ b/lib/parsetools/doc/src/leex.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>2009</year><year>2016</year>
+ <year>2009</year><year>2017</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -446,7 +446,8 @@ D = [0-9]
</item>
</taglist>
- <p>The following examples define Erlang data types:</p>
+ <p>The following examples define simplified versions of a few
+ Erlang data types:</p>
<code>
Atoms [a-z][0-9a-zA-Z_]*
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
index e0f37ae9df..e2e7d7359f 100644
--- a/lib/parsetools/src/leex.erl
+++ b/lib/parsetools/src/leex.erl
@@ -1548,22 +1548,23 @@ out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) ->
L = erl_scan:line(hd(Code)),
output_file_directive(File, XrlFile, L-2),
io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]),
- io:fwrite(File, " ~s\n", [pp_tokens(Code, L)]).
+ io:fwrite(File, " ~ts\n", [pp_tokens(Code, L, File)]).
-%% pp_tokens(Tokens, Line) -> [char()].
+%% pp_tokens(Tokens, Line, File) -> [char()].
%% Prints the tokens keeping the line breaks of the original code.
-pp_tokens(Tokens, Line0) -> pp_tokens(Tokens, Line0, none).
+pp_tokens(Tokens, Line0, File) -> pp_tokens(Tokens, Line0, File, none).
-pp_tokens([], _Line0, _) -> [];
-pp_tokens([T | Ts], Line0, Prev) ->
+pp_tokens([], _Line0, _, _) -> [];
+pp_tokens([T | Ts], Line0, File, Prev) ->
Line = erl_scan:line(T),
- [pp_sep(Line, Line0, Prev, T), pp_symbol(T) | pp_tokens(Ts, Line, T)].
+ [pp_sep(Line, Line0, Prev, T),
+ pp_symbol(T, File) | pp_tokens(Ts, Line, File, T)].
-pp_symbol({var,_,Var}) -> atom_to_list(Var);
-pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]);
-pp_symbol({dot, _}) -> ".";
-pp_symbol({Symbol, _}) -> atom_to_list(Symbol).
+pp_symbol({var,_,Var}, _) -> atom_to_list(Var);
+pp_symbol({_,_,Symbol}, File) -> format_symbol(Symbol, File);
+pp_symbol({dot, _}, _) -> ".";
+pp_symbol({Symbol, _}, _) -> atom_to_list(Symbol).
pp_sep(Line, Line0, Prev, T) when Line > Line0 ->
["\n " | pp_sep(Line - 1, Line0, Prev, T)];
@@ -1622,17 +1623,17 @@ out_dfa_edges(File, DFA) ->
end, orddict:new(), Pt),
foreach(fun (T) ->
Crs = orddict:fetch(T, Tdict),
- Edgelab = dfa_edgelabel(Crs),
+ Edgelab = dfa_edgelabel(Crs, File),
io:fwrite(File, " ~b -> ~b [label=\"~ts\"];~n",
[S,T,Edgelab])
end, sort(orddict:fetch_keys(Tdict)))
end, DFA).
-dfa_edgelabel([C]) when is_integer(C) -> quote(C);
-dfa_edgelabel(Cranges) ->
+dfa_edgelabel([C], File) when is_integer(C) -> quote(C, File);
+dfa_edgelabel(Cranges, File) ->
%% io:fwrite("el: ~p\n", [Cranges]),
- "[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)];
- (C) -> [quote(C)]
+ "[" ++ map(fun ({A,B}) -> [quote(A, File), "-", quote(B, File)];
+ (C) -> [quote(C, File)]
end, Cranges) ++ "]".
set_encoding(#leex{encoding = none}, File) ->
@@ -1651,33 +1652,50 @@ output_file_directive(File, Filename, Line) ->
format_filename(Filename0, File) ->
Filename = filename:flatten(Filename0),
+ case enc(File) of
+ unicode -> io_lib:write_string(Filename);
+ latin1 -> io_lib:write_string_as_latin1(Filename)
+ end.
+
+format_symbol(Symbol, File) ->
+ Format = case enc(File) of
+ latin1 -> "~p";
+ unicode -> "~tp"
+ end,
+ io_lib:fwrite(Format, [Symbol]).
+
+enc(File) ->
case lists:keyfind(encoding, 1, io:getopts(File)) of
- {encoding, unicode} -> io_lib:write_string(Filename);
- _ -> io_lib:write_string_as_latin1(Filename)
+ false -> latin1; % should never happen
+ {encoding, Enc} -> Enc
end.
-quote($^) -> "\\^";
-quote($.) -> "\\.";
-quote($$) -> "\\$";
-quote($-) -> "\\-";
-quote($[) -> "\\[";
-quote($]) -> "\\]";
-quote($\s) -> "\\\\s";
-quote($\") -> "\\\"";
-quote($\b) -> "\\\\b";
-quote($\f) -> "\\\\f";
-quote($\n) -> "\\\\n";
-quote($\r) -> "\\\\r";
-quote($\t) -> "\\\\t";
-quote($\e) -> "\\\\e";
-quote($\v) -> "\\\\v";
-quote($\d) -> "\\\\d";
-quote($\\) -> "\\\\";
-quote(C) when is_integer(C) ->
+quote($^, _File) -> "\\^";
+quote($., _File) -> "\\.";
+quote($$, _File) -> "\\$";
+quote($-, _File) -> "\\-";
+quote($[, _File) -> "\\[";
+quote($], _File) -> "\\]";
+quote($\s, _File) -> "\\\\s";
+quote($\", _File) -> "\\\"";
+quote($\b, _File) -> "\\\\b";
+quote($\f, _File) -> "\\\\f";
+quote($\n, _File) -> "\\\\n";
+quote($\r, _File) -> "\\\\r";
+quote($\t, _File) -> "\\\\t";
+quote($\e, _File) -> "\\\\e";
+quote($\v, _File) -> "\\\\v";
+quote($\d, _File) -> "\\\\d";
+quote($\\, _File) -> "\\\\";
+quote(C, File) when is_integer(C) ->
%% Must remove the $ and get the \'s right.
- case io_lib:write_char(C) of
+ S = case enc(File) of
+ unicode -> io_lib:write_char(C);
+ latin1 -> io_lib:write_char_as_latin1(C)
+ end,
+ case S of
[$$,$\\|Cs] -> "\\\\" ++ Cs;
[$$|Cs] -> Cs
end;
-quote(maxchar) ->
+quote(maxchar, _File) ->
"MAXCHAR".
diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl
index 54602848ec..3f5d9fee3e 100644
--- a/lib/parsetools/test/leex_SUITE.erl
+++ b/lib/parsetools/test/leex_SUITE.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2010-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2010-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -45,7 +45,7 @@
pt/1, man/1, ex/1, ex2/1, not_yet/1,
line_wrap/1,
- otp_10302/1, otp_11286/1, unicode/1, otp_13916/1]).
+ otp_10302/1, otp_11286/1, unicode/1, otp_13916/1, otp_14285/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(1)).
@@ -67,7 +67,7 @@ all() ->
groups() ->
[{checks, [], [file, compile, syntax]},
{examples, [], [pt, man, ex, ex2, not_yet, unicode]},
- {tickets, [], [otp_10302, otp_11286, otp_13916]},
+ {tickets, [], [otp_10302, otp_11286, otp_13916, otp_14285]},
{bugs, [], [line_wrap]}].
init_per_suite(Config) ->
@@ -1131,6 +1131,45 @@ otp_13916(Config) when is_list(Config) ->
?line run(Config, Ts),
ok.
+otp_14285(Config) ->
+ Dir = ?privdir,
+ Filename = filename:join(Dir, "file.xrl"),
+
+ Ts = [{otp_14285_1,
+ <<"%% encoding: latin-1\n"
+ "Definitions.\n"
+ "A = a\n"
+ "Z = z\n"
+ "L = [{A}-{Z}]\n"
+ "U = [\\x{400}]\n"
+ "Rules.\n"
+ "{L}+ : {token,l}.\n"
+ "{U}+ : {token,'\\x{400}'}.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ " {ok,['\\x{400}'],1} = string(\"\\x{400}\"), ok.\n">>,
+ default,
+ ok},
+ {otp_14285_2,
+ <<"%% encoding: UTF-8\n"
+ "Definitions.\n"
+ "A = a\n"
+ "Z = z\n"
+ "L = [{A}-{Z}]\n"
+ "U = [\x{400}]\n"
+ "Rules.\n"
+ "{L}+ : {token,l}.\n"
+ "{U}+ : {token,'\x{400}'}.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ " {ok,['\x{400}'],1} = string(\"\x{400}\"), ok.\n">>,
+ default,
+ ok}],
+ run(Config, Ts),
+ ok.
+
start_node(Name, Args) ->
[_,Host] = string:tokens(atom_to_list(node()), "@"),
ct:log("Trying to start ~w@~s~n", [Name,Host]),