From 25271fa55aacf0b367ad74532c952352344ed97d Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Thu, 2 Feb 2017 10:16:07 +0100 Subject: parsetools: Fix Leex regarding Unicode atoms Notice that macro names are unquoted atoms. --- lib/parsetools/doc/src/leex.xml | 5 ++- lib/parsetools/src/leex.erl | 92 +++++++++++++++++++++++--------------- lib/parsetools/test/leex_SUITE.erl | 45 +++++++++++++++++-- 3 files changed, 100 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml index 29d546105f..1227625287 100644 --- a/lib/parsetools/doc/src/leex.xml +++ b/lib/parsetools/doc/src/leex.xml @@ -4,7 +4,7 @@
- 20092016 + 20092017 Ericsson AB. All Rights Reserved. @@ -446,7 +446,8 @@ D = [0-9] -

The following examples define Erlang data types:

+

The following examples define simplified versions of a few + Erlang data types:

Atoms [a-z][0-9a-zA-Z_]* diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index e0f37ae9df..e2e7d7359f 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -1548,22 +1548,23 @@ out_action_code(File, XrlFile, {_A,Code,_Vars,Name,Args,ArgsChars}) -> L = erl_scan:line(hd(Code)), output_file_directive(File, XrlFile, L-2), io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]), - io:fwrite(File, " ~s\n", [pp_tokens(Code, L)]). + io:fwrite(File, " ~ts\n", [pp_tokens(Code, L, File)]). -%% pp_tokens(Tokens, Line) -> [char()]. +%% pp_tokens(Tokens, Line, File) -> [char()]. %% Prints the tokens keeping the line breaks of the original code. -pp_tokens(Tokens, Line0) -> pp_tokens(Tokens, Line0, none). +pp_tokens(Tokens, Line0, File) -> pp_tokens(Tokens, Line0, File, none). -pp_tokens([], _Line0, _) -> []; -pp_tokens([T | Ts], Line0, Prev) -> +pp_tokens([], _Line0, _, _) -> []; +pp_tokens([T | Ts], Line0, File, Prev) -> Line = erl_scan:line(T), - [pp_sep(Line, Line0, Prev, T), pp_symbol(T) | pp_tokens(Ts, Line, T)]. + [pp_sep(Line, Line0, Prev, T), + pp_symbol(T, File) | pp_tokens(Ts, Line, File, T)]. -pp_symbol({var,_,Var}) -> atom_to_list(Var); -pp_symbol({_,_,Symbol}) -> io_lib:fwrite("~p", [Symbol]); -pp_symbol({dot, _}) -> "."; -pp_symbol({Symbol, _}) -> atom_to_list(Symbol). +pp_symbol({var,_,Var}, _) -> atom_to_list(Var); +pp_symbol({_,_,Symbol}, File) -> format_symbol(Symbol, File); +pp_symbol({dot, _}, _) -> "."; +pp_symbol({Symbol, _}, _) -> atom_to_list(Symbol). pp_sep(Line, Line0, Prev, T) when Line > Line0 -> ["\n " | pp_sep(Line - 1, Line0, Prev, T)]; @@ -1622,17 +1623,17 @@ out_dfa_edges(File, DFA) -> end, orddict:new(), Pt), foreach(fun (T) -> Crs = orddict:fetch(T, Tdict), - Edgelab = dfa_edgelabel(Crs), + Edgelab = dfa_edgelabel(Crs, File), io:fwrite(File, " ~b -> ~b [label=\"~ts\"];~n", [S,T,Edgelab]) end, sort(orddict:fetch_keys(Tdict))) end, DFA). -dfa_edgelabel([C]) when is_integer(C) -> quote(C); -dfa_edgelabel(Cranges) -> +dfa_edgelabel([C], File) when is_integer(C) -> quote(C, File); +dfa_edgelabel(Cranges, File) -> %% io:fwrite("el: ~p\n", [Cranges]), - "[" ++ map(fun ({A,B}) -> [quote(A), "-", quote(B)]; - (C) -> [quote(C)] + "[" ++ map(fun ({A,B}) -> [quote(A, File), "-", quote(B, File)]; + (C) -> [quote(C, File)] end, Cranges) ++ "]". set_encoding(#leex{encoding = none}, File) -> @@ -1651,33 +1652,50 @@ output_file_directive(File, Filename, Line) -> format_filename(Filename0, File) -> Filename = filename:flatten(Filename0), + case enc(File) of + unicode -> io_lib:write_string(Filename); + latin1 -> io_lib:write_string_as_latin1(Filename) + end. + +format_symbol(Symbol, File) -> + Format = case enc(File) of + latin1 -> "~p"; + unicode -> "~tp" + end, + io_lib:fwrite(Format, [Symbol]). + +enc(File) -> case lists:keyfind(encoding, 1, io:getopts(File)) of - {encoding, unicode} -> io_lib:write_string(Filename); - _ -> io_lib:write_string_as_latin1(Filename) + false -> latin1; % should never happen + {encoding, Enc} -> Enc end. -quote($^) -> "\\^"; -quote($.) -> "\\."; -quote($$) -> "\\$"; -quote($-) -> "\\-"; -quote($[) -> "\\["; -quote($]) -> "\\]"; -quote($\s) -> "\\\\s"; -quote($\") -> "\\\""; -quote($\b) -> "\\\\b"; -quote($\f) -> "\\\\f"; -quote($\n) -> "\\\\n"; -quote($\r) -> "\\\\r"; -quote($\t) -> "\\\\t"; -quote($\e) -> "\\\\e"; -quote($\v) -> "\\\\v"; -quote($\d) -> "\\\\d"; -quote($\\) -> "\\\\"; -quote(C) when is_integer(C) -> +quote($^, _File) -> "\\^"; +quote($., _File) -> "\\."; +quote($$, _File) -> "\\$"; +quote($-, _File) -> "\\-"; +quote($[, _File) -> "\\["; +quote($], _File) -> "\\]"; +quote($\s, _File) -> "\\\\s"; +quote($\", _File) -> "\\\""; +quote($\b, _File) -> "\\\\b"; +quote($\f, _File) -> "\\\\f"; +quote($\n, _File) -> "\\\\n"; +quote($\r, _File) -> "\\\\r"; +quote($\t, _File) -> "\\\\t"; +quote($\e, _File) -> "\\\\e"; +quote($\v, _File) -> "\\\\v"; +quote($\d, _File) -> "\\\\d"; +quote($\\, _File) -> "\\\\"; +quote(C, File) when is_integer(C) -> %% Must remove the $ and get the \'s right. - case io_lib:write_char(C) of + S = case enc(File) of + unicode -> io_lib:write_char(C); + latin1 -> io_lib:write_char_as_latin1(C) + end, + case S of [$$,$\\|Cs] -> "\\\\" ++ Cs; [$$|Cs] -> Cs end; -quote(maxchar) -> +quote(maxchar, _File) -> "MAXCHAR". diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl index 54602848ec..3f5d9fee3e 100644 --- a/lib/parsetools/test/leex_SUITE.erl +++ b/lib/parsetools/test/leex_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2016. All Rights Reserved. +%% Copyright Ericsson AB 2010-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ pt/1, man/1, ex/1, ex2/1, not_yet/1, line_wrap/1, - otp_10302/1, otp_11286/1, unicode/1, otp_13916/1]). + otp_10302/1, otp_11286/1, unicode/1, otp_13916/1, otp_14285/1]). % Default timetrap timeout (set in init_per_testcase). -define(default_timeout, ?t:minutes(1)). @@ -67,7 +67,7 @@ all() -> groups() -> [{checks, [], [file, compile, syntax]}, {examples, [], [pt, man, ex, ex2, not_yet, unicode]}, - {tickets, [], [otp_10302, otp_11286, otp_13916]}, + {tickets, [], [otp_10302, otp_11286, otp_13916, otp_14285]}, {bugs, [], [line_wrap]}]. init_per_suite(Config) -> @@ -1131,6 +1131,45 @@ otp_13916(Config) when is_list(Config) -> ?line run(Config, Ts), ok. +otp_14285(Config) -> + Dir = ?privdir, + Filename = filename:join(Dir, "file.xrl"), + + Ts = [{otp_14285_1, + <<"%% encoding: latin-1\n" + "Definitions.\n" + "A = a\n" + "Z = z\n" + "L = [{A}-{Z}]\n" + "U = [\\x{400}]\n" + "Rules.\n" + "{L}+ : {token,l}.\n" + "{U}+ : {token,'\\x{400}'}.\n" + "Erlang code.\n" + "-export([t/0]).\n" + "t() ->\n" + " {ok,['\\x{400}'],1} = string(\"\\x{400}\"), ok.\n">>, + default, + ok}, + {otp_14285_2, + <<"%% encoding: UTF-8\n" + "Definitions.\n" + "A = a\n" + "Z = z\n" + "L = [{A}-{Z}]\n" + "U = [\x{400}]\n" + "Rules.\n" + "{L}+ : {token,l}.\n" + "{U}+ : {token,'\x{400}'}.\n" + "Erlang code.\n" + "-export([t/0]).\n" + "t() ->\n" + " {ok,['\x{400}'],1} = string(\"\x{400}\"), ok.\n">>, + default, + ok}], + run(Config, Ts), + ok. + start_node(Name, Args) -> [_,Host] = string:tokens(atom_to_list(node()), "@"), ct:log("Trying to start ~w@~s~n", [Name,Host]), -- cgit v1.2.3