From 300c5466a7c9cfe3ed22bba2a88ba21058406402 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Thu, 4 Oct 2012 15:58:26 +0200 Subject: [stdlib, kernel] Introduce Unicode support for Erlang source files Expect modifications, additions and corrections. There is a kludge in file_io_server and erl_scan:continuation_location() that's not so pleasing. --- lib/stdlib/test/epp_SUITE.erl | 82 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 6 deletions(-) (limited to 'lib/stdlib/test/epp_SUITE.erl') diff --git a/lib/stdlib/test/epp_SUITE.erl b/lib/stdlib/test/epp_SUITE.erl index 77c615d6d9..606bbbcbb2 100644 --- a/lib/stdlib/test/epp_SUITE.erl +++ b/lib/stdlib/test/epp_SUITE.erl @@ -25,7 +25,7 @@ variable_1/1, otp_4870/1, otp_4871/1, otp_5362/1, pmod/1, not_circular/1, skip_header/1, otp_6277/1, otp_7702/1, otp_8130/1, overload_mac/1, otp_8388/1, otp_8470/1, otp_8503/1, - otp_8562/1, otp_8665/1, otp_8911/1]). + otp_8562/1, otp_8665/1, otp_8911/1, otp_10302/1]). -export([epp_parse_erl_form/2]). @@ -67,7 +67,7 @@ all() -> {group, variable}, otp_4870, otp_4871, otp_5362, pmod, not_circular, skip_header, otp_6277, otp_7702, otp_8130, overload_mac, otp_8388, otp_8470, otp_8503, otp_8562, - otp_8665, otp_8911]. + otp_8665, otp_8911, otp_10302]. groups() -> [{upcase_mac, [], [upcase_mac_1, upcase_mac_2]}, @@ -582,12 +582,13 @@ otp_8130(suite) -> otp_8130(Config) when is_list(Config) -> true = os:putenv("epp_inc1", "stdlib"), Ts = [{otp_8130_1, - %% The scanner handles UNICODE in a special way. Hopefully - %% temporarily. <<"-define(M(A), ??A). " "t() -> " - " \"{ 34 , [ $1 , 2730 ] , \\\"34\\\" , X . a , 2730 }\" = " - " ?M({34,\"1\\x{aaa}\",\"34\",X.a,$\\x{aaa}}), ok. ">>, + " L = \"{ 34 , \\\"1\\\\x{AAA}\\\" , \\\"34\\\" , X . a , $\\\\x{AAA} }\", " + " R = ?M({34,\"1\\x{aaa}\",\"34\",X.a,$\\x{aaa}})," + " Lt = erl_scan:string(L, 1, [unicode])," + " Rt = erl_scan:string(R, 1, [unicode])," + " Lt = Rt, ok. ">>, ok}, {otp_8130_2, @@ -1284,6 +1285,75 @@ otp_8665(Config) when is_list(Config) -> ?line [] = compile(Config, Cs), ok. +otp_10302(doc) -> + "OTP-10302. Unicode characters scanner/parser."; +otp_10302(suite) -> + []; +otp_10302(Config) when is_list(Config) -> + %% Two messages (one too many). Keeps otp_4871 happy. + Cs = [{otp_8562, + <<"%% coding: utf-8\n \n \x{E4}">>, + {errors,[{3,epp,cannot_parse}, + {3,file_io_server,invalid_unicode}],[]}} + ], + [] = compile(Config, Cs), + Dir = ?config(priv_dir, Config), + File = filename:join(Dir, "otp_10302.erl"), + utf8 = encoding("coding: utf-8", File), + utf8 = encoding("coding: UTF-8", File), + latin1 = encoding("coding: Latin-1", File), + latin1 = encoding("coding: latin-1", File), + none = encoding_com("coding: utf-8", File), + none = encoding_com("\n\n%% coding: utf-8", File), + none = encoding_nocom("\n\n coding: utf-8", File), + utf8 = encoding_com("\n%% coding: utf-8", File), + utf8 = encoding_nocom("\n coding: utf-8", File), + none = encoding("coding: \nutf-8", File), + latin1 = encoding("Encoding : latin-1", File), + utf8 = encoding("ccoding: UTF-8", File), + utf8 = encoding("coding= utf-8", File), + utf8 = encoding_com(" %% coding= utf-8", File), + utf8 = encoding("coding = utf-8", File), + none = encoding("coding: utf-16 coding: utf-8", File), %first is bad + none = encoding("Coding: utf-8", File), %capital c + utf8 = encoding("-*- coding: utf-8 -*-", File), + utf8 = encoding("-*-coding= utf-8-*-", File), + utf8 = encoding("codingcoding= utf-8", File), + ok = prefix("coding: utf-8", File, utf8), + + "coding: latin-1" = epp:encoding_to_string(latin1), + "coding: utf-8" = epp:encoding_to_string(utf8), + true = lists:member(epp:default_encoding(), [latin1, utf8]), + + ok. + +prefix(S, File, Enc) -> + prefix(0, S, File, Enc). + +prefix(100, _S, _File, _) -> + ok; +prefix(N, S, File, Enc) -> + Enc = encoding(lists:duplicate(N, $\s) ++ S, File), + prefix(N+1, S, File, Enc). + +encoding(Enc, File) -> + E = encoding_com("%% " ++ Enc, File), + none = encoding_com(Enc, File), + E = encoding_nocom(Enc, File). + +encoding_com(Enc, File) -> + ok = file:write_file(File, Enc), + {ok, Fd} = file:open(File, [read]), + E = epp:set_encoding(Fd), + ok = file:close(Fd), + E = epp:read_encoding(File). + +encoding_nocom(Enc, File) -> + ok = file:write_file(File, Enc), + {ok, Fd} = file:open(File, [read]), + ok = file:close(Fd), + epp:read_encoding(File, [{in_comment_only, false}]). + check(Config, Tests) -> eval_tests(Config, fun check_test/2, Tests). -- cgit v1.2.3