From 5cab35f18ec4d05fb2dd50cbb14ad93c7831d7b9 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Fri, 18 Jan 2013 09:29:34 +0100 Subject: [stdlib] Change default of erl_scan's unicode option The value of the undocumented unicode option is very limited. The option will most likely be removed completely soon. --- lib/stdlib/src/erl_scan.erl | 15 +++-- lib/stdlib/test/erl_scan_SUITE.erl | 119 ++++++++++++++++++++----------------- 2 files changed, 74 insertions(+), 60 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index f4accde8e7..c9c3563d05 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -106,7 +106,7 @@ ws = false :: boolean(), comment = false :: boolean(), text = false :: boolean(), - unicode = false :: boolean()}). + unicode = true :: boolean()}). %%---------------------------------------------------------------------------- @@ -349,14 +349,14 @@ string_thing(_) -> "string". %% erl_scan:string("[98,2730,99]."). This is to protect the caller %% from character codes greater than 255. Search for UNI to find code %% implementing this "feature". The 'unicode' option is undocumented -%% and will probably be removed later. +%% and will be removed later. -define(NO_UNICODE, 0). -define(UNI255(C), (C =< 16#ff)). options(Opts0) when is_list(Opts0) -> Opts = lists:foldr(fun expand_opt/2, [], Opts0), - [RW_fun] = - case opts(Opts, [reserved_word_fun], []) of + [RW_fun, Unicode] = + case opts(Opts, [reserved_word_fun, unicode], []) of badarg -> erlang:error(badarg, [Opts0]); R -> @@ -365,7 +365,6 @@ options(Opts0) when is_list(Opts0) -> Comment = proplists:get_bool(return_comments, Opts), WS = proplists:get_bool(return_white_spaces, Opts), Txt = proplists:get_bool(text, Opts), - Unicode = proplists:get_bool(unicode, Opts), #erl_scan{resword_fun = RW_fun, comment = Comment, ws = WS, @@ -378,6 +377,8 @@ opts(Options, [Key|Keys], L) -> V = case lists:keyfind(Key, 1, Options) of {reserved_word_fun,F} when ?RESWORDFUN(F) -> {ok,F}; + {unicode, Bool} when is_boolean(Bool) -> + {ok,Bool}; {Key,_} -> badarg; false -> @@ -393,7 +394,9 @@ opts(_Options, [], L) -> lists:reverse(L). default_option(reserved_word_fun) -> - fun reserved_word/1. + fun reserved_word/1; +default_option(unicode) -> + true. expand_opt(return, Os) -> [return_comments,return_white_spaces|Os]; diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index 9a6b2f8f34..3f77d40a2e 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -2,7 +2,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1998-2012. All Rights Reserved. +%% Copyright Ericsson AB 1998-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -809,45 +809,52 @@ white_spaces() -> unicode() -> ?line {ok,[{char,1,83},{integer,1,45}],1} = - erl_scan:string("$\\12345"), % not unicode + erl_scan:string("$\\12345", 1, [{unicode,false}]), % not unicode ?line {error,{1,erl_scan,{illegal,character}},1} = - erl_scan:string([1089]), + erl_scan:string([1089], 1, [{unicode,false}]), ?line {error,{{1,1},erl_scan,{illegal,character}},{1,2}} = - erl_scan:string([1089], {1,1}), + erl_scan:string([1089], {1,1}, [{unicode,false}]), ?line {error,{1,erl_scan,{illegal,character}},1} = %% ?line {error,{1,erl_scan,{illegal,atom}},1} = - erl_scan:string("'a"++[1089]++"b'"), + erl_scan:string("'a"++[1089]++"b'", 1, [{unicode,false}]), ?line {error,{{1,3},erl_scan,{illegal,character}},{1,4}} = - erl_scan:string("'a"++[1089]++"b'", {1,1}), + erl_scan:string("'a"++[1089]++"b'", {1,1}, [{unicode,false}]), ?line test("\"a"++[1089]++"b\""), - ?line {ok,[{char,1,1}],1} = erl_scan:string([$$,$\\,$^,1089]), + ?line {ok,[{char,1,1}],1} = + erl_scan:string([$$,$\\,$^,1089], 1, [{unicode,false}]), - ?line {error,{1,erl_scan,Error},1} = erl_scan:string("\"qa\x{aaa}"), + ?line {error,{1,erl_scan,Error},1} = + erl_scan:string("\"qa\x{aaa}", 1, [{unicode,false}]), ?line "unterminated string starting with \"qa"++[2730]++"\"" = erl_scan:format_error(Error), ?line {error,{{1,1},erl_scan,_},{1,11}} = - erl_scan:string("\"qa\\x{aaa}",{1,1}), + erl_scan:string("\"qa\\x{aaa}",{1,1}, [{unicode,false}]), ?line {error,{{1,4},erl_scan,{illegal,character}},{1,11}} = - erl_scan:string("'qa\\x{aaa}'",{1,1}), + erl_scan:string("'qa\\x{aaa}'",{1,1}, [{unicode,false}]), Tags = [category, column, length, line, symbol, text], %% Workaround. No character codes greater than 255! To be changed. %% Note: don't remove these tests, just modify them! - ?line {ok,[{integer,1,1089}],1} = erl_scan:string([$$,1089]), - ?line {ok,[{integer,1,1089}],1} = erl_scan:string([$$,$\\,1089]), + ?line {ok,[{integer,1,1089}],1} = + erl_scan:string([$$,1089], 1, [{unicode,false}]), + ?line {ok,[{integer,1,1089}],1} = + erl_scan:string([$$,$\\,1089], 1, [{unicode,false}]), Qs = "$\\x{aaa}", - ?line {ok,[{integer,1,16#aaa}],1} = erl_scan:string(Qs), - ?line {ok,[Q2],{1,9}} = erl_scan:string("$\\x{aaa}", {1,1}, text), + ?line {ok,[{integer,1,16#aaa}],1} = + erl_scan:string(Qs, 1, [{unicode,false}]), + ?line {ok,[Q2],{1,9}} = + erl_scan:string("$\\x{aaa}", {1,1}, [text,{unicode,false}]), ?line [{category,integer},{column,1},{length,8}, {line,1},{symbol,16#aaa},{text,Qs}] = erl_scan:token_info(Q2), U1 = "\"\\x{aaa}\"", - ?line {ok,[T1,T2,T3],{1,10}} = erl_scan:string(U1, {1,1}, text), + ?line {ok,[T1,T2,T3],{1,10}} = + erl_scan:string(U1, {1,1}, [text,{unicode,false}]), ?line [{category,'['},{column,1},{length,1},{line,1}, {symbol,'['},{text,"\""}] = erl_scan:token_info(T1, Tags), ?line [{category,integer},{column,2},{length,7}, @@ -856,21 +863,23 @@ unicode() -> ?line [{category,']'},{column,9},{length,1},{line,1}, {symbol,']'},{text,"\""}] = erl_scan:token_info(T3, Tags), ?line {ok,[{'[',1},{integer,1,16#aaa},{']',1}],1} = - erl_scan:string(U1, 1), + erl_scan:string(U1, 1, [{unicode,false}]), U2 = "\"\\x41\\x{fff}\\x42\"", ?line {ok,[{'[',1},{char,1,16#41},{',',1},{integer,1,16#fff}, - {',',1},{char,1,16#42},{']',1}],1} = erl_scan:string(U2, 1), + {',',1},{char,1,16#42},{']',1}],1} = + erl_scan:string(U2, 1, [{unicode,false}]), U3 = "\"a\n\\x{fff}\n\"", ?line {ok,[{'[',1},{char,1,$a},{',',1},{char,1,$\n}, {',',2},{integer,2,16#fff},{',',2},{char,2,$\n}, {']',3}],3} = - erl_scan:string(U3, 1), + erl_scan:string(U3, 1, [{unicode,false}]), U4 = "\"\\^\n\\x{aaa}\\^\n\"", ?line {ok,[{'[',1},{char,1,$\n},{',',2},{integer,2,16#aaa}, - {',',2},{char,2,$\n},{']',3}],3} = erl_scan:string(U4, 1), + {',',2},{char,2,$\n},{']',3}],3} = + erl_scan:string(U4, 1, [{unicode,false}]), %% Keep these tests: ?line test(Qs), @@ -882,17 +891,19 @@ unicode() -> Str1 = "\"ab" ++ [1089] ++ "cd\"", ?line {ok,[{'[',1},{char,1,$a},{',',1},{char,1,$b},{',',1}, {integer,1,1089},{',',1},{char,1,$c},{',',1}, - {char,1,$d},{']',1}],1} = erl_scan:string(Str1), + {char,1,$d},{']',1}],1} = + erl_scan:string(Str1, 1, [{unicode,false}]), ?line {ok,[{'[',_},{char,_,$a},{',',_},{char,_,$b},{',',_}, {integer,_,1089},{',',_},{char,_,$c},{',',_}, - {char,_,$d},{']',_}],{1,8}} = erl_scan:string(Str1, {1,1}), + {char,_,$d},{']',_}],{1,8}} = + erl_scan:string(Str1, {1,1}, [{unicode,false}]), ?line test(Str1), Comment = "%% "++[1089], %% Returned a comment In R15B03: {error,{1,erl_scan,{illegal,character}},1} = - erl_scan:string(Comment, 1, return), + erl_scan:string(Comment, 1, [return,{unicode,false}]), {error,{{1,1},erl_scan,{illegal,character}},{1,5}} = - erl_scan:string(Comment, {1,1}, return), + erl_scan:string(Comment, {1,1}, [return,{unicode,false}]), ok. more_chars() -> @@ -967,16 +978,16 @@ otp_10302(suite) -> otp_10302(Config) when is_list(Config) -> %% From unicode(): {error,{1,erl_scan,{illegal,atom}},1} = - erl_scan:string("'a"++[1089]++"b'", 1, unicode), + erl_scan:string("'a"++[1089]++"b'", 1), {error,{{1,1},erl_scan,{illegal,atom}},{1,12}} = - erl_scan:string("'qa\\x{aaa}'",{1,1},unicode), + erl_scan:string("'qa\\x{aaa}'",{1,1}), - {ok,[{char,1,1089}],1} = erl_scan:string([$$,1089], 1, unicode), - {ok,[{char,1,1089}],1} = erl_scan:string([$$,$\\,1089],1,unicode), + {ok,[{char,1,1089}],1} = erl_scan:string([$$,1089], 1), + {ok,[{char,1,1089}],1} = erl_scan:string([$$,$\\,1089],1), Qs = "$\\x{aaa}", - {ok,[{char,1,2730}],1} = erl_scan:string(Qs,1,unicode), - {ok,[Q2],{1,9}} = erl_scan:string(Qs,{1,1},[unicode,text]), + {ok,[{char,1,2730}],1} = erl_scan:string(Qs,1), + {ok,[Q2],{1,9}} = erl_scan:string(Qs,{1,1},[text]), [{category,char},{column,1},{length,8}, {line,1},{symbol,16#aaa},{text,Qs}] = erl_scan:token_info(Q2), @@ -984,24 +995,24 @@ otp_10302(Config) when is_list(Config) -> Tags = [category, column, length, line, symbol, text], U1 = "\"\\x{aaa}\"", - {ok,[T1],{1,10}} = erl_scan:string(U1, {1,1}, [unicode,text]), + {ok,[T1],{1,10}} = erl_scan:string(U1, {1,1}, [text]), [{category,string},{column,1},{length,9},{line,1}, {symbol,[16#aaa]},{text,U1}] = erl_scan:token_info(T1, Tags), U2 = "\"\\x41\\x{fff}\\x42\"", - {ok,[{string,1,[65,4095,66]}],1} = erl_scan:string(U2, 1, unicode), + {ok,[{string,1,[65,4095,66]}],1} = erl_scan:string(U2, 1), U3 = "\"a\n\\x{fff}\n\"", - {ok,[{string,1,[97,10,4095,10]}],3} = erl_scan:string(U3, 1,unicode), + {ok,[{string,1,[97,10,4095,10]}],3} = erl_scan:string(U3, 1), U4 = "\"\\^\n\\x{aaa}\\^\n\"", - {ok,[{string,1,[10,2730,10]}],3} = erl_scan:string(U4, 1,[unicode]), + {ok,[{string,1,[10,2730,10]}],3} = erl_scan:string(U4, 1,[]), Str1 = "\"ab" ++ [1089] ++ "cd\"", {ok,[{string,1,[97,98,1089,99,100]}],1} = - erl_scan:string(Str1,1,unicode), + erl_scan:string(Str1,1), {ok,[{string,{1,1},[97,98,1089,99,100]}],{1,8}} = - erl_scan:string(Str1, {1,1},unicode), + erl_scan:string(Str1, {1,1}), OK1 = 16#D800-1, OK2 = 16#DFFF+1, @@ -1016,55 +1027,55 @@ otp_10302(Config) when is_list(Config) -> IllegalL = [Illegal1,Illegal2,Illegal3,Illegal4], [{ok,[{comment,1,[$%,$%,$\s,OK]}],1} = - erl_scan:string("%% "++[OK], 1, [unicode,return]) || + erl_scan:string("%% "++[OK], 1, [return]) || OK <- OKL], {ok,[{comment,_,[$%,$%,$\s,OK1]}],{1,5}} = - erl_scan:string("%% "++[OK1], {1,1}, [unicode,return]), + erl_scan:string("%% "++[OK1], {1,1}, [return]), [{error,{1,erl_scan,{illegal,character}},1} = - erl_scan:string("%% "++[Illegal], 1, [unicode,return]) || + erl_scan:string("%% "++[Illegal], 1, [return]) || Illegal <- IllegalL], {error,{{1,1},erl_scan,{illegal,character}},{1,5}} = - erl_scan:string("%% "++[Illegal1], {1,1}, [unicode,return]), + erl_scan:string("%% "++[Illegal1], {1,1}, [return]), - [{ok,[],1} = erl_scan:string("%% "++[OK], 1, [unicode]) || + [{ok,[],1} = erl_scan:string("%% "++[OK], 1, []) || OK <- OKL], - {ok,[],{1,5}} = erl_scan:string("%% "++[OK1], {1,1}, [unicode]), + {ok,[],{1,5}} = erl_scan:string("%% "++[OK1], {1,1}, []), [{error,{1,erl_scan,{illegal,character}},1} = - erl_scan:string("%% "++[Illegal], 1, [unicode]) || + erl_scan:string("%% "++[Illegal], 1, []) || Illegal <- IllegalL], {error,{{1,1},erl_scan,{illegal,character}},{1,5}} = - erl_scan:string("%% "++[Illegal1], {1,1}, [unicode]), + erl_scan:string("%% "++[Illegal1], {1,1}, []), [{ok,[{string,{1,1},[OK]}],{1,4}} = - erl_scan:string("\""++[OK]++"\"",{1,1},unicode) || + erl_scan:string("\""++[OK]++"\"",{1,1}) || OK <- OKL], [{error,{{1,2},erl_scan,{illegal,character}},{1,3}} = - erl_scan:string("\""++[OK]++"\"",{1,1},unicode) || + erl_scan:string("\""++[OK]++"\"",{1,1}) || OK <- IllegalL], [{error,{{1,1},erl_scan,{illegal,character}},{1,2}} = - erl_scan:string([Illegal],{1,1},unicode) || + erl_scan:string([Illegal],{1,1}) || Illegal <- IllegalL], {ok,[{char,{1,1},OK1}],{1,3}} = - erl_scan:string([$$,OK1],{1,1},unicode), + erl_scan:string([$$,OK1],{1,1}), {error,{{1,1},erl_scan,{illegal,character}},{1,2}} = - erl_scan:string([$$,Illegal1],{1,1},unicode), + erl_scan:string([$$,Illegal1],{1,1}), {ok,[{char,{1,1},OK1}],{1,4}} = - erl_scan:string([$$,$\\,OK1],{1,1},unicode), + erl_scan:string([$$,$\\,OK1],{1,1}), {error,{{1,1},erl_scan,{illegal,character}},{1,4}} = - erl_scan:string([$$,$\\,Illegal1],{1,1},unicode), + erl_scan:string([$$,$\\,Illegal1],{1,1}), {ok,[{string,{1,1},[55295]}],{1,5}} = - erl_scan:string("\"\\"++[OK1]++"\"",{1,1},unicode), + erl_scan:string("\"\\"++[OK1]++"\"",{1,1}), {error,{{1,2},erl_scan,{illegal,character}},{1,4}} = - erl_scan:string("\"\\"++[Illegal1]++"\"",{1,1},unicode), + erl_scan:string("\"\\"++[Illegal1]++"\"",{1,1}), {ok,[{char,{1,1},OK1}],{1,10}} = - erl_scan:string("$\\x{D7FF}",{1,1},unicode), + erl_scan:string("$\\x{D7FF}",{1,1}), {error,{{1,1},erl_scan,{illegal,character}},{1,10}} = - erl_scan:string("$\\x{D800}",{1,1},unicode), + erl_scan:string("$\\x{D800}",{1,1}), %% Not erl_scan, but erl_parse. {integer,0,1} = erl_parse:abstract(1), -- cgit v1.2.3