aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/stdlib/src/erl_scan.erl15
-rw-r--r--lib/stdlib/test/erl_scan_SUITE.erl119
2 files changed, 74 insertions, 60 deletions
diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index f4accde8e7..c9c3563d05 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -106,7 +106,7 @@
ws = false :: boolean(),
comment = false :: boolean(),
text = false :: boolean(),
- unicode = false :: boolean()}).
+ unicode = true :: boolean()}).
%%----------------------------------------------------------------------------
@@ -349,14 +349,14 @@ string_thing(_) -> "string".
%% erl_scan:string("[98,2730,99]."). This is to protect the caller
%% from character codes greater than 255. Search for UNI to find code
%% implementing this "feature". The 'unicode' option is undocumented
-%% and will probably be removed later.
+%% and will be removed later.
-define(NO_UNICODE, 0).
-define(UNI255(C), (C =< 16#ff)).
options(Opts0) when is_list(Opts0) ->
Opts = lists:foldr(fun expand_opt/2, [], Opts0),
- [RW_fun] =
- case opts(Opts, [reserved_word_fun], []) of
+ [RW_fun, Unicode] =
+ case opts(Opts, [reserved_word_fun, unicode], []) of
badarg ->
erlang:error(badarg, [Opts0]);
R ->
@@ -365,7 +365,6 @@ options(Opts0) when is_list(Opts0) ->
Comment = proplists:get_bool(return_comments, Opts),
WS = proplists:get_bool(return_white_spaces, Opts),
Txt = proplists:get_bool(text, Opts),
- Unicode = proplists:get_bool(unicode, Opts),
#erl_scan{resword_fun = RW_fun,
comment = Comment,
ws = WS,
@@ -378,6 +377,8 @@ opts(Options, [Key|Keys], L) ->
V = case lists:keyfind(Key, 1, Options) of
{reserved_word_fun,F} when ?RESWORDFUN(F) ->
{ok,F};
+ {unicode, Bool} when is_boolean(Bool) ->
+ {ok,Bool};
{Key,_} ->
badarg;
false ->
@@ -393,7 +394,9 @@ opts(_Options, [], L) ->
lists:reverse(L).
default_option(reserved_word_fun) ->
- fun reserved_word/1.
+ fun reserved_word/1;
+default_option(unicode) ->
+ true.
expand_opt(return, Os) ->
[return_comments,return_white_spaces|Os];
diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl
index 9a6b2f8f34..3f77d40a2e 100644
--- a/lib/stdlib/test/erl_scan_SUITE.erl
+++ b/lib/stdlib/test/erl_scan_SUITE.erl
@@ -2,7 +2,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1998-2012. All Rights Reserved.
+%% Copyright Ericsson AB 1998-2013. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -809,45 +809,52 @@ white_spaces() ->
unicode() ->
?line {ok,[{char,1,83},{integer,1,45}],1} =
- erl_scan:string("$\\12345"), % not unicode
+ erl_scan:string("$\\12345", 1, [{unicode,false}]), % not unicode
?line {error,{1,erl_scan,{illegal,character}},1} =
- erl_scan:string([1089]),
+ erl_scan:string([1089], 1, [{unicode,false}]),
?line {error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
- erl_scan:string([1089], {1,1}),
+ erl_scan:string([1089], {1,1}, [{unicode,false}]),
?line {error,{1,erl_scan,{illegal,character}},1} =
%% ?line {error,{1,erl_scan,{illegal,atom}},1} =
- erl_scan:string("'a"++[1089]++"b'"),
+ erl_scan:string("'a"++[1089]++"b'", 1, [{unicode,false}]),
?line {error,{{1,3},erl_scan,{illegal,character}},{1,4}} =
- erl_scan:string("'a"++[1089]++"b'", {1,1}),
+ erl_scan:string("'a"++[1089]++"b'", {1,1}, [{unicode,false}]),
?line test("\"a"++[1089]++"b\""),
- ?line {ok,[{char,1,1}],1} = erl_scan:string([$$,$\\,$^,1089]),
+ ?line {ok,[{char,1,1}],1} =
+ erl_scan:string([$$,$\\,$^,1089], 1, [{unicode,false}]),
- ?line {error,{1,erl_scan,Error},1} = erl_scan:string("\"qa\x{aaa}"),
+ ?line {error,{1,erl_scan,Error},1} =
+ erl_scan:string("\"qa\x{aaa}", 1, [{unicode,false}]),
?line "unterminated string starting with \"qa"++[2730]++"\"" =
erl_scan:format_error(Error),
?line {error,{{1,1},erl_scan,_},{1,11}} =
- erl_scan:string("\"qa\\x{aaa}",{1,1}),
+ erl_scan:string("\"qa\\x{aaa}",{1,1}, [{unicode,false}]),
?line {error,{{1,4},erl_scan,{illegal,character}},{1,11}} =
- erl_scan:string("'qa\\x{aaa}'",{1,1}),
+ erl_scan:string("'qa\\x{aaa}'",{1,1}, [{unicode,false}]),
Tags = [category, column, length, line, symbol, text],
%% Workaround. No character codes greater than 255! To be changed.
%% Note: don't remove these tests, just modify them!
- ?line {ok,[{integer,1,1089}],1} = erl_scan:string([$$,1089]),
- ?line {ok,[{integer,1,1089}],1} = erl_scan:string([$$,$\\,1089]),
+ ?line {ok,[{integer,1,1089}],1} =
+ erl_scan:string([$$,1089], 1, [{unicode,false}]),
+ ?line {ok,[{integer,1,1089}],1} =
+ erl_scan:string([$$,$\\,1089], 1, [{unicode,false}]),
Qs = "$\\x{aaa}",
- ?line {ok,[{integer,1,16#aaa}],1} = erl_scan:string(Qs),
- ?line {ok,[Q2],{1,9}} = erl_scan:string("$\\x{aaa}", {1,1}, text),
+ ?line {ok,[{integer,1,16#aaa}],1} =
+ erl_scan:string(Qs, 1, [{unicode,false}]),
+ ?line {ok,[Q2],{1,9}} =
+ erl_scan:string("$\\x{aaa}", {1,1}, [text,{unicode,false}]),
?line [{category,integer},{column,1},{length,8},
{line,1},{symbol,16#aaa},{text,Qs}] =
erl_scan:token_info(Q2),
U1 = "\"\\x{aaa}\"",
- ?line {ok,[T1,T2,T3],{1,10}} = erl_scan:string(U1, {1,1}, text),
+ ?line {ok,[T1,T2,T3],{1,10}} =
+ erl_scan:string(U1, {1,1}, [text,{unicode,false}]),
?line [{category,'['},{column,1},{length,1},{line,1},
{symbol,'['},{text,"\""}] = erl_scan:token_info(T1, Tags),
?line [{category,integer},{column,2},{length,7},
@@ -856,21 +863,23 @@ unicode() ->
?line [{category,']'},{column,9},{length,1},{line,1},
{symbol,']'},{text,"\""}] = erl_scan:token_info(T3, Tags),
?line {ok,[{'[',1},{integer,1,16#aaa},{']',1}],1} =
- erl_scan:string(U1, 1),
+ erl_scan:string(U1, 1, [{unicode,false}]),
U2 = "\"\\x41\\x{fff}\\x42\"",
?line {ok,[{'[',1},{char,1,16#41},{',',1},{integer,1,16#fff},
- {',',1},{char,1,16#42},{']',1}],1} = erl_scan:string(U2, 1),
+ {',',1},{char,1,16#42},{']',1}],1} =
+ erl_scan:string(U2, 1, [{unicode,false}]),
U3 = "\"a\n\\x{fff}\n\"",
?line {ok,[{'[',1},{char,1,$a},{',',1},{char,1,$\n},
{',',2},{integer,2,16#fff},{',',2},{char,2,$\n},
{']',3}],3} =
- erl_scan:string(U3, 1),
+ erl_scan:string(U3, 1, [{unicode,false}]),
U4 = "\"\\^\n\\x{aaa}\\^\n\"",
?line {ok,[{'[',1},{char,1,$\n},{',',2},{integer,2,16#aaa},
- {',',2},{char,2,$\n},{']',3}],3} = erl_scan:string(U4, 1),
+ {',',2},{char,2,$\n},{']',3}],3} =
+ erl_scan:string(U4, 1, [{unicode,false}]),
%% Keep these tests:
?line test(Qs),
@@ -882,17 +891,19 @@ unicode() ->
Str1 = "\"ab" ++ [1089] ++ "cd\"",
?line {ok,[{'[',1},{char,1,$a},{',',1},{char,1,$b},{',',1},
{integer,1,1089},{',',1},{char,1,$c},{',',1},
- {char,1,$d},{']',1}],1} = erl_scan:string(Str1),
+ {char,1,$d},{']',1}],1} =
+ erl_scan:string(Str1, 1, [{unicode,false}]),
?line {ok,[{'[',_},{char,_,$a},{',',_},{char,_,$b},{',',_},
{integer,_,1089},{',',_},{char,_,$c},{',',_},
- {char,_,$d},{']',_}],{1,8}} = erl_scan:string(Str1, {1,1}),
+ {char,_,$d},{']',_}],{1,8}} =
+ erl_scan:string(Str1, {1,1}, [{unicode,false}]),
?line test(Str1),
Comment = "%% "++[1089],
%% Returned a comment In R15B03:
{error,{1,erl_scan,{illegal,character}},1} =
- erl_scan:string(Comment, 1, return),
+ erl_scan:string(Comment, 1, [return,{unicode,false}]),
{error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
- erl_scan:string(Comment, {1,1}, return),
+ erl_scan:string(Comment, {1,1}, [return,{unicode,false}]),
ok.
more_chars() ->
@@ -967,16 +978,16 @@ otp_10302(suite) ->
otp_10302(Config) when is_list(Config) ->
%% From unicode():
{error,{1,erl_scan,{illegal,atom}},1} =
- erl_scan:string("'a"++[1089]++"b'", 1, unicode),
+ erl_scan:string("'a"++[1089]++"b'", 1),
{error,{{1,1},erl_scan,{illegal,atom}},{1,12}} =
- erl_scan:string("'qa\\x{aaa}'",{1,1},unicode),
+ erl_scan:string("'qa\\x{aaa}'",{1,1}),
- {ok,[{char,1,1089}],1} = erl_scan:string([$$,1089], 1, unicode),
- {ok,[{char,1,1089}],1} = erl_scan:string([$$,$\\,1089],1,unicode),
+ {ok,[{char,1,1089}],1} = erl_scan:string([$$,1089], 1),
+ {ok,[{char,1,1089}],1} = erl_scan:string([$$,$\\,1089],1),
Qs = "$\\x{aaa}",
- {ok,[{char,1,2730}],1} = erl_scan:string(Qs,1,unicode),
- {ok,[Q2],{1,9}} = erl_scan:string(Qs,{1,1},[unicode,text]),
+ {ok,[{char,1,2730}],1} = erl_scan:string(Qs,1),
+ {ok,[Q2],{1,9}} = erl_scan:string(Qs,{1,1},[text]),
[{category,char},{column,1},{length,8},
{line,1},{symbol,16#aaa},{text,Qs}] =
erl_scan:token_info(Q2),
@@ -984,24 +995,24 @@ otp_10302(Config) when is_list(Config) ->
Tags = [category, column, length, line, symbol, text],
U1 = "\"\\x{aaa}\"",
- {ok,[T1],{1,10}} = erl_scan:string(U1, {1,1}, [unicode,text]),
+ {ok,[T1],{1,10}} = erl_scan:string(U1, {1,1}, [text]),
[{category,string},{column,1},{length,9},{line,1},
{symbol,[16#aaa]},{text,U1}] = erl_scan:token_info(T1, Tags),
U2 = "\"\\x41\\x{fff}\\x42\"",
- {ok,[{string,1,[65,4095,66]}],1} = erl_scan:string(U2, 1, unicode),
+ {ok,[{string,1,[65,4095,66]}],1} = erl_scan:string(U2, 1),
U3 = "\"a\n\\x{fff}\n\"",
- {ok,[{string,1,[97,10,4095,10]}],3} = erl_scan:string(U3, 1,unicode),
+ {ok,[{string,1,[97,10,4095,10]}],3} = erl_scan:string(U3, 1),
U4 = "\"\\^\n\\x{aaa}\\^\n\"",
- {ok,[{string,1,[10,2730,10]}],3} = erl_scan:string(U4, 1,[unicode]),
+ {ok,[{string,1,[10,2730,10]}],3} = erl_scan:string(U4, 1,[]),
Str1 = "\"ab" ++ [1089] ++ "cd\"",
{ok,[{string,1,[97,98,1089,99,100]}],1} =
- erl_scan:string(Str1,1,unicode),
+ erl_scan:string(Str1,1),
{ok,[{string,{1,1},[97,98,1089,99,100]}],{1,8}} =
- erl_scan:string(Str1, {1,1},unicode),
+ erl_scan:string(Str1, {1,1}),
OK1 = 16#D800-1,
OK2 = 16#DFFF+1,
@@ -1016,55 +1027,55 @@ otp_10302(Config) when is_list(Config) ->
IllegalL = [Illegal1,Illegal2,Illegal3,Illegal4],
[{ok,[{comment,1,[$%,$%,$\s,OK]}],1} =
- erl_scan:string("%% "++[OK], 1, [unicode,return]) ||
+ erl_scan:string("%% "++[OK], 1, [return]) ||
OK <- OKL],
{ok,[{comment,_,[$%,$%,$\s,OK1]}],{1,5}} =
- erl_scan:string("%% "++[OK1], {1,1}, [unicode,return]),
+ erl_scan:string("%% "++[OK1], {1,1}, [return]),
[{error,{1,erl_scan,{illegal,character}},1} =
- erl_scan:string("%% "++[Illegal], 1, [unicode,return]) ||
+ erl_scan:string("%% "++[Illegal], 1, [return]) ||
Illegal <- IllegalL],
{error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
- erl_scan:string("%% "++[Illegal1], {1,1}, [unicode,return]),
+ erl_scan:string("%% "++[Illegal1], {1,1}, [return]),
- [{ok,[],1} = erl_scan:string("%% "++[OK], 1, [unicode]) ||
+ [{ok,[],1} = erl_scan:string("%% "++[OK], 1, []) ||
OK <- OKL],
- {ok,[],{1,5}} = erl_scan:string("%% "++[OK1], {1,1}, [unicode]),
+ {ok,[],{1,5}} = erl_scan:string("%% "++[OK1], {1,1}, []),
[{error,{1,erl_scan,{illegal,character}},1} =
- erl_scan:string("%% "++[Illegal], 1, [unicode]) ||
+ erl_scan:string("%% "++[Illegal], 1, []) ||
Illegal <- IllegalL],
{error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
- erl_scan:string("%% "++[Illegal1], {1,1}, [unicode]),
+ erl_scan:string("%% "++[Illegal1], {1,1}, []),
[{ok,[{string,{1,1},[OK]}],{1,4}} =
- erl_scan:string("\""++[OK]++"\"",{1,1},unicode) ||
+ erl_scan:string("\""++[OK]++"\"",{1,1}) ||
OK <- OKL],
[{error,{{1,2},erl_scan,{illegal,character}},{1,3}} =
- erl_scan:string("\""++[OK]++"\"",{1,1},unicode) ||
+ erl_scan:string("\""++[OK]++"\"",{1,1}) ||
OK <- IllegalL],
[{error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
- erl_scan:string([Illegal],{1,1},unicode) ||
+ erl_scan:string([Illegal],{1,1}) ||
Illegal <- IllegalL],
{ok,[{char,{1,1},OK1}],{1,3}} =
- erl_scan:string([$$,OK1],{1,1},unicode),
+ erl_scan:string([$$,OK1],{1,1}),
{error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
- erl_scan:string([$$,Illegal1],{1,1},unicode),
+ erl_scan:string([$$,Illegal1],{1,1}),
{ok,[{char,{1,1},OK1}],{1,4}} =
- erl_scan:string([$$,$\\,OK1],{1,1},unicode),
+ erl_scan:string([$$,$\\,OK1],{1,1}),
{error,{{1,1},erl_scan,{illegal,character}},{1,4}} =
- erl_scan:string([$$,$\\,Illegal1],{1,1},unicode),
+ erl_scan:string([$$,$\\,Illegal1],{1,1}),
{ok,[{string,{1,1},[55295]}],{1,5}} =
- erl_scan:string("\"\\"++[OK1]++"\"",{1,1},unicode),
+ erl_scan:string("\"\\"++[OK1]++"\"",{1,1}),
{error,{{1,2},erl_scan,{illegal,character}},{1,4}} =
- erl_scan:string("\"\\"++[Illegal1]++"\"",{1,1},unicode),
+ erl_scan:string("\"\\"++[Illegal1]++"\"",{1,1}),
{ok,[{char,{1,1},OK1}],{1,10}} =
- erl_scan:string("$\\x{D7FF}",{1,1},unicode),
+ erl_scan:string("$\\x{D7FF}",{1,1}),
{error,{{1,1},erl_scan,{illegal,character}},{1,10}} =
- erl_scan:string("$\\x{D800}",{1,1},unicode),
+ erl_scan:string("$\\x{D800}",{1,1}),
%% Not erl_scan, but erl_parse.
{integer,0,1} = erl_parse:abstract(1),