aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2013-02-11 09:47:09 +0100
committerHans Bolinder <[email protected]>2013-02-11 09:47:09 +0100
commitda88a14f95bfee5bfbcfcb04dd72ac17a912f3b5 (patch)
tree948923932ee3f5a4531f2054c44d5c0b25aff111
parentf1e98d7c70457f85be52cefda73ec4f4d98f5822 (diff)
parent05c4d31b2c5c5f6ade7d91b55c598fe7fa58e509 (diff)
downloadotp-da88a14f95bfee5bfbcfcb04dd72ac17a912f3b5.tar.gz
otp-da88a14f95bfee5bfbcfcb04dd72ac17a912f3b5.tar.bz2
otp-da88a14f95bfee5bfbcfcb04dd72ac17a912f3b5.zip
Merge branch 'hb/stdlib/erl_scan_unicode/OTP-10756'
* hb/stdlib/erl_scan_unicode/OTP-10756: [stdlib] Remove the undocumented 'unicode' option from the scanner
-rw-r--r--lib/stdlib/src/erl_scan.erl301
-rw-r--r--lib/stdlib/test/erl_scan_SUITE.erl108
2 files changed, 138 insertions, 271 deletions
diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index 26d5747ee7..3651f608bc 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -84,8 +84,7 @@
-type location() :: line() | {line(),column()}.
-type resword_fun() :: fun((atom()) -> boolean()).
-type option() :: 'return' | 'return_white_spaces' | 'return_comments'
- | 'text' | {'reserved_word_fun', resword_fun()}
- | 'unicode'.
+ | 'text' | {'reserved_word_fun', resword_fun()}.
-type options() :: option() | [option()].
-type symbol() :: atom() | float() | integer() | string().
-type info_line() :: integer() | term().
@@ -106,8 +105,7 @@
{resword_fun = fun reserved_word/1 :: resword_fun(),
ws = false :: boolean(),
comment = false :: boolean(),
- text = false :: boolean(),
- unicode = true :: boolean()}).
+ text = false :: boolean()}).
%%----------------------------------------------------------------------------
@@ -344,20 +342,12 @@ string_thing(_) -> "string".
C > 16#DFFF andalso C < 16#FFFE orelse
C > 16#FFFF andalso C =< 16#10FFFF)).
-%% When the option 'unicode' is false: return Unicode strings as lists
-%% of integers and Unicode characters as integers. For instance,
-%% erl_scan:string("\"b\x{aaa}c\".") is equivalent to
-%% erl_scan:string("[98,2730,99]."). This is to protect the caller
-%% from character codes greater than 255. Search for UNI to find code
-%% implementing this "feature". The 'unicode' option is undocumented
-%% and will be removed later.
--define(NO_UNICODE, 0).
--define(UNI255(C), (C =< 16#ff)).
+-define(UNI255(C), C >= 0, C =< 16#ff).
options(Opts0) when is_list(Opts0) ->
Opts = lists:foldr(fun expand_opt/2, [], Opts0),
- [RW_fun, Unicode] =
- case opts(Opts, [reserved_word_fun, unicode], []) of
+ [RW_fun] =
+ case opts(Opts, [reserved_word_fun], []) of
badarg ->
erlang:error(badarg, [Opts0]);
R ->
@@ -369,8 +359,7 @@ options(Opts0) when is_list(Opts0) ->
#erl_scan{resword_fun = RW_fun,
comment = Comment,
ws = WS,
- text = Txt,
- unicode = Unicode};
+ text = Txt};
options(Opt) ->
options([Opt]).
@@ -378,8 +367,6 @@ opts(Options, [Key|Keys], L) ->
V = case lists:keyfind(Key, 1, Options) of
{reserved_word_fun,F} when ?RESWORDFUN(F) ->
{ok,F};
- {unicode, Bool} when is_boolean(Bool) ->
- {ok,Bool};
{Key,_} ->
badarg;
false ->
@@ -395,9 +382,7 @@ opts(_Options, [], L) ->
lists:reverse(L).
default_option(reserved_word_fun) ->
- fun reserved_word/1;
-default_option(unicode) ->
- true.
+ fun reserved_word/1.
expand_opt(return, Os) ->
[return_comments,return_white_spaces|Os];
@@ -531,10 +516,10 @@ scan1("."=Cs, _St, Line, Col, Toks) ->
scan1([$.=C|Cs], St, Line, Col, Toks) ->
scan_dot(Cs, St, Line, Col, Toks, [C]);
scan1([$"|Cs], St, Line, Col, Toks) -> %" Emacs
- State0 = {[],[],Line,Col,?NO_UNICODE},
+ State0 = {[],[],Line,Col},
scan_string(Cs, St, Line, incr_column(Col, 1), Toks, State0);
scan1([$'|Cs], St, Line, Col, Toks) -> %' Emacs
- State0 = {[],[],Line,Col,?NO_UNICODE},
+ State0 = {[],[],Line,Col},
scan_qatom(Cs, St, Line, incr_column(Col, 1), Toks, State0);
scan1([$$|Cs], St, Line, Col, Toks) ->
scan_char(Cs, St, Line, Col, Toks);
@@ -655,7 +640,7 @@ scan1([$~|Cs], St, Line, Col, Toks) ->
scan1([$&|Cs], St, Line, Col, Toks) ->
tok2(Cs, St, Line, Col, Toks, "&", '&', 1);
%% End of optimization.
-scan1([C|Cs], St, Line, Col, Toks) when ?CHAR(C), ?UNI255(C) ->
+scan1([C|Cs], St, Line, Col, Toks) when ?UNI255(C) ->
Str = [C],
tok2(Cs, St, Line, Col, Toks, Str, list_to_atom(Str), 1);
scan1([C|Cs], _St, Line, Col, _Toks) when ?CHAR(C) ->
@@ -718,14 +703,16 @@ scan_name([], Ncs) ->
scan_name(Cs, Ncs) ->
{lists:reverse(Ncs),Cs}.
+-define(STR(St, S), if St#erl_scan.text -> S; true -> [] end).
+
scan_dot([$%|_]=Cs, St, Line, Col, Toks, Ncs) ->
Attrs = attributes(Line, Col, St, Ncs),
{ok,[{dot,Attrs}|Toks],Cs,Line,incr_column(Col, 1)};
scan_dot([$\n=C|Cs], St, Line, Col, Toks, Ncs) ->
- Attrs = attributes(Line, Col, St, Ncs++[C]),
+ Attrs = attributes(Line, Col, St, ?STR(St, Ncs++[C])),
{ok,[{dot,Attrs}|Toks],Cs,Line+1,new_column(Col, 1)};
scan_dot([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) ->
- Attrs = attributes(Line, Col, St, Ncs++[C]),
+ Attrs = attributes(Line, Col, St, ?STR(St, Ncs++[C])),
{ok,[{dot,Attrs}|Toks],Cs,Line,incr_column(Col, 2)};
scan_dot(eof=Cs, St, Line, Col, Toks, Ncs) ->
Attrs = attributes(Line, Col, St, Ncs),
@@ -858,26 +845,20 @@ scan_char([$\\|Cs]=Cs0, St, Line, Col, Toks) ->
{eof,Ncol} ->
scan_error(char, Line, Col, Line, Ncol, eof);
{nl,Val,Str,Ncs,Ncol} ->
- Attrs = attributes(Line, Col, St, "$\\"++Str), %"
+ Attrs = attributes(Line, Col, St, ?STR(St, "$\\"++Str)), %"
Ntoks = [{char,Attrs,Val}|Toks],
scan1(Ncs, St, Line+1, Ncol, Ntoks);
- {unicode,Val,Str,Ncs,Ncol} ->
- Attrs = attributes(Line, Col, St, "$\\"++Str), %"
- Tag = char_tag(Val, St), % UNI
- Ntoks = [{Tag,Attrs,Val}|Toks],
- scan1(Ncs, St, Line, Ncol, Ntoks);
{Val,Str,Ncs,Ncol} ->
- Attrs = attributes(Line, Col, St, "$\\"++Str), %"
+ Attrs = attributes(Line, Col, St, ?STR(St, "$\\"++Str)), %"
Ntoks = [{char,Attrs,Val}|Toks],
scan1(Ncs, St, Line, Ncol, Ntoks)
end;
scan_char([$\n=C|Cs], St, Line, Col, Toks) ->
- Attrs = attributes(Line, Col, St, [$$,C]),
+ Attrs = attributes(Line, Col, St, ?STR(St, [$$,C])),
scan1(Cs, St, Line+1, new_column(Col, 1), [{char,Attrs,C}|Toks]);
scan_char([C|Cs], St, Line, Col, Toks) when ?UNICODE(C) ->
- Tag = char_tag(C, St), % UNI
- Attrs = attributes(Line, Col, St, [$$,C]),
- scan1(Cs, St, Line, incr_column(Col, 2), [{Tag,Attrs,C}|Toks]);
+ Attrs = attributes(Line, Col, St, ?STR(St, [$$,C])),
+ scan1(Cs, St, Line, incr_column(Col, 2), [{char,Attrs,C}|Toks]);
scan_char([C|_Cs], _St, Line, Col, _Toks) when ?CHAR(C) ->
scan_error({illegal,character}, Line, Col, Line, incr_column(Col, 1), eof);
scan_char([], _St, Line, Col, Toks) ->
@@ -885,90 +866,32 @@ scan_char([], _St, Line, Col, Toks) ->
scan_char(eof, _St, Line, Col, _Toks) ->
scan_error(char, Line, Col, Line, incr_column(Col, 1), eof).
--compile({inline,[char_tag/2]}).
-
-char_tag(C, _St) when ?UNI255(C) ->
- char;
-char_tag(_C, #erl_scan{unicode = true}) ->
- char;
-char_tag(_C, _St) ->
- integer.
-
-scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) ->
- case scan_string0(Cs, St, Line, Col, $\", true, Str, Wcs, Uni0) of %"
- {more,Ncs,Nline,Ncol,Nstr,Nwcs,Uni} ->
- State = {Nwcs,Nstr,Line0,Col0,Uni},
+scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
+ case scan_string0(Cs, St, Line, Col, $\", Str, Wcs) of %"
+ {more,Ncs,Nline,Ncol,Nstr,Nwcs} ->
+ State = {Nwcs,Nstr,Line0,Col0},
{more,{Ncs,Ncol,Toks,Nline,State,fun scan_string/6}};
{char_error,Ncs,Error,Nline,Ncol,EndCol} ->
scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
{error,Nline,Ncol,Nwcs,Ncs} ->
Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); %"
- {Ncs,Nline,Ncol,Nstr,Nwcs,Uni} when Uni =:= ?NO_UNICODE;
- St#erl_scan.unicode ->
+ {Ncs,Nline,Ncol,Nstr,Nwcs} ->
Attrs = attributes(Line0, Col0, St, Nstr),
- scan1(Ncs, St, Nline, Ncol, [{string,Attrs,Nwcs}|Toks]);
- {Ncs,Nline,Ncol,Nstr,_Nwcs,_Uni} ->
- Ntoks = unicode_string_to_list(Line0, Col0, St, Nstr, Toks),
- scan1(Ncs, St, Nline, Ncol, Ntoks)
+ scan1(Ncs, St, Nline, Ncol, [{string,Attrs,Nwcs}|Toks])
end.
-%% UNI
-unicode_string_to_list(Line, Col, St, [$"=C|Nstr], Toks) -> %" Emacs
- Paren = {'[',attributes(Line, Col, St, [C])},
- u2l(Nstr, Line, incr_column(Col, 1), St, [Paren|Toks]).
-
-u2l([$"]=Cs, Line, Col, St, Toks) -> %" Emacs
- [{']',attributes(Line, Col, St, Cs)}|Toks];
-u2l([$\n=C|Cs], Line, Col, St, Toks) ->
- Ntoks = unicode_nl_tokens(Line, Col, [C], C, St, Toks, Cs),
- u2l(Cs, Line+1, new_column(Col, 1), St, Ntoks);
-u2l([$\\|Cs], Line, Col, St, Toks) ->
- case scan_escape(Cs, Col) of
- {nl,Val,ValStr,Ncs,Ncol} ->
- Nstr = [$\\|ValStr],
- Ntoks = unicode_nl_tokens(Line, Col, Nstr, Val, St, Toks, Ncs),
- u2l(Ncs, Line+1, Ncol, St, Ntoks);
- {unicode,Val,ValStr,Ncs,Ncol} ->
- Nstr = [$\\|ValStr],
- Ntoks = unicode_tokens(Line, Col, Nstr, Val, St, Toks, Ncs),
- u2l(Ncs, Line, incr_column(Ncol, 1), St, Ntoks);
- {Val,ValStr,Ncs,Ncol} ->
- Nstr = [$\\|ValStr],
- Ntoks = unicode_tokens(Line, Col, Nstr, Val, St, Toks, Ncs),
- u2l(Ncs, Line, incr_column(Ncol, 1), St, Ntoks)
- end;
-u2l([C|Cs], Line, Col, St, Toks) ->
- Ntoks = unicode_tokens(Line, Col, [C], C, St, Toks, Cs),
- u2l(Cs, Line, incr_column(Col, 1), St, Ntoks).
-
-unicode_nl_tokens(Line, Col, Str, Val, St, Toks, Cs) ->
- Ccol = new_column(Col, 1),
- unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Line+1, Ccol).
-
-unicode_tokens(Line, Col, Str, Val, St, Toks, Cs) ->
- Ccol = incr_column(Col, length(Str)),
- unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Line, Ccol).
-
-unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Cline, Ccol) ->
- Attrs = attributes(Line, Col, St, Str),
- Tag = if ?UNI255(Val) -> char; true -> integer end,
- Token = {Tag,Attrs,Val},
- [{',',attributes(Cline, Ccol, St, "")} || Cs =/= "\""] ++ [Token|Toks].
-
-scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) ->
- AllowUni = St#erl_scan.unicode,
- case scan_string0(Cs, St, Line, Col, $\', AllowUni, Str, Wcs, Uni0) of %'
- {more,Ncs,Nline,Ncol,Nstr,Nwcs,Uni} ->
- State = {Nwcs,Nstr,Line0,Col0,Uni},
+scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
+ case scan_string0(Cs, St, Line, Col, $\', Str, Wcs) of %'
+ {more,Ncs,Nline,Ncol,Nstr,Nwcs} ->
+ State = {Nwcs,Nstr,Line0,Col0},
{more,{Ncs,Ncol,Toks,Nline,State,fun scan_qatom/6}};
{char_error,Ncs,Error,Nline,Ncol,EndCol} ->
scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
{error,Nline,Ncol,Nwcs,Ncs} ->
Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %'
- {Ncs,Nline,Ncol,Nstr,Nwcs,Uni} ->
- true = Uni =:= ?NO_UNICODE orelse AllowUni,
+ {Ncs,Nline,Ncol,Nstr,Nwcs} ->
case catch list_to_atom(Nwcs) of
A when is_atom(A) ->
Attrs = attributes(Line0, Col0, St, Nstr),
@@ -978,95 +901,75 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) ->
end
end.
-scan_string0(Cs, #erl_scan{text=false}, Line, no_col=Col, Q, U, [], Wcs, Uni) ->
- scan_string_no_col(Cs, Line, Col, Q, U, Wcs, Uni);
-scan_string0(Cs, #erl_scan{text=true}, Line, no_col=Col, Q, U, Str, Wcs, Uni) ->
- scan_string1(Cs, Line, Col, Q, U, Str, Wcs, Uni);
-scan_string0(Cs, _St, Line, Col, Q, U, [], Wcs, Uni) ->
- scan_string_col(Cs, Line, Col, Q, U, Wcs, Uni);
-scan_string0(Cs, _St, Line, Col, Q, U, Str, Wcs, Uni) ->
- scan_string1(Cs, Line, Col, Q, U, Str, Wcs, Uni).
+scan_string0(Cs, #erl_scan{text=false}, Line, no_col=Col, Q, [], Wcs) ->
+ scan_string_no_col(Cs, Line, Col, Q, Wcs);
+scan_string0(Cs, #erl_scan{text=true}, Line, no_col=Col, Q, Str, Wcs) ->
+ scan_string1(Cs, Line, Col, Q, Str, Wcs);
+scan_string0(Cs, St, Line, Col, Q, [], Wcs) ->
+ scan_string_col(Cs, St, Line, Col, Q, Wcs);
+scan_string0(Cs, _St, Line, Col, Q, Str, Wcs) ->
+ scan_string1(Cs, Line, Col, Q, Str, Wcs).
%% Optimization. Col =:= no_col.
-scan_string_no_col([Q|Cs], Line, Col, Q, _U, Wcs, Uni) ->
- {Cs,Line,Col,_DontCare=[],lists:reverse(Wcs),Uni};
-scan_string_no_col([$\n=C|Cs], Line, Col, Q, U, Wcs, Uni) ->
- scan_string_no_col(Cs, Line+1, Col, Q, U, [C|Wcs], Uni);
-scan_string_no_col([C|Cs], Line, Col, Q, U, Wcs, Uni) when C =/= $\\,
- ?CHAR(C),
- ?UNI255(C) ->
- scan_string_no_col(Cs, Line, Col, Q, U, [C|Wcs], Uni);
-scan_string_no_col(Cs, Line, Col, Q, U, Wcs, Uni) ->
- scan_string1(Cs, Line, Col, Q, U, Wcs, Wcs, Uni).
+scan_string_no_col([Q|Cs], Line, Col, Q, Wcs) ->
+ {Cs,Line,Col,_DontCare=[],lists:reverse(Wcs)};
+scan_string_no_col([$\n=C|Cs], Line, Col, Q, Wcs) ->
+ scan_string_no_col(Cs, Line+1, Col, Q, [C|Wcs]);
+scan_string_no_col([C|Cs], Line, Col, Q, Wcs) when C =/= $\\, ?UNICODE(C) ->
+ scan_string_no_col(Cs, Line, Col, Q, [C|Wcs]);
+scan_string_no_col(Cs, Line, Col, Q, Wcs) ->
+ scan_string1(Cs, Line, Col, Q, Wcs, Wcs).
%% Optimization. Col =/= no_col.
-scan_string_col([Q|Cs], Line, Col, Q, _U, Wcs0, Uni) ->
+scan_string_col([Q|Cs], St, Line, Col, Q, Wcs0) ->
Wcs = lists:reverse(Wcs0),
- Str = [Q|Wcs++[Q]],
- {Cs,Line,Col+1,Str,Wcs,Uni};
-scan_string_col([$\n=C|Cs], Line, _xCol, Q, U, Wcs, Uni) ->
- scan_string_col(Cs, Line+1, 1, Q, U, [C|Wcs], Uni);
-scan_string_col([C|Cs], Line, Col, Q, U, Wcs, Uni) when C =/= $\\,
- ?CHAR(C),
- ?UNI255(C) ->
- scan_string_col(Cs, Line, Col+1, Q, U, [C|Wcs], Uni);
-scan_string_col(Cs, Line, Col, Q, U, Wcs, Uni) ->
- scan_string1(Cs, Line, Col, Q, U, Wcs, Wcs, Uni).
-
-%% UNI_STR is to be replaced by STR when the Unicode-string-to-list
-%% workaround is eventually removed.
--define(UNI_STR(Col, S), S).
+ Str = ?STR(St, [Q|Wcs++[Q]]),
+ {Cs,Line,Col+1,Str,Wcs};
+scan_string_col([$\n=C|Cs], St, Line, _xCol, Q, Wcs) ->
+ scan_string_col(Cs, St, Line+1, 1, Q, [C|Wcs]);
+scan_string_col([C|Cs], St, Line, Col, Q, Wcs) when C =/= $\\, ?UNICODE(C) ->
+ scan_string_col(Cs, St, Line, Col+1, Q, [C|Wcs]);
+scan_string_col(Cs, _St, Line, Col, Q, Wcs) ->
+ scan_string1(Cs, Line, Col, Q, Wcs, Wcs).
%% Note: in those cases when a 'char_error' tuple is returned below it
%% is tempting to skip over characters up to the first Q character,
%% but then the end location of the error tuple would not correspond
%% to the start location of the returned Rest string. (Maybe the end
%% location could be modified, but that too is ugly.)
-scan_string1([Q|Cs], Line, Col, Q, _U, Str0, Wcs0, Uni) ->
+scan_string1([Q|Cs], Line, Col, Q, Str0, Wcs0) ->
Wcs = lists:reverse(Wcs0),
- Str = ?UNI_STR(Col, [Q|lists:reverse(Str0, [Q])]),
- {Cs,Line,incr_column(Col, 1),Str,Wcs,Uni};
-scan_string1([$\n=C|Cs], Line, Col, Q, U, Str, Wcs, Uni) ->
+ Str = [Q|lists:reverse(Str0, [Q])],
+ {Cs,Line,incr_column(Col, 1),Str,Wcs};
+scan_string1([$\n=C|Cs], Line, Col, Q, Str, Wcs) ->
Ncol = new_column(Col, 1),
- scan_string1(Cs, Line+1, Ncol, Q, U, ?UNI_STR(Col, [C|Str]), [C|Wcs], Uni);
-scan_string1([$\\|Cs]=Cs0, Line, Col, Q, U, Str, Wcs, Uni) ->
+ scan_string1(Cs, Line+1, Ncol, Q, [C|Str], [C|Wcs]);
+scan_string1([$\\|Cs]=Cs0, Line, Col, Q, Str, Wcs) ->
case scan_escape(Cs, Col) of
more ->
- {more,Cs0,Line,Col,Str,Wcs,Uni};
+ {more,Cs0,Line,Col,Str,Wcs};
{error,Ncs,Error,Ncol} ->
{char_error,Ncs,Error,Line,Col,incr_column(Ncol, 1)};
{eof,Ncol} ->
{error,Line,incr_column(Ncol, 1),lists:reverse(Wcs),eof};
{nl,Val,ValStr,Ncs,Ncol} ->
- Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])),
+ Nstr = lists:reverse(ValStr, [$\\|Str]),
Nwcs = [Val|Wcs],
- scan_string1(Ncs, Line+1, Ncol, Q, U, Nstr, Nwcs, Uni);
- {unicode,_Val,_ValStr,Ncs,Ncol} when not U -> %' Emacs
- {char_error,Ncs,{illegal,character},Line,Col,incr_column(Ncol, 1)};
- {unicode,Val,ValStr,Ncs,Ncol} -> % UNI. Uni is set to Val.
- Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])),
- Nwcs = [Val|Wcs], % not used
- scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, U, Nstr, Nwcs, Val);
+ scan_string1(Ncs, Line+1, Ncol, Q, Nstr, Nwcs);
{Val,ValStr,Ncs,Ncol} ->
- Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])),
+ Nstr = lists:reverse(ValStr, [$\\|Str]),
Nwcs = [Val|Wcs],
- scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, U, Nstr, Nwcs, Uni)
+ scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, Nstr, Nwcs)
end;
-scan_string1([C|Cs], Line, no_col=Col, Q, U, Str, Wcs, Uni) when ?CHAR(C),
- ?UNI255(C) ->
- %% scan_string1(Cs, Line, Col, Q, U, Str, [C|Wcs], Uni);
- scan_string1(Cs, Line, Col, Q, U, [C|Str], [C|Wcs], Uni); % UNI
-scan_string1([C|Cs], Line, Col, Q, U, Str, Wcs, Uni) when ?CHAR(C), ?UNI255(C) ->
- scan_string1(Cs, Line, Col+1, Q, U, [C|Str], [C|Wcs], Uni);
-scan_string1([C|Cs], Line, Col, _Q, false, _Str, _Wcs, _Uni) when ?CHAR(C) -> %' UNI
- {char_error,Cs,{illegal,character},Line,Col,incr_column(Col, 1)};
-scan_string1([C|Cs], Line, Col, Q, U, Str, Wcs, _Uni) when ?UNICODE(C) ->
- scan_string1(Cs, Line, incr_column(Col, 1), Q, U, [C|Str], [C|Wcs], C);
-scan_string1([C|Cs], Line, Col, _Q, _U, _Str, _Wcs, _Uni) when ?CHAR(C) -> % UNI
+scan_string1([C|Cs], Line, no_col=Col, Q, Str, Wcs) when ?UNICODE(C) ->
+ scan_string1(Cs, Line, Col, Q, [C|Str], [C|Wcs]);
+scan_string1([C|Cs], Line, Col, Q, Str, Wcs) when ?UNICODE(C) ->
+ scan_string1(Cs, Line, Col+1, Q, [C|Str], [C|Wcs]);
+scan_string1([C|Cs], Line, Col, _Q, _Str, _Wcs) when ?CHAR(C) ->
{char_error,Cs,{illegal,character},Line,Col,incr_column(Col, 1)};
-scan_string1([]=Cs, Line, Col, _Q, _U, Str, Wcs, Uni) ->
- {more,Cs,Line,Col,Str,Wcs,Uni};
-scan_string1(eof, Line, Col, _Q, _U, _Str, Wcs, _Uni) ->
+scan_string1([]=Cs, Line, Col, _Q, Str, Wcs) ->
+ {more,Cs,Line,Col,Str,Wcs};
+scan_string1(eof, Line, Col, _Q, _Str, Wcs) ->
{error,Line,Col,lists:reverse(Wcs),eof}.
-define(OCT(C), C >= $0, C =< $7).
@@ -1077,16 +980,16 @@ scan_string1(eof, Line, Col, _Q, _U, _Str, Wcs, _Uni) ->
%% \<1-3> octal digits
scan_escape([O1,O2,O3|Cs], Col) when ?OCT(O1), ?OCT(O2), ?OCT(O3) ->
Val = (O1*8 + O2)*8 + O3 - 73*$0,
- {Val,?UNI_STR(Col, [O1,O2,O3]),Cs,incr_column(Col, 3)};
+ {Val,[O1,O2,O3],Cs,incr_column(Col, 3)};
scan_escape([O1,O2], _Col) when ?OCT(O1), ?OCT(O2) ->
more;
scan_escape([O1,O2|Cs], Col) when ?OCT(O1), ?OCT(O2) ->
Val = (O1*8 + O2) - 9*$0,
- {Val,?UNI_STR(Col, [O1,O2]),Cs,incr_column(Col, 2)};
+ {Val,[O1,O2],Cs,incr_column(Col, 2)};
scan_escape([O1], _Col) when ?OCT(O1) ->
more;
scan_escape([O1|Cs], Col) when ?OCT(O1) ->
- {O1 - $0,?UNI_STR(Col, [O1]),Cs,incr_column(Col, 1)};
+ {O1 - $0,[O1],Cs,incr_column(Col, 1)};
%% \x{<hex digits>}
scan_escape([$x,${|Cs], Col) ->
scan_hex(Cs, incr_column(Col, 2), []);
@@ -1097,29 +1000,27 @@ scan_escape([$x|eof], Col) ->
%% \x<2> hexadecimal digits
scan_escape([$x,H1,H2|Cs], Col) when ?HEX(H1), ?HEX(H2) ->
Val = erlang:list_to_integer([H1,H2], 16),
- {Val,?UNI_STR(Col, [$x,H1,H2]),Cs,incr_column(Col, 3)};
+ {Val,[$x,H1,H2],Cs,incr_column(Col, 3)};
scan_escape([$x,H1], _Col) when ?HEX(H1) ->
more;
scan_escape([$x|Cs], Col) ->
{error,Cs,{illegal,character},incr_column(Col, 1)};
%% \^X -> CTL-X
scan_escape([$^=C0,$\n=C|Cs], Col) ->
- {nl,C,?UNI_STR(Col, [C0,C]),Cs,new_column(Col, 1)};
+ {nl,C,[C0,C],Cs,new_column(Col, 1)};
scan_escape([$^=C0,C|Cs], Col) when ?CHAR(C) ->
Val = C band 31,
- {Val,?UNI_STR(Col, [C0,C]),Cs,incr_column(Col, 2)};
+ {Val,[C0,C],Cs,incr_column(Col, 2)};
scan_escape([$^], _Col) ->
more;
scan_escape([$^|eof], Col) ->
{eof,incr_column(Col, 1)};
scan_escape([$\n=C|Cs], Col) ->
- {nl,C,?UNI_STR(Col, [C]),Cs,new_column(Col, 1)};
-scan_escape([C0|Cs], Col) when ?CHAR(C0), ?UNI255(C0) ->
+ {nl,C,[C],Cs,new_column(Col, 1)};
+scan_escape([C0|Cs], Col) when ?UNICODE(C0) ->
C = escape_char(C0),
- {C,?UNI_STR(Col, [C0]),Cs,incr_column(Col, 1)};
-scan_escape([C|Cs], Col) when ?UNICODE(C) ->
- {unicode,C,?UNI_STR(Col, [C]),Cs,incr_column(Col, 1)};
-scan_escape([C|Cs], Col) when ?CHAR(C) -> % UNI
+ {C,[C0],Cs,incr_column(Col, 1)};
+scan_escape([C|Cs], Col) when ?CHAR(C) ->
{error,Cs,{illegal,character},incr_column(Col, 1)};
scan_escape([], _Col) ->
more;
@@ -1136,10 +1037,8 @@ scan_hex(Cs, Col, Wcs) ->
scan_esc_end([$}|Cs], Col, Wcs0, B, Str0) ->
Wcs = lists:reverse(Wcs0),
case catch erlang:list_to_integer(Wcs, B) of
- Val when Val =< 16#FF ->
- {Val,?UNI_STR(Col, Str0++Wcs++[$}]),Cs,incr_column(Col, 1)};
Val when ?UNICODE(Val) ->
- {unicode,Val,?UNI_STR(Col, Str0++Wcs++[$}]),Cs,incr_column(Col,1)};
+ {Val,Str0++Wcs++[$}],Cs,incr_column(Col, 1)};
_ ->
{error,Cs,{illegal,character},incr_column(Col, 1)}
end;
@@ -1171,7 +1070,7 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0) ->
Ncs = lists:reverse(Ncs0),
case catch list_to_integer(Ncs) of
B when B >= 2, B =< 1+$Z-$A+10 ->
- Bcs = Ncs++[$#],
+ Bcs = ?STR(St, Ncs++[$#]),
scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs});
B ->
Len = length(Ncs),
@@ -1204,7 +1103,7 @@ scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) ->
Ncs = lists:reverse(Ncs0),
case catch erlang:list_to_integer(Ncs, B) of
N when is_integer(N) ->
- tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N);
+ tok3(Cs, St, Line, Col, Toks, integer, ?STR(St, Bcs++Ncs), N);
_ ->
Len = length(Bcs)+length(Ncs),
Ncol = incr_column(Col, Len),
@@ -1244,36 +1143,30 @@ float_end(Cs, St, Line, Col, Toks, Ncs0) ->
scan_error({illegal,float}, Line, Col, Line, Ncol, Cs)
end.
-skip_comment(Cs, St, Line, Col, Toks, N) ->
- skip_comment(Cs, St, Line, Col, Toks, N, St#erl_scan.unicode).
-
-skip_comment([C|Cs], St, Line, Col, Toks, N, U) when C =/= $\n, ?CHAR(C) ->
- case ?UNI255(C) orelse U andalso ?UNICODE(C) of
+skip_comment([C|Cs], St, Line, Col, Toks, N) when C =/= $\n, ?CHAR(C) ->
+ case ?UNICODE(C) of
true ->
- skip_comment(Cs, St, Line, Col, Toks, N+1, U);
+ skip_comment(Cs, St, Line, Col, Toks, N+1);
false ->
Ncol = incr_column(Col, N+1),
scan_error({illegal,character}, Line, Col, Line, Ncol, Cs)
end;
-skip_comment([]=Cs, _St, Line, Col, Toks, N, _U) ->
+skip_comment([]=Cs, _St, Line, Col, Toks, N) ->
{more,{Cs,Col,Toks,Line,N,fun skip_comment/6}};
-skip_comment(Cs, St, Line, Col, Toks, N, _U) ->
+skip_comment(Cs, St, Line, Col, Toks, N) ->
scan1(Cs, St, Line, incr_column(Col, N), Toks).
-scan_comment(Cs, St, Line, Col, Toks, Ncs) ->
- scan_comment(Cs, St, Line, Col, Toks, Ncs, St#erl_scan.unicode).
-
-scan_comment([C|Cs], St, Line, Col, Toks, Ncs, U) when C =/= $\n, ?CHAR(C) ->
- case ?UNI255(C) orelse U andalso ?UNICODE(C) of
+scan_comment([C|Cs], St, Line, Col, Toks, Ncs) when C =/= $\n, ?CHAR(C) ->
+ case ?UNICODE(C) of
true ->
- scan_comment(Cs, St, Line, Col, Toks, [C|Ncs], U);
+ scan_comment(Cs, St, Line, Col, Toks, [C|Ncs]);
false ->
Ncol = incr_column(Col, length(Ncs)+1),
scan_error({illegal,character}, Line, Col, Line, Ncol, Cs)
end;
-scan_comment([]=Cs, _St, Line, Col, Toks, Ncs, _U) ->
+scan_comment([]=Cs, _St, Line, Col, Toks, Ncs) ->
{more,{Cs,Col,Toks,Line,Ncs,fun scan_comment/6}};
-scan_comment(Cs, St, Line, Col, Toks, Ncs0, _U) ->
+scan_comment(Cs, St, Line, Col, Toks, Ncs0) ->
Ncs = lists:reverse(Ncs0),
tok3(Cs, St, Line, Col, Toks, comment, Ncs, Ncs).
diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl
index 3f77d40a2e..ecd181e87c 100644
--- a/lib/stdlib/test/erl_scan_SUITE.erl
+++ b/lib/stdlib/test/erl_scan_SUITE.erl
@@ -118,13 +118,13 @@ check(String) ->
%%% (This should be useful for all format_error functions.)
check_error({error, Info, EndLine}, Module0) ->
- ?line {ErrorLine, Module, Desc} = Info,
- ?line true = (Module == Module0),
- ?line assert_type(EndLine, integer),
- ?line assert_type(ErrorLine, integer),
- ?line true = (ErrorLine =< EndLine),
- ?line String = lists:flatten(Module0:format_error(Desc)),
- ?line true = io_lib:printable_list(String).
+ {ErrorLine, Module, Desc} = Info,
+ true = (Module == Module0),
+ assert_type(EndLine, integer),
+ assert_type(ErrorLine, integer),
+ true = (ErrorLine =< EndLine),
+ String = lists:flatten(Module0:format_error(Desc)),
+ true = io_lib:printable_list(String).
iso88591(doc) -> ["Tests the support for ISO-8859-1 i.e Latin-1"];
iso88591(suite) -> [];
@@ -809,77 +809,57 @@ white_spaces() ->
unicode() ->
?line {ok,[{char,1,83},{integer,1,45}],1} =
- erl_scan:string("$\\12345", 1, [{unicode,false}]), % not unicode
+ erl_scan:string("$\\12345"), % not unicode
?line {error,{1,erl_scan,{illegal,character}},1} =
- erl_scan:string([1089], 1, [{unicode,false}]),
+ erl_scan:string([1089]),
?line {error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
- erl_scan:string([1089], {1,1}, [{unicode,false}]),
- ?line {error,{1,erl_scan,{illegal,character}},1} =
- %% ?line {error,{1,erl_scan,{illegal,atom}},1} =
- erl_scan:string("'a"++[1089]++"b'", 1, [{unicode,false}]),
- ?line {error,{{1,3},erl_scan,{illegal,character}},{1,4}} =
- erl_scan:string("'a"++[1089]++"b'", {1,1}, [{unicode,false}]),
+ erl_scan:string([1089], {1,1}),
+ ?line {error,{1,erl_scan,{illegal,atom}},1} =
+ erl_scan:string("'a"++[1089]++"b'", 1),
+ ?line {error,{{1,1},erl_scan,{illegal,atom}},{1,6}} =
+ erl_scan:string("'a"++[1089]++"b'", {1,1}),
?line test("\"a"++[1089]++"b\""),
?line {ok,[{char,1,1}],1} =
- erl_scan:string([$$,$\\,$^,1089], 1, [{unicode,false}]),
+ erl_scan:string([$$,$\\,$^,1089], 1),
?line {error,{1,erl_scan,Error},1} =
- erl_scan:string("\"qa\x{aaa}", 1, [{unicode,false}]),
+ erl_scan:string("\"qa\x{aaa}", 1),
?line "unterminated string starting with \"qa"++[2730]++"\"" =
erl_scan:format_error(Error),
?line {error,{{1,1},erl_scan,_},{1,11}} =
- erl_scan:string("\"qa\\x{aaa}",{1,1}, [{unicode,false}]),
- ?line {error,{{1,4},erl_scan,{illegal,character}},{1,11}} =
- erl_scan:string("'qa\\x{aaa}'",{1,1}, [{unicode,false}]),
-
- Tags = [category, column, length, line, symbol, text],
-
- %% Workaround. No character codes greater than 255! To be changed.
- %% Note: don't remove these tests, just modify them!
+ erl_scan:string("\"qa\\x{aaa}",{1,1}),
+ ?line {error,{{1,1},erl_scan,{illegal,atom}},{1,12}} =
+ erl_scan:string("'qa\\x{aaa}'",{1,1}),
- ?line {ok,[{integer,1,1089}],1} =
- erl_scan:string([$$,1089], 1, [{unicode,false}]),
- ?line {ok,[{integer,1,1089}],1} =
- erl_scan:string([$$,$\\,1089], 1, [{unicode,false}]),
+ ?line {ok,[{char,1,1089}],1} =
+ erl_scan:string([$$,1089], 1),
+ ?line {ok,[{char,1,1089}],1} =
+ erl_scan:string([$$,$\\,1089], 1),
Qs = "$\\x{aaa}",
- ?line {ok,[{integer,1,16#aaa}],1} =
- erl_scan:string(Qs, 1, [{unicode,false}]),
+ ?line {ok,[{char,1,$\x{aaa}}],1} =
+ erl_scan:string(Qs, 1),
?line {ok,[Q2],{1,9}} =
- erl_scan:string("$\\x{aaa}", {1,1}, [text,{unicode,false}]),
- ?line [{category,integer},{column,1},{length,8},
+ erl_scan:string("$\\x{aaa}", {1,1}, [text]),
+ ?line [{category,char},{column,1},{length,8},
{line,1},{symbol,16#aaa},{text,Qs}] =
erl_scan:token_info(Q2),
U1 = "\"\\x{aaa}\"",
- ?line {ok,[T1,T2,T3],{1,10}} =
- erl_scan:string(U1, {1,1}, [text,{unicode,false}]),
- ?line [{category,'['},{column,1},{length,1},{line,1},
- {symbol,'['},{text,"\""}] = erl_scan:token_info(T1, Tags),
- ?line [{category,integer},{column,2},{length,7},
- {line,1},{symbol,16#aaa},{text,"\\x{aaa}"}] =
- erl_scan:token_info(T2, Tags),
- ?line [{category,']'},{column,9},{length,1},{line,1},
- {symbol,']'},{text,"\""}] = erl_scan:token_info(T3, Tags),
- ?line {ok,[{'[',1},{integer,1,16#aaa},{']',1}],1} =
- erl_scan:string(U1, 1, [{unicode,false}]),
+ {ok,
+ [{string,[{line,1},{column,1},{text,"\"\\x{aaa}\""}],[2730]}],
+ {1,10}} = erl_scan:string(U1, {1,1}, [text]),
+ {ok,[{string,1,[2730]}],1} = erl_scan:string(U1, 1),
U2 = "\"\\x41\\x{fff}\\x42\"",
- ?line {ok,[{'[',1},{char,1,16#41},{',',1},{integer,1,16#fff},
- {',',1},{char,1,16#42},{']',1}],1} =
- erl_scan:string(U2, 1, [{unicode,false}]),
+ {ok,[{string,1,[$\x41,$\x{fff},$\x42]}],1} = erl_scan:string(U2, 1),
U3 = "\"a\n\\x{fff}\n\"",
- ?line {ok,[{'[',1},{char,1,$a},{',',1},{char,1,$\n},
- {',',2},{integer,2,16#fff},{',',2},{char,2,$\n},
- {']',3}],3} =
- erl_scan:string(U3, 1, [{unicode,false}]),
+ {ok,[{string,1,[$a,$\n,$\x{fff},$\n]}],3} = erl_scan:string(U3, 1),
U4 = "\"\\^\n\\x{aaa}\\^\n\"",
- ?line {ok,[{'[',1},{char,1,$\n},{',',2},{integer,2,16#aaa},
- {',',2},{char,2,$\n},{']',3}],3} =
- erl_scan:string(U4, 1, [{unicode,false}]),
+ {ok,[{string,1,[$\n,$\x{aaa},$\n]}],3} = erl_scan:string(U4, 1),
%% Keep these tests:
?line test(Qs),
@@ -889,21 +869,15 @@ unicode() ->
?line test(U4),
Str1 = "\"ab" ++ [1089] ++ "cd\"",
- ?line {ok,[{'[',1},{char,1,$a},{',',1},{char,1,$b},{',',1},
- {integer,1,1089},{',',1},{char,1,$c},{',',1},
- {char,1,$d},{']',1}],1} =
- erl_scan:string(Str1, 1, [{unicode,false}]),
- ?line {ok,[{'[',_},{char,_,$a},{',',_},{char,_,$b},{',',_},
- {integer,_,1089},{',',_},{char,_,$c},{',',_},
- {char,_,$d},{']',_}],{1,8}} =
- erl_scan:string(Str1, {1,1}, [{unicode,false}]),
+ {ok,[{string,1,[$a,$b,1089,$c,$d]}],1} = erl_scan:string(Str1, 1),
+ {ok,[{string,{1,1},[$a,$b,1089,$c,$d]}],{1,8}} =
+ erl_scan:string(Str1, {1,1}),
?line test(Str1),
Comment = "%% "++[1089],
- %% Returned a comment In R15B03:
- {error,{1,erl_scan,{illegal,character}},1} =
- erl_scan:string(Comment, 1, [return,{unicode,false}]),
- {error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
- erl_scan:string(Comment, {1,1}, [return,{unicode,false}]),
+ {ok,[{comment,1,[$%,$%,$\s,1089]}],1} =
+ erl_scan:string(Comment, 1, [return]),
+ {ok,[{comment,{1,1},[$%,$%,$\s,1089]}],{1,5}} =
+ erl_scan:string(Comment, {1,1}, [return]),
ok.
more_chars() ->