1 files changed, 157 insertions, 202 deletions
diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index 8e59e01f48..3651f608bc 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -1,7 +1,8 @@
+%% -*- coding: utf-8 -*-
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2012. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2013. All Rights Reserved.
 %%
 %% The contents of this file are subject to the Erlang Public License,
 %% Version 1.1, (the "License"); you may not use this file except in
@@ -32,19 +33,19 @@
 %% 173 - 176    { - ~           punctuation
 %% 177          DEL             control
 %% 200 - 237                    control
-%% 240 - 277    NBSP - �        punctuation
-%% 300 - 326    � - �           uppercase
-%% 327          �               punctuation
-%% 330 - 336    � - �           uppercase
-%% 337 - 366    � - �           lowercase
-%% 367          �               punctuation
-%% 370 - 377    � - �           lowercase
+%% 240 - 277    NBSP - ¿        punctuation
+%% 300 - 326    À - Ö           uppercase
+%% 327          ×               punctuation
+%% 330 - 336    Ø - Þ           uppercase
+%% 337 - 366    ß - ö           lowercase
+%% 367          ÷               punctuation
+%% 370 - 377    ø - ÿ           lowercase
 %%
 %% Many punctuation characters have special meaning:
 %%  $\s, $_, $", $$, $%, $', $.
 %% DEL is a punctuation.
 %%
-%% Must watch using � \327, very close to x \170.
+%% Must watch using × \327, very close to x \170.
 
 -module(erl_scan).
 
@@ -55,7 +56,16 @@
          token_info/1,token_info/2,
          attributes_info/1,attributes_info/2,set_attribute/3]).
 
--export_type([error_info/0, line/0, tokens_result/0]).
+%%% Private
+-export([continuation_location/1]).
+
+-export_type([error_info/0,
+              line/0,
+              location/0,
+              options/0,
+              return_cont/0,
+              token/0,
+              tokens_result/0]).
 
 %%%
 %%% Defines and type definitions
@@ -104,7 +114,7 @@
 format_error({string,Quote,Head}) ->
     lists:flatten(["unterminated " ++ string_thing(Quote) ++
                    " starting with " ++
-                   io_lib:write_unicode_string(Head, Quote)]);
+                   io_lib:write_string(Head, Quote)]);
 format_error({illegal,Type}) ->
     lists:flatten(io_lib:fwrite("illegal ~w", [Type]));
 format_error(char) -> "unterminated character";
@@ -183,6 +193,11 @@ tokens({erl_scan_continuation,Cs,Col,Toks,Line,St,Any,Fun},
        CharSpec, _Loc, _Opts) ->
     tokens1(Cs++CharSpec, St, Line, Col, Toks, Fun, Any).
 
+continuation_location({erl_scan_continuation,_,no_col,_,Line,_,_,_}) ->
+    Line;
+continuation_location({erl_scan_continuation,_,Col,_,Line,_,_,_}) ->
+    {Line,Col}.
+
 -type attribute_item() :: 'column' | 'length' | 'line'
                         | 'location' | 'text'.
 -type info_location() :: location() | term().
@@ -201,15 +216,14 @@ token_info(Token) ->
     Items = [category,column,length,line,symbol,text], % undefined order
     token_info(Token, Items).
 
--spec token_info(Token, TokenItem) -> TokenInfo | 'undefined' when
-      Token :: token(),
-      TokenItem :: token_item(),
-      TokenInfo :: TokenInfoTuple :: token_info();
-                (Token, TokenItems) -> [TokenInfo] when
-      Token :: token(),
-      TokenItems :: [TokenItem],
-      TokenItem :: token_item(),
-      TokenInfo :: [TokenInfoTuple :: token_info()].
+-spec token_info(Token, TokenItem) -> TokenInfoTuple | 'undefined' when
+                     Token :: token(),
+                     TokenItem :: token_item(),
+                     TokenInfoTuple :: token_info();
+                (Token, TokenItems) -> TokenInfo when
+                     Token :: token(),
+                     TokenItems :: [TokenItem :: token_item()],
+                     TokenInfo :: [TokenInfoTuple :: token_info()].
 token_info(_Token, []) ->
     [];
 token_info(Token, [Item|Items]) when is_atom(Item) ->
@@ -239,16 +253,15 @@ attributes_info(Attributes) ->
     Items = [column,length,line,text], % undefined order
     attributes_info(Attributes, Items).
 
--spec attributes_info(Attributes, AttributeItem) ->
-                        AttributeInfo | 'undefined' when
-      Attributes :: attributes(),
-      AttributeItem :: attribute_item(),
-      AttributeInfo :: AttributeInfoTuple :: attribute_info();
-                     (Attributes, AttributeItems) -> [AttributeInfo] when
-      Attributes :: attributes(),
-      AttributeItems :: [AttributeItem],
-      AttributeItem :: attribute_item(),
-      AttributeInfo :: [AttributeInfoTuple :: attribute_info()].
+-spec attributes_info
+        (Attributes, AttributeItem) -> AttributeInfoTuple | 'undefined' when
+             Attributes :: attributes(),
+             AttributeItem :: attribute_item(),
+             AttributeInfoTuple :: attribute_info();
+        (Attributes, AttributeItems) -> AttributeInfo when
+             Attributes :: attributes(),
+             AttributeItems :: [AttributeItem :: attribute_item()],
+             AttributeInfo :: [AttributeInfoTuple :: attribute_info()].
 attributes_info(_Attrs, []) ->
     [];
 attributes_info(Attrs, [A|As]) when is_atom(A) ->
@@ -324,13 +337,12 @@ string_thing(_) -> "string".
          (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)).
 -define(DIGIT(C), C >= $0, C =< $9).
 -define(CHAR(C), is_integer(C), C >= 0).
+-define(UNICODE(C),
+         (C >= 0 andalso C < 16#D800 orelse
+          C > 16#DFFF andalso C < 16#FFFE orelse
+          C > 16#FFFF andalso C =< 16#10FFFF)).
 
-%% A workaround: Unicode strings are not returned as strings, but as
-%% lists of integers. For instance, "b\x{aaa}c" => [98,2730,99]. This
-%% is to protect the system from character codes greater than 255. To
-%% be removed. Search for UNI to find workaround code.
--define(NO_UNICODE, 0).
--define(UNI255(C), (C) =< 16#ff).
+-define(UNI255(C), C >= 0, C =< 16#ff).
 
 options(Opts0) when is_list(Opts0) ->
     Opts = lists:foldr(fun expand_opt/2, [], Opts0),
@@ -504,18 +516,18 @@ scan1("."=Cs, _St, Line, Col, Toks) ->
 scan1([$.=C|Cs], St, Line, Col, Toks) ->
     scan_dot(Cs, St, Line, Col, Toks, [C]);
 scan1([$"|Cs], St, Line, Col, Toks) -> %" Emacs
-    State0 = {[],[],Line,Col,?NO_UNICODE},
+    State0 = {[],[],Line,Col},
     scan_string(Cs, St, Line, incr_column(Col, 1), Toks, State0);
 scan1([$'|Cs], St, Line, Col, Toks) -> %' Emacs
-    State0 = {[],[],Line,Col,?NO_UNICODE},
+    State0 = {[],[],Line,Col},
     scan_qatom(Cs, St, Line, incr_column(Col, 1), Toks, State0);
 scan1([$$|Cs], St, Line, Col, Toks) ->
     scan_char(Cs, St, Line, Col, Toks);
 scan1([$\r|Cs], St, Line, Col, Toks) when St#erl_scan.ws ->
     white_space_end(Cs, St, Line, Col, Toks, 1, "\r");
-scan1([C|Cs], St, Line, Col, Toks) when C >= $�, C =< $�, C =/= $� ->
+scan1([C|Cs], St, Line, Col, Toks) when C >= $ß, C =< $ÿ, C =/= $÷ ->
     scan_atom(Cs, St, Line, Col, Toks, [C]);
-scan1([C|Cs], St, Line, Col, Toks) when C >= $�, C =< $�, C /= $� ->
+scan1([C|Cs], St, Line, Col, Toks) when C >= $À, C =< $Þ, C /= $× ->
     scan_variable(Cs, St, Line, Col, Toks, [C]);
 scan1([$\t|Cs], St, Line, Col, Toks) when St#erl_scan.ws ->
     scan_tabs(Cs, St, Line, Col, Toks, 1);
@@ -628,15 +640,12 @@ scan1([$~|Cs], St, Line, Col, Toks) ->
 scan1([$&|Cs], St, Line, Col, Toks) ->
     tok2(Cs, St, Line, Col, Toks, "&", '&', 1);
 %% End of optimization.
-scan1([C|Cs], St, Line, Col, Toks) when ?CHAR(C) ->
+scan1([C|Cs], St, Line, Col, Toks) when ?UNI255(C) ->
     Str = [C],
-    case catch list_to_atom(Str) of
-        Sym when is_atom(Sym) ->
-            tok2(Cs, St, Line, Col, Toks, Str, Sym, 1);
-        _ ->
-            Ncol = incr_column(Col, 1),
-            scan_error({illegal,character}, Line, Col, Line, Ncol, Cs)
-    end;
+    tok2(Cs, St, Line, Col, Toks, Str, list_to_atom(Str), 1);
+scan1([C|Cs], _St, Line, Col, _Toks) when ?CHAR(C) ->
+    Ncol = incr_column(Col, 1),
+    scan_error({illegal,character}, Line, Col, Line, Ncol, Cs);
 scan1([]=Cs, _St, Line, Col, Toks) ->
     {more,{Cs,Col,Toks,Line,[],fun scan/6}};
 scan1(eof=Cs, _St, Line, Col, Toks) ->
@@ -685,23 +694,25 @@ scan_name([C|Cs], Ncs) when ?DIGIT(C) ->
     scan_name(Cs, [C|Ncs]);
 scan_name([$@=C|Cs], Ncs) ->
     scan_name(Cs, [C|Ncs]);
-scan_name([C|Cs], Ncs) when C >= $�, C =< $�, C =/= $� ->
+scan_name([C|Cs], Ncs) when C >= $ß, C =< $ÿ, C =/= $÷ ->
     scan_name(Cs, [C|Ncs]);
-scan_name([C|Cs], Ncs) when C >= $�, C =< $�, C =/= $� ->
+scan_name([C|Cs], Ncs) when C >= $À, C =< $Þ, C =/= $× ->
     scan_name(Cs, [C|Ncs]);
 scan_name([], Ncs) ->
     {more,Ncs};
 scan_name(Cs, Ncs) ->
     {lists:reverse(Ncs),Cs}.
 
+-define(STR(St, S), if St#erl_scan.text -> S; true -> [] end).
+
 scan_dot([$%|_]=Cs, St, Line, Col, Toks, Ncs) ->
     Attrs = attributes(Line, Col, St, Ncs),
     {ok,[{dot,Attrs}|Toks],Cs,Line,incr_column(Col, 1)};
 scan_dot([$\n=C|Cs], St, Line, Col, Toks, Ncs) ->
-    Attrs = attributes(Line, Col, St, Ncs++[C]),
+    Attrs = attributes(Line, Col, St, ?STR(St, Ncs++[C])),
     {ok,[{dot,Attrs}|Toks],Cs,Line+1,new_column(Col, 1)};
 scan_dot([C|Cs], St, Line, Col, Toks, Ncs) when ?WHITE_SPACE(C) ->
-    Attrs = attributes(Line, Col, St, Ncs++[C]),
+    Attrs = attributes(Line, Col, St, ?STR(St, Ncs++[C])),
     {ok,[{dot,Attrs}|Toks],Cs,Line,incr_column(Col, 2)};
 scan_dot(eof=Cs, St, Line, Col, Toks, Ncs) ->
     Attrs = attributes(Line, Col, St, Ncs),
@@ -834,102 +845,53 @@ scan_char([$\\|Cs]=Cs0, St, Line, Col, Toks) ->
         {eof,Ncol} ->
             scan_error(char, Line, Col, Line, Ncol, eof);
         {nl,Val,Str,Ncs,Ncol} ->
-            Attrs = attributes(Line, Col, St, "$\\"++Str),
+            Attrs = attributes(Line, Col, St, ?STR(St, "$\\"++Str)), %"
             Ntoks = [{char,Attrs,Val}|Toks],
             scan1(Ncs, St, Line+1, Ncol, Ntoks);
-        {unicode,Val,Str,Ncs,Ncol} ->
-            Attrs = attributes(Line, Col, St, "$\\"++Str),
-            Ntoks = [{integer,Attrs,Val}|Toks], % UNI
-            scan1(Ncs, St, Line, Ncol, Ntoks);
         {Val,Str,Ncs,Ncol} ->
-            Attrs = attributes(Line, Col, St, "$\\"++Str),
+            Attrs = attributes(Line, Col, St, ?STR(St, "$\\"++Str)), %"
             Ntoks = [{char,Attrs,Val}|Toks],
             scan1(Ncs, St, Line, Ncol, Ntoks)
     end;
 scan_char([$\n=C|Cs], St, Line, Col, Toks) ->
-    Attrs = attributes(Line, Col, St, [$$,C]),
+    Attrs = attributes(Line, Col, St, ?STR(St, [$$,C])),
     scan1(Cs, St, Line+1, new_column(Col, 1), [{char,Attrs,C}|Toks]);
-scan_char([C|Cs], St, Line, Col, Toks) when ?CHAR(C) ->
-    Tag = if ?UNI255(C) -> char; true -> integer end, % UNI
-    Attrs = attributes(Line, Col, St, [$$,C]),
-    scan1(Cs, St, Line, incr_column(Col, 2), [{Tag,Attrs,C}|Toks]);
+scan_char([C|Cs], St, Line, Col, Toks) when ?UNICODE(C) ->
+    Attrs = attributes(Line, Col, St, ?STR(St, [$$,C])),
+    scan1(Cs, St, Line, incr_column(Col, 2), [{char,Attrs,C}|Toks]);
+scan_char([C|_Cs], _St, Line, Col, _Toks) when ?CHAR(C) ->
+    scan_error({illegal,character}, Line, Col, Line, incr_column(Col, 1), eof);
 scan_char([], _St, Line, Col, Toks) ->
     {more,{[$$],Col,Toks,Line,[],fun scan/6}};
 scan_char(eof, _St, Line, Col, _Toks) ->
     scan_error(char, Line, Col, Line, incr_column(Col, 1), eof).
 
-scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) ->
-    case scan_string0(Cs, St, Line, Col, $\", Str, Wcs, Uni0) of
-        {more,Ncs,Nline,Ncol,Nstr,Nwcs,Uni} ->
-            State = {Nwcs,Nstr,Line0,Col0,Uni},
+scan_string(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
+    case scan_string0(Cs, St, Line, Col, $\", Str, Wcs) of %"
+        {more,Ncs,Nline,Ncol,Nstr,Nwcs} ->
+            State = {Nwcs,Nstr,Line0,Col0},
             {more,{Ncs,Ncol,Toks,Nline,State,fun scan_string/6}};
         {char_error,Ncs,Error,Nline,Ncol,EndCol} ->
             scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
         {error,Nline,Ncol,Nwcs,Ncs} ->
             Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
-            scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs);
-        {Ncs,Nline,Ncol,Nstr,Nwcs,?NO_UNICODE} ->
+            scan_error({string,$\",Estr}, Line0, Col0, Nline, Ncol, Ncs); %"
+        {Ncs,Nline,Ncol,Nstr,Nwcs} ->
             Attrs = attributes(Line0, Col0, St, Nstr),
-            scan1(Ncs, St, Nline, Ncol, [{string,Attrs,Nwcs}|Toks]);
-        {Ncs,Nline,Ncol,Nstr,_Nwcs,_Uni} ->
-            Ntoks = unicode_string_to_list(Line0, Col0, St, Nstr, Toks),
-            scan1(Ncs, St, Nline, Ncol, Ntoks)
+            scan1(Ncs, St, Nline, Ncol, [{string,Attrs,Nwcs}|Toks])
     end.
 
-%% UNI
-unicode_string_to_list(Line, Col, St, [$"=C|Nstr], Toks) -> %" Emacs
-    Paren = {'[',attributes(Line, Col, St, [C])},
-    u2l(Nstr, Line, incr_column(Col, 1), St, [Paren|Toks]).
-
-u2l([$"]=Cs, Line, Col, St, Toks) -> %" Emacs
-    [{']',attributes(Line, Col, St, Cs)}|Toks];
-u2l([$\n=C|Cs], Line, Col, St, Toks) ->
-    Ntoks = unicode_nl_tokens(Line, Col, [C], C, St, Toks, Cs),
-    u2l(Cs, Line+1, new_column(Col, 1), St, Ntoks);
-u2l([$\\|Cs], Line, Col, St, Toks) ->
-    case scan_escape(Cs, Col) of
-        {nl,Val,ValStr,Ncs,Ncol} ->
-            Nstr = [$\\|ValStr],
-            Ntoks = unicode_nl_tokens(Line, Col, Nstr, Val, St, Toks, Ncs),
-            u2l(Ncs, Line+1, Ncol, St, Ntoks);
-        {unicode,Val,ValStr,Ncs,Ncol} ->
-            Nstr = [$\\|ValStr],
-            Ntoks = unicode_tokens(Line, Col, Nstr, Val, St, Toks, Ncs),
-            u2l(Ncs, Line, incr_column(Ncol, 1), St, Ntoks);
-        {Val,ValStr,Ncs,Ncol} ->
-            Nstr = [$\\|ValStr],
-            Ntoks = unicode_tokens(Line, Col, Nstr, Val, St, Toks, Ncs),
-            u2l(Ncs, Line, incr_column(Ncol, 1), St, Ntoks)
-    end;
-u2l([C|Cs], Line, Col, St, Toks) ->
-    Ntoks = unicode_tokens(Line, Col, [C], C, St, Toks, Cs),
-    u2l(Cs, Line, incr_column(Col, 1), St, Ntoks).
-
-unicode_nl_tokens(Line, Col, Str, Val, St, Toks, Cs) ->
-    Ccol = new_column(Col, 1),
-    unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Line+1, Ccol).
-
-unicode_tokens(Line, Col, Str, Val, St, Toks, Cs) ->
-    Ccol = incr_column(Col, length(Str)),
-    unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Line, Ccol).
-
-unicode_tokens(Line, Col, Str, Val, St, Toks, Cs, Cline, Ccol) ->
-    Attrs = attributes(Line, Col, St, Str),
-    Tag = if ?UNI255(Val) -> char; true -> integer end,
-    Token = {Tag,Attrs,Val},
-    [{',',attributes(Cline, Ccol, St, "")} || Cs =/= "\""] ++ [Token|Toks].
-
-scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) ->
-    case scan_string0(Cs, St, Line, Col, $\', Str, Wcs, Uni0) of
-        {more,Ncs,Nline,Ncol,Nstr,Nwcs,Uni} ->
-            State = {Nwcs,Nstr,Line0,Col0,Uni},
+scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
+    case scan_string0(Cs, St, Line, Col, $\', Str, Wcs) of %'
+        {more,Ncs,Nline,Ncol,Nstr,Nwcs} ->
+            State = {Nwcs,Nstr,Line0,Col0},
             {more,{Ncs,Ncol,Toks,Nline,State,fun scan_qatom/6}};
         {char_error,Ncs,Error,Nline,Ncol,EndCol} ->
             scan_error(Error, Nline, Ncol, Nline, EndCol, Ncs);
         {error,Nline,Ncol,Nwcs,Ncs} ->
             Estr = string:substr(Nwcs, 1, 16), % Expanded escape chars.
-            scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs);
-        {Ncs,Nline,Ncol,Nstr,Nwcs,?NO_UNICODE} ->
+            scan_error({string,$\',Estr}, Line0, Col0, Nline, Ncol, Ncs); %'
+        {Ncs,Nline,Ncol,Nstr,Nwcs} ->
             case catch list_to_atom(Nwcs) of
                 A when is_atom(A) ->
                     Attrs = attributes(Line0, Col0, St, Nstr),
@@ -939,91 +901,75 @@ scan_qatom(Cs, St, Line, Col, Toks, {Wcs,Str,Line0,Col0,Uni0}) ->
             end
     end.
 
-scan_string0(Cs, #erl_scan{text=false}, Line, no_col=Col, Q, [], Wcs, Uni) ->
-    scan_string_no_col(Cs, Line, Col, Q, Wcs, Uni);
-scan_string0(Cs, #erl_scan{text=true}, Line, no_col=Col, Q, Str, Wcs, Uni) ->
-    scan_string1(Cs, Line, Col, Q, Str, Wcs, Uni);
-scan_string0(Cs, _St, Line, Col, Q, [], Wcs, Uni) ->
-    scan_string_col(Cs, Line, Col, Q, Wcs, Uni);
-scan_string0(Cs, _St, Line, Col, Q, Str, Wcs, Uni) ->
-    scan_string1(Cs, Line, Col, Q, Str, Wcs, Uni).
+scan_string0(Cs, #erl_scan{text=false}, Line, no_col=Col, Q, [], Wcs) ->
+    scan_string_no_col(Cs, Line, Col, Q, Wcs);
+scan_string0(Cs, #erl_scan{text=true}, Line, no_col=Col, Q, Str, Wcs) ->
+    scan_string1(Cs, Line, Col, Q, Str, Wcs);
+scan_string0(Cs, St, Line, Col, Q, [], Wcs) ->
+    scan_string_col(Cs, St, Line, Col, Q, Wcs);
+scan_string0(Cs, _St, Line, Col, Q, Str, Wcs) ->
+    scan_string1(Cs, Line, Col, Q, Str, Wcs).
 
 %% Optimization. Col =:= no_col.
-scan_string_no_col([Q|Cs], Line, Col, Q, Wcs, Uni) ->
-    {Cs,Line,Col,_DontCare=[],lists:reverse(Wcs),Uni};
-scan_string_no_col([$\n=C|Cs], Line, Col, Q, Wcs, Uni) ->
-    scan_string_no_col(Cs, Line+1, Col, Q, [C|Wcs], Uni);
-scan_string_no_col([C|Cs], Line, Col, Q, Wcs, Uni) when C =/= $\\,
-                                                        ?CHAR(C), ?UNI255(C) ->
-    scan_string_no_col(Cs, Line, Col, Q, [C|Wcs], Uni);
-scan_string_no_col(Cs, Line, Col, Q, Wcs, Uni) ->
-    scan_string1(Cs, Line, Col, Q, Wcs, Wcs, Uni).
+scan_string_no_col([Q|Cs], Line, Col, Q, Wcs) ->
+    {Cs,Line,Col,_DontCare=[],lists:reverse(Wcs)};
+scan_string_no_col([$\n=C|Cs], Line, Col, Q, Wcs) ->
+    scan_string_no_col(Cs, Line+1, Col, Q, [C|Wcs]);
+scan_string_no_col([C|Cs], Line, Col, Q, Wcs) when C =/= $\\, ?UNICODE(C) ->
+    scan_string_no_col(Cs, Line, Col, Q, [C|Wcs]);
+scan_string_no_col(Cs, Line, Col, Q, Wcs) ->
+    scan_string1(Cs, Line, Col, Q, Wcs, Wcs).
 
 %% Optimization. Col =/= no_col.
-scan_string_col([Q|Cs], Line, Col, Q, Wcs0, Uni) ->
+scan_string_col([Q|Cs], St, Line, Col, Q, Wcs0) ->
     Wcs = lists:reverse(Wcs0),
-    Str = [Q|Wcs++[Q]],
-    {Cs,Line,Col+1,Str,Wcs,Uni};
-scan_string_col([$\n=C|Cs], Line, _xCol, Q, Wcs, Uni) ->
-    scan_string_col(Cs, Line+1, 1, Q, [C|Wcs], Uni);
-scan_string_col([C|Cs], Line, Col, Q, Wcs, Uni) when C =/= $\\,
-                                                     ?CHAR(C), ?UNI255(C) ->
-    scan_string_col(Cs, Line, Col+1, Q, [C|Wcs], Uni);
-scan_string_col(Cs, Line, Col, Q, Wcs, Uni) ->
-    scan_string1(Cs, Line, Col, Q, Wcs, Wcs, Uni).
-
-%% UNI_STR is to be replaced by STR when the Unicode-string-to-list
-%% workaround is eventually removed.
--define(UNI_STR(Col, S), S).
+    Str = ?STR(St, [Q|Wcs++[Q]]),
+    {Cs,Line,Col+1,Str,Wcs};
+scan_string_col([$\n=C|Cs], St, Line, _xCol, Q, Wcs) ->
+    scan_string_col(Cs, St, Line+1, 1, Q, [C|Wcs]);
+scan_string_col([C|Cs], St, Line, Col, Q, Wcs) when C =/= $\\, ?UNICODE(C) ->
+    scan_string_col(Cs, St, Line, Col+1, Q, [C|Wcs]);
+scan_string_col(Cs, _St, Line, Col, Q, Wcs) ->
+    scan_string1(Cs, Line, Col, Q, Wcs, Wcs).
 
 %% Note: in those cases when a 'char_error' tuple is returned below it
 %% is tempting to skip over characters up to the first Q character,
 %% but then the end location of the error tuple would not correspond
 %% to the start location of the returned Rest string. (Maybe the end
 %% location could be modified, but that too is ugly.)
-scan_string1([Q|Cs], Line, Col, Q, Str0, Wcs0, Uni) ->
+scan_string1([Q|Cs], Line, Col, Q, Str0, Wcs0) ->
     Wcs = lists:reverse(Wcs0),
-    Str = ?UNI_STR(Col, [Q|lists:reverse(Str0, [Q])]),
-    {Cs,Line,incr_column(Col, 1),Str,Wcs,Uni};
-scan_string1([$\n=C|Cs], Line, Col, Q, Str, Wcs, Uni) ->
+    Str = [Q|lists:reverse(Str0, [Q])],
+    {Cs,Line,incr_column(Col, 1),Str,Wcs};
+scan_string1([$\n=C|Cs], Line, Col, Q, Str, Wcs) ->
     Ncol = new_column(Col, 1),
-    scan_string1(Cs, Line+1, Ncol, Q, ?UNI_STR(Col, [C|Str]), [C|Wcs], Uni);
-scan_string1([$\\|Cs]=Cs0, Line, Col, Q, Str, Wcs, Uni) ->
+    scan_string1(Cs, Line+1, Ncol, Q, [C|Str], [C|Wcs]);
+scan_string1([$\\|Cs]=Cs0, Line, Col, Q, Str, Wcs) ->
     case scan_escape(Cs, Col) of
         more ->
-            {more,Cs0,Line,Col,Str,Wcs,Uni};
+            {more,Cs0,Line,Col,Str,Wcs};
         {error,Ncs,Error,Ncol} ->
             {char_error,Ncs,Error,Line,Col,incr_column(Ncol, 1)};
         {eof,Ncol} ->
             {error,Line,incr_column(Ncol, 1),lists:reverse(Wcs),eof};
         {nl,Val,ValStr,Ncs,Ncol} ->
-            Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])),
+            Nstr = lists:reverse(ValStr, [$\\|Str]),
             Nwcs = [Val|Wcs],
-            scan_string1(Ncs, Line+1, Ncol, Q, Nstr, Nwcs, Uni);
-        {unicode,_Val,_ValStr,Ncs,Ncol} when Q =:= $' -> %' Emacs
-            {char_error,Ncs,{illegal,character},Line,Col,incr_column(Ncol, 1)};
-        {unicode,Val,ValStr,Ncs,Ncol} -> % UNI. Uni is set to Val.
-            Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])),
-            Nwcs = [Val|Wcs], % not used
-            scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, Nstr, Nwcs, Val);
+            scan_string1(Ncs, Line+1, Ncol, Q, Nstr, Nwcs);
         {Val,ValStr,Ncs,Ncol} ->
-            Nstr = ?UNI_STR(Ncol, lists:reverse(ValStr, [$\\|Str])),
+            Nstr = lists:reverse(ValStr, [$\\|Str]),
             Nwcs = [Val|Wcs],
-            scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, Nstr, Nwcs, Uni)
+            scan_string1(Ncs, Line, incr_column(Ncol, 1), Q, Nstr, Nwcs)
     end;
-scan_string1([C|Cs], Line, no_col=Col, Q, Str, Wcs, Uni) when ?CHAR(C),
-                                                              ?UNI255(C) ->
-    %% scan_string1(Cs, Line, Col, Q, Str, [C|Wcs], Uni);
-    scan_string1(Cs, Line, Col, Q, [C|Str], [C|Wcs], Uni); % UNI
-scan_string1([C|Cs], Line, Col, Q, Str, Wcs, Uni) when ?CHAR(C), ?UNI255(C) ->
-    scan_string1(Cs, Line, Col+1, Q, [C|Str], [C|Wcs], Uni);
-scan_string1([C|Cs], Line, Col, $', _Str, _Wcs, _Uni) when ?CHAR(C) -> %' UNI
+scan_string1([C|Cs], Line, no_col=Col, Q, Str, Wcs) when ?UNICODE(C) ->
+    scan_string1(Cs, Line, Col, Q, [C|Str], [C|Wcs]);
+scan_string1([C|Cs], Line, Col, Q, Str, Wcs) when ?UNICODE(C) ->
+    scan_string1(Cs, Line, Col+1, Q, [C|Str], [C|Wcs]);
+scan_string1([C|Cs], Line, Col, _Q, _Str, _Wcs) when ?CHAR(C) ->
     {char_error,Cs,{illegal,character},Line,Col,incr_column(Col, 1)};
-scan_string1([C|Cs], Line, Col, Q, Str, Wcs, _Uni) when ?CHAR(C) -> % UNI
-    scan_string1(Cs, Line, incr_column(Col, 1), Q, [C|Str], [C|Wcs], C);
-scan_string1([]=Cs, Line, Col, _Q, Str, Wcs, Uni) ->
-    {more,Cs,Line,Col,Str,Wcs,Uni};
-scan_string1(eof, Line, Col, _Q, _Str, Wcs, _Uni) ->
+scan_string1([]=Cs, Line, Col, _Q, Str, Wcs) ->
+    {more,Cs,Line,Col,Str,Wcs};
+scan_string1(eof, Line, Col, _Q, _Str, Wcs) ->
     {error,Line,Col,lists:reverse(Wcs),eof}.
 
 -define(OCT(C), C >= $0, C =< $7).
@@ -1034,16 +980,16 @@ scan_string1(eof, Line, Col, _Q, _Str, Wcs, _Uni) ->
 %% \<1-3> octal digits
 scan_escape([O1,O2,O3|Cs], Col) when ?OCT(O1), ?OCT(O2), ?OCT(O3) ->
     Val = (O1*8 + O2)*8 + O3 - 73*$0,
-    {Val,?UNI_STR(Col, [O1,O2,O3]),Cs,incr_column(Col, 3)};
+    {Val,[O1,O2,O3],Cs,incr_column(Col, 3)};
 scan_escape([O1,O2], _Col) when ?OCT(O1), ?OCT(O2) ->
     more;
 scan_escape([O1,O2|Cs], Col) when ?OCT(O1), ?OCT(O2) ->
     Val = (O1*8 + O2) - 9*$0,
-    {Val,?UNI_STR(Col, [O1,O2]),Cs,incr_column(Col, 2)};
+    {Val,[O1,O2],Cs,incr_column(Col, 2)};
 scan_escape([O1], _Col) when ?OCT(O1) ->
     more;
 scan_escape([O1|Cs], Col) when ?OCT(O1) ->
-    {O1 - $0,?UNI_STR(Col, [O1]),Cs,incr_column(Col, 1)};
+    {O1 - $0,[O1],Cs,incr_column(Col, 1)};
 %% \x{<hex digits>}
 scan_escape([$x,${|Cs], Col) ->
     scan_hex(Cs, incr_column(Col, 2), []);
@@ -1054,28 +1000,28 @@ scan_escape([$x|eof], Col) ->
 %% \x<2> hexadecimal digits
 scan_escape([$x,H1,H2|Cs], Col) when ?HEX(H1), ?HEX(H2) ->
     Val = erlang:list_to_integer([H1,H2], 16),
-    {Val,?UNI_STR(Col, [$x,H1,H2]),Cs,incr_column(Col, 3)};
+    {Val,[$x,H1,H2],Cs,incr_column(Col, 3)};
 scan_escape([$x,H1], _Col) when ?HEX(H1) ->
     more;
 scan_escape([$x|Cs], Col) ->
     {error,Cs,{illegal,character},incr_column(Col, 1)};
 %% \^X -> CTL-X
 scan_escape([$^=C0,$\n=C|Cs], Col) ->
-    {nl,C,?UNI_STR(Col, [C0,C]),Cs,new_column(Col, 1)};
+    {nl,C,[C0,C],Cs,new_column(Col, 1)};
 scan_escape([$^=C0,C|Cs], Col) when ?CHAR(C) ->
     Val = C band 31,
-    {Val,?UNI_STR(Col, [C0,C]),Cs,incr_column(Col, 2)};
+    {Val,[C0,C],Cs,incr_column(Col, 2)};
 scan_escape([$^], _Col) ->
     more;
 scan_escape([$^|eof], Col) ->
     {eof,incr_column(Col, 1)};
 scan_escape([$\n=C|Cs], Col) ->
-    {nl,C,?UNI_STR(Col, [C]),Cs,new_column(Col, 1)};
-scan_escape([C0|Cs], Col) when ?CHAR(C0), ?UNI255(C0) ->
+    {nl,C,[C],Cs,new_column(Col, 1)};
+scan_escape([C0|Cs], Col) when ?UNICODE(C0) ->
     C = escape_char(C0),
-    {C,?UNI_STR(Col, [C0]),Cs,incr_column(Col, 1)};
-scan_escape([C|Cs], Col) when ?CHAR(C) -> % UNI
-    {unicode,C,?UNI_STR(Col, [C]),Cs,incr_column(Col, 1)};
+    {C,[C0],Cs,incr_column(Col, 1)};
+scan_escape([C|Cs], Col) when ?CHAR(C) ->
+    {error,Cs,{illegal,character},incr_column(Col, 1)};
 scan_escape([], _Col) ->
     more;
 scan_escape(eof, Col) ->
@@ -1091,10 +1037,8 @@ scan_hex(Cs, Col, Wcs) ->
 scan_esc_end([$}|Cs], Col, Wcs0, B, Str0) ->
     Wcs = lists:reverse(Wcs0),
     case catch erlang:list_to_integer(Wcs, B) of
-        Val when Val =< 16#FF ->
-            {Val,?UNI_STR(Col, Str0++Wcs++[$}]),Cs,incr_column(Col, 1)};
-        Val when Val =< 16#10FFFF ->
-            {unicode,Val,?UNI_STR(Col, Str0++Wcs++[$}]),Cs,incr_column(Col,1)};
+        Val when ?UNICODE(Val) ->
+            {Val,Str0++Wcs++[$}],Cs,incr_column(Col, 1)};
         _ ->
             {error,Cs,{illegal,character},incr_column(Col, 1)}
     end;
@@ -1126,7 +1070,7 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0) ->
     Ncs = lists:reverse(Ncs0),
     case catch list_to_integer(Ncs) of
         B when B >= 2, B =< 1+$Z-$A+10 ->
-            Bcs = Ncs++[$#],
+            Bcs = ?STR(St, Ncs++[$#]),
             scan_based_int(Cs, St, Line, Col, Toks, {B,[],Bcs});
         B ->
             Len = length(Ncs),
@@ -1159,7 +1103,7 @@ scan_based_int(Cs, St, Line, Col, Toks, {B,Ncs0,Bcs}) ->
     Ncs = lists:reverse(Ncs0),
     case catch erlang:list_to_integer(Ncs, B) of
         N when is_integer(N) ->
-            tok3(Cs, St, Line, Col, Toks, integer, Bcs++Ncs, N);
+            tok3(Cs, St, Line, Col, Toks, integer, ?STR(St, Bcs++Ncs), N);
         _ ->
             Len = length(Bcs)+length(Ncs),
             Ncol = incr_column(Col, Len),
@@ -1200,14 +1144,26 @@ float_end(Cs, St, Line, Col, Toks, Ncs0) ->
     end.
 
 skip_comment([C|Cs], St, Line, Col, Toks, N) when C =/= $\n, ?CHAR(C) ->
-    skip_comment(Cs, St, Line, Col, Toks, N+1);
+    case ?UNICODE(C) of
+        true ->
+            skip_comment(Cs, St, Line, Col, Toks, N+1);
+        false ->
+            Ncol = incr_column(Col, N+1),
+            scan_error({illegal,character}, Line, Col, Line, Ncol, Cs)
+    end;
 skip_comment([]=Cs, _St, Line, Col, Toks, N) ->
     {more,{Cs,Col,Toks,Line,N,fun skip_comment/6}};
 skip_comment(Cs, St, Line, Col, Toks, N) ->
     scan1(Cs, St, Line, incr_column(Col, N), Toks).
 
 scan_comment([C|Cs], St, Line, Col, Toks, Ncs) when C =/= $\n, ?CHAR(C) ->
-    scan_comment(Cs, St, Line, Col, Toks, [C|Ncs]);
+    case ?UNICODE(C) of
+        true ->
+            scan_comment(Cs, St, Line, Col, Toks, [C|Ncs]);
+        false ->
+            Ncol = incr_column(Col, length(Ncs)+1),
+            scan_error({illegal,character}, Line, Col, Line, Ncol, Cs)
+    end;
 scan_comment([]=Cs, _St, Line, Col, Toks, Ncs) ->
     {more,{Cs,Col,Toks,Line,Ncs,fun scan_comment/6}};
 scan_comment(Cs, St, Line, Col, Toks, Ncs0) ->
@@ -1345,7 +1301,6 @@ reserved_word('fun') -> true;
 reserved_word('if') -> true;
 reserved_word('let') -> true;
 reserved_word('of') -> true;
-reserved_word('query') -> true;
 reserved_word('receive') -> true;
 reserved_word('when') -> true;
 reserved_word('bnot') -> true;