%%----------------------------------------------------------------------
%%
%% %CopyrightBegin%
%% 
%% Copyright Ericsson AB 1999-2009. All Rights Reserved.
%% 
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%% 
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%% 
%% %CopyrightEnd%
%%
%%
%%----------------------------------------------------------------------
%% File    : cosNotification_Scanner.erl
%% Purpose : Scan and pre-process a grammar.
%%----------------------------------------------------------------------

-module('cosNotification_Scanner').

-export([scan/1]).

scan(Str) ->
    RSL = scan(Str, 1, [], any),
    {ok, lists:reverse(RSL)}.


%% Guard macros used at top scan functions only
-define(is_number(X), X >= $0, X =< $9).
-define(is_upper(X), X >= $A, X =< $Z).
-define(is_lower(X), X >= $a, X =< $z).

%%----------------------------------------------------------------------
%% scan
%%
%% A-Z
scan([X|Str], Line, Out, Type) when ?is_upper(X) ->
    scan_name(Str, [X], Line, Out, Type);
%% a-z
scan([X|Str], Line, Out, Type) when ?is_lower(X) ->
    scan_name(Str, [X], Line, Out, Type);
%% 0-9
scan([X|Str], Line, Out, Type) when ?is_number(X) ->
    scan_number(Str, [X], Line, Out, Type);

%% RELOP:s == != <= >= > <
scan([$=,$= | Str], Line, Out, _Type) ->
    scan(Str, Line, [{'RELOP', '=='} | Out], any);
scan([$!,$= | Str], Line, Out, _Type) ->
    scan(Str, Line, [{'RELOP', '!='} | Out], any);
scan([$<,$= | Str], Line, Out, _Type) ->
    scan(Str, Line, [{'RELOP', '<='} | Out], any);
scan([$>,$= | Str], Line, Out, _Type) ->
    scan(Str, Line, [{'RELOP', '>='} | Out], any);
scan([$> | Str], Line, Out, _Type) ->
    scan(Str, Line, [{'RELOP', '>'} | Out], any);
scan([$< | Str], Line, Out, _Type) ->
    scan(Str, Line, [{'RELOP', '<'} | Out], any);

%% ADDOP:s + -
scan([$+ | Str], Line, Out, Type) ->
    scan(Str, Line, [{'ADDOP', '+'} | Out], Type);
scan([$- | Str], Line, Out, Type) ->
    scan(Str, Line, [{'ADDOP', '-'} | Out], Type);

%% MULOP:s * /
scan([$* | Str], Line, Out, _Type) ->
    scan(Str, Line, [{'MULOP', '*'} | Out], any);
scan([$/ | Str], Line, Out, _Type) ->
    scan(Str, Line, [{'MULOP', '/'} | Out], any);

%% TAB
scan([9| T], Line, Out, Type) -> scan(T, Line, Out, Type);
%% SP
scan([32| T], Line, Out, Type) -> scan(T, Line, Out, Type);
%% CR
scan([$\r|Str], Line, Out, Type) ->
    scan(Str, Line, Out, Type);
%% LF
scan([$\n|Str], Line, Out, Type) ->
    scan(Str, Line+1, Out, Type);
%% \\
scan([92, 92 | Str], Line, Out, Type) ->
    scan(Str, Line, [{'dbslsh', Line} | Out], Type);
%% \'
scan([92, 39 | Str], Line, Out, Type) ->
    scan(Str, Line, [{'bslshd', Line} | Out], Type);
%% '\'
scan([92 | Str], Line, Out, Type) ->
    scan(Str, Line, [{'bslsh', Line} | Out], Type);
%% '_'
scan([$_ | Str], Line, Out, dollar) -> 
    scan_name(Str, [$_], Line, Out, dollar);
%% '$'
scan([$$, 92 | Str], Line, Out, _Type) -> 
    scan(Str, Line, [{'bslsh', Line}, {'dollar', Line} | Out], dollar);
scan([$$ | Str], Line, Out, _Type) -> 
    scan(Str, Line, [{'dollar', Line} | Out], dollar);
scan([$"|Str], Line, Out, Type) ->
    scan_const(char, Str, [], Line, Out, Type);
scan([$'|Str], Line, Out, Type) ->
    scan_const(string, Str, [], Line, Out, Type);

%% Writing '+.<CompDot>' is not allowed ('+' or '-' are only allowed
%% as unary for <UnionVal> (within a component) which must be en integer).
scan([$. | Str], Line, [{'ADDOP', Op}|Out], _) ->
    scan_frac(Str, [$.], Line, [{'ADDOP', Op}|Out], any);
%% Must be a <CompDot>
scan([$. | Str], Line, Out, dollar) ->
    scan(Str, Line, [{'.',Line} | Out], dollar);
%% Number
scan([$. | Str], Line, Out, Type) ->
    scan_frac(Str, [$.], Line, Out, Type);
scan([C|Str], Line, Out, Type) ->
    scan(Str, Line, [{list_to_atom([C]), Line} | Out], Type);
	    
scan([], _Line, Out, _Type) ->
    Out.

%%----------------------------------------------------------------------
%% scan_name
%%

scan_number([X|Str], Accum, Line, Out, Type) when ?is_number(X) ->
    scan_number(Str, [X|Accum], Line, Out, Type);
scan_number([X|Str], Accum, Line, Out, dollar) when X==$. ->
    scan(Str, Line, [{'.', Line}, 
		     {'int', list_to_integer(lists:reverse(Accum))} | Out], dollar);
scan_number([X|Str], Accum, Line, Out, Type) when X==$. ->
    scan_frac(Str, [X|Accum], Line, Out, Type);
scan_number([X|Str], Accum, Line, Out, Type) when X==$e ->
    scan_exp(Str, [X|Accum], Line, Out, Type);
scan_number([X|Str], Accum, Line, Out, Type) when X==$E ->
    scan_exp(Str, [X|Accum], Line, Out, Type);
scan_number(Str, Accum, Line, Out, Type) ->
    scan(Str, Line, [{'int', list_to_integer(lists:reverse(Accum))} | Out], Type).


%% Floating point number scan.
%%
%%	Non trivial scan. A float consists of an integral part, a
%%	decimal point, a fraction part, an e or E and a signed integer
%%	exponent. Either the integer part or the fraction part but not
%%	both may be missing, and either the decimal point or the
%%	exponent part but not both may be missing. The exponent part
%%	must consist of an e or E and a possibly signed exponent.
%%
%%	Analysis shows that "1." ".7" "1e2" ".5e-3" "1.7e2" "1.7e-2"
%%	is allowed and "1" ".e9" is not. The sign is only allowed just
%%	after an e or E. The scanner reads a number as an integer
%%	until it encounters a "." so the integer part only error case
%%	will not be caught in the scanner (but rather in expression
%%	evaluation)

scan_frac([$e | _Str], [$.], _Line, _Out, _Type) ->
    {error, "illegal_float"};
scan_frac([$E | _Str], [$.], _Line, _Out, _Type) ->
    {error, "illegal_float"};
scan_frac(Str, Accum, Line, Out, Type) ->
    scan_frac2(Str, Accum, Line, Out, Type).

scan_frac2([X|Str], Accum, Line, Out, Type) when ?is_number(X) ->
    scan_frac2(Str, [X|Accum], Line, Out, Type);
scan_frac2([X|Str], Accum, Line, Out, Type) when X==$e ->
    scan_exp(Str, [X|Accum], Line, Out, Type);
scan_frac2([X|Str], Accum, Line, Out, Type) when X==$E ->
    scan_exp(Str, [X|Accum], Line, Out, Type);
%% Since '.2' is allowed, we add '0' in front to be sure (erlang do not allow
%% list_to_float(".2") and list_to_float("0.2") eq. list_to_float("00.2")).
scan_frac2(Str, Accum, Line, Out, Type) ->
    scan(Str, Line, [{'num', list_to_float([$0|lists:reverse(Accum)])} | Out], Type).

scan_exp([X|Str], Accum, Line, Out, Type) when X==$- ->
    scan_exp2(Str, [X|Accum], Line, Out, Type);
scan_exp(Str, Accum, Line, Out, Type) ->
    scan_exp2(Str, Accum, Line, Out, Type).

scan_exp2([X|Str], Accum, Line, Out, Type) when ?is_number(X) ->
    scan_exp2(Str, [X|Accum], Line, Out, Type);
%% Since '.2' is allowed, we add '0' in front to be sure (erlang do not allow
%% list_to_float(".2")).
scan_exp2(Str, Accum, Line, Out, Type) ->
    scan(Str, Line, [{'num', list_to_float([$0|lists:reverse(Accum)])} | Out], Type).


scan_name([X|Str], Accum, Line, Out, Type) when ?is_upper(X) ->
    scan_name(Str, [X|Accum], Line, Out, Type);
scan_name([X|Str], Accum, Line, Out, Type) when ?is_lower(X) ->
    scan_name(Str, [X|Accum], Line, Out, Type);
scan_name([X|Str], Accum, Line, Out, Type) when ?is_number(X) ->
    scan_name(Str, [X|Accum], Line, Out, Type);
scan_name([$_|Str], Accum, Line, Out, dollar) ->
    scan_name(Str, [$_|Accum], Line, Out, dollar);
scan_name(S, Accum, Line, [{bslsh,LL} | Out], Type) ->
    %% An escaped identifier.
    L = lists:reverse(Accum),
    scan(S, Line, [{'ident', L}, {bslsh,LL} | Out], Type);
scan_name(S, Accum, Line, Out, Type) ->
    L = lists:reverse(Accum),
    {X, NewType} = case check_name(L) of
		       false -> 
			   {{'ident', L}, Type};
		       _ -> 
			   {{list_to_atom(L), Line}, any}
		   end,
    scan(S, Line, [X | Out], NewType).

%% Shall scan a constant
scan_const(char, [$" | Rest], Accum, Line, Out, Type) ->
    scan(Rest, Line, 
	 [{'ident', list_to_atom(lists:reverse(Accum))} | Out], Type);
scan_const(char, [], _Accum, _Line, Out, _Type) -> %% Bad string
%    {error, "bad_string"};
    Out;
scan_const(string, [$' | Rest], Accum, Line, Out, Type) ->
    scan(Rest, Line, 
	 [{'string', lists:reverse(Accum)} | Out], Type);
scan_const(Mode, [$\\, C | Rest], Accum, Line, Out, Type) ->
    case escaped_char(C) of
	error ->
	    %% Bad escape character
	    %% {error, "bad_escape_character"};
	    scan_const(Mode, Rest, [C | Accum], Line, Out, Type);
	EC ->
	    scan_const(Mode, Rest, [EC | Accum], Line, Out, Type)
    end;
scan_const(Mode, [C | Rest], Accum, Line, Out, Type) ->
    scan_const(Mode, Rest, [C | Accum], Line, Out, Type).

%% Escaped character. Escaped chars are repr as two characters in the
%% input list of letters and this is translated into one char.
escaped_char($n) -> $\n;
escaped_char($t) -> $\t;
escaped_char($v) -> $\v;
escaped_char($b) -> $\b;
escaped_char($r) -> $ ;
escaped_char($f) -> $\f;
escaped_char($a) -> $\a;
escaped_char($\\) -> $\\;
escaped_char($?) -> $?;
escaped_char($') -> $';
escaped_char($") -> $";
%% Error
escaped_char(_Other) -> error.


check_name("exist") ->     true;
check_name("default") ->   true;
check_name("_length") ->   true;
check_name("_d") ->        true;
check_name("_type_id") ->  true;
check_name("_repos_id") -> true;
check_name("not") ->       true;
check_name("or") ->        true;
check_name("and") ->       true;
check_name("FALSE") ->     true;
check_name("TRUE") ->      true;
check_name("in") ->        true;
check_name(_) ->           false.