aboutsummaryrefslogblamecommitdiffstats
path: root/lib/asn1/src/asn1ct_tok.erl
blob: 8235b689f822b1fc0bbcfc9d57113594b1928c04 (plain) (tree)
1
2
3
4
5

                   
  
                                                        
  










                                                                           
  






                                                               
                                 
 

                                     
                          
                                                      
                      







                                                                          

        

                                                     
 
                                 
                            










                                                
 





















                                                               
                                 
                                                                    
 


                                                          
 



                                              
 


                                              



                                     



                                            
 
                                                           


                                                                         
                                                                    
                                                                 
 



                                            
 

                                            
 



                                            
 





                                              
 

                                            
 


                                                         
               
                                                   
                


                                                                                
        
 


                                                         
 


                                                                   
 

                                                
 



                                                          

 

                               
 












                                                                

                         
                                       
                
                                                 
        


                                         

                         
                                    
                
                                                 
        

                                          














                                    
                          



                                 
                                  



                           


                                       
 

                                                 
              
                                          
               
                                                              
        
                                                   
            







                                                      

                        
                                                
                
                                                                   
        
                                  

                        
                                                
                
                                                                



                                                                     
                                


























                                                         
















                                       
                                                                







                                     
                                       






                                           

                                    
                                 




                                      




































                                             
%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 1997-2016. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%%
-module(asn1ct_tok).

%% Tokenize ASN.1 code (input to parser generated with yecc)   

-export([file/1,format_error/1]).

file(File0) ->
    case file:open(File0, [read])  of
	{error, Reason} ->
	    {error,{File0,file:format_error(Reason)}};
	{ok,Stream} ->
	    try
		process(Stream, 1, [])
	    catch
		throw:{error,Line,Reason} ->
		    File = filename:basename(File0),
		    Error = {structured_error,{File,Line},?MODULE,Reason},
		    {error,[Error]}
	    end
    end.

process(Stream, Lno, R) ->
    process(io:get_line(Stream, ''), Stream, Lno, R).

process(eof, Stream, Lno, Acc) ->
    ok = file:close(Stream),
    lists:reverse([{'$end',Lno}|Acc]);
process(L, Stream, Lno0, Acc) when is_list(L) ->
    try tokenise(Stream, L, Lno0, []) of
	{Lno,[]} ->
	    process(Stream, Lno, Acc);
	{Lno,Ts} ->
	    process(Stream, Lno, Ts++Acc)
    catch
	throw:{error,Reason} ->
	    throw({error,Lno0,Reason})
    end.

format_error(eof_in_comment) ->
    "premature end of file in multi-line comment";
format_error(eol_in_token) ->
    "end of line in token";
format_error({invalid_binary_number,Str}) ->
    io_lib:format("invalid binary number: '~s'", [Str]);
format_error({invalid_hex_number,Str}) ->
    io_lib:format("invalid hex number: '~s'", [Str]);
format_error(Other) ->
    io_lib:format("~p", [Other]).

tokenise(Stream, [$&,H|T], Lno, R) when $A =< H , H =< $Z ->
    {X,T1} = get_name(T, [H]),
    tokenise(Stream, T1, Lno, [{typefieldreference,Lno,X}|R]);
tokenise(Stream, [$&,H|T], Lno, R) when $a =< H , H =< $z ->
    {X,T1} = get_name(T, [H]),
    tokenise(Stream, T1, Lno, [{valuefieldreference,Lno,X}|R]);

tokenise(Stream, "--"++T, Lno, R) ->
    tokenise(Stream, skip_comment(T), Lno, R);

tokenise(Stream, [$-,H|T], Lno, R) when $0 =< H , H =< $9 ->
    {X, T1} = get_number(T, [H]),
    tokenise(Stream, T1, Lno, [{number,Lno,-list_to_integer(X)}|R]);

tokenise(Stream, "/*"++T, Lno0, R) ->
    {Lno,T1} = skip_multiline_comment(Stream, T, Lno0, 0),
    tokenise(Stream, T1, Lno, R);

tokenise(Stream, "::="++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{'::=',Lno}|R]);
tokenise(Stream, ":"++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{':',Lno}|R]);

tokenise(Stream, "'"++T0, Lno, R) ->
    {Thing, T1} = collect_quoted(T0, Lno, []),
    tokenise(Stream, T1, Lno, [Thing|R]);
tokenise(Stream,[$"|T],Lno,R) ->
    {Str,T1} = collect_string(T,Lno),
    tokenise(Stream,T1,Lno,[Str|R]);

tokenise(Stream, "{"++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{'{',Lno}|R]);
tokenise(Stream, "}"++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{'}',Lno}|R]);

%% Even though x.680 specify '[[' and ']]' as lexical items
%% it does not work to have them as such since the single '[' and ']' can
%% be used beside each other in 'WITH SYNTAX' in x.681.
%% The solution chosen here, i.e. to have them as separate lexical items
%% will not detect the cases where there is white space between them
%% which would be an error in the use in ExtensionAdditionGroups.

tokenise(Stream, "]"++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{']',Lno}|R]);
tokenise(Stream, "["++T,Lno,R) ->
    tokenise(Stream, T, Lno, [{'[',Lno}|R]);

tokenise(Stream, ","++T,Lno,R) ->
    tokenise(Stream, T, Lno, [{',',Lno}|R]);

tokenise(Stream, "("++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{'(',Lno}|R]);
tokenise(Stream, ")"++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{')',Lno}|R]);

tokenise(Stream, "..."++T,Lno,R) ->
    tokenise(Stream, T, Lno, [{'...',Lno}|R]);
tokenise(Stream, ".."++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{'..',Lno}|R]);
tokenise(Stream, "."++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{'.',Lno}|R]);

tokenise(Stream, "|"++T, Lno, R) ->
    tokenise(Stream, T, Lno, [{'|',Lno}|R]);

tokenise(Stream, [H|T], Lno, R) when $A =< H , H =< $Z ->
    {X,T1} = get_name(T, [H]),
    case reserved_word(X) of
	true ->
	    tokenise(Stream, T1, Lno, [{X,Lno}|R]);
	false ->
	    tokenise(Stream, T1, Lno, [{typereference,Lno,X}|R]);
	rstrtype ->
	    tokenise(Stream, T1, Lno, [{restrictedcharacterstringtype,Lno,X}|R])
    end;

tokenise(Stream, [H|T], Lno, R) when $a =< H , H =< $z ->
    {X, T1} = get_name(T, [H]),
    tokenise(Stream, T1, Lno, [{identifier,Lno,X}|R]);

tokenise(Stream, [H|T], Lno, R) when $0 =< H , H =< $9 ->
    {X, T1} = get_number(T, [H]),
    tokenise(Stream, T1, Lno, [{number,Lno,list_to_integer(X)}|R]);

tokenise(Stream, [H|T], Lno, R) when H =< $\s ->
    tokenise(Stream, T, Lno, R);

tokenise(Stream, [H|T], Lno, R) ->
    tokenise(Stream, T, Lno, [{list_to_atom([H]),Lno}|R]);
tokenise(_Stream, [], Lno, R) ->
    {Lno+1,R}.


collect_string(L, Lno) ->
    collect_string(L, Lno, []).

collect_string([$"|T], _Lno, Str) ->
    {{cstring,1,lists:reverse(Str)},T};
collect_string([H|T], Lno, Str) ->
    collect_string(T, Lno, [H|Str]);
collect_string([], _, _) ->
    throw({error,missing_quote_at_eof}).
           
%% <name> is letters digits hyphens.
%% Hypen is not the last character. Hypen hyphen is NOT allowed.
%%
%% <identifier> ::= <lowercase> <name>

get_name([$-,Char|T]=T0, Acc) ->
    case isalnum(Char) of
	true ->
	    get_name(T, [Char,$-|Acc]);
	false ->
	    {list_to_atom(lists:reverse(Acc)),T0}
    end;
get_name([$-|_]=T, Acc) ->
    {list_to_atom(lists:reverse(Acc)),T};
get_name([Char|T]=T0, Acc) ->
    case isalnum(Char) of
	true ->
	    get_name(T, [Char|Acc]);
	false ->
	    {list_to_atom(lists:reverse(Acc)),T0}
    end;
get_name([], Acc) ->
    {list_to_atom(lists:reverse(Acc)),[]}.
	    
isalnum(H) when $A =< H , H =< $Z ->
    true;
isalnum(H) when $a =< H , H =< $z ->
    true;
isalnum(H) when $0 =< H , H =< $9 ->
    true;
isalnum(_) ->
    false.

isdigit(H) when $0 =< H , H =< $9 ->
    true;
isdigit(_) ->
    false.

get_number([H|T]=T0, L) ->
    case isdigit(H) of
	true ->
	    get_number(T, [H|L]);
	false ->
	    {lists:reverse(L), T0}
    end;
get_number([], L) ->
    {lists:reverse(L), []}.

skip_comment([]) -> [];
skip_comment("--"++T) -> T;
skip_comment([_|T]) -> skip_comment(T).

skip_multiline_comment(Stream, [], Lno, Level) ->
    case io:get_line(Stream, '') of
	eof ->
	    throw({error,eof_in_comment});
	Line ->
	    skip_multiline_comment(Stream, Line, Lno+1, Level)
    end;
skip_multiline_comment(_Stream, "*/"++T, Lno, 0) ->
    {Lno,T};
skip_multiline_comment(Stream, "*/"++T, Lno, Level) ->
    skip_multiline_comment(Stream, T, Lno, Level - 1);
skip_multiline_comment(Stream, "/*"++T, Lno, Level) ->
    skip_multiline_comment(Stream, T, Lno, Level + 1);
skip_multiline_comment(Stream, [_|T], Lno, Level) ->
    skip_multiline_comment(Stream, T, Lno, Level).

collect_quoted("'B"++T, Lno, L) ->
    case check_bin(L) of
        true ->
            {{bstring,Lno,lists:reverse(L)}, T};
        false ->
            throw({error,{invalid_binary_number,lists:reverse(L)}})
    end;
collect_quoted("'H"++T, Lno, L) ->
    case check_hex(L) of
        true ->
            {{hstring,Lno,lists:reverse(L)}, T};
        false ->
            throw({error,{invalid_hex_number,lists:reverse(L)}})
    end;
collect_quoted([H|T], Lno, L) ->
    collect_quoted(T, Lno,[H|L]);
collect_quoted([], _, _) ->        % This should be allowed FIX later
    throw({error,eol_in_token}).

check_bin([$0|T]) ->
    check_bin(T);
check_bin([$1|T]) ->
    check_bin(T);
check_bin([]) ->
    true;
check_bin(_) ->
    false.

check_hex([H|T]) when $0 =< H , H =< $9 ->
    check_hex(T);
check_hex([H|T])  when $A =< H , H =< $F ->
    check_hex(T);
check_hex([]) ->
    true;
check_hex(_) ->
    false.


%% reserved_word(A) -> true|false|rstrtype
%% A = atom()
%% returns true if A is a reserved ASN.1 word
%% returns false if A is not a reserved word
%% returns rstrtype if A is a reserved word in the group 
%% 	RestrictedCharacterStringType
reserved_word('ABSENT') -> true;
reserved_word('ALL') -> true;
reserved_word('ANY') -> true;
reserved_word('APPLICATION') -> true;
reserved_word('AUTOMATIC') -> true;
reserved_word('BEGIN') -> true;
reserved_word('BIT') -> true;
reserved_word('BMPString') -> rstrtype;
reserved_word('BOOLEAN') -> true;
reserved_word('BY') -> true;
reserved_word('CHARACTER') -> true;
reserved_word('CHOICE') -> true;
reserved_word('CLASS') -> true;
reserved_word('COMPONENT') -> true;
reserved_word('COMPONENTS') -> true;
reserved_word('CONSTRAINED') -> true;
reserved_word('CONTAINING') -> true;
reserved_word('DEFAULT') -> true;
reserved_word('DEFINED') -> true; % not present in X.680 07/2002
reserved_word('DEFINITIONS') -> true;
reserved_word('EMBEDDED') -> true;
reserved_word('ENCODED') -> true;
reserved_word('END') -> true;
reserved_word('ENUMERATED') -> true;
reserved_word('EXCEPT') -> true;
reserved_word('EXPLICIT') -> true;
reserved_word('EXPORTS') -> true;
reserved_word('EXTENSIBILITY') -> true;
reserved_word('EXTERNAL') -> true;
reserved_word('FALSE') -> true;
reserved_word('FROM') -> true;
reserved_word('GeneralizedTime') -> true;
reserved_word('GeneralString') -> rstrtype;
reserved_word('GraphicString') -> rstrtype;
reserved_word('IA5String') -> rstrtype;
reserved_word('IDENTIFIER') -> true;
reserved_word('IMPLICIT') -> true;
reserved_word('IMPLIED') -> true;
reserved_word('IMPORTS') -> true;
reserved_word('INCLUDES') -> true;
reserved_word('INSTANCE') -> true;
reserved_word('INTEGER') -> true;
reserved_word('INTERSECTION') -> true;
reserved_word('MAX') -> true;
reserved_word('MIN') -> true;
reserved_word('MINUS-INFINITY') -> true;
reserved_word('NULL') -> true;
reserved_word('NumericString') -> rstrtype;
reserved_word('OBJECT') -> true;
reserved_word('ObjectDescriptor') -> true;
reserved_word('OCTET') -> true;
reserved_word('OF') -> true;
reserved_word('OPTIONAL') -> true;
reserved_word('PATTERN') -> true;
reserved_word('PDV') -> true;
reserved_word('PLUS-INFINITY') -> true;
reserved_word('PRESENT') -> true;
reserved_word('PrintableString') -> rstrtype;
reserved_word('PRIVATE') -> true;
reserved_word('REAL') -> true;
reserved_word('RELATIVE-OID') -> true;
reserved_word('SEQUENCE') -> true;
reserved_word('SET') -> true;
reserved_word('SIZE') -> true;
reserved_word('STRING') -> true;
reserved_word('SYNTAX') -> true;
reserved_word('T61String') -> rstrtype;
reserved_word('TAGS') -> true;
reserved_word('TeletexString') -> rstrtype;
reserved_word('TRUE') -> true;
reserved_word('UNION') -> true;
reserved_word('UNIQUE') -> true;
reserved_word('UNIVERSAL') -> true;
reserved_word('UniversalString') -> rstrtype;
reserved_word('UTCTime') -> true;
reserved_word('UTF8String') -> rstrtype;
reserved_word('VideotexString') -> rstrtype;
reserved_word('VisibleString') -> rstrtype;
reserved_word('WITH') -> true;
reserved_word(_) -> false.