diff options
Diffstat (limited to 'lib/asn1/src/asn1ct_tok.erl')
-rw-r--r-- | lib/asn1/src/asn1ct_tok.erl | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/lib/asn1/src/asn1ct_tok.erl b/lib/asn1/src/asn1ct_tok.erl new file mode 100644 index 0000000000..27116c46c5 --- /dev/null +++ b/lib/asn1/src/asn1ct_tok.erl @@ -0,0 +1,391 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% +-module(asn1ct_tok). + +%% Tokenize ASN.1 code (input to parser generated with yecc) + +-export([get_name/2,tokenise/2, file/1]). + + +file(File) -> + case file:open(File, [read]) of + {error, Reason} -> + {error,{File,file:format_error(Reason)}}; + {ok,Stream} -> + process0(Stream) + end. + +process0(Stream) -> + process(Stream,0,[]). + +process(Stream,Lno,R) -> + process(io:get_line(Stream, ''), Stream,Lno+1,R). + +process(eof, Stream,Lno,R) -> + file:close(Stream), + lists:flatten(lists:reverse([{'$end',Lno}|R])); + + +process(L, Stream,Lno,R) when is_list(L) -> + %%io:format('read:~s',[L]), + case catch tokenise(L,Lno) of + {'ERR',Reason} -> + io:format("Tokeniser error on line: ~w ~w~n",[Lno,Reason]), + exit(0); + {multiline_comment,NestingLevel} -> + {RestL,Lno2} = process_skip_multiline_comment(Stream,Lno,NestingLevel), + process(RestL,Stream,Lno2,R); + T -> + %%io:format('toks:~w~n',[T]), + process(Stream,Lno,[T|R]) + end. + +process_skip_multiline_comment(Stream,Lno,NestingLevel) -> + process_skip_multiline_comment(io:get_line(Stream, ''), + Stream, Lno + 1, NestingLevel). +process_skip_multiline_comment(eof,_Stream,Lno,_NestingLevel) -> + io:format("Tokeniser error on line: ~w, premature end of multiline comment~n",[Lno]), + exit(0); +process_skip_multiline_comment(Line,Stream,Lno,NestingLevel) -> + case catch skip_multiline_comment(Line,NestingLevel) of + {multiline_comment,NestingLevel2} -> + process_skip_multiline_comment(Stream,Lno,NestingLevel2); + T -> + {T,Lno} + end. + +tokenise([H|T],Lno) when $a =< H , H =< $z -> + {X, T1} = get_name(T, [H]), + [{identifier,Lno, list_to_atom(X)}|tokenise(T1,Lno)]; + +tokenise([$&,H|T],Lno) when $A =< H , H =< $Z -> + {Y, T1} = get_name(T, [H]), + X = list_to_atom(Y), + [{typefieldreference, Lno, X} | tokenise(T1, Lno)]; + +tokenise([$&,H|T],Lno) when $a =< H , H =< $z -> + {Y, T1} = get_name(T, [H]), + X = list_to_atom(Y), + [{valuefieldreference, Lno, X} | tokenise(T1, Lno)]; + +tokenise([H|T],Lno) when $A =< H , H =< $Z -> + {Y, T1} = get_name(T, [H]), + X = list_to_atom(Y), + case reserved_word(X) of + true -> + [{X,Lno}|tokenise(T1,Lno)]; + false -> + [{typereference,Lno,X}|tokenise(T1,Lno)]; + rstrtype -> + [{restrictedcharacterstringtype,Lno,X}|tokenise(T1,Lno)] + end; + +tokenise([$-,H|T],Lno) when $0 =< H , H =< $9 -> + {X, T1} = get_number(T, [H]), + [{number,Lno,-1 * list_to_integer(X)}|tokenise(T1,Lno)]; + +tokenise([H|T],Lno) when $0 =< H , H =< $9 -> + {X, T1} = get_number(T, [H]), + [{number,Lno,list_to_integer(X)}|tokenise(T1,Lno)]; + +tokenise([$-,$-|T],Lno) -> + tokenise(skip_comment(T),Lno); + +tokenise([$/,$*|T],Lno) -> + tokenise(skip_multiline_comment(T,0),Lno); + +tokenise([$:,$:,$=|T],Lno) -> + [{'::=',Lno}|tokenise(T,Lno)]; + +tokenise([$'|T],Lno) -> + case catch collect_quoted(T,Lno,[]) of + {'ERR',_} -> + throw({'ERR','bad_quote'}); + {Thing, T1} -> + [Thing|tokenise(T1,Lno)] + end; + +tokenise([$"|T],Lno) -> + collect_string(T,Lno); + +tokenise([${|T],Lno) -> + [{'{',Lno}|tokenise(T,Lno)]; + +tokenise([$}|T],Lno) -> + [{'}',Lno}|tokenise(T,Lno)]; + +tokenise([$]|T],Lno) -> + [{']',Lno}|tokenise(T,Lno)]; + +tokenise([$[|T],Lno) -> + [{'[',Lno}|tokenise(T,Lno)]; + +tokenise([$,|T],Lno) -> + [{',',Lno}|tokenise(T,Lno)]; + +tokenise([$(|T],Lno) -> + [{'(',Lno}|tokenise(T,Lno)]; +tokenise([$)|T],Lno) -> + [{')',Lno}|tokenise(T,Lno)]; + +tokenise([$.,$.,$.|T],Lno) -> + [{'...',Lno}|tokenise(T,Lno)]; + +tokenise([$.,$.|T],Lno) -> + [{'..',Lno}|tokenise(T,Lno)]; + +tokenise([$.|T],Lno) -> + [{'.',Lno}|tokenise(T,Lno)]; +tokenise([$^|T],Lno) -> + [{'^',Lno}|tokenise(T,Lno)]; +tokenise([$!|T],Lno) -> + [{'!',Lno}|tokenise(T,Lno)]; +tokenise([$||T],Lno) -> + [{'|',Lno}|tokenise(T,Lno)]; + + +tokenise([H|T],Lno) -> + case white_space(H) of + true -> + tokenise(T,Lno); + false -> + [{list_to_atom([H]),Lno}|tokenise(T,Lno)] + end; +tokenise([],_) -> + []. + + +collect_string(L,Lno) -> + collect_string(L,Lno,[]). + +collect_string([],_,_) -> + throw({'ERR','bad_quote found eof'}); + +collect_string([H|T],Lno,Str) -> + case H of + $" -> + [{cstring,1,lists:reverse(Str)}|tokenise(T,Lno)]; + Ch -> + collect_string(T,Lno,[Ch|Str]) + end. + + + +% <name> is letters digits hyphens +% hypen is not the last character. Hypen hyphen is NOT allowed +% +% <identifier> ::= <lowercase> <name> + +get_name([$-,Char|T], L) -> + case isalnum(Char) of + true -> + get_name(T,[Char,$-|L]); + false -> + {lists:reverse(L),[$-,Char|T]} + end; +get_name([$-|T], L) -> + {lists:reverse(L),[$-|T]}; +get_name([Char|T], L) -> + case isalnum(Char) of + true -> + get_name(T,[Char|L]); + false -> + {lists:reverse(L),[Char|T]} + end; +get_name([], L) -> + {lists:reverse(L), []}. + + +isalnum(H) when $A =< H , H =< $Z -> + true; +isalnum(H) when $a =< H , H =< $z -> + true; +isalnum(H) when $0 =< H , H =< $9 -> + true; +isalnum(_) -> + false. + +isdigit(H) when $0 =< H , H =< $9 -> + true; +isdigit(_) -> + false. + +white_space(9) -> true; +white_space(10) -> true; +white_space(13) -> true; +white_space(32) -> true; +white_space(_) -> false. + + +get_number([H|T], L) -> + case isdigit(H) of + true -> + get_number(T, [H|L]); + false -> + {lists:reverse(L), [H|T]} + end; +get_number([], L) -> + {lists:reverse(L), []}. + +skip_comment([]) -> + []; +skip_comment([$-,$-|T]) -> + T; +skip_comment([_|T]) -> + skip_comment(T). + + +skip_multiline_comment([],L) -> + throw({multiline_comment,L}); +skip_multiline_comment([$*,$/|T],0) -> + T; +skip_multiline_comment([$*,$/|T],Level) -> + skip_multiline_comment(T,Level - 1); +skip_multiline_comment([$/,$*|T],Level) -> + skip_multiline_comment(T,Level + 1); +skip_multiline_comment([_|T],Level) -> + skip_multiline_comment(T,Level). + + +collect_quoted([$',$B|T],Lno, L) -> + case check_bin(L) of + true -> + {{bstring,Lno, lists:reverse(L)}, T}; + false -> + throw({'ERR',{invalid_binary_number, lists:reverse(L)}}) + end; +collect_quoted([$',$H|T],Lno, L) -> + case check_hex(L) of + true -> + {{hstring,Lno, lists:reverse(L)}, T}; + false -> + throw({'ERR',{invalid_binary_number, lists:reverse(L)}}) + end; +collect_quoted([H|T], Lno, L) -> + collect_quoted(T, Lno,[H|L]); +collect_quoted([], _, _) -> % This should be allowed FIX later + throw({'ERR',{eol_in_token}}). + +check_bin([$0|T]) -> + check_bin(T); +check_bin([$1|T]) -> + check_bin(T); +check_bin([]) -> + true; +check_bin(_) -> + false. + +check_hex([H|T]) when $0 =< H , H =< $9 -> + check_hex(T); +check_hex([H|T]) when $A =< H , H =< $F -> + check_hex(T); +check_hex([]) -> + true; +check_hex(_) -> + false. + + +%% reserved_word(A) -> true|false|rstrtype +%% A = atom() +%% returns true if A is a reserved ASN.1 word +%% returns false if A is not a reserved word +%% returns rstrtype if A is a reserved word in the group +%% RestrictedCharacterStringType +reserved_word('ABSENT') -> true; +%reserved_word('ABSTRACT-SYNTAX') -> true; % impl as predef item +reserved_word('ALL') -> true; +reserved_word('ANY') -> true; +reserved_word('APPLICATION') -> true; +reserved_word('AUTOMATIC') -> true; +reserved_word('BEGIN') -> true; +reserved_word('BIT') -> true; +reserved_word('BMPString') -> rstrtype; +reserved_word('BOOLEAN') -> true; +reserved_word('BY') -> true; +reserved_word('CHARACTER') -> true; +reserved_word('CHOICE') -> true; +reserved_word('CLASS') -> true; +reserved_word('COMPONENT') -> true; +reserved_word('COMPONENTS') -> true; +reserved_word('CONSTRAINED') -> true; +reserved_word('CONTAINING') -> true; +reserved_word('DEFAULT') -> true; +reserved_word('DEFINED') -> true; +reserved_word('DEFINITIONS') -> true; +reserved_word('EMBEDDED') -> true; +reserved_word('ENCODED') -> true; +reserved_word('END') -> true; +reserved_word('ENUMERATED') -> true; +reserved_word('EXCEPT') -> true; +reserved_word('EXPLICIT') -> true; +reserved_word('EXPORTS') -> true; +reserved_word('EXTERNAL') -> true; +reserved_word('FALSE') -> true; +reserved_word('FROM') -> true; +reserved_word('GeneralizedTime') -> true; +reserved_word('GeneralString') -> rstrtype; +reserved_word('GraphicString') -> rstrtype; +reserved_word('IA5String') -> rstrtype; +% reserved_word('TYPE-IDENTIFIER') -> true; % impl as predef item +reserved_word('IDENTIFIER') -> true; +reserved_word('IMPLICIT') -> true; +reserved_word('IMPORTS') -> true; +reserved_word('INCLUDES') -> true; +reserved_word('INSTANCE') -> true; +reserved_word('INTEGER') -> true; +reserved_word('INTERSECTION') -> true; +reserved_word('ISO646String') -> rstrtype; +reserved_word('MAX') -> true; +reserved_word('MIN') -> true; +reserved_word('MINUS-INFINITY') -> true; +reserved_word('NULL') -> true; +reserved_word('NumericString') -> rstrtype; +reserved_word('OBJECT') -> true; +reserved_word('ObjectDescriptor') -> true; +reserved_word('OCTET') -> true; +reserved_word('OF') -> true; +reserved_word('OPTIONAL') -> true; +reserved_word('PATTERN') -> true; +reserved_word('PDV') -> true; +reserved_word('PLUS-INFINITY') -> true; +reserved_word('PRESENT') -> true; +reserved_word('PrintableString') -> rstrtype; +reserved_word('PRIVATE') -> true; +reserved_word('REAL') -> true; +reserved_word('RELATIVE-OID') -> true; +reserved_word('SEQUENCE') -> true; +reserved_word('SET') -> true; +reserved_word('SIZE') -> true; +reserved_word('STRING') -> true; +reserved_word('SYNTAX') -> true; +reserved_word('T61String') -> rstrtype; +reserved_word('TAGS') -> true; +reserved_word('TeletexString') -> rstrtype; +reserved_word('TRUE') -> true; +reserved_word('UNION') -> true; +reserved_word('UNIQUE') -> true; +reserved_word('UNIVERSAL') -> true; +reserved_word('UniversalString') -> rstrtype; +reserved_word('UTCTime') -> true; +reserved_word('UTF8String') -> rstrtype; +reserved_word('VideotexString') -> rstrtype; +reserved_word('VisibleString') -> rstrtype; +reserved_word('WITH') -> true; +reserved_word(_) -> false. |