diff options
-rw-r--r-- | lib/asn1/src/asn1ct_tok.erl | 332 | ||||
-rw-r--r-- | lib/asn1/test/syntax_SUITE.erl | 38 |
2 files changed, 180 insertions, 190 deletions
diff --git a/lib/asn1/src/asn1ct_tok.erl b/lib/asn1/src/asn1ct_tok.erl index 8687ed955c..d51fea6402 100644 --- a/lib/asn1/src/asn1ct_tok.erl +++ b/lib/asn1/src/asn1ct_tok.erl @@ -21,191 +21,177 @@ %% Tokenize ASN.1 code (input to parser generated with yecc) --export([get_name/2,tokenise/4, file/1]). +-export([file/1,format_error/1]). - -file(File) -> - case file:open(File, [read]) of +file(File0) -> + case file:open(File0, [read]) of {error, Reason} -> - {error,{File,file:format_error(Reason)}}; + {error,{File0,file:format_error(Reason)}}; {ok,Stream} -> - process(Stream,0,[]) + try + process(Stream, 1, []) + catch + throw:{error,Line,Reason} -> + File = filename:basename(File0), + Error = {structured_error,{File,Line},?MODULE,Reason}, + {error,[Error]} + end end. -process(Stream,Lno,R) -> - process(io:get_line(Stream, ''), Stream,Lno+1,R). +process(Stream, Lno, R) -> + process(io:get_line(Stream, ''), Stream, Lno, R). -process(eof, Stream,Lno,R) -> +process(eof, Stream, Lno, Acc) -> ok = file:close(Stream), - lists:flatten(lists:reverse([{'$end',Lno}|R])); - - -process(L, Stream,Lno,R) when is_list(L) -> - %%io:format('read:~s',[L]), - case catch tokenise(Stream,L,Lno,[]) of - {'ERR',Reason} -> - io:format("Tokeniser error on line: ~w ~w~n",[Lno,Reason]), - exit(0); - {NewLno,T} -> - %%io:format('toks:~w~n',[T]), - process(Stream,NewLno,[T|R]) - end. - -tokenise(Stream,[H|T],Lno,R) when $a =< H , H =< $z -> - {X, T1} = get_name(T, [H]), - tokenise(Stream,T1,Lno,[{identifier,Lno, list_to_atom(X)}|R]); - -tokenise(Stream,[$&,H|T],Lno,R) when $A =< H , H =< $Z -> - {Y, T1} = get_name(T, [H]), - X = list_to_atom(Y), - tokenise(Stream,T1,Lno,[{typefieldreference, Lno, X} | R]); - -tokenise(Stream,[$&,H|T],Lno,R) when $a =< H , H =< $z -> - {Y, T1} = get_name(T, [H]), - X = list_to_atom(Y), - tokenise(Stream,T1,Lno,[{valuefieldreference, Lno, X} | R]); - -tokenise(Stream,[H|T],Lno,R) when $A =< H , H =< $Z -> - {Y, T1} = get_name(T, [H]), - X = list_to_atom(Y), - case reserved_word(X) of - true -> - tokenise(Stream,T1,Lno,[{X,Lno}|R]); - false -> - tokenise(Stream,T1,Lno,[{typereference,Lno,X}|R]); - rstrtype -> - tokenise(Stream,T1,Lno,[{restrictedcharacterstringtype,Lno,X}|R]) - end; - -tokenise(Stream,[$-,H|T],Lno,R) when $0 =< H , H =< $9 -> - {X, T1} = get_number(T, [H]), - tokenise(Stream,T1,Lno,[{number,Lno,-1 * list_to_integer(X)}|R]); + lists:reverse([{'$end',Lno}|Acc]); +process(L, Stream, Lno0, Acc) when is_list(L) -> + try tokenise(Stream, L, Lno0, []) of + {Lno,[]} -> + process(Stream, Lno, Acc); + {Lno,Ts} -> + process(Stream, Lno, Ts++Acc) + catch + throw:{error,Reason} -> + throw({error,Lno0,Reason}) + end. -tokenise(Stream,[H|T],Lno,R) when $0 =< H , H =< $9 -> +format_error(eof_in_comment) -> + "premature end of file in multi-line comment"; +format_error(eol_in_token) -> + "end of line in token"; +format_error({invalid_binary_number,Str}) -> + io_lib:format("invalid binary number: '~s'", [Str]); +format_error({invalid_hex_number,Str}) -> + io_lib:format("invalid hex number: '~s'", [Str]); +format_error(Other) -> + io_lib:format("~p", [Other]). + +tokenise(Stream, [$&,H|T], Lno, R) when $A =< H , H =< $Z -> + {X,T1} = get_name(T, [H]), + tokenise(Stream, T1, Lno, [{typefieldreference,Lno,X}|R]); +tokenise(Stream, [$&,H|T], Lno, R) when $a =< H , H =< $z -> + {X,T1} = get_name(T, [H]), + tokenise(Stream, T1, Lno, [{valuefieldreference,Lno,X}|R]); + +tokenise(Stream, "--"++T, Lno, R) -> + tokenise(Stream, skip_comment(T), Lno, R); + +tokenise(Stream, [$-,H|T], Lno, R) when $0 =< H , H =< $9 -> {X, T1} = get_number(T, [H]), - tokenise(Stream,T1,Lno,[{number,Lno,list_to_integer(X)}|R]); - -tokenise(Stream,[$-,$-|T],Lno,R) -> - tokenise(Stream,skip_comment(T),Lno,R); + tokenise(Stream, T1, Lno, [{number,Lno,-list_to_integer(X)}|R]); -tokenise(Stream,[$/,$*|T],Lno,R) -> - {NewLno,T1} = skip_multiline_comment(Stream,T,Lno,0), - tokenise(Stream,T1,NewLno,R); +tokenise(Stream, "/*"++T, Lno0, R) -> + {Lno,T1} = skip_multiline_comment(Stream, T, Lno0, 0), + tokenise(Stream, T1, Lno, R); -tokenise(Stream,[$:,$:,$=|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'::=',Lno}|R]); - -tokenise(Stream,[$'|T],Lno,R) -> - case catch collect_quoted(T,Lno,[]) of - {'ERR',_} -> - throw({'ERR','bad_quote'}); - {Thing, T1} -> - tokenise(Stream,T1,Lno,[Thing|R]) - end; +tokenise(Stream, "::="++T, Lno, R) -> + tokenise(Stream, T, Lno, [{'::=',Lno}|R]); +tokenise(Stream, ":"++T, Lno, R) -> + tokenise(Stream, T, Lno, [{':',Lno}|R]); +tokenise(Stream, "'"++T0, Lno, R) -> + {Thing, T1} = collect_quoted(T0, Lno, []), + tokenise(Stream, T1, Lno, [Thing|R]); tokenise(Stream,[$"|T],Lno,R) -> {Str,T1} = collect_string(T,Lno), tokenise(Stream,T1,Lno,[Str|R]); -tokenise(Stream,[${|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'{',Lno}|R]); - -tokenise(Stream,[$}|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'}',Lno}|R]); - -%% tokenise(Stream,[$],$]|T],Lno,R) -> -%% tokenise(Stream,T,Lno,[{']]',Lno}|R]); +tokenise(Stream, "{"++T, Lno, R) -> + tokenise(Stream, T, Lno, [{'{',Lno}|R]); +tokenise(Stream, "}"++T, Lno, R) -> + tokenise(Stream, T, Lno, [{'}',Lno}|R]); %% Even though x.680 specify '[[' and ']]' as lexical items -%% it does not work to have them as such since the single [ and ] can -%% be used beside each other in the SYNTAX OF in x.681 -%% the solution chosen here , i.e. to have them as separate lexical items +%% it does not work to have them as such since the single '[' and ']' can +%% be used beside each other in 'WITH SYNTAX' in x.681. +%% The solution chosen here, i.e. to have them as separate lexical items %% will not detect the cases where there is white space between them -%% which would be an error in the use in ExtensionAdditionGroups - -%% tokenise(Stream,[$[,$[|T],Lno,R) -> -%% tokenise(Stream,T,Lno,[{'[[',Lno}|R]); - -tokenise(Stream,[$]|T],Lno,R) -> - tokenise(Stream,T,Lno,[{']',Lno}|R]); - -tokenise(Stream,[$[|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'[',Lno}|R]); +%% which would be an error in the use in ExtensionAdditionGroups. -tokenise(Stream,[$,|T],Lno,R) -> - tokenise(Stream,T,Lno,[{',',Lno}|R]); +tokenise(Stream, "]"++T, Lno, R) -> + tokenise(Stream, T, Lno, [{']',Lno}|R]); +tokenise(Stream, "["++T,Lno,R) -> + tokenise(Stream, T, Lno, [{'[',Lno}|R]); -tokenise(Stream,[$(|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'(',Lno}|R]); -tokenise(Stream,[$)|T],Lno,R) -> - tokenise(Stream,T,Lno,[{')',Lno}|R]); +tokenise(Stream, ","++T,Lno,R) -> + tokenise(Stream, T, Lno, [{',',Lno}|R]); -tokenise(Stream,[$.,$.,$.|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'...',Lno}|R]); +tokenise(Stream, "("++T, Lno, R) -> + tokenise(Stream, T, Lno, [{'(',Lno}|R]); +tokenise(Stream, ")"++T, Lno, R) -> + tokenise(Stream, T, Lno, [{')',Lno}|R]); -tokenise(Stream,[$.,$.|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'..',Lno}|R]); +tokenise(Stream, "..."++T,Lno,R) -> + tokenise(Stream, T, Lno, [{'...',Lno}|R]); +tokenise(Stream, ".."++T, Lno, R) -> + tokenise(Stream, T, Lno, [{'..',Lno}|R]); +tokenise(Stream, "."++T, Lno, R) -> + tokenise(Stream, T, Lno, [{'.',Lno}|R]); -tokenise(Stream,[$.|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'.',Lno}|R]); -tokenise(Stream,[$^|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'^',Lno}|R]); -tokenise(Stream,[$!|T],Lno,R) -> - tokenise(Stream,T,Lno,[{'!',Lno}|R]); -tokenise(Stream,[$||T],Lno,R) -> - tokenise(Stream,T,Lno,[{'|',Lno}|R]); +tokenise(Stream, "|"++T, Lno, R) -> + tokenise(Stream, T, Lno, [{'|',Lno}|R]); -tokenise(Stream,[H|T],Lno,R) -> - case white_space(H) of +tokenise(Stream, [H|T], Lno, R) when $A =< H , H =< $Z -> + {X,T1} = get_name(T, [H]), + case reserved_word(X) of true -> - tokenise(Stream,T,Lno,R); + tokenise(Stream, T1, Lno, [{X,Lno}|R]); false -> - tokenise(Stream,T,Lno,[{list_to_atom([H]),Lno}|R]) + tokenise(Stream, T1, Lno, [{typereference,Lno,X}|R]); + rstrtype -> + tokenise(Stream, T1, Lno, [{restrictedcharacterstringtype,Lno,X}|R]) end; -tokenise(_Stream,[],Lno,R) -> - {Lno,lists:reverse(R)}. +tokenise(Stream, [H|T], Lno, R) when $a =< H , H =< $z -> + {X, T1} = get_name(T, [H]), + tokenise(Stream, T1, Lno, [{identifier,Lno,X}|R]); -collect_string(L,Lno) -> - collect_string(L,Lno,[]). +tokenise(Stream, [H|T], Lno, R) when $0 =< H , H =< $9 -> + {X, T1} = get_number(T, [H]), + tokenise(Stream, T1, Lno, [{number,Lno,list_to_integer(X)}|R]); -collect_string([],_,_) -> - throw({'ERR','bad_quote found eof'}); +tokenise(Stream, [H|T], Lno, R) when H =< $\s -> + tokenise(Stream, T, Lno, R); -collect_string([H|T],Lno,Str) -> - case H of - $" -> - {{cstring,1,lists:reverse(Str)},T}; - Ch -> - collect_string(T,Lno,[Ch|Str]) - end. - +tokenise(Stream, [H|T], Lno, R) -> + tokenise(Stream, T, Lno, [{list_to_atom([H]),Lno}|R]); +tokenise(_Stream, [], Lno, R) -> + {Lno+1,R}. -% <name> is letters digits hyphens -% hypen is not the last character. Hypen hyphen is NOT allowed -% -% <identifier> ::= <lowercase> <name> +collect_string(L, Lno) -> + collect_string(L, Lno, []). -get_name([$-,Char|T], L) -> +collect_string([$"|T], _Lno, Str) -> + {{cstring,1,lists:reverse(Str)},T}; +collect_string([H|T], Lno, Str) -> + collect_string(T, Lno, [H|Str]); +collect_string([], _, _) -> + throw({error,missing_quote_at_eof}). + +%% <name> is letters digits hyphens. +%% Hypen is not the last character. Hypen hyphen is NOT allowed. +%% +%% <identifier> ::= <lowercase> <name> + +get_name([$-,Char|T]=T0, Acc) -> case isalnum(Char) of true -> - get_name(T,[Char,$-|L]); + get_name(T, [Char,$-|Acc]); false -> - {lists:reverse(L),[$-,Char|T]} + {list_to_atom(lists:reverse(Acc)),T0} end; -get_name([$-|T], L) -> - {lists:reverse(L),[$-|T]}; -get_name([Char|T], L) -> +get_name([$-|_]=T, Acc) -> + {list_to_atom(lists:reverse(Acc)),T}; +get_name([Char|T]=T0, Acc) -> case isalnum(Char) of true -> - get_name(T,[Char|L]); + get_name(T, [Char|Acc]); false -> - {lists:reverse(L),[Char|T]} + {list_to_atom(lists:reverse(Acc)),T0} end; -get_name([], L) -> - {lists:reverse(L), []}. - +get_name([], Acc) -> + {list_to_atom(lists:reverse(Acc)),[]}. isalnum(H) when $A =< H , H =< $Z -> true; @@ -221,67 +207,54 @@ isdigit(H) when $0 =< H , H =< $9 -> isdigit(_) -> false. -white_space(9) -> true; -white_space(10) -> true; -white_space(13) -> true; -white_space(32) -> true; -white_space(_) -> false. - - -get_number([H|T], L) -> +get_number([H|T]=T0, L) -> case isdigit(H) of true -> get_number(T, [H|L]); false -> - {lists:reverse(L), [H|T]} + {lists:reverse(L), T0} end; get_number([], L) -> {lists:reverse(L), []}. -skip_comment([]) -> - []; -skip_comment([$-,$-|T]) -> - T; -skip_comment([_|T]) -> - skip_comment(T). - +skip_comment([]) -> []; +skip_comment("--"++T) -> T; +skip_comment([_|T]) -> skip_comment(T). -skip_multiline_comment(Stream,[],Lno,Level) -> - case io:get_line(Stream,'') of +skip_multiline_comment(Stream, [], Lno, Level) -> + case io:get_line(Stream, '') of eof -> - io:format("Tokeniser error on line: ~w~n" - "premature end of multiline comment~n",[Lno]), - exit(0); + throw({error,eof_in_comment}); Line -> - skip_multiline_comment(Stream,Line,Lno+1,Level) + skip_multiline_comment(Stream, Line, Lno+1, Level) end; -skip_multiline_comment(_Stream,[$*,$/|T],Lno,0) -> +skip_multiline_comment(_Stream, "*/"++T, Lno, 0) -> {Lno,T}; -skip_multiline_comment(Stream,[$*,$/|T],Lno,Level) -> - skip_multiline_comment(Stream,T,Lno,Level - 1); -skip_multiline_comment(Stream,[$/,$*|T],Lno,Level) -> - skip_multiline_comment(Stream,T,Lno,Level + 1); -skip_multiline_comment(Stream,[_|T],Lno,Level) -> - skip_multiline_comment(Stream,T,Lno,Level). - -collect_quoted([$',$B|T],Lno, L) -> +skip_multiline_comment(Stream, "*/"++T, Lno, Level) -> + skip_multiline_comment(Stream, T, Lno, Level - 1); +skip_multiline_comment(Stream, "/*"++T, Lno, Level) -> + skip_multiline_comment(Stream, T, Lno, Level + 1); +skip_multiline_comment(Stream, [_|T], Lno, Level) -> + skip_multiline_comment(Stream, T, Lno, Level). + +collect_quoted("'B"++T, Lno, L) -> case check_bin(L) of true -> - {{bstring,Lno, lists:reverse(L)}, T}; + {{bstring,Lno,lists:reverse(L)}, T}; false -> - throw({'ERR',{invalid_binary_number, lists:reverse(L)}}) + throw({error,{invalid_binary_number,lists:reverse(L)}}) end; -collect_quoted([$',$H|T],Lno, L) -> +collect_quoted("'H"++T, Lno, L) -> case check_hex(L) of true -> - {{hstring,Lno, lists:reverse(L)}, T}; + {{hstring,Lno,lists:reverse(L)}, T}; false -> - throw({'ERR',{invalid_binary_number, lists:reverse(L)}}) + throw({error,{invalid_hex_number,lists:reverse(L)}}) end; collect_quoted([H|T], Lno, L) -> collect_quoted(T, Lno,[H|L]); collect_quoted([], _, _) -> % This should be allowed FIX later - throw({'ERR',{eol_in_token}}). + throw({error,eol_in_token}). check_bin([$0|T]) -> check_bin(T); @@ -351,7 +324,6 @@ reserved_word('INCLUDES') -> true; reserved_word('INSTANCE') -> true; reserved_word('INTEGER') -> true; reserved_word('INTERSECTION') -> true; -reserved_word('ISO646String') -> rstrtype; reserved_word('MAX') -> true; reserved_word('MIN') -> true; reserved_word('MINUS-INFINITY') -> true; diff --git a/lib/asn1/test/syntax_SUITE.erl b/lib/asn1/test/syntax_SUITE.erl index f8d10157c0..1a2c938fe5 100644 --- a/lib/asn1/test/syntax_SUITE.erl +++ b/lib/asn1/test/syntax_SUITE.erl @@ -28,6 +28,7 @@ objects/1, sequence/1, syntax/1, + tokenizer/1, types/1, values/1]). @@ -49,6 +50,7 @@ groups() -> objects, sequence, syntax, + tokenizer, types, values]}]. @@ -228,6 +230,20 @@ syntax(Config) -> run(L, "Syntax", Config), ok. +tokenizer(Config) -> + Head = "Tokenize DEFINITIONS AUTOMATIC TAGS ::=\n" + "BEGIN\n", + End = "\nEND\n", + L0 = [{"'",3,eol_in_token}, + {"'42'B",3,{invalid_binary_number,"42"}}, + {"'ZZZ'H",3,{invalid_hex_number,"ZZZ"}}, + {"\"abc",3,missing_quote_at_eof}, + {"/*",3,eof_in_comment} + ], + L = [{Head++S++End,Line,E} || {S,Line,E} <- L0], + run(L, "Tokenizer", Config, asn1ct_tok), + ok. + types(Config) -> Head = "Types DEFINITIONS AUTOMATIC TAGS ::=\n" "BEGIN\n" @@ -283,40 +299,42 @@ values(Config) -> run(L, "Values", Config), ok. -run(List, File0, Config) -> +run(List, File, Config) -> + run(List, File, Config, asn1ct_parser2). + +run(List, File0, Config, Module) -> Base = File0 ++ ".asn1", File = filename:join(?config(priv_dir, Config), Base), - case run_1(List, Base, File, 0) of + case run_1(List, Base, File, Module, 0) of 0 -> ok; Errors -> ?t:fail(Errors) end. -run_1([{Source,Line,Error}=Exp|T], Base, File, N) -> - M = asn1ct_parser2, +run_1([{Source,Line,Error}=Exp|T], Base, File, Module, N) -> ok = file:write_file(File, Source), io:format("~s", [Source]), case asn1ct:compile(File) of - {error,[{structured_error,{Base,L},M,E}]} -> + {error,[{structured_error,{Base,L},Module,E}]} -> case {L,E} of {Line,Error} -> - run_1(T, Base, File, N); + run_1(T, Base, File, Module, N); {Line,OtherError} -> io:format("*** Wrong error: ~p, expected ~p ***\n", [OtherError,Error]), - run_1(T, Base, File, N+1); + run_1(T, Base, File, Module, N+1); {OtherLine,Error} -> io:format("*** Wrong line: ~p, expected ~p ***\n", [OtherLine,Line]), - run_1(T, Base, File, N+1); + run_1(T, Base, File, Module, N+1); {_,_} -> io:format("*** Wrong line: ~p, expected ~p ***", [L,Line]), io:format("*** Wrong error: ~p, expected ~p ***\n", [E,Error]), - run_1(T, Base, File, N+1) + run_1(T, Base, File, Module, N+1) end; Other -> io:format("~p\nGOT: ~p", [Exp,Other]) end; -run_1([], _, _, N) -> +run_1([], _, _, _, N) -> N. |