aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2014-12-11 14:51:56 +0100
committerBjörn Gustavsson <[email protected]>2015-01-12 12:22:57 +0100
commitb320ccf10becb894c2b325da69a65b316189b4a6 (patch)
treec19d404215c11b4a227f483a21fc1a13b4d4ecb3
parenta7521902cbf4ae2e6cf78689dc98c2da944f142f (diff)
downloadotp-b320ccf10becb894c2b325da69a65b316189b4a6.tar.gz
otp-b320ccf10becb894c2b325da69a65b316189b4a6.tar.bz2
otp-b320ccf10becb894c2b325da69a65b316189b4a6.zip
asn1ct_tok: Clean up
-rw-r--r--lib/asn1/src/asn1ct_tok.erl332
-rw-r--r--lib/asn1/test/syntax_SUITE.erl38
2 files changed, 180 insertions, 190 deletions
diff --git a/lib/asn1/src/asn1ct_tok.erl b/lib/asn1/src/asn1ct_tok.erl
index 8687ed955c..d51fea6402 100644
--- a/lib/asn1/src/asn1ct_tok.erl
+++ b/lib/asn1/src/asn1ct_tok.erl
@@ -21,191 +21,177 @@
%% Tokenize ASN.1 code (input to parser generated with yecc)
--export([get_name/2,tokenise/4, file/1]).
+-export([file/1,format_error/1]).
-
-file(File) ->
- case file:open(File, [read]) of
+file(File0) ->
+ case file:open(File0, [read]) of
{error, Reason} ->
- {error,{File,file:format_error(Reason)}};
+ {error,{File0,file:format_error(Reason)}};
{ok,Stream} ->
- process(Stream,0,[])
+ try
+ process(Stream, 1, [])
+ catch
+ throw:{error,Line,Reason} ->
+ File = filename:basename(File0),
+ Error = {structured_error,{File,Line},?MODULE,Reason},
+ {error,[Error]}
+ end
end.
-process(Stream,Lno,R) ->
- process(io:get_line(Stream, ''), Stream,Lno+1,R).
+process(Stream, Lno, R) ->
+ process(io:get_line(Stream, ''), Stream, Lno, R).
-process(eof, Stream,Lno,R) ->
+process(eof, Stream, Lno, Acc) ->
ok = file:close(Stream),
- lists:flatten(lists:reverse([{'$end',Lno}|R]));
-
-
-process(L, Stream,Lno,R) when is_list(L) ->
- %%io:format('read:~s',[L]),
- case catch tokenise(Stream,L,Lno,[]) of
- {'ERR',Reason} ->
- io:format("Tokeniser error on line: ~w ~w~n",[Lno,Reason]),
- exit(0);
- {NewLno,T} ->
- %%io:format('toks:~w~n',[T]),
- process(Stream,NewLno,[T|R])
- end.
-
-tokenise(Stream,[H|T],Lno,R) when $a =< H , H =< $z ->
- {X, T1} = get_name(T, [H]),
- tokenise(Stream,T1,Lno,[{identifier,Lno, list_to_atom(X)}|R]);
-
-tokenise(Stream,[$&,H|T],Lno,R) when $A =< H , H =< $Z ->
- {Y, T1} = get_name(T, [H]),
- X = list_to_atom(Y),
- tokenise(Stream,T1,Lno,[{typefieldreference, Lno, X} | R]);
-
-tokenise(Stream,[$&,H|T],Lno,R) when $a =< H , H =< $z ->
- {Y, T1} = get_name(T, [H]),
- X = list_to_atom(Y),
- tokenise(Stream,T1,Lno,[{valuefieldreference, Lno, X} | R]);
-
-tokenise(Stream,[H|T],Lno,R) when $A =< H , H =< $Z ->
- {Y, T1} = get_name(T, [H]),
- X = list_to_atom(Y),
- case reserved_word(X) of
- true ->
- tokenise(Stream,T1,Lno,[{X,Lno}|R]);
- false ->
- tokenise(Stream,T1,Lno,[{typereference,Lno,X}|R]);
- rstrtype ->
- tokenise(Stream,T1,Lno,[{restrictedcharacterstringtype,Lno,X}|R])
- end;
-
-tokenise(Stream,[$-,H|T],Lno,R) when $0 =< H , H =< $9 ->
- {X, T1} = get_number(T, [H]),
- tokenise(Stream,T1,Lno,[{number,Lno,-1 * list_to_integer(X)}|R]);
+ lists:reverse([{'$end',Lno}|Acc]);
+process(L, Stream, Lno0, Acc) when is_list(L) ->
+ try tokenise(Stream, L, Lno0, []) of
+ {Lno,[]} ->
+ process(Stream, Lno, Acc);
+ {Lno,Ts} ->
+ process(Stream, Lno, Ts++Acc)
+ catch
+ throw:{error,Reason} ->
+ throw({error,Lno0,Reason})
+ end.
-tokenise(Stream,[H|T],Lno,R) when $0 =< H , H =< $9 ->
+format_error(eof_in_comment) ->
+ "premature end of file in multi-line comment";
+format_error(eol_in_token) ->
+ "end of line in token";
+format_error({invalid_binary_number,Str}) ->
+ io_lib:format("invalid binary number: '~s'", [Str]);
+format_error({invalid_hex_number,Str}) ->
+ io_lib:format("invalid hex number: '~s'", [Str]);
+format_error(Other) ->
+ io_lib:format("~p", [Other]).
+
+tokenise(Stream, [$&,H|T], Lno, R) when $A =< H , H =< $Z ->
+ {X,T1} = get_name(T, [H]),
+ tokenise(Stream, T1, Lno, [{typefieldreference,Lno,X}|R]);
+tokenise(Stream, [$&,H|T], Lno, R) when $a =< H , H =< $z ->
+ {X,T1} = get_name(T, [H]),
+ tokenise(Stream, T1, Lno, [{valuefieldreference,Lno,X}|R]);
+
+tokenise(Stream, "--"++T, Lno, R) ->
+ tokenise(Stream, skip_comment(T), Lno, R);
+
+tokenise(Stream, [$-,H|T], Lno, R) when $0 =< H , H =< $9 ->
{X, T1} = get_number(T, [H]),
- tokenise(Stream,T1,Lno,[{number,Lno,list_to_integer(X)}|R]);
-
-tokenise(Stream,[$-,$-|T],Lno,R) ->
- tokenise(Stream,skip_comment(T),Lno,R);
+ tokenise(Stream, T1, Lno, [{number,Lno,-list_to_integer(X)}|R]);
-tokenise(Stream,[$/,$*|T],Lno,R) ->
- {NewLno,T1} = skip_multiline_comment(Stream,T,Lno,0),
- tokenise(Stream,T1,NewLno,R);
+tokenise(Stream, "/*"++T, Lno0, R) ->
+ {Lno,T1} = skip_multiline_comment(Stream, T, Lno0, 0),
+ tokenise(Stream, T1, Lno, R);
-tokenise(Stream,[$:,$:,$=|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'::=',Lno}|R]);
-
-tokenise(Stream,[$'|T],Lno,R) ->
- case catch collect_quoted(T,Lno,[]) of
- {'ERR',_} ->
- throw({'ERR','bad_quote'});
- {Thing, T1} ->
- tokenise(Stream,T1,Lno,[Thing|R])
- end;
+tokenise(Stream, "::="++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{'::=',Lno}|R]);
+tokenise(Stream, ":"++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{':',Lno}|R]);
+tokenise(Stream, "'"++T0, Lno, R) ->
+ {Thing, T1} = collect_quoted(T0, Lno, []),
+ tokenise(Stream, T1, Lno, [Thing|R]);
tokenise(Stream,[$"|T],Lno,R) ->
{Str,T1} = collect_string(T,Lno),
tokenise(Stream,T1,Lno,[Str|R]);
-tokenise(Stream,[${|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'{',Lno}|R]);
-
-tokenise(Stream,[$}|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'}',Lno}|R]);
-
-%% tokenise(Stream,[$],$]|T],Lno,R) ->
-%% tokenise(Stream,T,Lno,[{']]',Lno}|R]);
+tokenise(Stream, "{"++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{'{',Lno}|R]);
+tokenise(Stream, "}"++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{'}',Lno}|R]);
%% Even though x.680 specify '[[' and ']]' as lexical items
-%% it does not work to have them as such since the single [ and ] can
-%% be used beside each other in the SYNTAX OF in x.681
-%% the solution chosen here , i.e. to have them as separate lexical items
+%% it does not work to have them as such since the single '[' and ']' can
+%% be used beside each other in 'WITH SYNTAX' in x.681.
+%% The solution chosen here, i.e. to have them as separate lexical items
%% will not detect the cases where there is white space between them
-%% which would be an error in the use in ExtensionAdditionGroups
-
-%% tokenise(Stream,[$[,$[|T],Lno,R) ->
-%% tokenise(Stream,T,Lno,[{'[[',Lno}|R]);
-
-tokenise(Stream,[$]|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{']',Lno}|R]);
-
-tokenise(Stream,[$[|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'[',Lno}|R]);
+%% which would be an error in the use in ExtensionAdditionGroups.
-tokenise(Stream,[$,|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{',',Lno}|R]);
+tokenise(Stream, "]"++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{']',Lno}|R]);
+tokenise(Stream, "["++T,Lno,R) ->
+ tokenise(Stream, T, Lno, [{'[',Lno}|R]);
-tokenise(Stream,[$(|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'(',Lno}|R]);
-tokenise(Stream,[$)|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{')',Lno}|R]);
+tokenise(Stream, ","++T,Lno,R) ->
+ tokenise(Stream, T, Lno, [{',',Lno}|R]);
-tokenise(Stream,[$.,$.,$.|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'...',Lno}|R]);
+tokenise(Stream, "("++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{'(',Lno}|R]);
+tokenise(Stream, ")"++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{')',Lno}|R]);
-tokenise(Stream,[$.,$.|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'..',Lno}|R]);
+tokenise(Stream, "..."++T,Lno,R) ->
+ tokenise(Stream, T, Lno, [{'...',Lno}|R]);
+tokenise(Stream, ".."++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{'..',Lno}|R]);
+tokenise(Stream, "."++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{'.',Lno}|R]);
-tokenise(Stream,[$.|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'.',Lno}|R]);
-tokenise(Stream,[$^|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'^',Lno}|R]);
-tokenise(Stream,[$!|T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'!',Lno}|R]);
-tokenise(Stream,[$||T],Lno,R) ->
- tokenise(Stream,T,Lno,[{'|',Lno}|R]);
+tokenise(Stream, "|"++T, Lno, R) ->
+ tokenise(Stream, T, Lno, [{'|',Lno}|R]);
-tokenise(Stream,[H|T],Lno,R) ->
- case white_space(H) of
+tokenise(Stream, [H|T], Lno, R) when $A =< H , H =< $Z ->
+ {X,T1} = get_name(T, [H]),
+ case reserved_word(X) of
true ->
- tokenise(Stream,T,Lno,R);
+ tokenise(Stream, T1, Lno, [{X,Lno}|R]);
false ->
- tokenise(Stream,T,Lno,[{list_to_atom([H]),Lno}|R])
+ tokenise(Stream, T1, Lno, [{typereference,Lno,X}|R]);
+ rstrtype ->
+ tokenise(Stream, T1, Lno, [{restrictedcharacterstringtype,Lno,X}|R])
end;
-tokenise(_Stream,[],Lno,R) ->
- {Lno,lists:reverse(R)}.
+tokenise(Stream, [H|T], Lno, R) when $a =< H , H =< $z ->
+ {X, T1} = get_name(T, [H]),
+ tokenise(Stream, T1, Lno, [{identifier,Lno,X}|R]);
-collect_string(L,Lno) ->
- collect_string(L,Lno,[]).
+tokenise(Stream, [H|T], Lno, R) when $0 =< H , H =< $9 ->
+ {X, T1} = get_number(T, [H]),
+ tokenise(Stream, T1, Lno, [{number,Lno,list_to_integer(X)}|R]);
-collect_string([],_,_) ->
- throw({'ERR','bad_quote found eof'});
+tokenise(Stream, [H|T], Lno, R) when H =< $\s ->
+ tokenise(Stream, T, Lno, R);
-collect_string([H|T],Lno,Str) ->
- case H of
- $" ->
- {{cstring,1,lists:reverse(Str)},T};
- Ch ->
- collect_string(T,Lno,[Ch|Str])
- end.
-
+tokenise(Stream, [H|T], Lno, R) ->
+ tokenise(Stream, T, Lno, [{list_to_atom([H]),Lno}|R]);
+tokenise(_Stream, [], Lno, R) ->
+ {Lno+1,R}.
-% <name> is letters digits hyphens
-% hypen is not the last character. Hypen hyphen is NOT allowed
-%
-% <identifier> ::= <lowercase> <name>
+collect_string(L, Lno) ->
+ collect_string(L, Lno, []).
-get_name([$-,Char|T], L) ->
+collect_string([$"|T], _Lno, Str) ->
+ {{cstring,1,lists:reverse(Str)},T};
+collect_string([H|T], Lno, Str) ->
+ collect_string(T, Lno, [H|Str]);
+collect_string([], _, _) ->
+ throw({error,missing_quote_at_eof}).
+
+%% <name> is letters digits hyphens.
+%% Hypen is not the last character. Hypen hyphen is NOT allowed.
+%%
+%% <identifier> ::= <lowercase> <name>
+
+get_name([$-,Char|T]=T0, Acc) ->
case isalnum(Char) of
true ->
- get_name(T,[Char,$-|L]);
+ get_name(T, [Char,$-|Acc]);
false ->
- {lists:reverse(L),[$-,Char|T]}
+ {list_to_atom(lists:reverse(Acc)),T0}
end;
-get_name([$-|T], L) ->
- {lists:reverse(L),[$-|T]};
-get_name([Char|T], L) ->
+get_name([$-|_]=T, Acc) ->
+ {list_to_atom(lists:reverse(Acc)),T};
+get_name([Char|T]=T0, Acc) ->
case isalnum(Char) of
true ->
- get_name(T,[Char|L]);
+ get_name(T, [Char|Acc]);
false ->
- {lists:reverse(L),[Char|T]}
+ {list_to_atom(lists:reverse(Acc)),T0}
end;
-get_name([], L) ->
- {lists:reverse(L), []}.
-
+get_name([], Acc) ->
+ {list_to_atom(lists:reverse(Acc)),[]}.
isalnum(H) when $A =< H , H =< $Z ->
true;
@@ -221,67 +207,54 @@ isdigit(H) when $0 =< H , H =< $9 ->
isdigit(_) ->
false.
-white_space(9) -> true;
-white_space(10) -> true;
-white_space(13) -> true;
-white_space(32) -> true;
-white_space(_) -> false.
-
-
-get_number([H|T], L) ->
+get_number([H|T]=T0, L) ->
case isdigit(H) of
true ->
get_number(T, [H|L]);
false ->
- {lists:reverse(L), [H|T]}
+ {lists:reverse(L), T0}
end;
get_number([], L) ->
{lists:reverse(L), []}.
-skip_comment([]) ->
- [];
-skip_comment([$-,$-|T]) ->
- T;
-skip_comment([_|T]) ->
- skip_comment(T).
-
+skip_comment([]) -> [];
+skip_comment("--"++T) -> T;
+skip_comment([_|T]) -> skip_comment(T).
-skip_multiline_comment(Stream,[],Lno,Level) ->
- case io:get_line(Stream,'') of
+skip_multiline_comment(Stream, [], Lno, Level) ->
+ case io:get_line(Stream, '') of
eof ->
- io:format("Tokeniser error on line: ~w~n"
- "premature end of multiline comment~n",[Lno]),
- exit(0);
+ throw({error,eof_in_comment});
Line ->
- skip_multiline_comment(Stream,Line,Lno+1,Level)
+ skip_multiline_comment(Stream, Line, Lno+1, Level)
end;
-skip_multiline_comment(_Stream,[$*,$/|T],Lno,0) ->
+skip_multiline_comment(_Stream, "*/"++T, Lno, 0) ->
{Lno,T};
-skip_multiline_comment(Stream,[$*,$/|T],Lno,Level) ->
- skip_multiline_comment(Stream,T,Lno,Level - 1);
-skip_multiline_comment(Stream,[$/,$*|T],Lno,Level) ->
- skip_multiline_comment(Stream,T,Lno,Level + 1);
-skip_multiline_comment(Stream,[_|T],Lno,Level) ->
- skip_multiline_comment(Stream,T,Lno,Level).
-
-collect_quoted([$',$B|T],Lno, L) ->
+skip_multiline_comment(Stream, "*/"++T, Lno, Level) ->
+ skip_multiline_comment(Stream, T, Lno, Level - 1);
+skip_multiline_comment(Stream, "/*"++T, Lno, Level) ->
+ skip_multiline_comment(Stream, T, Lno, Level + 1);
+skip_multiline_comment(Stream, [_|T], Lno, Level) ->
+ skip_multiline_comment(Stream, T, Lno, Level).
+
+collect_quoted("'B"++T, Lno, L) ->
case check_bin(L) of
true ->
- {{bstring,Lno, lists:reverse(L)}, T};
+ {{bstring,Lno,lists:reverse(L)}, T};
false ->
- throw({'ERR',{invalid_binary_number, lists:reverse(L)}})
+ throw({error,{invalid_binary_number,lists:reverse(L)}})
end;
-collect_quoted([$',$H|T],Lno, L) ->
+collect_quoted("'H"++T, Lno, L) ->
case check_hex(L) of
true ->
- {{hstring,Lno, lists:reverse(L)}, T};
+ {{hstring,Lno,lists:reverse(L)}, T};
false ->
- throw({'ERR',{invalid_binary_number, lists:reverse(L)}})
+ throw({error,{invalid_hex_number,lists:reverse(L)}})
end;
collect_quoted([H|T], Lno, L) ->
collect_quoted(T, Lno,[H|L]);
collect_quoted([], _, _) -> % This should be allowed FIX later
- throw({'ERR',{eol_in_token}}).
+ throw({error,eol_in_token}).
check_bin([$0|T]) ->
check_bin(T);
@@ -351,7 +324,6 @@ reserved_word('INCLUDES') -> true;
reserved_word('INSTANCE') -> true;
reserved_word('INTEGER') -> true;
reserved_word('INTERSECTION') -> true;
-reserved_word('ISO646String') -> rstrtype;
reserved_word('MAX') -> true;
reserved_word('MIN') -> true;
reserved_word('MINUS-INFINITY') -> true;
diff --git a/lib/asn1/test/syntax_SUITE.erl b/lib/asn1/test/syntax_SUITE.erl
index f8d10157c0..1a2c938fe5 100644
--- a/lib/asn1/test/syntax_SUITE.erl
+++ b/lib/asn1/test/syntax_SUITE.erl
@@ -28,6 +28,7 @@
objects/1,
sequence/1,
syntax/1,
+ tokenizer/1,
types/1,
values/1]).
@@ -49,6 +50,7 @@ groups() ->
objects,
sequence,
syntax,
+ tokenizer,
types,
values]}].
@@ -228,6 +230,20 @@ syntax(Config) ->
run(L, "Syntax", Config),
ok.
+tokenizer(Config) ->
+ Head = "Tokenize DEFINITIONS AUTOMATIC TAGS ::=\n"
+ "BEGIN\n",
+ End = "\nEND\n",
+ L0 = [{"'",3,eol_in_token},
+ {"'42'B",3,{invalid_binary_number,"42"}},
+ {"'ZZZ'H",3,{invalid_hex_number,"ZZZ"}},
+ {"\"abc",3,missing_quote_at_eof},
+ {"/*",3,eof_in_comment}
+ ],
+ L = [{Head++S++End,Line,E} || {S,Line,E} <- L0],
+ run(L, "Tokenizer", Config, asn1ct_tok),
+ ok.
+
types(Config) ->
Head = "Types DEFINITIONS AUTOMATIC TAGS ::=\n"
"BEGIN\n"
@@ -283,40 +299,42 @@ values(Config) ->
run(L, "Values", Config),
ok.
-run(List, File0, Config) ->
+run(List, File, Config) ->
+ run(List, File, Config, asn1ct_parser2).
+
+run(List, File0, Config, Module) ->
Base = File0 ++ ".asn1",
File = filename:join(?config(priv_dir, Config), Base),
- case run_1(List, Base, File, 0) of
+ case run_1(List, Base, File, Module, 0) of
0 -> ok;
Errors -> ?t:fail(Errors)
end.
-run_1([{Source,Line,Error}=Exp|T], Base, File, N) ->
- M = asn1ct_parser2,
+run_1([{Source,Line,Error}=Exp|T], Base, File, Module, N) ->
ok = file:write_file(File, Source),
io:format("~s", [Source]),
case asn1ct:compile(File) of
- {error,[{structured_error,{Base,L},M,E}]} ->
+ {error,[{structured_error,{Base,L},Module,E}]} ->
case {L,E} of
{Line,Error} ->
- run_1(T, Base, File, N);
+ run_1(T, Base, File, Module, N);
{Line,OtherError} ->
io:format("*** Wrong error: ~p, expected ~p ***\n",
[OtherError,Error]),
- run_1(T, Base, File, N+1);
+ run_1(T, Base, File, Module, N+1);
{OtherLine,Error} ->
io:format("*** Wrong line: ~p, expected ~p ***\n",
[OtherLine,Line]),
- run_1(T, Base, File, N+1);
+ run_1(T, Base, File, Module, N+1);
{_,_} ->
io:format("*** Wrong line: ~p, expected ~p ***",
[L,Line]),
io:format("*** Wrong error: ~p, expected ~p ***\n",
[E,Error]),
- run_1(T, Base, File, N+1)
+ run_1(T, Base, File, Module, N+1)
end;
Other ->
io:format("~p\nGOT: ~p", [Exp,Other])
end;
-run_1([], _, _, N) ->
+run_1([], _, _, _, N) ->
N.