%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2010-2011. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%%
%% %CopyrightEnd%
%%
-module(diameter_dict_scanner).
%%
%% A scanner for dictionary files of the form expected by yecc.
%%
-export([scan/1,
format_error/1]).
-export([is_name/1]).
%% -----------------------------------------------------------
%% # scan/1
%% -----------------------------------------------------------
-spec scan(string()) -> {ok, [Token]} | {error, {atom(), string(), Lineno}}
when Token :: {word, Lineno, string()}
| {number, Lineno, non_neg_integer()}
| {Symbol, Lineno},
Lineno :: pos_integer(),
Symbol :: '{' | '}' | '<' | '>' | '[' | ']'
| '*' | '::=' | ':' | ',' | '-'
| avp_types
| avp_vendor_id
| codecs
| custom_types
| define
| grouped
| id
| inherits
| messages
| name
| prefix
| vendor
| '$end'
| code
| 'answer-message'
| 'AVP'
| 'AVP-Header'
| 'Diameter'
| 'Diameter-Header'
| 'Header'
| 'REQ'
| 'PXY'
| 'ERR'.
scan(B)
when is_binary(B) ->
scan(binary_to_list(B));
scan(S) ->
scan(S, {1, []}).
scan(S, {Lineno, Acc}) ->
case split(S) of
'$end' = E ->
{ok, lists:reverse([{E, Lineno} | Acc])};
{Tok, Rest} ->
scan(Rest, acc(Tok, Lineno, Acc));
Reason when is_list(Reason) ->
{error, {Reason, S, Lineno}}
end.
%% format_error/1
format_error({Reason, Input, Lineno}) ->
io_lib:format("~s at line ~p: ~s",
[Reason, Lineno, head(Input, [], 20, true)]).
%% is_name/1
is_name([H|T]) ->
is_alphanum(H) andalso lists:all(fun is_name_ch/1, T).
%% ===========================================================================
head(Str, Acc, N, _)
when [] == Str;
0 == N;
$\r == hd(Str);
$\n == hd(Str) ->
lists:reverse(Acc);
head([C|Rest], Acc, N, true = T) %% skip leading whitespace
when C == $\s;
C == $\t;
C == $\f;
C == $\v ->
head(Rest, Acc, N, T);
head([C|Rest], Acc, N, _) ->
head(Rest, [C|Acc], N-1, false).
acc(endline, Lineno, Acc) ->
{Lineno + 1, Acc};
acc(T, Lineno, Acc) ->
{Lineno, [tok(T, Lineno) | Acc]}.
tok({Cat, Sym}, Lineno) ->
{Cat, Lineno, Sym};
tok(Sym, Lineno) ->
{Sym, Lineno}.
%% # split/1
%%
%% Output: {Token, Rest} | atom()
%% Finito.
split("") ->
'$end';
%% Skip comments. This precludes using semicolon for any other purpose.
split([$;|T]) ->
split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T));
%% Beginning of a section.
split([$@|T]) ->
{Name, Rest} = lists:splitwith(fun is_name_ch/1, T),
case section(Name) of
false ->
"Unknown section";
'end' ->
'$end';
A ->
{A, Rest}
end;
split("::=" ++ T) ->
{'::=', T};
split([H|T])
when H == ${; H == $};
H == $<; H == $>;
H == $[; H == $];
H == $*; H == $:; H == $,; H == $- ->
{list_to_atom([H]), T};
%% RFC 3588 requires various names to begin with a letter but 3GPP (for
%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.)
split([H|_] = L) when $0 =< H, H =< $9 ->
{P, Rest} = splitwith(fun is_name_ch/1, L),
Tok = try
{number, read_int(P)}
catch
error:_ ->
word(P)
end,
{Tok, Rest};
split([H|_] = L) when $a =< H, H =< $z;
$A =< H, H =< $Z ->
{P, Rest} = splitwith(fun is_name_ch/1, L),
{word(P), Rest};
split([$'|T]) ->
case splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of
{[_|_] = A, [$'|Rest]} ->
{{word, A}, Rest};
{[], [$'|_]} ->
"Empty string";
_ -> %% not terminated on same line
"Unterminated string"
end;
%% Line ending of various forms.
split([$\r,$\n|T]) ->
{endline, T};
split([C|T])
when C == $\r;
C == $\n ->
{endline, T};
%% Ignore whitespace.
split([C|T])
when C == $\s;
C == $\t;
C == $\f;
C == $\v ->
split(T);
split(_) ->
"Unexpected character".
%% word/1
%% Reserved words significant in parsing ...
word(S)
when S == "answer-message";
S == "code";
S == "AVP";
S == "AVP-Header";
S == "Diameter";
S == "Diameter-Header";
S == "Header";
S == "REQ";
S == "PXY";
S == "ERR" ->
list_to_atom(S);
%% ... or not.
word(S) ->
{word, S}.
%% section/1
section(N)
when N == "avp_types";
N == "avp_vendor_id";
N == "codecs";
N == "custom_types";
N == "define";
N == "end";
N == "enum";
N == "grouped";
N == "id";
N == "inherits";
N == "messages";
N == "name";
N == "prefix";
N == "vendor" ->
list_to_atom(N);
section(_) ->
false.
%% read_int/1
read_int([$0,X|S])
when X == $X;
X == $x ->
{ok, [N], []} = io_lib:fread("~16u", S),
N;
read_int(S) ->
list_to_integer(S).
%% splitwith/3
splitwith(Fun, [H|T]) ->
{SH, ST} = lists:splitwith(Fun, T),
{[H|SH], ST}.
is_eol_ch(C) ->
C == $\n orelse C == $\r.
is_name_ch(C) ->
is_alphanum(C) orelse C == $- orelse C == $_.
is_alphanum(C) ->
is_lower(C) orelse is_upper(C) orelse is_digit(C).
is_lower(C) ->
$a =< C andalso C =< $z.
is_upper(C) ->
$A =< C andalso C =< $Z.
is_digit(C) ->
$0 =< C andalso C =< $9.