aboutsummaryrefslogtreecommitdiffstats
path: root/lib/diameter/src/compiler/diameter_dict_scanner.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/diameter/src/compiler/diameter_dict_scanner.erl')
-rw-r--r--lib/diameter/src/compiler/diameter_dict_scanner.erl274
1 files changed, 274 insertions, 0 deletions
diff --git a/lib/diameter/src/compiler/diameter_dict_scanner.erl b/lib/diameter/src/compiler/diameter_dict_scanner.erl
new file mode 100644
index 0000000000..963b58fdfc
--- /dev/null
+++ b/lib/diameter/src/compiler/diameter_dict_scanner.erl
@@ -0,0 +1,274 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2010-2011. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(diameter_dict_scanner).
+
+%%
+%% A scanner for dictionary files of the form expected by yecc.
+%%
+
+-export([scan/1,
+ format_error/1]).
+
+-export([is_name/1]).
+
+%% -----------------------------------------------------------
+%% # scan/1
+%% -----------------------------------------------------------
+
+-spec scan(string()) -> {ok, [Token]} | {error, {atom(), string(), Lineno}}
+ when Token :: {word, Lineno, string()}
+ | {number, Lineno, non_neg_integer()}
+ | {Symbol, Lineno},
+ Lineno :: pos_integer(),
+ Symbol :: '{' | '}' | '<' | '>' | '[' | ']'
+ | '*' | '::=' | ':' | ',' | '-'
+ | avp_types
+ | avp_vendor_id
+ | codecs
+ | custom_types
+ | define
+ | grouped
+ | id
+ | inherits
+ | messages
+ | name
+ | prefix
+ | vendor
+ | '$end'
+ | code
+ | 'answer-message'
+ | 'AVP'
+ | 'AVP-Header'
+ | 'Diameter'
+ | 'Diameter-Header'
+ | 'Header'
+ | 'REQ'
+ | 'PXY'
+ | 'ERR'.
+
+scan(B)
+ when is_binary(B) ->
+ scan(binary_to_list(B));
+scan(S) ->
+ scan(S, {1, []}).
+
+scan(S, {Lineno, Acc}) ->
+ case split(S) of
+ '$end' = E ->
+ {ok, lists:reverse([{E, Lineno} | Acc])};
+ {Tok, Rest} ->
+ scan(Rest, acc(Tok, Lineno, Acc));
+ Reason when is_list(Reason) ->
+ {error, {Reason, S, Lineno}}
+ end.
+
+%% format_error/1
+
+format_error({Reason, Input, Lineno}) ->
+ io_lib:format("~s at line ~p: ~s",
+ [Reason, Lineno, head(Input, [], 20, true)]).
+
+%% is_name/1
+
+is_name([H|T]) ->
+ is_alphanum(H) andalso lists:all(fun is_name_ch/1, T).
+
+%% ===========================================================================
+
+head(Str, Acc, N, _)
+ when [] == Str;
+ 0 == N;
+ $\r == hd(Str);
+ $\n == hd(Str) ->
+ lists:reverse(Acc);
+head([C|Rest], Acc, N, true = T) %% skip leading whitespace
+ when C == $\s;
+ C == $\t;
+ C == $\f;
+ C == $\v ->
+ head(Rest, Acc, N, T);
+head([C|Rest], Acc, N, _) ->
+ head(Rest, [C|Acc], N-1, false).
+
+acc(endline, Lineno, Acc) ->
+ {Lineno + 1, Acc};
+acc(T, Lineno, Acc) ->
+ {Lineno, [tok(T, Lineno) | Acc]}.
+
+tok({Cat, Sym}, Lineno) ->
+ {Cat, Lineno, Sym};
+tok(Sym, Lineno) ->
+ {Sym, Lineno}.
+
+%% # split/1
+%%
+%% Output: {Token, Rest} | atom()
+
+%% Finito.
+split("") ->
+ '$end';
+
+%% Skip comments. This precludes using semicolon for any other purpose.
+split([$;|T]) ->
+ split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T));
+
+%% Beginning of a section.
+split([$@|T]) ->
+ {Name, Rest} = lists:splitwith(fun is_name_ch/1, T),
+ case section(Name) of
+ false ->
+ "Unknown section";
+ 'end' ->
+ '$end';
+ A ->
+ {A, Rest}
+ end;
+
+split("::=" ++ T) ->
+ {'::=', T};
+
+split([H|T])
+ when H == ${; H == $};
+ H == $<; H == $>;
+ H == $[; H == $];
+ H == $*; H == $:; H == $,; H == $- ->
+ {list_to_atom([H]), T};
+
+%% RFC 3588 requires various names to begin with a letter but 3GPP (for
+%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.)
+split([H|_] = L) when $0 =< H, H =< $9 ->
+ {P, Rest} = splitwith(fun is_name_ch/1, L),
+ Tok = try
+ {number, read_int(P)}
+ catch
+ error:_ ->
+ word(P)
+ end,
+ {Tok, Rest};
+
+split([H|_] = L) when $a =< H, H =< $z;
+ $A =< H, H =< $Z ->
+ {P, Rest} = splitwith(fun is_name_ch/1, L),
+ {word(P), Rest};
+
+split([$'|T]) ->
+ case splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of
+ {[_|_] = A, [$'|Rest]} ->
+ {{word, A}, Rest};
+ {[], [$'|_]} ->
+ "Empty string";
+ _ -> %% not terminated on same line
+ "Unterminated string"
+ end;
+
+%% Line ending of various forms.
+split([$\r,$\n|T]) ->
+ {endline, T};
+split([C|T])
+ when C == $\r;
+ C == $\n ->
+ {endline, T};
+
+%% Ignore whitespace.
+split([C|T])
+ when C == $\s;
+ C == $\t;
+ C == $\f;
+ C == $\v ->
+ split(T);
+
+split(_) ->
+ "Unexpected character".
+
+%% word/1
+
+%% Reserved words significant in parsing ...
+word(S)
+ when S == "answer-message";
+ S == "code";
+ S == "AVP";
+ S == "AVP-Header";
+ S == "Diameter";
+ S == "Diameter-Header";
+ S == "Header";
+ S == "REQ";
+ S == "PXY";
+ S == "ERR" ->
+ list_to_atom(S);
+
+%% ... or not.
+word(S) ->
+ {word, S}.
+
+%% section/1
+
+section(N)
+ when N == "avp_types";
+ N == "avp_vendor_id";
+ N == "codecs";
+ N == "custom_types";
+ N == "define";
+ N == "end";
+ N == "enum";
+ N == "grouped";
+ N == "id";
+ N == "inherits";
+ N == "messages";
+ N == "name";
+ N == "prefix";
+ N == "vendor" ->
+ list_to_atom(N);
+section(_) ->
+ false.
+
+%% read_int/1
+
+read_int([$0,X|S])
+ when X == $X;
+ X == $x ->
+ {ok, [N], []} = io_lib:fread("~16u", S),
+ N;
+
+read_int(S) ->
+ list_to_integer(S).
+
+%% splitwith/3
+
+splitwith(Fun, [H|T]) ->
+ {SH, ST} = lists:splitwith(Fun, T),
+ {[H|SH], ST}.
+
+is_eol_ch(C) ->
+ C == $\n orelse C == $\r.
+
+is_name_ch(C) ->
+ is_alphanum(C) orelse C == $- orelse C == $_.
+
+is_alphanum(C) ->
+ is_lower(C) orelse is_upper(C) orelse is_digit(C).
+
+is_lower(C) ->
+ $a =< C andalso C =< $z.
+
+is_upper(C) ->
+ $A =< C andalso C =< $Z.
+
+is_digit(C) ->
+ $0 =< C andalso C =< $9.