Replace dictionary file parser

The previous parse was very adhoc and simply crashed on any kind of input error, providing no identification of the objectionable input that caused the parse to fail. The new parser is generated from a yecc grammar, making it easier both to understand what it is that's being parsed and to provide useful diagnostics to the user in case of error.
author: Anders Svensson <[email protected]> 2011-10-16 21:36:37 +0200
committer: Anders Svensson <[email protected]> 2011-12-02 15:23:52 +0100
commit: ca185011269606596814075d4c8f9d13a855866b (patch)
tree: f67a45298ad311b05997c20dd4bf4fd1c3313e38 /lib/diameter/src/compiler/diameter_dict_scanner.erl
parent: 3cdd095e8de8506881a9856e711a90c9ed723f0a (diff)
download: otp-ca185011269606596814075d4c8f9d13a855866b.tar.gz
otp-ca185011269606596814075d4c8f9d13a855866b.tar.bz2
otp-ca185011269606596814075d4c8f9d13a855866b.zip
1 files changed, 265 insertions, 0 deletions
diff --git a/lib/diameter/src/compiler/diameter_dict_scanner.erl b/lib/diameter/src/compiler/diameter_dict_scanner.erl
new file mode 100644
index 0000000000..74bf0cb06a
--- /dev/null
+++ b/lib/diameter/src/compiler/diameter_dict_scanner.erl
@@ -0,0 +1,265 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2010-2011. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(diameter_dict_scanner).
+
+%%
+%% A scanner for dictionary files of the form expected by yecc.
+%%
+
+-export([scan/1,
+         format_error/1]).
+
+%% -----------------------------------------------------------
+%% # scan/1
+%% -----------------------------------------------------------
+
+-spec scan(string()) -> {ok, [Token]} | {error, {atom(), string(), Lineno}}
+ when Token  :: {word, Lineno, string()}
+              | {number, Lineno, non_neg_integer()}
+              | {Symbol, Lineno},
+      Lineno   :: pos_integer(),
+      Symbol   :: '{' | '}'   | '<' | '>' | '[' | ']'
+                | '*' | '::=' | ':' | ',' | '-'
+                | avp_types
+                | avp_vendor_id
+                | codecs
+                | custom_types
+                | define
+                | grouped
+                | id
+                | inherits
+                | messages
+                | name
+                | prefix
+                | vendor
+                | '$end'
+                | code
+                | 'answer-message'
+                | 'AVP'
+                | 'AVP-Header'
+                | 'Diameter'
+                | 'Diameter-Header'
+                | 'Header'
+                | 'REQ'
+                | 'PXY'
+                | 'ERR'.
+
+scan(B)
+  when is_binary(B) ->
+    scan(binary_to_list(B));
+scan(S) ->
+    scan(S, {1, []}).
+
+scan(S, {Lineno, Acc}) ->
+    case split(S) of
+        '$end' = E ->
+            {ok, lists:reverse([{E, Lineno} | Acc])};
+        {Tok, Rest} ->
+            scan(Rest, acc(Tok, Lineno, Acc));
+        Reason when is_list(Reason) ->
+            {error, {Reason, S, Lineno}}
+    end.
+
+format_error({Reason, Input, Lineno}) ->
+    io_lib:format("~s at line ~p: ~s",
+                  [Reason, Lineno, head(Input, [], 20, true)]).
+
+head(Str, Acc, N, _)
+  when [] == Str;
+       0 == N;
+       $\r == hd(Str);
+       $\n == hd(Str) ->
+    lists:reverse(Acc);
+head([C|Rest], Acc, N, true = T)  %% skip leading whitespace
+  when C == $\s;
+       C == $\t;
+       C == $\f;
+       C == $\v ->
+    head(Rest, Acc, N, T);
+head([C|Rest], Acc, N, _) ->
+    head(Rest, [C|Acc], N-1, false).
+
+acc(endline, Lineno, Acc) ->
+    {Lineno + 1, Acc};
+acc(T, Lineno, Acc) ->
+    {Lineno, [tok(T, Lineno) | Acc]}.
+
+tok({Cat, Sym}, Lineno) ->
+    {Cat, Lineno, Sym};
+tok(Sym, Lineno) ->
+    {Sym, Lineno}.
+
+%% # split/1
+%%
+%% Output: {Token, Rest} | atom()
+
+%% Finito.
+split("") ->
+    '$end';
+
+%% Skip comments. This precludes using semicolon for any other purpose.
+split([$;|T]) ->
+    split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T));
+
+%% Beginning of a section.
+split([$@|T]) ->
+    {Name, Rest} = lists:splitwith(fun is_name_ch/1, T),
+    case section(Name) of
+        false ->
+            "Unknown section";
+        'end' ->
+            '$end';
+        A ->
+            {A, Rest}
+    end;
+
+split("::=" ++ T) ->
+    {'::=', T};
+
+split([H|T])
+  when H == ${; H == $};
+       H == $<; H == $>;
+       H == $[; H == $];
+       H == $*; H == $:; H == $,; H == $- ->
+    {list_to_atom([H]), T};
+
+%% RFC 3588 requires various names to begin with a letter but 3GPP (for
+%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.)
+split([H|_] = L) when $0 =< H, H =< $9 ->
+    {P, Rest} = splitwith(fun is_name_ch/1, L),
+    Tok = try
+              {number, read_int(P)}
+          catch
+              error:_ ->
+                  word(P)
+          end,
+    {Tok, Rest};
+
+split([H|_] = L) when $a =< H, H =< $z;
+                      $A =< H, H =< $Z ->
+    {P, Rest} = splitwith(fun is_name_ch/1, L),
+    {word(P), Rest};
+
+split([$'|T]) ->
+    case splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of
+        {[_|_] = A, [$'|Rest]} ->
+            {{word, A}, Rest};
+        {[_|_], _} ->  %% not terminated on same line
+            "Unterminated atom";
+        {[], []} ->    %% last character
+            "Unterminated atom";
+        {[], _} ->
+            "Empty atom"
+    end;
+
+%% Line ending of various forms.
+split([$\r,$\n|T]) ->
+    {endline, T};
+split([C|T])
+  when C == $\r;
+       C == $\n ->
+    {endline, T};
+
+%% Ignore whitespace.
+split([C|T])
+  when C == $\s;
+       C == $\t;
+       C == $\f;
+       C == $\v ->
+    split(T);
+
+split(_) ->
+    "Unexpected character".
+
+%% word/1
+
+%% Reserved words significant in parsing ...
+word(S)
+  when S == "answer-message";
+       S == "code";
+       S == "AVP";
+       S == "AVP-Header";
+       S == "Diameter";
+       S == "Diameter-Header";
+       S == "Header";
+       S == "REQ";
+       S == "PXY";
+       S == "ERR" ->
+    list_to_atom(S);
+
+%% ... or not.
+word(S) ->
+    {word, S}.
+
+%% section/1
+
+section(N)
+  when N == "avp_types";
+       N == "avp_vendor_id";
+       N == "codecs";
+       N == "custom_types";
+       N == "define";
+       N == "end";
+       N == "enum";
+       N == "grouped";
+       N == "id";
+       N == "inherits";
+       N == "messages";
+       N == "name";
+       N == "prefix";
+       N == "vendor" ->
+    list_to_atom(N);
+section(_) ->
+    false.
+
+%% read_int/1
+
+read_int([$0,X|S])
+  when X == $X;
+       X == $x ->
+    {ok, [N], []} = io_lib:fread("~16u", S),
+    N;
+
+read_int(S) ->
+    list_to_integer(S).
+
+%% splitwith/3
+
+splitwith(Fun, [H|T]) ->
+    {SH, ST} = lists:splitwith(Fun, T),
+    {[H|SH], ST}.
+
+is_eol_ch(C) ->
+    C == $\n orelse C == $\r.
+
+is_name_ch(C) ->
+    is_alphanum(C) orelse C == $- orelse C == $_.
+
+is_alphanum(C) ->
+    is_lower(C) orelse is_upper(C) orelse is_digit(C).
+
+is_lower(C) ->
+    $a =< C andalso C =< $z.
+
+is_upper(C) ->
+    $A =< C andalso C =< $Z.
+
+is_digit(C) ->
+    $0 =< C andalso C =< $9.
author	Anders Svensson <[email protected]>	2011-10-16 21:36:37 +0200
committer	Anders Svensson <[email protected]>	2011-12-02 15:23:52 +0100
commit	ca185011269606596814075d4c8f9d13a855866b (patch)
tree	f67a45298ad311b05997c20dd4bf4fd1c3313e38 /lib/diameter/src/compiler/diameter_dict_scanner.erl
parent	3cdd095e8de8506881a9856e711a90c9ed723f0a (diff)
download	otp-ca185011269606596814075d4c8f9d13a855866b.tar.gz otp-ca185011269606596814075d4c8f9d13a855866b.tar.bz2 otp-ca185011269606596814075d4c8f9d13a855866b.zip