Replace dictionary file parser

The previous parse was very adhoc and simply crashed on any kind of input error, providing no identification of the objectionable input that caused the parse to fail. The new parser is generated from a yecc grammar, making it easier both to understand what it is that's being parsed and to provide useful diagnostics to the user in case of error.
author: Anders Svensson <[email protected]> 2011-10-16 21:36:37 +0200
committer: Anders Svensson <[email protected]> 2011-12-02 15:23:52 +0100
commit: ca185011269606596814075d4c8f9d13a855866b (patch)
tree: f67a45298ad311b05997c20dd4bf4fd1c3313e38
parent: 3cdd095e8de8506881a9856e711a90c9ed723f0a (diff)
download: otp-ca185011269606596814075d4c8f9d13a855866b.tar.gz
otp-ca185011269606596814075d4c8f9d13a855866b.tar.bz2
otp-ca185011269606596814075d4c8f9d13a855866b.zip
6 files changed, 619 insertions, 169 deletions
diff --git a/lib/diameter/src/Makefile b/lib/diameter/src/Makefile
index eea2aa894d..2ec016ecbc 100644
--- a/lib/diameter/src/Makefile
+++ b/lib/diameter/src/Makefile
@@ -54,14 +54,16 @@ VPATH = .:base:compiler:transport:gen
 
 include modules.mk
 
+# Modules generated from dictionary specifications.
 DICT_MODULES = $(DICTS:%=gen/diameter_gen_%)
 DICT_ERLS    = $(DICT_MODULES:%=%.erl)
 DICT_HRLS    = $(DICT_MODULES:%=%.hrl)
 
 # Modules to build before compiling dictionaries.
-COMPILER_MODULES = $(filter compiler/%, $(CT_MODULES))
+COMPILER_MODULES = $(notdir $(filter compiler/%, $(CT_MODULES))) \
+                   $(DICT_YRL)
 
-# All handwritten modules.
+# All handwritten modules from which a depend.mk is generated.
 MODULES = \
 	$(RT_MODULES) \
 	$(CT_MODULES)
@@ -74,11 +76,12 @@ APP_MODULES = \
 # Modules for which to build beams.
 TARGET_MODULES = \
 	$(APP_MODULES) \
-	$(CT_MODULES)
+	$(CT_MODULES) \
+	$(DICT_YRL:%=gen/%)
 
 # What to build for the 'opt' target.
 TARGET_FILES = \
-	$(patsubst %,$(EBIN)/%.$(EMULATOR),$(notdir $(TARGET_MODULES))) \
+	$(patsubst %, $(EBIN)/%.$(EMULATOR), $(notdir $(TARGET_MODULES))) \
 	$(APP_TARGET) \
 	$(APPUP_TARGET)
 
@@ -125,6 +128,10 @@ opt: $(TARGET_FILES)
 debug:
 	@$(MAKE) TYPE=debug opt
 
+# The dictionary parser.
+gen/$(DICT_YRL).erl: compiler/$(DICT_YRL).yrl
+	$(ERLC) -Werror -o $(@D) $<
+
 # Generate the app file.
 $(APP_TARGET): $(APP_SRC) ../vsn.mk modules.mk
 	M=`echo $(notdir $(APP_MODULES)) | tr ' ' ,`; \
@@ -146,6 +153,8 @@ info:
 	@echo ========================================
 	@$(call list,DICTS)
 	@echo
+	@$(call list,DICT_YRL)
+	@echo
 	@$(call list,RT_MODULES)
 	@echo
 	@$(call list,CT_MODULES)
@@ -164,7 +173,7 @@ info:
 	@echo ========================================
 
 clean:
-	rm -f $(TARGET_FILES) $(DICT_ERLS) $(DICT_HRLS)
+	rm -f $(TARGET_FILES) gen/*
 	rm -f depend.mk
 
 # ----------------------------------------------------
@@ -192,8 +201,9 @@ release_spec: opt
 	$(MAKE) $(TARGET_DIRS:%/=release_src_%)
 
 $(TARGET_DIRS:%/=release_src_%): release_src_%:
-	$(INSTALL_DATA) $(filter $*/%,$(TARGET_MODULES:%=%.erl) \
-	                              $(INTERNAL_HRLS)) \
+	$(INSTALL_DATA) $(filter $*/%, $(TARGET_MODULES:%=%.erl) \
+	                               $(INTERNAL_HRLS)) \
+	                $(filter $*/%, compiler/$(DICT_YRL).yrl) \
 	                $(RELSYSDIR)/src/$*
 
 release_docs_spec:
@@ -207,7 +217,7 @@ gen/diameter_gen_base_accounting.hrl gen/diameter_gen_relay.hrl: \
 	$(EBIN)/diameter_gen_base_rfc3588.$(EMULATOR)
 
 gen/diameter_gen_base_rfc3588.erl gen/diameter_gen_base_rfc3588.hrl: \
-	$(COMPILER_MODULES:compiler/%=$(EBIN)/%.$(EMULATOR))
+	$(COMPILER_MODULES:%=$(EBIN)/%.$(EMULATOR))
 
 $(DICT_MODULES:gen/%=$(EBIN)/%.$(EMULATOR)): \
 	$(INCDIR)/diameter.hrl \
@@ -224,11 +234,13 @@ depend.mk: depend.sed $(MODULES:%=%.erl) Makefile
 
 -include depend.mk
 
-.PRECIOUS: $(DICT_ERLS) $(DICT_HRLS)
 .PHONY: app clean depend dict info release_subdir
 .PHONY: debug opt release_docs_spec release_spec
 .PHONY: $(TARGET_DIRS:%/=%) $(TARGET_DIRS:%/=release_src_%)
 
+# Keep intermediate files.
+.SECONDARY: $(DICT_ERLS) $(DICT_HRLS) gen/$(DICT_YRL:%=%.erl)
+
 # ----------------------------------------------------
 # Targets using secondary expansion (make >= 3.81)
 # ----------------------------------------------------
@@ -237,4 +249,6 @@ depend.mk: depend.sed $(MODULES:%=%.erl) Makefile
 
 # Make beams from a subdirectory.
 $(TARGET_DIRS:%/=%): \
-  $$(patsubst $$@/%,$(EBIN)/%.$(EMULATOR),$$(filter $$@/%,$(TARGET_MODULES)))
+  $$(patsubst $$@/%, \
+              $(EBIN)/%.$(EMULATOR), \
+              $$(filter $$@/%, $(TARGET_MODULES) compiler/$(DICT_YRL)))
diff --git a/lib/diameter/src/compiler/diameter_dict_parser.yrl b/lib/diameter/src/compiler/diameter_dict_parser.yrl
new file mode 100644
index 0000000000..6fd4cedd23
--- /dev/null
+++ b/lib/diameter/src/compiler/diameter_dict_parser.yrl
@@ -0,0 +1,324 @@
+%% -*- erlang -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2010-2011. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% A grammar for dictionary specification.
+%%
+
+Nonterminals
+  application_id avp avp_code avp_def avp_defs avp_flags avp_header
+  avp_header_tok avp_name avp_names avp_ref avp_spec avp_type
+  avp_vendor avps bit bits command_def command_id diameter_name
+  dictionary enum_def enum_defs group_def group_defs header header_tok
+  ident idents message_defs module qual section sections.
+
+Terminals
+  avp_types avp_vendor_id codecs custom_types define enum grouped
+  id inherits messages name prefix vendor
+  number word
+  '{' '}' '<' '>' '[' ']' '*' '::=' ':' ',' '-'
+   code
+  'answer-message'
+  'AVP' 'AVP-Header'
+  'Diameter' 'Diameter-Header' 'Header'
+  'REQ' 'PXY' 'ERR'.
+
+Rootsymbol dictionary.
+
+Endsymbol '$end'.
+
+%% ===========================================================================
+
+dictionary -> sections : '$1'.
+
+sections -> '$empty'         : [].
+sections -> section sections : ['$1' | '$2'].
+
+section -> name ident   : ['$1', '$2'].
+section -> prefix ident : ['$1', '$2'].
+section -> id number    : ['$1', '$2'].
+section -> vendor number ident       : ['$1', '$2', '$3'].
+section -> inherits module avp_names : ['$1', '$2' | '$3'].
+section -> avp_types avp_defs        : ['$1' | '$2'].
+section -> avp_vendor_id number avp_names : ['$1', '$2' | '$3'].
+section -> custom_types module avp_names  : ['$1', '$2' | '$3'].
+section -> codecs module avp_names      : ['$1', '$2' | '$3'].
+section -> messages message_defs  : ['$1' | '$2'].
+section -> grouped group_defs     : ['$1' | '$2'].
+section -> enum ident enum_defs   : ['$1', '$2' | '$3'].
+section -> define ident enum_defs : ['$1', '$2' | '$3'].
+
+%% =====================================
+
+module -> ident : '$1'.
+
+avp_names -> idents : '$1'.  %% Note: not 'AVP'
+
+avp_defs -> '$empty'         : [].
+avp_defs -> avp_def avp_defs : ['$1' | '$2'].
+
+avp_def -> ident number avp_type avp_flags : ['$1', '$2', '$3', '$4'].
+
+avp_type -> ident : '$1'.
+
+idents -> '$empty'     : [].
+idents -> ident idents : ['$1' | '$2'].
+
+avp_flags -> '-'   :
+    {_, Lineno} = '$1',
+    {word, Lineno, ""}.
+avp_flags -> ident :
+    '$1'.
+%% Could support lowercase here if there's a use for distinguishing
+%% between Must and Should in the future in deciding whether or not
+%% to set a flag.
+
+ident -> word : '$1'.
+
+%% Don't bother mapping reserved words to make these usable in this
+%% context. That an AVP can't be named Diameter-Header is probably no
+%% great loss, and that it can't be named AVP may even save someone
+%% from themselves. (Temporarily at least.)
+
+group_defs -> '$empty'             : [].
+group_defs -> group_def group_defs : ['$1' | '$2'].
+
+message_defs -> '$empty'                 : [].
+message_defs -> command_def message_defs : ['$1' | '$2'].
+
+enum_defs -> '$empty'           : [].
+enum_defs -> enum_def enum_defs : ['$1' | '$2'].
+
+enum_def -> ident number : ['$1', '$2'].
+
+%% =====================================
+%% 3.2.  Command Code ABNF specification
+%%
+%%    Every Command Code defined MUST include a corresponding ABNF
+%%    specification, which is used to define the AVPs that MUST or MAY be
+%%    present when sending the message.  The following format is used in
+%%    the definition:
+
+%%   command-def      = <command-name> "::=" diameter-message
+%%
+%%   command-name     = diameter-name
+%%
+%%   diameter-name    = ALPHA *(ALPHA / DIGIT / "-")
+%%
+%%   diameter-message = header  [ *fixed] [ *required] [ *optional]
+
+%% answer-message is a special case.
+command_def -> 'answer-message' '::=' '<' header_tok ':' code
+                                          ',' 'ERR' '[' 'PXY' ']' '>'
+               avps
+  : ['$1', false | '$13'].
+
+command_def -> diameter_name '::=' header avps
+  : ['$1', '$3' | '$4'].
+%% Ensure the order fixed/required/optional by semantic checks rather
+%% than grammatically since the latter requires more lookahead: don't
+%% know until after a leading qual which of the three it is that's
+%% being parsed.
+
+diameter_name -> ident : '$1'.
+
+%%   header           = "<" "Diameter Header:" command-id
+%%                      [r-bit] [p-bit] [e-bit] [application-id] ">"
+%%
+%%   command-id       = 1*DIGIT
+%%                      ; The Command Code assigned to the command
+%%
+%%   r-bit            = ", REQ"
+%%                      ; If present, the 'R' bit in the Command
+%%                      ; Flags is set, indicating that the message
+%%                      ; is a request, as opposed to an answer.
+%%
+%%   p-bit            = ", PXY"
+%%                      ; If present, the 'P' bit in the Command
+%%                      ; Flags is set, indicating that the message
+%%                      ; is proxiable.
+%%
+%%   e-bit            = ", ERR"
+%%                      ; If present, the 'E' bit in the Command
+%%                      ; Flags is set, indicating that the answer
+%%                      ; message contains a Result-Code AVP in
+%%                      ; the "protocol error" class.
+%%
+%%   application-id   = 1*DIGIT
+
+header -> '<' header_tok ':' command_id bits application_id '>'
+  : ['$4', '$5', '$6'].
+
+command_id -> number : '$1'.
+
+%% Accept both the form of the base definition and the typo (fixed in
+%% 3588bis) of the grammar.
+header_tok -> 'Diameter' 'Header'.
+header_tok -> 'Diameter-Header'.
+
+bits -> '$empty'     : [].
+bits -> ',' bit bits : ['$2' | '$3'].
+
+%% ERR only makes sense for answer-message so don't allow it here
+%% (despite 3588).
+bit -> 'REQ' : '$1'.
+bit -> 'PXY' : '$1'.
+
+application_id -> '$empty' : false.
+application_id -> number   : '$1'.
+
+%%   fixed            = [qual] "<" avp-spec ">"
+%%                      ; Defines the fixed position of an AVP
+%%
+%%   required         = [qual] "{" avp-spec "}"
+%%                      ; The AVP MUST be present and can appear
+%%                      ; anywhere in the message.
+%%
+%%   optional         = [qual] "[" avp-name "]"
+%%                      ; The avp-name in the 'optional' rule cannot
+%%                      ; evaluate to any AVP Name which is included
+%%                      ; in a fixed or required rule.  The AVP can
+%%                      ; appear anywhere in the message.
+%%                      ;
+%%                      ; NOTE:  "[" and "]" have a slightly different
+%%                      ; meaning than in ABNF (RFC 5234]). These braces
+%%                      ; cannot be used to express optional fixed rules
+%%                      ; (such as an optional ICV at the end). To do this,
+%%                      ; the convention is '0*1fixed'.
+
+avps -> '$empty' : [].
+avps -> avp avps : ['$1' | '$2'].
+
+avp -> avp_ref      : [false | '$1'].
+avp -> qual avp_ref : ['$1' | '$2'].
+
+avp_ref -> '<' avp_spec '>' : [$<, '$2'].
+avp_ref -> '{' avp_name '}' : [${, '$2'].
+avp_ref -> '[' avp_name ']' : [$[, '$2'].
+%% Note that required can be an avp_name, not just avp_spec. 'AVP'
+%% is specified as required by Failed-AVP for example.
+
+%%   qual             = [min] "*" [max]
+%%                      ; See ABNF conventions, RFC 5234 Section 4.
+%%                      ; The absence of any qualifiers depends on
+%%                      ; whether it precedes a fixed, required, or
+%%                      ; optional rule. If a fixed or required rule has
+%%                      ; no qualifier, then exactly one such AVP MUST
+%%                      ; be present.  If an optional rule has no
+%%                      ; qualifier, then 0 or 1 such AVP may be
+%%                      ; present. If an optional rule has a qualifier,
+%%                      ; then the value of min MUST be 0 if present.
+%%
+%%   min              = 1*DIGIT
+%%                      ; The minimum number of times the element may
+%%                      ; be present. If absent, the default value is zero
+%%                      ; for fixed and optional rules and one for required
+%%                      ; rules. The value MUST be at least one for for
+%%                      ; required rules.
+%%
+%%   max              = 1*DIGIT
+%%                      ; The maximum number of times the element may
+%%                      ; be present. If absent, the default value is
+%%                      ; infinity. A value of zero implies the AVP MUST
+%%                      ; NOT be present.
+
+qual -> number '*' number : {'$1', '$3'}.
+qual -> number '*'        : {'$1', true}.
+qual -> '*' number        : {true, '$2'}.
+qual -> '*'               : true.
+
+%%   avp-spec         = diameter-name
+%%                      ; The avp-spec has to be an AVP Name, defined
+%%                      ; in the base or extended Diameter
+%%                      ; specifications.
+
+avp_spec -> diameter_name : '$1'.
+
+%%   avp-name         = avp-spec / "AVP"
+%%                      ; The string "AVP" stands for *any* arbitrary AVP
+%%                      ; Name, not otherwise listed in that command code
+%%                      ; definition. Addition this AVP is recommended for
+%%                      ; all command ABNFs to allow for extensibility.
+
+avp_name -> 'AVP'    : '$1'.
+avp_name -> avp_spec : '$1'.
+
+%%   The following is a definition of a fictitious command code:
+%%
+%%   Example-Request ::= < Diameter Header: 9999999, REQ, PXY >
+%%                       { User-Name }
+%%                     * { Origin-Host }
+%%                     * [ AVP ]
+
+%% =====================================
+%% 4.4.   Grouped AVP Values
+%%
+%%    The Diameter protocol allows AVP values of type 'Grouped'.  This
+%%    implies that the Data field is actually a sequence of AVPs.  It is
+%%    possible to include an AVP with a Grouped type within a Grouped type,
+%%    that is, to nest them.  AVPs within an AVP of type Grouped have the
+%%    same padding requirements as non-Grouped AVPs, as defined in Section
+%%    4.
+%%
+%%    The AVP Code numbering space of all AVPs included in a Grouped AVP is
+%%    the same as for non-grouped AVPs.  Receivers of a Grouped AVP that
+%%    does not have the 'M' (mandatory) bit set and one or more of the
+%%    encapsulated AVPs within the group has the 'M' (mandatory) bit set
+%%    MAY simply be ignored if the Grouped AVP itself is unrecognized.  The
+%%    rule applies even if the encapsulated AVP with its 'M' (mandatory)
+%%    bit set is further encapsulated within other sub-groups; i.e. other
+%%    Grouped AVPs embedded within the Grouped AVP.
+%%
+%%    Every Grouped AVP defined MUST include a corresponding grammar, using
+%%    ABNF [RFC5234] (with modifications), as defined below.
+
+%%          grouped-avp-def  = <name> "::=" avp
+%%
+%%          name-fmt         = ALPHA *(ALPHA / DIGIT / "-")
+%%
+%%          name             = name-fmt
+%%                             ; The name has to be the name of an AVP,
+%%                             ; defined in the base or extended Diameter
+%%                             ; specifications.
+%%
+%%          avp              = header  [ *fixed] [ *required] [ *optional]
+
+group_def -> ident '::=' avp_header avps : ['$1', '$3' | '$4'].
+
+%%          header           = "<" "AVP-Header:" avpcode [vendor] ">"
+%%
+%%          avpcode          = 1*DIGIT
+%%                             ; The AVP Code assigned to the Grouped AVP
+%%
+%%          vendor           = 1*DIGIT
+%%                             ; The Vendor-ID assigned to the Grouped AVP.
+%%                             ; If absent, the default value of zero is
+%%                             ; used.
+
+avp_header -> '<' avp_header_tok ':' avp_code avp_vendor '>'
+  : ['$4', '$5'].
+
+avp_header_tok -> 'AVP-Header'.
+avp_header_tok -> 'AVP' 'Header'.
+
+avp_code -> number : '$1'.
+
+avp_vendor -> '$empty' : false.
+avp_vendor -> number   : '$1'.
diff --git a/lib/diameter/src/compiler/diameter_dict_scanner.erl b/lib/diameter/src/compiler/diameter_dict_scanner.erl
new file mode 100644
index 0000000000..74bf0cb06a
--- /dev/null
+++ b/lib/diameter/src/compiler/diameter_dict_scanner.erl
@@ -0,0 +1,265 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2010-2011. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(diameter_dict_scanner).
+
+%%
+%% A scanner for dictionary files of the form expected by yecc.
+%%
+
+-export([scan/1,
+         format_error/1]).
+
+%% -----------------------------------------------------------
+%% # scan/1
+%% -----------------------------------------------------------
+
+-spec scan(string()) -> {ok, [Token]} | {error, {atom(), string(), Lineno}}
+ when Token  :: {word, Lineno, string()}
+              | {number, Lineno, non_neg_integer()}
+              | {Symbol, Lineno},
+      Lineno   :: pos_integer(),
+      Symbol   :: '{' | '}'   | '<' | '>' | '[' | ']'
+                | '*' | '::=' | ':' | ',' | '-'
+                | avp_types
+                | avp_vendor_id
+                | codecs
+                | custom_types
+                | define
+                | grouped
+                | id
+                | inherits
+                | messages
+                | name
+                | prefix
+                | vendor
+                | '$end'
+                | code
+                | 'answer-message'
+                | 'AVP'
+                | 'AVP-Header'
+                | 'Diameter'
+                | 'Diameter-Header'
+                | 'Header'
+                | 'REQ'
+                | 'PXY'
+                | 'ERR'.
+
+scan(B)
+  when is_binary(B) ->
+    scan(binary_to_list(B));
+scan(S) ->
+    scan(S, {1, []}).
+
+scan(S, {Lineno, Acc}) ->
+    case split(S) of
+        '$end' = E ->
+            {ok, lists:reverse([{E, Lineno} | Acc])};
+        {Tok, Rest} ->
+            scan(Rest, acc(Tok, Lineno, Acc));
+        Reason when is_list(Reason) ->
+            {error, {Reason, S, Lineno}}
+    end.
+
+format_error({Reason, Input, Lineno}) ->
+    io_lib:format("~s at line ~p: ~s",
+                  [Reason, Lineno, head(Input, [], 20, true)]).
+
+head(Str, Acc, N, _)
+  when [] == Str;
+       0 == N;
+       $\r == hd(Str);
+       $\n == hd(Str) ->
+    lists:reverse(Acc);
+head([C|Rest], Acc, N, true = T)  %% skip leading whitespace
+  when C == $\s;
+       C == $\t;
+       C == $\f;
+       C == $\v ->
+    head(Rest, Acc, N, T);
+head([C|Rest], Acc, N, _) ->
+    head(Rest, [C|Acc], N-1, false).
+
+acc(endline, Lineno, Acc) ->
+    {Lineno + 1, Acc};
+acc(T, Lineno, Acc) ->
+    {Lineno, [tok(T, Lineno) | Acc]}.
+
+tok({Cat, Sym}, Lineno) ->
+    {Cat, Lineno, Sym};
+tok(Sym, Lineno) ->
+    {Sym, Lineno}.
+
+%% # split/1
+%%
+%% Output: {Token, Rest} | atom()
+
+%% Finito.
+split("") ->
+    '$end';
+
+%% Skip comments. This precludes using semicolon for any other purpose.
+split([$;|T]) ->
+    split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T));
+
+%% Beginning of a section.
+split([$@|T]) ->
+    {Name, Rest} = lists:splitwith(fun is_name_ch/1, T),
+    case section(Name) of
+        false ->
+            "Unknown section";
+        'end' ->
+            '$end';
+        A ->
+            {A, Rest}
+    end;
+
+split("::=" ++ T) ->
+    {'::=', T};
+
+split([H|T])
+  when H == ${; H == $};
+       H == $<; H == $>;
+       H == $[; H == $];
+       H == $*; H == $:; H == $,; H == $- ->
+    {list_to_atom([H]), T};
+
+%% RFC 3588 requires various names to begin with a letter but 3GPP (for
+%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.)
+split([H|_] = L) when $0 =< H, H =< $9 ->
+    {P, Rest} = splitwith(fun is_name_ch/1, L),
+    Tok = try
+              {number, read_int(P)}
+          catch
+              error:_ ->
+                  word(P)
+          end,
+    {Tok, Rest};
+
+split([H|_] = L) when $a =< H, H =< $z;
+                      $A =< H, H =< $Z ->
+    {P, Rest} = splitwith(fun is_name_ch/1, L),
+    {word(P), Rest};
+
+split([$'|T]) ->
+    case splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of
+        {[_|_] = A, [$'|Rest]} ->
+            {{word, A}, Rest};
+        {[_|_], _} ->  %% not terminated on same line
+            "Unterminated atom";
+        {[], []} ->    %% last character
+            "Unterminated atom";
+        {[], _} ->
+            "Empty atom"
+    end;
+
+%% Line ending of various forms.
+split([$\r,$\n|T]) ->
+    {endline, T};
+split([C|T])
+  when C == $\r;
+       C == $\n ->
+    {endline, T};
+
+%% Ignore whitespace.
+split([C|T])
+  when C == $\s;
+       C == $\t;
+       C == $\f;
+       C == $\v ->
+    split(T);
+
+split(_) ->
+    "Unexpected character".
+
+%% word/1
+
+%% Reserved words significant in parsing ...
+word(S)
+  when S == "answer-message";
+       S == "code";
+       S == "AVP";
+       S == "AVP-Header";
+       S == "Diameter";
+       S == "Diameter-Header";
+       S == "Header";
+       S == "REQ";
+       S == "PXY";
+       S == "ERR" ->
+    list_to_atom(S);
+
+%% ... or not.
+word(S) ->
+    {word, S}.
+
+%% section/1
+
+section(N)
+  when N == "avp_types";
+       N == "avp_vendor_id";
+       N == "codecs";
+       N == "custom_types";
+       N == "define";
+       N == "end";
+       N == "enum";
+       N == "grouped";
+       N == "id";
+       N == "inherits";
+       N == "messages";
+       N == "name";
+       N == "prefix";
+       N == "vendor" ->
+    list_to_atom(N);
+section(_) ->
+    false.
+
+%% read_int/1
+
+read_int([$0,X|S])
+  when X == $X;
+       X == $x ->
+    {ok, [N], []} = io_lib:fread("~16u", S),
+    N;
+
+read_int(S) ->
+    list_to_integer(S).
+
+%% splitwith/3
+
+splitwith(Fun, [H|T]) ->
+    {SH, ST} = lists:splitwith(Fun, T),
+    {[H|SH], ST}.
+
+is_eol_ch(C) ->
+    C == $\n orelse C == $\r.
+
+is_name_ch(C) ->
+    is_alphanum(C) orelse C == $- orelse C == $_.
+
+is_alphanum(C) ->
+    is_lower(C) orelse is_upper(C) orelse is_digit(C).
+
+is_lower(C) ->
+    $a =< C andalso C =< $z.
+
+is_upper(C) ->
+    $A =< C andalso C =< $Z.
+
+is_digit(C) ->
+    $0 =< C andalso C =< $9.
diff --git a/lib/diameter/src/compiler/diameter_spec_scan.erl b/lib/diameter/src/compiler/diameter_spec_scan.erl
deleted file mode 100644
index bc0448882a..0000000000
--- a/lib/diameter/src/compiler/diameter_spec_scan.erl
+++ /dev/null
@@ -1,157 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2010-2011. All Rights Reserved.
-%%
-%% The contents of this file are subject to the Erlang Public License,
-%% Version 1.1, (the "License"); you may not use this file except in
-%% compliance with the License. You should have received a copy of the
-%% Erlang Public License along with this software. If not, it can be
-%% retrieved online at http://www.erlang.org/.
-%%
-%% Software distributed under the License is distributed on an "AS IS"
-%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
-%% the License for the specific language governing rights and limitations
-%% under the License.
-%%
-%% %CopyrightEnd%
-%%
-
--module(diameter_spec_scan).
-
-%%
-%% Functions used by the spec file parser in diameter_spec_util.
-%%
-
--export([split/1,
-         split/2,
-         parse/1]).
-
-%%% -----------------------------------------------------------
-%%% # parse/1
-%%%
-%%% Output: list of Token
-%%%
-%%%         Token = '{' | '}' | '<' | '>' | '[' | ']'
-%%%               | '*' | '::=' | ':' | ',' | '-'
-%%%               | {name, string()}
-%%%               | {tag, atom()}
-%%%               | {number, integer() >= 0}
-%%%
-%%% Tokenize a string. Fails if the string does not parse.
-%%% -----------------------------------------------------------
-
-parse(S) ->
-    parse(S, []).
-
-%% parse/2
-
-parse(S, Acc) ->
-    acc(split(S), Acc).
-
-acc({T, Rest}, Acc) ->
-    parse(Rest, [T | Acc]);
-acc("", Acc) ->
-    lists:reverse(Acc).
-
-%%% -----------------------------------------------------------
-%%% # split/2
-%%%
-%%% Output: {list() of Token, Rest}
-%%%
-%%% Extract a specified number of tokens from a string. Returns a list
-%%% of length less than the specified number if there are less than
-%%% this number of tokens to be parsed.
-%%% -----------------------------------------------------------
-
-split(Str, N)
-  when N >= 0 ->
-    split(N, Str, []).
-
-split(0, Str, Acc) ->
-    {lists:reverse(Acc), Str};
-
-split(N, Str, Acc) ->
-    case split(Str) of
-        {T, Rest} ->
-            split(N-1, Rest, [T|Acc]);
-        "" = Rest ->
-            {lists:reverse(Acc), Rest}
-    end.
-
-%%% -----------------------------------------------------------
-%%% # split/1
-%%%
-%%% Output: {Token, Rest} | ""
-%%%
-%%% Extract the next token from a string.
-%%% -----------------------------------------------------------
-
-split("" = Rest) ->
-    Rest;
-
-split("::=" ++ T) ->
-    {'::=', T};
-
-split([H|T])
-  when H == ${; H == $};
-       H == $<; H == $>;
-       H == $[; H == $];
-       H == $*; H == $:; H == $,; H == $- ->
-    {list_to_atom([H]), T};
-
-split([H|T]) when $A =< H, H =< $Z;
-                  $0 =< H, H =< $9 ->
-    {P, Rest} = splitwith(fun is_name_ch/1, [H], T),
-    Tok = try
-              {number, read_int(P)}
-          catch
-              error:_ ->
-                  {name, P}
-          end,
-    {Tok, Rest};
-
-split([H|T]) when $a =< H, H =< $z ->
-    {P, Rest} = splitwith(fun is_name_ch/1, [H], T),
-    {{tag, list_to_atom(P)}, Rest};
-
-split([H|T]) when H == $\t;
-                  H == $\s;
-                  H == $\n ->
-    split(T).
-
-%% read_int/1
-
-read_int([$0,X|S])
-  when X == $X;
-       X == $x ->
-    {ok, [N], []} = io_lib:fread("~16u", S),
-    N;
-
-read_int(S) ->
-    list_to_integer(S).
-
-%% splitwith/3
-
-splitwith(Fun, Acc, S) ->
-    split([] /= S andalso Fun(hd(S)), Fun, Acc, S).
-
-split(true, Fun, Acc, [H|T]) ->
-    splitwith(Fun, [H|Acc], T);
-split(false, _, Acc, S) ->
-    {lists:reverse(Acc), S}.
-
-is_name_ch(C) ->
-    is_alphanum(C) orelse C == $- orelse C == $_.
-
-is_alphanum(C) ->
-    is_lower(C) orelse is_upper(C) orelse is_digit(C).
-
-is_lower(C) ->
-    $a =< C andalso C =< $z.
-
-is_upper(C) ->
-    $A =< C andalso C =< $Z.
-
-is_digit(C) ->
-    $0 =< C andalso C =< $9.
diff --git a/lib/diameter/src/gen/.gitignore b/lib/diameter/src/gen/.gitignore
index d490642eb7..3f32313f56 100644
--- a/lib/diameter/src/gen/.gitignore
+++ b/lib/diameter/src/gen/.gitignore
@@ -1,2 +1,2 @@
-
+/diameter_dict_parser.erl
 /diameter_gen*rl
diff --git a/lib/diameter/src/modules.mk b/lib/diameter/src/modules.mk
index c7cbe598af..6929528a37 100644
--- a/lib/diameter/src/modules.mk
+++ b/lib/diameter/src/modules.mk
@@ -24,6 +24,10 @@ DICTS = \
 	base_accounting \
 	relay
 
+# The yecc grammar for the dictionary parser.
+DICT_YRL = \
+	diameter_dict_parser
+
 # Handwritten (runtime) modules included in the app file.
 RT_MODULES = \
 	base/diameter \
@@ -62,7 +66,7 @@ CT_MODULES = \
 	base/diameter_info \
 	compiler/diameter_codegen \
 	compiler/diameter_exprecs \
-	compiler/diameter_spec_scan \
+	compiler/diameter_dict_scanner \
 	compiler/diameter_spec_util \
 	compiler/diameter_make
author	Anders Svensson <[email protected]>	2011-10-16 21:36:37 +0200
committer	Anders Svensson <[email protected]>	2011-12-02 15:23:52 +0100
commit	ca185011269606596814075d4c8f9d13a855866b (patch)
tree	f67a45298ad311b05997c20dd4bf4fd1c3313e38
parent	3cdd095e8de8506881a9856e711a90c9ed723f0a (diff)
download	otp-ca185011269606596814075d4c8f9d13a855866b.tar.gz otp-ca185011269606596814075d4c8f9d13a855866b.tar.bz2 otp-ca185011269606596814075d4c8f9d13a855866b.zip