From ca185011269606596814075d4c8f9d13a855866b Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Sun, 16 Oct 2011 21:36:37 +0200 Subject: Replace dictionary file parser The previous parse was very adhoc and simply crashed on any kind of input error, providing no identification of the objectionable input that caused the parse to fail. The new parser is generated from a yecc grammar, making it easier both to understand what it is that's being parsed and to provide useful diagnostics to the user in case of error. --- lib/diameter/src/compiler/diameter_dict_parser.yrl | 324 +++++++++++++++++++++ .../src/compiler/diameter_dict_scanner.erl | 265 +++++++++++++++++ lib/diameter/src/compiler/diameter_spec_scan.erl | 157 ---------- 3 files changed, 589 insertions(+), 157 deletions(-) create mode 100644 lib/diameter/src/compiler/diameter_dict_parser.yrl create mode 100644 lib/diameter/src/compiler/diameter_dict_scanner.erl delete mode 100644 lib/diameter/src/compiler/diameter_spec_scan.erl (limited to 'lib/diameter/src/compiler') diff --git a/lib/diameter/src/compiler/diameter_dict_parser.yrl b/lib/diameter/src/compiler/diameter_dict_parser.yrl new file mode 100644 index 0000000000..6fd4cedd23 --- /dev/null +++ b/lib/diameter/src/compiler/diameter_dict_parser.yrl @@ -0,0 +1,324 @@ +%% -*- erlang -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2010-2011. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% A grammar for dictionary specification. +%% + +Nonterminals + application_id avp avp_code avp_def avp_defs avp_flags avp_header + avp_header_tok avp_name avp_names avp_ref avp_spec avp_type + avp_vendor avps bit bits command_def command_id diameter_name + dictionary enum_def enum_defs group_def group_defs header header_tok + ident idents message_defs module qual section sections. + +Terminals + avp_types avp_vendor_id codecs custom_types define enum grouped + id inherits messages name prefix vendor + number word + '{' '}' '<' '>' '[' ']' '*' '::=' ':' ',' '-' + code + 'answer-message' + 'AVP' 'AVP-Header' + 'Diameter' 'Diameter-Header' 'Header' + 'REQ' 'PXY' 'ERR'. + +Rootsymbol dictionary. + +Endsymbol '$end'. + +%% =========================================================================== + +dictionary -> sections : '$1'. + +sections -> '$empty' : []. +sections -> section sections : ['$1' | '$2']. + +section -> name ident : ['$1', '$2']. +section -> prefix ident : ['$1', '$2']. +section -> id number : ['$1', '$2']. +section -> vendor number ident : ['$1', '$2', '$3']. +section -> inherits module avp_names : ['$1', '$2' | '$3']. +section -> avp_types avp_defs : ['$1' | '$2']. +section -> avp_vendor_id number avp_names : ['$1', '$2' | '$3']. +section -> custom_types module avp_names : ['$1', '$2' | '$3']. +section -> codecs module avp_names : ['$1', '$2' | '$3']. +section -> messages message_defs : ['$1' | '$2']. +section -> grouped group_defs : ['$1' | '$2']. +section -> enum ident enum_defs : ['$1', '$2' | '$3']. +section -> define ident enum_defs : ['$1', '$2' | '$3']. + +%% ===================================== + +module -> ident : '$1'. + +avp_names -> idents : '$1'. %% Note: not 'AVP' + +avp_defs -> '$empty' : []. +avp_defs -> avp_def avp_defs : ['$1' | '$2']. + +avp_def -> ident number avp_type avp_flags : ['$1', '$2', '$3', '$4']. + +avp_type -> ident : '$1'. + +idents -> '$empty' : []. +idents -> ident idents : ['$1' | '$2']. + +avp_flags -> '-' : + {_, Lineno} = '$1', + {word, Lineno, ""}. +avp_flags -> ident : + '$1'. +%% Could support lowercase here if there's a use for distinguishing +%% between Must and Should in the future in deciding whether or not +%% to set a flag. + +ident -> word : '$1'. + +%% Don't bother mapping reserved words to make these usable in this +%% context. That an AVP can't be named Diameter-Header is probably no +%% great loss, and that it can't be named AVP may even save someone +%% from themselves. (Temporarily at least.) + +group_defs -> '$empty' : []. +group_defs -> group_def group_defs : ['$1' | '$2']. + +message_defs -> '$empty' : []. +message_defs -> command_def message_defs : ['$1' | '$2']. + +enum_defs -> '$empty' : []. +enum_defs -> enum_def enum_defs : ['$1' | '$2']. + +enum_def -> ident number : ['$1', '$2']. + +%% ===================================== +%% 3.2. Command Code ABNF specification +%% +%% Every Command Code defined MUST include a corresponding ABNF +%% specification, which is used to define the AVPs that MUST or MAY be +%% present when sending the message. The following format is used in +%% the definition: + +%% command-def = "::=" diameter-message +%% +%% command-name = diameter-name +%% +%% diameter-name = ALPHA *(ALPHA / DIGIT / "-") +%% +%% diameter-message = header [ *fixed] [ *required] [ *optional] + +%% answer-message is a special case. +command_def -> 'answer-message' '::=' '<' header_tok ':' code + ',' 'ERR' '[' 'PXY' ']' '>' + avps + : ['$1', false | '$13']. + +command_def -> diameter_name '::=' header avps + : ['$1', '$3' | '$4']. +%% Ensure the order fixed/required/optional by semantic checks rather +%% than grammatically since the latter requires more lookahead: don't +%% know until after a leading qual which of the three it is that's +%% being parsed. + +diameter_name -> ident : '$1'. + +%% header = "<" "Diameter Header:" command-id +%% [r-bit] [p-bit] [e-bit] [application-id] ">" +%% +%% command-id = 1*DIGIT +%% ; The Command Code assigned to the command +%% +%% r-bit = ", REQ" +%% ; If present, the 'R' bit in the Command +%% ; Flags is set, indicating that the message +%% ; is a request, as opposed to an answer. +%% +%% p-bit = ", PXY" +%% ; If present, the 'P' bit in the Command +%% ; Flags is set, indicating that the message +%% ; is proxiable. +%% +%% e-bit = ", ERR" +%% ; If present, the 'E' bit in the Command +%% ; Flags is set, indicating that the answer +%% ; message contains a Result-Code AVP in +%% ; the "protocol error" class. +%% +%% application-id = 1*DIGIT + +header -> '<' header_tok ':' command_id bits application_id '>' + : ['$4', '$5', '$6']. + +command_id -> number : '$1'. + +%% Accept both the form of the base definition and the typo (fixed in +%% 3588bis) of the grammar. +header_tok -> 'Diameter' 'Header'. +header_tok -> 'Diameter-Header'. + +bits -> '$empty' : []. +bits -> ',' bit bits : ['$2' | '$3']. + +%% ERR only makes sense for answer-message so don't allow it here +%% (despite 3588). +bit -> 'REQ' : '$1'. +bit -> 'PXY' : '$1'. + +application_id -> '$empty' : false. +application_id -> number : '$1'. + +%% fixed = [qual] "<" avp-spec ">" +%% ; Defines the fixed position of an AVP +%% +%% required = [qual] "{" avp-spec "}" +%% ; The AVP MUST be present and can appear +%% ; anywhere in the message. +%% +%% optional = [qual] "[" avp-name "]" +%% ; The avp-name in the 'optional' rule cannot +%% ; evaluate to any AVP Name which is included +%% ; in a fixed or required rule. The AVP can +%% ; appear anywhere in the message. +%% ; +%% ; NOTE: "[" and "]" have a slightly different +%% ; meaning than in ABNF (RFC 5234]). These braces +%% ; cannot be used to express optional fixed rules +%% ; (such as an optional ICV at the end). To do this, +%% ; the convention is '0*1fixed'. + +avps -> '$empty' : []. +avps -> avp avps : ['$1' | '$2']. + +avp -> avp_ref : [false | '$1']. +avp -> qual avp_ref : ['$1' | '$2']. + +avp_ref -> '<' avp_spec '>' : [$<, '$2']. +avp_ref -> '{' avp_name '}' : [${, '$2']. +avp_ref -> '[' avp_name ']' : [$[, '$2']. +%% Note that required can be an avp_name, not just avp_spec. 'AVP' +%% is specified as required by Failed-AVP for example. + +%% qual = [min] "*" [max] +%% ; See ABNF conventions, RFC 5234 Section 4. +%% ; The absence of any qualifiers depends on +%% ; whether it precedes a fixed, required, or +%% ; optional rule. If a fixed or required rule has +%% ; no qualifier, then exactly one such AVP MUST +%% ; be present. If an optional rule has no +%% ; qualifier, then 0 or 1 such AVP may be +%% ; present. If an optional rule has a qualifier, +%% ; then the value of min MUST be 0 if present. +%% +%% min = 1*DIGIT +%% ; The minimum number of times the element may +%% ; be present. If absent, the default value is zero +%% ; for fixed and optional rules and one for required +%% ; rules. The value MUST be at least one for for +%% ; required rules. +%% +%% max = 1*DIGIT +%% ; The maximum number of times the element may +%% ; be present. If absent, the default value is +%% ; infinity. A value of zero implies the AVP MUST +%% ; NOT be present. + +qual -> number '*' number : {'$1', '$3'}. +qual -> number '*' : {'$1', true}. +qual -> '*' number : {true, '$2'}. +qual -> '*' : true. + +%% avp-spec = diameter-name +%% ; The avp-spec has to be an AVP Name, defined +%% ; in the base or extended Diameter +%% ; specifications. + +avp_spec -> diameter_name : '$1'. + +%% avp-name = avp-spec / "AVP" +%% ; The string "AVP" stands for *any* arbitrary AVP +%% ; Name, not otherwise listed in that command code +%% ; definition. Addition this AVP is recommended for +%% ; all command ABNFs to allow for extensibility. + +avp_name -> 'AVP' : '$1'. +avp_name -> avp_spec : '$1'. + +%% The following is a definition of a fictitious command code: +%% +%% Example-Request ::= < Diameter Header: 9999999, REQ, PXY > +%% { User-Name } +%% * { Origin-Host } +%% * [ AVP ] + +%% ===================================== +%% 4.4. Grouped AVP Values +%% +%% The Diameter protocol allows AVP values of type 'Grouped'. This +%% implies that the Data field is actually a sequence of AVPs. It is +%% possible to include an AVP with a Grouped type within a Grouped type, +%% that is, to nest them. AVPs within an AVP of type Grouped have the +%% same padding requirements as non-Grouped AVPs, as defined in Section +%% 4. +%% +%% The AVP Code numbering space of all AVPs included in a Grouped AVP is +%% the same as for non-grouped AVPs. Receivers of a Grouped AVP that +%% does not have the 'M' (mandatory) bit set and one or more of the +%% encapsulated AVPs within the group has the 'M' (mandatory) bit set +%% MAY simply be ignored if the Grouped AVP itself is unrecognized. The +%% rule applies even if the encapsulated AVP with its 'M' (mandatory) +%% bit set is further encapsulated within other sub-groups; i.e. other +%% Grouped AVPs embedded within the Grouped AVP. +%% +%% Every Grouped AVP defined MUST include a corresponding grammar, using +%% ABNF [RFC5234] (with modifications), as defined below. + +%% grouped-avp-def = "::=" avp +%% +%% name-fmt = ALPHA *(ALPHA / DIGIT / "-") +%% +%% name = name-fmt +%% ; The name has to be the name of an AVP, +%% ; defined in the base or extended Diameter +%% ; specifications. +%% +%% avp = header [ *fixed] [ *required] [ *optional] + +group_def -> ident '::=' avp_header avps : ['$1', '$3' | '$4']. + +%% header = "<" "AVP-Header:" avpcode [vendor] ">" +%% +%% avpcode = 1*DIGIT +%% ; The AVP Code assigned to the Grouped AVP +%% +%% vendor = 1*DIGIT +%% ; The Vendor-ID assigned to the Grouped AVP. +%% ; If absent, the default value of zero is +%% ; used. + +avp_header -> '<' avp_header_tok ':' avp_code avp_vendor '>' + : ['$4', '$5']. + +avp_header_tok -> 'AVP-Header'. +avp_header_tok -> 'AVP' 'Header'. + +avp_code -> number : '$1'. + +avp_vendor -> '$empty' : false. +avp_vendor -> number : '$1'. diff --git a/lib/diameter/src/compiler/diameter_dict_scanner.erl b/lib/diameter/src/compiler/diameter_dict_scanner.erl new file mode 100644 index 0000000000..74bf0cb06a --- /dev/null +++ b/lib/diameter/src/compiler/diameter_dict_scanner.erl @@ -0,0 +1,265 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2010-2011. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(diameter_dict_scanner). + +%% +%% A scanner for dictionary files of the form expected by yecc. +%% + +-export([scan/1, + format_error/1]). + +%% ----------------------------------------------------------- +%% # scan/1 +%% ----------------------------------------------------------- + +-spec scan(string()) -> {ok, [Token]} | {error, {atom(), string(), Lineno}} + when Token :: {word, Lineno, string()} + | {number, Lineno, non_neg_integer()} + | {Symbol, Lineno}, + Lineno :: pos_integer(), + Symbol :: '{' | '}' | '<' | '>' | '[' | ']' + | '*' | '::=' | ':' | ',' | '-' + | avp_types + | avp_vendor_id + | codecs + | custom_types + | define + | grouped + | id + | inherits + | messages + | name + | prefix + | vendor + | '$end' + | code + | 'answer-message' + | 'AVP' + | 'AVP-Header' + | 'Diameter' + | 'Diameter-Header' + | 'Header' + | 'REQ' + | 'PXY' + | 'ERR'. + +scan(B) + when is_binary(B) -> + scan(binary_to_list(B)); +scan(S) -> + scan(S, {1, []}). + +scan(S, {Lineno, Acc}) -> + case split(S) of + '$end' = E -> + {ok, lists:reverse([{E, Lineno} | Acc])}; + {Tok, Rest} -> + scan(Rest, acc(Tok, Lineno, Acc)); + Reason when is_list(Reason) -> + {error, {Reason, S, Lineno}} + end. + +format_error({Reason, Input, Lineno}) -> + io_lib:format("~s at line ~p: ~s", + [Reason, Lineno, head(Input, [], 20, true)]). + +head(Str, Acc, N, _) + when [] == Str; + 0 == N; + $\r == hd(Str); + $\n == hd(Str) -> + lists:reverse(Acc); +head([C|Rest], Acc, N, true = T) %% skip leading whitespace + when C == $\s; + C == $\t; + C == $\f; + C == $\v -> + head(Rest, Acc, N, T); +head([C|Rest], Acc, N, _) -> + head(Rest, [C|Acc], N-1, false). + +acc(endline, Lineno, Acc) -> + {Lineno + 1, Acc}; +acc(T, Lineno, Acc) -> + {Lineno, [tok(T, Lineno) | Acc]}. + +tok({Cat, Sym}, Lineno) -> + {Cat, Lineno, Sym}; +tok(Sym, Lineno) -> + {Sym, Lineno}. + +%% # split/1 +%% +%% Output: {Token, Rest} | atom() + +%% Finito. +split("") -> + '$end'; + +%% Skip comments. This precludes using semicolon for any other purpose. +split([$;|T]) -> + split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T)); + +%% Beginning of a section. +split([$@|T]) -> + {Name, Rest} = lists:splitwith(fun is_name_ch/1, T), + case section(Name) of + false -> + "Unknown section"; + 'end' -> + '$end'; + A -> + {A, Rest} + end; + +split("::=" ++ T) -> + {'::=', T}; + +split([H|T]) + when H == ${; H == $}; + H == $<; H == $>; + H == $[; H == $]; + H == $*; H == $:; H == $,; H == $- -> + {list_to_atom([H]), T}; + +%% RFC 3588 requires various names to begin with a letter but 3GPP (for +%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.) +split([H|_] = L) when $0 =< H, H =< $9 -> + {P, Rest} = splitwith(fun is_name_ch/1, L), + Tok = try + {number, read_int(P)} + catch + error:_ -> + word(P) + end, + {Tok, Rest}; + +split([H|_] = L) when $a =< H, H =< $z; + $A =< H, H =< $Z -> + {P, Rest} = splitwith(fun is_name_ch/1, L), + {word(P), Rest}; + +split([$'|T]) -> + case splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of + {[_|_] = A, [$'|Rest]} -> + {{word, A}, Rest}; + {[_|_], _} -> %% not terminated on same line + "Unterminated atom"; + {[], []} -> %% last character + "Unterminated atom"; + {[], _} -> + "Empty atom" + end; + +%% Line ending of various forms. +split([$\r,$\n|T]) -> + {endline, T}; +split([C|T]) + when C == $\r; + C == $\n -> + {endline, T}; + +%% Ignore whitespace. +split([C|T]) + when C == $\s; + C == $\t; + C == $\f; + C == $\v -> + split(T); + +split(_) -> + "Unexpected character". + +%% word/1 + +%% Reserved words significant in parsing ... +word(S) + when S == "answer-message"; + S == "code"; + S == "AVP"; + S == "AVP-Header"; + S == "Diameter"; + S == "Diameter-Header"; + S == "Header"; + S == "REQ"; + S == "PXY"; + S == "ERR" -> + list_to_atom(S); + +%% ... or not. +word(S) -> + {word, S}. + +%% section/1 + +section(N) + when N == "avp_types"; + N == "avp_vendor_id"; + N == "codecs"; + N == "custom_types"; + N == "define"; + N == "end"; + N == "enum"; + N == "grouped"; + N == "id"; + N == "inherits"; + N == "messages"; + N == "name"; + N == "prefix"; + N == "vendor" -> + list_to_atom(N); +section(_) -> + false. + +%% read_int/1 + +read_int([$0,X|S]) + when X == $X; + X == $x -> + {ok, [N], []} = io_lib:fread("~16u", S), + N; + +read_int(S) -> + list_to_integer(S). + +%% splitwith/3 + +splitwith(Fun, [H|T]) -> + {SH, ST} = lists:splitwith(Fun, T), + {[H|SH], ST}. + +is_eol_ch(C) -> + C == $\n orelse C == $\r. + +is_name_ch(C) -> + is_alphanum(C) orelse C == $- orelse C == $_. + +is_alphanum(C) -> + is_lower(C) orelse is_upper(C) orelse is_digit(C). + +is_lower(C) -> + $a =< C andalso C =< $z. + +is_upper(C) -> + $A =< C andalso C =< $Z. + +is_digit(C) -> + $0 =< C andalso C =< $9. diff --git a/lib/diameter/src/compiler/diameter_spec_scan.erl b/lib/diameter/src/compiler/diameter_spec_scan.erl deleted file mode 100644 index bc0448882a..0000000000 --- a/lib/diameter/src/compiler/diameter_spec_scan.erl +++ /dev/null @@ -1,157 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2010-2011. All Rights Reserved. -%% -%% The contents of this file are subject to the Erlang Public License, -%% Version 1.1, (the "License"); you may not use this file except in -%% compliance with the License. You should have received a copy of the -%% Erlang Public License along with this software. If not, it can be -%% retrieved online at http://www.erlang.org/. -%% -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See -%% the License for the specific language governing rights and limitations -%% under the License. -%% -%% %CopyrightEnd% -%% - --module(diameter_spec_scan). - -%% -%% Functions used by the spec file parser in diameter_spec_util. -%% - --export([split/1, - split/2, - parse/1]). - -%%% ----------------------------------------------------------- -%%% # parse/1 -%%% -%%% Output: list of Token -%%% -%%% Token = '{' | '}' | '<' | '>' | '[' | ']' -%%% | '*' | '::=' | ':' | ',' | '-' -%%% | {name, string()} -%%% | {tag, atom()} -%%% | {number, integer() >= 0} -%%% -%%% Tokenize a string. Fails if the string does not parse. -%%% ----------------------------------------------------------- - -parse(S) -> - parse(S, []). - -%% parse/2 - -parse(S, Acc) -> - acc(split(S), Acc). - -acc({T, Rest}, Acc) -> - parse(Rest, [T | Acc]); -acc("", Acc) -> - lists:reverse(Acc). - -%%% ----------------------------------------------------------- -%%% # split/2 -%%% -%%% Output: {list() of Token, Rest} -%%% -%%% Extract a specified number of tokens from a string. Returns a list -%%% of length less than the specified number if there are less than -%%% this number of tokens to be parsed. -%%% ----------------------------------------------------------- - -split(Str, N) - when N >= 0 -> - split(N, Str, []). - -split(0, Str, Acc) -> - {lists:reverse(Acc), Str}; - -split(N, Str, Acc) -> - case split(Str) of - {T, Rest} -> - split(N-1, Rest, [T|Acc]); - "" = Rest -> - {lists:reverse(Acc), Rest} - end. - -%%% ----------------------------------------------------------- -%%% # split/1 -%%% -%%% Output: {Token, Rest} | "" -%%% -%%% Extract the next token from a string. -%%% ----------------------------------------------------------- - -split("" = Rest) -> - Rest; - -split("::=" ++ T) -> - {'::=', T}; - -split([H|T]) - when H == ${; H == $}; - H == $<; H == $>; - H == $[; H == $]; - H == $*; H == $:; H == $,; H == $- -> - {list_to_atom([H]), T}; - -split([H|T]) when $A =< H, H =< $Z; - $0 =< H, H =< $9 -> - {P, Rest} = splitwith(fun is_name_ch/1, [H], T), - Tok = try - {number, read_int(P)} - catch - error:_ -> - {name, P} - end, - {Tok, Rest}; - -split([H|T]) when $a =< H, H =< $z -> - {P, Rest} = splitwith(fun is_name_ch/1, [H], T), - {{tag, list_to_atom(P)}, Rest}; - -split([H|T]) when H == $\t; - H == $\s; - H == $\n -> - split(T). - -%% read_int/1 - -read_int([$0,X|S]) - when X == $X; - X == $x -> - {ok, [N], []} = io_lib:fread("~16u", S), - N; - -read_int(S) -> - list_to_integer(S). - -%% splitwith/3 - -splitwith(Fun, Acc, S) -> - split([] /= S andalso Fun(hd(S)), Fun, Acc, S). - -split(true, Fun, Acc, [H|T]) -> - splitwith(Fun, [H|Acc], T); -split(false, _, Acc, S) -> - {lists:reverse(Acc), S}. - -is_name_ch(C) -> - is_alphanum(C) orelse C == $- orelse C == $_. - -is_alphanum(C) -> - is_lower(C) orelse is_upper(C) orelse is_digit(C). - -is_lower(C) -> - $a =< C andalso C =< $z. - -is_upper(C) -> - $A =< C andalso C =< $Z. - -is_digit(C) -> - $0 =< C andalso C =< $9. -- cgit v1.2.3