diff options
author | Anders Svensson <[email protected]> | 2011-10-16 21:36:37 +0200 |
---|---|---|
committer | Anders Svensson <[email protected]> | 2011-12-02 15:23:52 +0100 |
commit | ca185011269606596814075d4c8f9d13a855866b (patch) | |
tree | f67a45298ad311b05997c20dd4bf4fd1c3313e38 /lib/diameter/src/compiler/diameter_dict_scanner.erl | |
parent | 3cdd095e8de8506881a9856e711a90c9ed723f0a (diff) | |
download | otp-ca185011269606596814075d4c8f9d13a855866b.tar.gz otp-ca185011269606596814075d4c8f9d13a855866b.tar.bz2 otp-ca185011269606596814075d4c8f9d13a855866b.zip |
Replace dictionary file parser
The previous parse was very adhoc and simply crashed on any kind
of input error, providing no identification of the objectionable input
that caused the parse to fail. The new parser is generated from a yecc
grammar, making it easier both to understand what it is that's being
parsed and to provide useful diagnostics to the user in case of error.
Diffstat (limited to 'lib/diameter/src/compiler/diameter_dict_scanner.erl')
-rw-r--r-- | lib/diameter/src/compiler/diameter_dict_scanner.erl | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/lib/diameter/src/compiler/diameter_dict_scanner.erl b/lib/diameter/src/compiler/diameter_dict_scanner.erl new file mode 100644 index 0000000000..74bf0cb06a --- /dev/null +++ b/lib/diameter/src/compiler/diameter_dict_scanner.erl @@ -0,0 +1,265 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2010-2011. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(diameter_dict_scanner). + +%% +%% A scanner for dictionary files of the form expected by yecc. +%% + +-export([scan/1, + format_error/1]). + +%% ----------------------------------------------------------- +%% # scan/1 +%% ----------------------------------------------------------- + +-spec scan(string()) -> {ok, [Token]} | {error, {atom(), string(), Lineno}} + when Token :: {word, Lineno, string()} + | {number, Lineno, non_neg_integer()} + | {Symbol, Lineno}, + Lineno :: pos_integer(), + Symbol :: '{' | '}' | '<' | '>' | '[' | ']' + | '*' | '::=' | ':' | ',' | '-' + | avp_types + | avp_vendor_id + | codecs + | custom_types + | define + | grouped + | id + | inherits + | messages + | name + | prefix + | vendor + | '$end' + | code + | 'answer-message' + | 'AVP' + | 'AVP-Header' + | 'Diameter' + | 'Diameter-Header' + | 'Header' + | 'REQ' + | 'PXY' + | 'ERR'. + +scan(B) + when is_binary(B) -> + scan(binary_to_list(B)); +scan(S) -> + scan(S, {1, []}). + +scan(S, {Lineno, Acc}) -> + case split(S) of + '$end' = E -> + {ok, lists:reverse([{E, Lineno} | Acc])}; + {Tok, Rest} -> + scan(Rest, acc(Tok, Lineno, Acc)); + Reason when is_list(Reason) -> + {error, {Reason, S, Lineno}} + end. + +format_error({Reason, Input, Lineno}) -> + io_lib:format("~s at line ~p: ~s", + [Reason, Lineno, head(Input, [], 20, true)]). + +head(Str, Acc, N, _) + when [] == Str; + 0 == N; + $\r == hd(Str); + $\n == hd(Str) -> + lists:reverse(Acc); +head([C|Rest], Acc, N, true = T) %% skip leading whitespace + when C == $\s; + C == $\t; + C == $\f; + C == $\v -> + head(Rest, Acc, N, T); +head([C|Rest], Acc, N, _) -> + head(Rest, [C|Acc], N-1, false). + +acc(endline, Lineno, Acc) -> + {Lineno + 1, Acc}; +acc(T, Lineno, Acc) -> + {Lineno, [tok(T, Lineno) | Acc]}. + +tok({Cat, Sym}, Lineno) -> + {Cat, Lineno, Sym}; +tok(Sym, Lineno) -> + {Sym, Lineno}. + +%% # split/1 +%% +%% Output: {Token, Rest} | atom() + +%% Finito. +split("") -> + '$end'; + +%% Skip comments. This precludes using semicolon for any other purpose. +split([$;|T]) -> + split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T)); + +%% Beginning of a section. +split([$@|T]) -> + {Name, Rest} = lists:splitwith(fun is_name_ch/1, T), + case section(Name) of + false -> + "Unknown section"; + 'end' -> + '$end'; + A -> + {A, Rest} + end; + +split("::=" ++ T) -> + {'::=', T}; + +split([H|T]) + when H == ${; H == $}; + H == $<; H == $>; + H == $[; H == $]; + H == $*; H == $:; H == $,; H == $- -> + {list_to_atom([H]), T}; + +%% RFC 3588 requires various names to begin with a letter but 3GPP (for +%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.) +split([H|_] = L) when $0 =< H, H =< $9 -> + {P, Rest} = splitwith(fun is_name_ch/1, L), + Tok = try + {number, read_int(P)} + catch + error:_ -> + word(P) + end, + {Tok, Rest}; + +split([H|_] = L) when $a =< H, H =< $z; + $A =< H, H =< $Z -> + {P, Rest} = splitwith(fun is_name_ch/1, L), + {word(P), Rest}; + +split([$'|T]) -> + case splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of + {[_|_] = A, [$'|Rest]} -> + {{word, A}, Rest}; + {[_|_], _} -> %% not terminated on same line + "Unterminated atom"; + {[], []} -> %% last character + "Unterminated atom"; + {[], _} -> + "Empty atom" + end; + +%% Line ending of various forms. +split([$\r,$\n|T]) -> + {endline, T}; +split([C|T]) + when C == $\r; + C == $\n -> + {endline, T}; + +%% Ignore whitespace. +split([C|T]) + when C == $\s; + C == $\t; + C == $\f; + C == $\v -> + split(T); + +split(_) -> + "Unexpected character". + +%% word/1 + +%% Reserved words significant in parsing ... +word(S) + when S == "answer-message"; + S == "code"; + S == "AVP"; + S == "AVP-Header"; + S == "Diameter"; + S == "Diameter-Header"; + S == "Header"; + S == "REQ"; + S == "PXY"; + S == "ERR" -> + list_to_atom(S); + +%% ... or not. +word(S) -> + {word, S}. + +%% section/1 + +section(N) + when N == "avp_types"; + N == "avp_vendor_id"; + N == "codecs"; + N == "custom_types"; + N == "define"; + N == "end"; + N == "enum"; + N == "grouped"; + N == "id"; + N == "inherits"; + N == "messages"; + N == "name"; + N == "prefix"; + N == "vendor" -> + list_to_atom(N); +section(_) -> + false. + +%% read_int/1 + +read_int([$0,X|S]) + when X == $X; + X == $x -> + {ok, [N], []} = io_lib:fread("~16u", S), + N; + +read_int(S) -> + list_to_integer(S). + +%% splitwith/3 + +splitwith(Fun, [H|T]) -> + {SH, ST} = lists:splitwith(Fun, T), + {[H|SH], ST}. + +is_eol_ch(C) -> + C == $\n orelse C == $\r. + +is_name_ch(C) -> + is_alphanum(C) orelse C == $- orelse C == $_. + +is_alphanum(C) -> + is_lower(C) orelse is_upper(C) orelse is_digit(C). + +is_lower(C) -> + $a =< C andalso C =< $z. + +is_upper(C) -> + $A =< C andalso C =< $Z. + +is_digit(C) -> + $0 =< C andalso C =< $9. |