diff options
Diffstat (limited to 'lib/diameter/src/compiler/diameter_dict_scanner.erl')
-rw-r--r-- | lib/diameter/src/compiler/diameter_dict_scanner.erl | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/lib/diameter/src/compiler/diameter_dict_scanner.erl b/lib/diameter/src/compiler/diameter_dict_scanner.erl new file mode 100644 index 0000000000..963b58fdfc --- /dev/null +++ b/lib/diameter/src/compiler/diameter_dict_scanner.erl @@ -0,0 +1,274 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2010-2011. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(diameter_dict_scanner). + +%% +%% A scanner for dictionary files of the form expected by yecc. +%% + +-export([scan/1, + format_error/1]). + +-export([is_name/1]). + +%% ----------------------------------------------------------- +%% # scan/1 +%% ----------------------------------------------------------- + +-spec scan(string()) -> {ok, [Token]} | {error, {atom(), string(), Lineno}} + when Token :: {word, Lineno, string()} + | {number, Lineno, non_neg_integer()} + | {Symbol, Lineno}, + Lineno :: pos_integer(), + Symbol :: '{' | '}' | '<' | '>' | '[' | ']' + | '*' | '::=' | ':' | ',' | '-' + | avp_types + | avp_vendor_id + | codecs + | custom_types + | define + | grouped + | id + | inherits + | messages + | name + | prefix + | vendor + | '$end' + | code + | 'answer-message' + | 'AVP' + | 'AVP-Header' + | 'Diameter' + | 'Diameter-Header' + | 'Header' + | 'REQ' + | 'PXY' + | 'ERR'. + +scan(B) + when is_binary(B) -> + scan(binary_to_list(B)); +scan(S) -> + scan(S, {1, []}). + +scan(S, {Lineno, Acc}) -> + case split(S) of + '$end' = E -> + {ok, lists:reverse([{E, Lineno} | Acc])}; + {Tok, Rest} -> + scan(Rest, acc(Tok, Lineno, Acc)); + Reason when is_list(Reason) -> + {error, {Reason, S, Lineno}} + end. + +%% format_error/1 + +format_error({Reason, Input, Lineno}) -> + io_lib:format("~s at line ~p: ~s", + [Reason, Lineno, head(Input, [], 20, true)]). + +%% is_name/1 + +is_name([H|T]) -> + is_alphanum(H) andalso lists:all(fun is_name_ch/1, T). + +%% =========================================================================== + +head(Str, Acc, N, _) + when [] == Str; + 0 == N; + $\r == hd(Str); + $\n == hd(Str) -> + lists:reverse(Acc); +head([C|Rest], Acc, N, true = T) %% skip leading whitespace + when C == $\s; + C == $\t; + C == $\f; + C == $\v -> + head(Rest, Acc, N, T); +head([C|Rest], Acc, N, _) -> + head(Rest, [C|Acc], N-1, false). + +acc(endline, Lineno, Acc) -> + {Lineno + 1, Acc}; +acc(T, Lineno, Acc) -> + {Lineno, [tok(T, Lineno) | Acc]}. + +tok({Cat, Sym}, Lineno) -> + {Cat, Lineno, Sym}; +tok(Sym, Lineno) -> + {Sym, Lineno}. + +%% # split/1 +%% +%% Output: {Token, Rest} | atom() + +%% Finito. +split("") -> + '$end'; + +%% Skip comments. This precludes using semicolon for any other purpose. +split([$;|T]) -> + split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T)); + +%% Beginning of a section. +split([$@|T]) -> + {Name, Rest} = lists:splitwith(fun is_name_ch/1, T), + case section(Name) of + false -> + "Unknown section"; + 'end' -> + '$end'; + A -> + {A, Rest} + end; + +split("::=" ++ T) -> + {'::=', T}; + +split([H|T]) + when H == ${; H == $}; + H == $<; H == $>; + H == $[; H == $]; + H == $*; H == $:; H == $,; H == $- -> + {list_to_atom([H]), T}; + +%% RFC 3588 requires various names to begin with a letter but 3GPP (for +%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.) +split([H|_] = L) when $0 =< H, H =< $9 -> + {P, Rest} = splitwith(fun is_name_ch/1, L), + Tok = try + {number, read_int(P)} + catch + error:_ -> + word(P) + end, + {Tok, Rest}; + +split([H|_] = L) when $a =< H, H =< $z; + $A =< H, H =< $Z -> + {P, Rest} = splitwith(fun is_name_ch/1, L), + {word(P), Rest}; + +split([$'|T]) -> + case splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of + {[_|_] = A, [$'|Rest]} -> + {{word, A}, Rest}; + {[], [$'|_]} -> + "Empty string"; + _ -> %% not terminated on same line + "Unterminated string" + end; + +%% Line ending of various forms. +split([$\r,$\n|T]) -> + {endline, T}; +split([C|T]) + when C == $\r; + C == $\n -> + {endline, T}; + +%% Ignore whitespace. +split([C|T]) + when C == $\s; + C == $\t; + C == $\f; + C == $\v -> + split(T); + +split(_) -> + "Unexpected character". + +%% word/1 + +%% Reserved words significant in parsing ... +word(S) + when S == "answer-message"; + S == "code"; + S == "AVP"; + S == "AVP-Header"; + S == "Diameter"; + S == "Diameter-Header"; + S == "Header"; + S == "REQ"; + S == "PXY"; + S == "ERR" -> + list_to_atom(S); + +%% ... or not. +word(S) -> + {word, S}. + +%% section/1 + +section(N) + when N == "avp_types"; + N == "avp_vendor_id"; + N == "codecs"; + N == "custom_types"; + N == "define"; + N == "end"; + N == "enum"; + N == "grouped"; + N == "id"; + N == "inherits"; + N == "messages"; + N == "name"; + N == "prefix"; + N == "vendor" -> + list_to_atom(N); +section(_) -> + false. + +%% read_int/1 + +read_int([$0,X|S]) + when X == $X; + X == $x -> + {ok, [N], []} = io_lib:fread("~16u", S), + N; + +read_int(S) -> + list_to_integer(S). + +%% splitwith/3 + +splitwith(Fun, [H|T]) -> + {SH, ST} = lists:splitwith(Fun, T), + {[H|SH], ST}. + +is_eol_ch(C) -> + C == $\n orelse C == $\r. + +is_name_ch(C) -> + is_alphanum(C) orelse C == $- orelse C == $_. + +is_alphanum(C) -> + is_lower(C) orelse is_upper(C) orelse is_digit(C). + +is_lower(C) -> + $a =< C andalso C =< $z. + +is_upper(C) -> + $A =< C andalso C =< $Z. + +is_digit(C) -> + $0 =< C andalso C =< $9. |