diff options
author | Ingela Anderton Andin <[email protected]> | 2016-05-02 12:19:37 +0200 |
---|---|---|
committer | Ingela Anderton Andin <[email protected]> | 2016-05-03 10:05:09 +0200 |
commit | 7fbcbf4d1f2c81d44a5c6a4889c98f32cd4505ae (patch) | |
tree | 154617ffb545f68bfb4644d8582429e983f82f1c /lib/inets/src/inets_app | |
parent | fea24ae8d37b33e97ef1897d0d3b6cdb2338c051 (diff) | |
download | otp-7fbcbf4d1f2c81d44a5c6a4889c98f32cd4505ae.tar.gz otp-7fbcbf4d1f2c81d44a5c6a4889c98f32cd4505ae.tar.bz2 otp-7fbcbf4d1f2c81d44a5c6a4889c98f32cd4505ae.zip |
inets: Put back inets_regexp module in OTP 18
Put back unused module inets_regexp and remove it in OTP 19 instead as
it is an incompatibility, although it is an undocumented module and
should not affect other applications (the world is not perfect).
Diffstat (limited to 'lib/inets/src/inets_app')
-rw-r--r-- | lib/inets/src/inets_app/Makefile | 3 | ||||
-rw-r--r-- | lib/inets/src/inets_app/inets.app.src | 1 | ||||
-rw-r--r-- | lib/inets/src/inets_app/inets_regexp.erl | 414 |
3 files changed, 417 insertions, 1 deletions
diff --git a/lib/inets/src/inets_app/Makefile b/lib/inets/src/inets_app/Makefile index 0a4b625b6a..7f51676dc5 100644 --- a/lib/inets/src/inets_app/Makefile +++ b/lib/inets/src/inets_app/Makefile @@ -49,7 +49,8 @@ MODULES = \ inets_sup \ inets_trace \ inets_lib \ - inets_time_compat + inets_time_compat \ + inets_regexp INTERNAL_HRL_FILES = inets_internal.hrl EXTERNAL_HRL_FILES = ../../include/httpd.hrl \ diff --git a/lib/inets/src/inets_app/inets.app.src b/lib/inets/src/inets_app/inets.app.src index 2f213794a3..c09139872f 100644 --- a/lib/inets/src/inets_app/inets.app.src +++ b/lib/inets/src/inets_app/inets.app.src @@ -29,6 +29,7 @@ inets_trace, inets_lib, inets_time_compat, + inets_regexp, %% FTP ftp, diff --git a/lib/inets/src/inets_app/inets_regexp.erl b/lib/inets/src/inets_app/inets_regexp.erl new file mode 100644 index 0000000000..fc1608bc5a --- /dev/null +++ b/lib/inets/src/inets_app/inets_regexp.erl @@ -0,0 +1,414 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2009. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(inets_regexp). + +-export([parse/1, match/2, first_match/2, split/2, sub/3, gsub/3]). + + +%%%========================================================================= +%%% API +%%%========================================================================= + +%% parse(RegExp) -> {ok, RE} | {error, E}. +%% Parse the regexp described in the string RegExp. + +parse(S) -> + case (catch reg(S)) of + {R, []} -> + {ok, R}; + {_R, [C|_]} -> + {error, {illegal, [C]}}; + {error, E} -> + {error, E} + end. + + +%% Find the longest match of RegExp in String. + +match(S, RegExp) when is_list(RegExp) -> + case parse(RegExp) of + {ok,RE} -> match(S, RE); + {error,E} -> {error,E} + end; +match(S, RE) -> + case match(RE, S, 1, 0, -1) of + {Start,Len} when Len >= 0 -> + {match, Start, Len}; + {_Start,_Len} -> + nomatch + end. + +%% Find the first match of RegExp in String. + +first_match(S, RegExp) when is_list(RegExp) -> + case parse(RegExp) of + {ok, RE} -> + first_match(S, RE); + {error, E} -> + {error, E} + end; +first_match(S, RE) -> + case first_match(RE, S, 1) of + {Start,Len} when Len >= 0 -> + {match, Start,Len}; + nomatch -> + nomatch + end. + +first_match(RE, S, St) when S =/= [] -> + case re_apply(S, St, RE) of + {match, P, _Rest} -> + {St, P-St}; + nomatch -> + first_match(RE, tl(S), St+1) + end; +first_match(_RE, [], _St) -> + nomatch. + + +match(RE, S, St, Pos, L) -> + case first_match(RE, S, St) of + {St1, L1} -> + Nst = St1 + 1, + if L1 > L -> + match(RE, lists:nthtail(Nst-St, S), Nst, St1, L1); + true -> + match(RE, lists:nthtail(Nst-St, S), Nst, Pos, L) + end; + nomatch -> + {Pos, L} + end. + + +%% Split a string into substrings where the RegExp describes the +%% field seperator. The RegExp " " is specially treated. + +split(String, " ") -> %This is really special + {ok, RE} = parse("[ \t]+"), + case split_apply(String, RE, true) of + [[]|Ss] -> + {ok,Ss}; + Ss -> + {ok,Ss} + end; +split(String, RegExp) when is_list(RegExp) -> + case parse(RegExp) of + {ok, RE} -> + {ok, split_apply(String, RE, false)}; + {error, E} -> + {error,E} + end; +split(String, RE) -> + {ok, split_apply(String, RE, false)}. + + +%% Substitute the first match of the regular expression RegExp +%% with the string Replace in String. Accept pre-parsed regular +%% expressions. + +sub(String, RegExp, Rep) when is_list(RegExp) -> + case parse(RegExp) of + {ok, RE} -> + sub(String, RE, Rep); + {error, E} -> + {error, E} + end; +sub(String, RE, Rep) -> + Ss = sub_match(String, RE, 1), + {ok, sub_repl(Ss, Rep, String, 1), length(Ss)}. + + +%% Substitute every match of the regular expression RegExp with +%% the string New in String. Accept pre-parsed regular expressions. + +gsub(String, RegExp, Rep) when is_list(RegExp) -> + case parse(RegExp) of + {ok, RE} -> + gsub(String, RE, Rep); + {error, E} -> + {error, E} + end; +gsub(String, RE, Rep) -> + Ss = matches(String, RE, 1), + {ok, sub_repl(Ss, Rep, String, 1), length(Ss)}. + + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + +%% This is the regular expression grammar used. It is equivalent to the +%% one used in AWK, except that we allow ^ $ to be used anywhere and fail +%% in the matching. +%% +%% reg -> reg1 : '$1'. +%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}. +%% reg1 -> reg2 : '$1'. +%% reg2 -> reg2 reg3 : {concat,'$1','$2'}. +%% reg2 -> reg3 : '$1'. +%% reg3 -> reg3 "*" : {kclosure,'$1'}. +%% reg3 -> reg3 "+" : {pclosure,'$1'}. +%% reg3 -> reg3 "?" : {optional,'$1'}. +%% reg3 -> reg4 : '$1'. +%% reg4 -> "(" reg ")" : '$2'. +%% reg4 -> "\\" char : '$2'. +%% reg4 -> "^" : bos. +%% reg4 -> "$" : eos. +%% reg4 -> "." : char. +%% reg4 -> "[" class "]" : {char_class,char_class('$2')} +%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')} +%% reg4 -> "\"" chars "\"" : char_string('$2') +%% reg4 -> char : '$1'. +%% reg4 -> empty : epsilon. +%% The grammar of the current regular expressions. The actual parser +%% is a recursive descent implementation of the grammar. + +reg(S) -> reg1(S). + +%% reg1 -> reg2 reg1' +%% reg1' -> "|" reg2 +%% reg1' -> empty + +reg1(S0) -> + {L,S1} = reg2(S0), + reg1p(S1, L). + +reg1p([$||S0], L) -> + {R,S1} = reg2(S0), + reg1p(S1, {'or',L,R}); +reg1p(S, L) -> {L,S}. + +%% reg2 -> reg3 reg2' +%% reg2' -> reg3 +%% reg2' -> empty + +reg2(S0) -> + {L,S1} = reg3(S0), + reg2p(S1, L). + +reg2p([C|S0], L) when (C =/= $|) andalso (C =/= $)) -> + {R,S1} = reg3([C|S0]), + reg2p(S1, {concat,L,R}); +reg2p(S, L) -> {L,S}. + +%% reg3 -> reg4 reg3' +%% reg3' -> "*" reg3' +%% reg3' -> "+" reg3' +%% reg3' -> "?" reg3' +%% reg3' -> empty + +reg3(S0) -> + {L,S1} = reg4(S0), + reg3p(S1, L). + +reg3p([$*|S], L) -> reg3p(S, {kclosure,L}); +reg3p([$+|S], L) -> reg3p(S, {pclosure,L}); +reg3p([$?|S], L) -> reg3p(S, {optional,L}); +reg3p(S, L) -> {L,S}. + +reg4([$(|S0]) -> + case reg(S0) of + {R,[$)|S1]} -> {R,S1}; + {_R,_S} -> throw({error,{unterminated,"("}}) + end; +reg4([$\\,O1,O2,O3|S]) + when ((O1 >= $0) andalso + (O1 =< $7) andalso + (O2 >= $0) andalso + (O2 =< $7) andalso + (O3 >= $0) andalso + (O3 =< $7)) -> + {(O1*8 + O2)*8 + O3 - 73*$0,S}; +reg4([$\\,C|S]) -> + {escape_char(C),S}; +reg4([$\\]) -> + throw({error, {unterminated,"\\"}}); +reg4([$^|S]) -> + {bos,S}; +reg4([$$|S]) -> + {eos,S}; +reg4([$.|S]) -> + {{comp_class,"\n"},S}; +reg4("[^" ++ S0) -> + case char_class(S0) of + {Cc,[$]|S1]} -> {{comp_class,Cc},S1}; + {_Cc,_S} -> throw({error,{unterminated,"["}}) + end; +reg4([$[|S0]) -> + case char_class(S0) of + {Cc,[$]|S1]} -> {{char_class,Cc},S1}; + {_Cc,_S1} -> throw({error,{unterminated,"["}}) + end; +reg4([C|S]) + when (C =/= $*) andalso (C =/= $+) andalso (C =/= $?) andalso (C =/= $]) -> + {C, S}; +reg4([C|_S]) -> + throw({error,{illegal,[C]}}); +reg4([]) -> + {epsilon,[]}. + +escape_char($n) -> $\n; %\n = LF +escape_char($r) -> $\r; %\r = CR +escape_char($t) -> $\t; %\t = TAB +escape_char($v) -> $\v; %\v = VT +escape_char($b) -> $\b; %\b = BS +escape_char($f) -> $\f; %\f = FF +escape_char($e) -> $\e; %\e = ESC +escape_char($s) -> $\s; %\s = SPACE +escape_char($d) -> $\d; %\d = DEL +escape_char(C) -> C. + +char_class([$]|S]) -> char_class(S, [$]]); +char_class(S) -> char_class(S, []). + +char($\\, [O1,O2,O3|S]) when + O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> + {(O1*8 + O2)*8 + O3 - 73*$0,S}; +char($\\, [C|S]) -> {escape_char(C),S}; +char(C, S) -> {C,S}. + +char_class([C1|S0], Cc) when C1 =/= $] -> + case char(C1, S0) of + {Cf,[$-,C2|S1]} when C2 =/= $] -> + case char(C2, S1) of + {Cl,S2} when Cf < Cl -> char_class(S2, [{Cf,Cl}|Cc]); + {Cl,_S2} -> throw({error,{char_class,[Cf,$-,Cl]}}) + end; + {C,S1} -> char_class(S1, [C|Cc]) + end; +char_class(S, Cc) -> {Cc,S}. + + +%% re_apply(String, StartPos, RegExp) -> re_app_res(). +%% +%% Apply the (parse of the) regular expression RegExp to String. If +%% there is a match return the position of the remaining string and +%% the string if else return 'nomatch'. BestMatch specifies if we want +%% the longest match, or just a match. +%% +%% StartPos should be the real start position as it is used to decide +%% if we ae at the beginning of the string. +%% +%% Pass two functions to re_apply_or so it can decide, on the basis +%% of BestMatch, whether to just any take any match or try both to +%% find the longest. This is slower but saves duplicatng code. + +re_apply(S, St, RE) -> re_apply(RE, [], S, St). + +re_apply(epsilon, More, S, P) -> %This always matches + re_apply_more(More, S, P); +re_apply({'or',RE1,RE2}, More, S, P) -> + re_apply_or(re_apply(RE1, More, S, P), + re_apply(RE2, More, S, P)); +re_apply({concat,RE1,RE2}, More, S0, P) -> + re_apply(RE1, [RE2|More], S0, P); +re_apply({kclosure,CE}, More, S, P) -> + %% Be careful with the recursion, explicitly do one call before + %% looping. + re_apply_or(re_apply_more(More, S, P), + re_apply(CE, [{kclosure,CE}|More], S, P)); +re_apply({pclosure,CE}, More, S, P) -> + re_apply(CE, [{kclosure,CE}|More], S, P); +re_apply({optional,CE}, More, S, P) -> + re_apply_or(re_apply_more(More, S, P), + re_apply(CE, More, S, P)); +re_apply(bos, More, S, 1) -> re_apply_more(More, S, 1); +re_apply(eos, More, [$\n|S], P) -> re_apply_more(More, S, P); +re_apply(eos, More, [], P) -> re_apply_more(More, [], P); +re_apply({char_class,Cc}, More, [C|S], P) -> + case in_char_class(C, Cc) of + true -> re_apply_more(More, S, P+1); + false -> nomatch + end; +re_apply({comp_class,Cc}, More, [C|S], P) -> + case in_char_class(C, Cc) of + true -> nomatch; + false -> re_apply_more(More, S, P+1) + end; +re_apply(C, More, [C|S], P) when is_integer(C) -> + re_apply_more(More, S, P+1); +re_apply(_RE, _More, _S, _P) -> nomatch. + +%% re_apply_more([RegExp], String, Length) -> re_app_res(). + +re_apply_more([RE|More], S, P) -> re_apply(RE, More, S, P); +re_apply_more([], S, P) -> {match,P,S}. + +%% in_char_class(Char, Class) -> bool(). + +in_char_class(C, [{C1,C2}|_Cc]) when C >= C1, C =< C2 -> true; +in_char_class(C, [C|_Cc]) -> true; +in_char_class(C, [_|Cc]) -> in_char_class(C, Cc); +in_char_class(_C, []) -> false. + +%% re_apply_or(Match1, Match2) -> re_app_res(). +%% If we want the best match then choose the longest match, else just +%% choose one by trying sequentially. + +re_apply_or({match,P1,S1}, {match,P2,_S2}) when P1 >= P2 -> {match,P1,S1}; +re_apply_or({match,_P1,_S1}, {match,P2,S2}) -> {match,P2,S2}; +re_apply_or(nomatch, R2) -> R2; +re_apply_or(R1, nomatch) -> R1. + + +matches(S, RE, St) -> + case first_match(RE, S, St) of + {St1,0} -> + [{St1,0}|matches(string:substr(S, St1+2-St), RE, St1+1)]; + {St1,L1} -> + [{St1,L1}|matches(string:substr(S, St1+L1+1-St), RE, St1+L1)]; + nomatch -> + [] + end. + +sub_match(S, RE, St) -> + case first_match(RE, S, St) of + {St1,L1} -> [{St1,L1}]; + nomatch -> [] + end. + +sub_repl([{St,L}|Ss], Rep, S, Pos) -> + Rs = sub_repl(Ss, Rep, S, St+L), + string:substr(S, Pos, St-Pos) ++ + sub_repl(Rep, string:substr(S, St, L), Rs); +sub_repl([], _Rep, S, Pos) -> + string:substr(S, Pos). + +sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest); +sub_repl("\\&" ++ Rep, M, Rest) -> [$&|sub_repl(Rep, M, Rest)]; +sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)]; +sub_repl([], _M, Rest) -> Rest. + +split_apply(S, RE, Trim) -> split_apply(S, 1, RE, Trim, []). + +split_apply([], _P, _RE, true, []) -> + []; +split_apply([], _P, _RE, _T, Sub) -> + [lists:reverse(Sub)]; +split_apply(S, P, RE, T, Sub) -> + case re_apply(S, P, RE) of + {match,P,_Rest} -> + split_apply(tl(S), P+1, RE, T, [hd(S)|Sub]); + {match,P1,Rest} -> + [lists:reverse(Sub)|split_apply(Rest, P1, RE, T, [])]; + nomatch -> + split_apply(tl(S), P+1, RE, T, [hd(S)|Sub]) + end. |