aboutsummaryrefslogtreecommitdiffstats
path: root/lib/inets/src/inets_app/inets_regexp.erl
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2016-02-24 17:55:04 +0100
committerSverker Eriksson <[email protected]>2016-02-24 17:55:04 +0100
commit03743cd4193a2ca97f9b9a52a25e63f616e8fc07 (patch)
tree9c1f4094a2105ec4bf19dd0d16e76b598d0e608d /lib/inets/src/inets_app/inets_regexp.erl
parent1b094d72ffc56069c72f17c7edd673dbbfe47e39 (diff)
parent35739bd06776f90526006486b3f4ab7e54f7f951 (diff)
downloadotp-03743cd4193a2ca97f9b9a52a25e63f616e8fc07.tar.gz
otp-03743cd4193a2ca97f9b9a52a25e63f616e8fc07.tar.bz2
otp-03743cd4193a2ca97f9b9a52a25e63f616e8fc07.zip
Merge branch 'master' into sverk/master/halt-INT_MIN
Diffstat (limited to 'lib/inets/src/inets_app/inets_regexp.erl')
-rw-r--r--lib/inets/src/inets_app/inets_regexp.erl414
1 files changed, 0 insertions, 414 deletions
diff --git a/lib/inets/src/inets_app/inets_regexp.erl b/lib/inets/src/inets_app/inets_regexp.erl
deleted file mode 100644
index fc1608bc5a..0000000000
--- a/lib/inets/src/inets_app/inets_regexp.erl
+++ /dev/null
@@ -1,414 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2009. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%% http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%%
-%% %CopyrightEnd%
-%%
-
--module(inets_regexp).
-
--export([parse/1, match/2, first_match/2, split/2, sub/3, gsub/3]).
-
-
-%%%=========================================================================
-%%% API
-%%%=========================================================================
-
-%% parse(RegExp) -> {ok, RE} | {error, E}.
-%% Parse the regexp described in the string RegExp.
-
-parse(S) ->
- case (catch reg(S)) of
- {R, []} ->
- {ok, R};
- {_R, [C|_]} ->
- {error, {illegal, [C]}};
- {error, E} ->
- {error, E}
- end.
-
-
-%% Find the longest match of RegExp in String.
-
-match(S, RegExp) when is_list(RegExp) ->
- case parse(RegExp) of
- {ok,RE} -> match(S, RE);
- {error,E} -> {error,E}
- end;
-match(S, RE) ->
- case match(RE, S, 1, 0, -1) of
- {Start,Len} when Len >= 0 ->
- {match, Start, Len};
- {_Start,_Len} ->
- nomatch
- end.
-
-%% Find the first match of RegExp in String.
-
-first_match(S, RegExp) when is_list(RegExp) ->
- case parse(RegExp) of
- {ok, RE} ->
- first_match(S, RE);
- {error, E} ->
- {error, E}
- end;
-first_match(S, RE) ->
- case first_match(RE, S, 1) of
- {Start,Len} when Len >= 0 ->
- {match, Start,Len};
- nomatch ->
- nomatch
- end.
-
-first_match(RE, S, St) when S =/= [] ->
- case re_apply(S, St, RE) of
- {match, P, _Rest} ->
- {St, P-St};
- nomatch ->
- first_match(RE, tl(S), St+1)
- end;
-first_match(_RE, [], _St) ->
- nomatch.
-
-
-match(RE, S, St, Pos, L) ->
- case first_match(RE, S, St) of
- {St1, L1} ->
- Nst = St1 + 1,
- if L1 > L ->
- match(RE, lists:nthtail(Nst-St, S), Nst, St1, L1);
- true ->
- match(RE, lists:nthtail(Nst-St, S), Nst, Pos, L)
- end;
- nomatch ->
- {Pos, L}
- end.
-
-
-%% Split a string into substrings where the RegExp describes the
-%% field seperator. The RegExp " " is specially treated.
-
-split(String, " ") -> %This is really special
- {ok, RE} = parse("[ \t]+"),
- case split_apply(String, RE, true) of
- [[]|Ss] ->
- {ok,Ss};
- Ss ->
- {ok,Ss}
- end;
-split(String, RegExp) when is_list(RegExp) ->
- case parse(RegExp) of
- {ok, RE} ->
- {ok, split_apply(String, RE, false)};
- {error, E} ->
- {error,E}
- end;
-split(String, RE) ->
- {ok, split_apply(String, RE, false)}.
-
-
-%% Substitute the first match of the regular expression RegExp
-%% with the string Replace in String. Accept pre-parsed regular
-%% expressions.
-
-sub(String, RegExp, Rep) when is_list(RegExp) ->
- case parse(RegExp) of
- {ok, RE} ->
- sub(String, RE, Rep);
- {error, E} ->
- {error, E}
- end;
-sub(String, RE, Rep) ->
- Ss = sub_match(String, RE, 1),
- {ok, sub_repl(Ss, Rep, String, 1), length(Ss)}.
-
-
-%% Substitute every match of the regular expression RegExp with
-%% the string New in String. Accept pre-parsed regular expressions.
-
-gsub(String, RegExp, Rep) when is_list(RegExp) ->
- case parse(RegExp) of
- {ok, RE} ->
- gsub(String, RE, Rep);
- {error, E} ->
- {error, E}
- end;
-gsub(String, RE, Rep) ->
- Ss = matches(String, RE, 1),
- {ok, sub_repl(Ss, Rep, String, 1), length(Ss)}.
-
-
-%%%========================================================================
-%%% Internal functions
-%%%========================================================================
-
-%% This is the regular expression grammar used. It is equivalent to the
-%% one used in AWK, except that we allow ^ $ to be used anywhere and fail
-%% in the matching.
-%%
-%% reg -> reg1 : '$1'.
-%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}.
-%% reg1 -> reg2 : '$1'.
-%% reg2 -> reg2 reg3 : {concat,'$1','$2'}.
-%% reg2 -> reg3 : '$1'.
-%% reg3 -> reg3 "*" : {kclosure,'$1'}.
-%% reg3 -> reg3 "+" : {pclosure,'$1'}.
-%% reg3 -> reg3 "?" : {optional,'$1'}.
-%% reg3 -> reg4 : '$1'.
-%% reg4 -> "(" reg ")" : '$2'.
-%% reg4 -> "\\" char : '$2'.
-%% reg4 -> "^" : bos.
-%% reg4 -> "$" : eos.
-%% reg4 -> "." : char.
-%% reg4 -> "[" class "]" : {char_class,char_class('$2')}
-%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')}
-%% reg4 -> "\"" chars "\"" : char_string('$2')
-%% reg4 -> char : '$1'.
-%% reg4 -> empty : epsilon.
-%% The grammar of the current regular expressions. The actual parser
-%% is a recursive descent implementation of the grammar.
-
-reg(S) -> reg1(S).
-
-%% reg1 -> reg2 reg1'
-%% reg1' -> "|" reg2
-%% reg1' -> empty
-
-reg1(S0) ->
- {L,S1} = reg2(S0),
- reg1p(S1, L).
-
-reg1p([$||S0], L) ->
- {R,S1} = reg2(S0),
- reg1p(S1, {'or',L,R});
-reg1p(S, L) -> {L,S}.
-
-%% reg2 -> reg3 reg2'
-%% reg2' -> reg3
-%% reg2' -> empty
-
-reg2(S0) ->
- {L,S1} = reg3(S0),
- reg2p(S1, L).
-
-reg2p([C|S0], L) when (C =/= $|) andalso (C =/= $)) ->
- {R,S1} = reg3([C|S0]),
- reg2p(S1, {concat,L,R});
-reg2p(S, L) -> {L,S}.
-
-%% reg3 -> reg4 reg3'
-%% reg3' -> "*" reg3'
-%% reg3' -> "+" reg3'
-%% reg3' -> "?" reg3'
-%% reg3' -> empty
-
-reg3(S0) ->
- {L,S1} = reg4(S0),
- reg3p(S1, L).
-
-reg3p([$*|S], L) -> reg3p(S, {kclosure,L});
-reg3p([$+|S], L) -> reg3p(S, {pclosure,L});
-reg3p([$?|S], L) -> reg3p(S, {optional,L});
-reg3p(S, L) -> {L,S}.
-
-reg4([$(|S0]) ->
- case reg(S0) of
- {R,[$)|S1]} -> {R,S1};
- {_R,_S} -> throw({error,{unterminated,"("}})
- end;
-reg4([$\\,O1,O2,O3|S])
- when ((O1 >= $0) andalso
- (O1 =< $7) andalso
- (O2 >= $0) andalso
- (O2 =< $7) andalso
- (O3 >= $0) andalso
- (O3 =< $7)) ->
- {(O1*8 + O2)*8 + O3 - 73*$0,S};
-reg4([$\\,C|S]) ->
- {escape_char(C),S};
-reg4([$\\]) ->
- throw({error, {unterminated,"\\"}});
-reg4([$^|S]) ->
- {bos,S};
-reg4([$$|S]) ->
- {eos,S};
-reg4([$.|S]) ->
- {{comp_class,"\n"},S};
-reg4("[^" ++ S0) ->
- case char_class(S0) of
- {Cc,[$]|S1]} -> {{comp_class,Cc},S1};
- {_Cc,_S} -> throw({error,{unterminated,"["}})
- end;
-reg4([$[|S0]) ->
- case char_class(S0) of
- {Cc,[$]|S1]} -> {{char_class,Cc},S1};
- {_Cc,_S1} -> throw({error,{unterminated,"["}})
- end;
-reg4([C|S])
- when (C =/= $*) andalso (C =/= $+) andalso (C =/= $?) andalso (C =/= $]) ->
- {C, S};
-reg4([C|_S]) ->
- throw({error,{illegal,[C]}});
-reg4([]) ->
- {epsilon,[]}.
-
-escape_char($n) -> $\n; %\n = LF
-escape_char($r) -> $\r; %\r = CR
-escape_char($t) -> $\t; %\t = TAB
-escape_char($v) -> $\v; %\v = VT
-escape_char($b) -> $\b; %\b = BS
-escape_char($f) -> $\f; %\f = FF
-escape_char($e) -> $\e; %\e = ESC
-escape_char($s) -> $\s; %\s = SPACE
-escape_char($d) -> $\d; %\d = DEL
-escape_char(C) -> C.
-
-char_class([$]|S]) -> char_class(S, [$]]);
-char_class(S) -> char_class(S, []).
-
-char($\\, [O1,O2,O3|S]) when
- O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
- {(O1*8 + O2)*8 + O3 - 73*$0,S};
-char($\\, [C|S]) -> {escape_char(C),S};
-char(C, S) -> {C,S}.
-
-char_class([C1|S0], Cc) when C1 =/= $] ->
- case char(C1, S0) of
- {Cf,[$-,C2|S1]} when C2 =/= $] ->
- case char(C2, S1) of
- {Cl,S2} when Cf < Cl -> char_class(S2, [{Cf,Cl}|Cc]);
- {Cl,_S2} -> throw({error,{char_class,[Cf,$-,Cl]}})
- end;
- {C,S1} -> char_class(S1, [C|Cc])
- end;
-char_class(S, Cc) -> {Cc,S}.
-
-
-%% re_apply(String, StartPos, RegExp) -> re_app_res().
-%%
-%% Apply the (parse of the) regular expression RegExp to String. If
-%% there is a match return the position of the remaining string and
-%% the string if else return 'nomatch'. BestMatch specifies if we want
-%% the longest match, or just a match.
-%%
-%% StartPos should be the real start position as it is used to decide
-%% if we ae at the beginning of the string.
-%%
-%% Pass two functions to re_apply_or so it can decide, on the basis
-%% of BestMatch, whether to just any take any match or try both to
-%% find the longest. This is slower but saves duplicatng code.
-
-re_apply(S, St, RE) -> re_apply(RE, [], S, St).
-
-re_apply(epsilon, More, S, P) -> %This always matches
- re_apply_more(More, S, P);
-re_apply({'or',RE1,RE2}, More, S, P) ->
- re_apply_or(re_apply(RE1, More, S, P),
- re_apply(RE2, More, S, P));
-re_apply({concat,RE1,RE2}, More, S0, P) ->
- re_apply(RE1, [RE2|More], S0, P);
-re_apply({kclosure,CE}, More, S, P) ->
- %% Be careful with the recursion, explicitly do one call before
- %% looping.
- re_apply_or(re_apply_more(More, S, P),
- re_apply(CE, [{kclosure,CE}|More], S, P));
-re_apply({pclosure,CE}, More, S, P) ->
- re_apply(CE, [{kclosure,CE}|More], S, P);
-re_apply({optional,CE}, More, S, P) ->
- re_apply_or(re_apply_more(More, S, P),
- re_apply(CE, More, S, P));
-re_apply(bos, More, S, 1) -> re_apply_more(More, S, 1);
-re_apply(eos, More, [$\n|S], P) -> re_apply_more(More, S, P);
-re_apply(eos, More, [], P) -> re_apply_more(More, [], P);
-re_apply({char_class,Cc}, More, [C|S], P) ->
- case in_char_class(C, Cc) of
- true -> re_apply_more(More, S, P+1);
- false -> nomatch
- end;
-re_apply({comp_class,Cc}, More, [C|S], P) ->
- case in_char_class(C, Cc) of
- true -> nomatch;
- false -> re_apply_more(More, S, P+1)
- end;
-re_apply(C, More, [C|S], P) when is_integer(C) ->
- re_apply_more(More, S, P+1);
-re_apply(_RE, _More, _S, _P) -> nomatch.
-
-%% re_apply_more([RegExp], String, Length) -> re_app_res().
-
-re_apply_more([RE|More], S, P) -> re_apply(RE, More, S, P);
-re_apply_more([], S, P) -> {match,P,S}.
-
-%% in_char_class(Char, Class) -> bool().
-
-in_char_class(C, [{C1,C2}|_Cc]) when C >= C1, C =< C2 -> true;
-in_char_class(C, [C|_Cc]) -> true;
-in_char_class(C, [_|Cc]) -> in_char_class(C, Cc);
-in_char_class(_C, []) -> false.
-
-%% re_apply_or(Match1, Match2) -> re_app_res().
-%% If we want the best match then choose the longest match, else just
-%% choose one by trying sequentially.
-
-re_apply_or({match,P1,S1}, {match,P2,_S2}) when P1 >= P2 -> {match,P1,S1};
-re_apply_or({match,_P1,_S1}, {match,P2,S2}) -> {match,P2,S2};
-re_apply_or(nomatch, R2) -> R2;
-re_apply_or(R1, nomatch) -> R1.
-
-
-matches(S, RE, St) ->
- case first_match(RE, S, St) of
- {St1,0} ->
- [{St1,0}|matches(string:substr(S, St1+2-St), RE, St1+1)];
- {St1,L1} ->
- [{St1,L1}|matches(string:substr(S, St1+L1+1-St), RE, St1+L1)];
- nomatch ->
- []
- end.
-
-sub_match(S, RE, St) ->
- case first_match(RE, S, St) of
- {St1,L1} -> [{St1,L1}];
- nomatch -> []
- end.
-
-sub_repl([{St,L}|Ss], Rep, S, Pos) ->
- Rs = sub_repl(Ss, Rep, S, St+L),
- string:substr(S, Pos, St-Pos) ++
- sub_repl(Rep, string:substr(S, St, L), Rs);
-sub_repl([], _Rep, S, Pos) ->
- string:substr(S, Pos).
-
-sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest);
-sub_repl("\\&" ++ Rep, M, Rest) -> [$&|sub_repl(Rep, M, Rest)];
-sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)];
-sub_repl([], _M, Rest) -> Rest.
-
-split_apply(S, RE, Trim) -> split_apply(S, 1, RE, Trim, []).
-
-split_apply([], _P, _RE, true, []) ->
- [];
-split_apply([], _P, _RE, _T, Sub) ->
- [lists:reverse(Sub)];
-split_apply(S, P, RE, T, Sub) ->
- case re_apply(S, P, RE) of
- {match,P,_Rest} ->
- split_apply(tl(S), P+1, RE, T, [hd(S)|Sub]);
- {match,P1,Rest} ->
- [lists:reverse(Sub)|split_apply(Rest, P1, RE, T, [])];
- nomatch ->
- split_apply(tl(S), P+1, RE, T, [hd(S)|Sub])
- end.