diff options
Diffstat (limited to 'lib/inets/src')
| -rw-r--r-- | lib/inets/src/inets_app/Makefile | 3 | ||||
| -rw-r--r-- | lib/inets/src/inets_app/inets.app.src | 1 | ||||
| -rw-r--r-- | lib/inets/src/inets_app/inets.appup.src | 6 | ||||
| -rw-r--r-- | lib/inets/src/inets_app/inets_regexp.erl | 414 | 
4 files changed, 417 insertions, 7 deletions
| diff --git a/lib/inets/src/inets_app/Makefile b/lib/inets/src/inets_app/Makefile index 1d870c14e8..a294381f67 100644 --- a/lib/inets/src/inets_app/Makefile +++ b/lib/inets/src/inets_app/Makefile @@ -49,7 +49,8 @@ MODULES = \  	inets_sup \  	inets_trace \  	inets_lib \ -	inets_time_compat +	inets_time_compat \ +	inets_regexp  INTERNAL_HRL_FILES = inets_internal.hrl  EXTERNAL_HRL_FILES = ../../include/httpd.hrl \ diff --git a/lib/inets/src/inets_app/inets.app.src b/lib/inets/src/inets_app/inets.app.src index 5706a335d7..abf8c0cdea 100644 --- a/lib/inets/src/inets_app/inets.app.src +++ b/lib/inets/src/inets_app/inets.app.src @@ -29,6 +29,7 @@  	    inets_trace,              inets_lib,              inets_time_compat, +	    inets_regexp,              %% FTP              ftp, diff --git a/lib/inets/src/inets_app/inets.appup.src b/lib/inets/src/inets_app/inets.appup.src index a603357ebd..3a31daeb20 100644 --- a/lib/inets/src/inets_app/inets.appup.src +++ b/lib/inets/src/inets_app/inets.appup.src @@ -18,16 +18,10 @@  %% %CopyrightEnd%  {"%VSN%",   [ -  {<<"6.2.1">>, [{load_module, httpd_script_env, soft_purge, soft_purge, []}]}, -  {<<"6.2">>,   [{load_module, httpd_script_env, soft_purge, soft_purge, []}, -		 {load_module, httpc, soft_purge, soft_purge, []}]},    {<<"6\\..*">>,[{restart_application, inets}]},    {<<"5\\..*">>,[{restart_application, inets}]}   ],   [ -  {<<"6.2.1">>, [{load_module, httpd_script_env, soft_purge, soft_purge, []}]}, -  {<<"6.2">>,   [{load_module, httpd_script_env, soft_purge, soft_purge, []}, -		 {load_module, httpc, soft_purge, soft_purge, []}]},    {<<"6\\..*">>,[{restart_application, inets}]},    {<<"5\\..*">>,[{restart_application, inets}]}   ] diff --git a/lib/inets/src/inets_app/inets_regexp.erl b/lib/inets/src/inets_app/inets_regexp.erl new file mode 100644 index 0000000000..fc1608bc5a --- /dev/null +++ b/lib/inets/src/inets_app/inets_regexp.erl @@ -0,0 +1,414 @@ +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 2009. All Rights Reserved. +%%  +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%%     http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%  +%% %CopyrightEnd% +%% + +-module(inets_regexp). + +-export([parse/1, match/2, first_match/2, split/2, sub/3, gsub/3]). + + +%%%========================================================================= +%%%  API +%%%========================================================================= + +%% parse(RegExp) -> {ok, RE} | {error, E}. +%% Parse the regexp described in the string RegExp. + +parse(S) -> +    case (catch reg(S)) of +	{R, []} ->  +	    {ok, R}; +	{_R, [C|_]} ->  +	    {error, {illegal, [C]}}; +	{error, E} ->  +	    {error, E} +    end. + + +%% Find the longest match of RegExp in String. + +match(S, RegExp) when is_list(RegExp) -> +    case parse(RegExp) of +	{ok,RE} -> match(S, RE); +	{error,E} -> {error,E} +    end; +match(S, RE) -> +    case match(RE, S, 1, 0, -1) of +	{Start,Len} when Len >= 0 -> +	    {match, Start, Len}; +	{_Start,_Len} ->  +	    nomatch +    end. + +%% Find the first match of RegExp in String. + +first_match(S, RegExp) when is_list(RegExp) -> +    case parse(RegExp) of +	{ok, RE} ->  +	    first_match(S, RE); +	{error, E} ->  +	    {error, E} +    end; +first_match(S, RE) -> +    case first_match(RE, S, 1) of +	{Start,Len} when Len >= 0 -> +	    {match, Start,Len}; +	nomatch ->  +	    nomatch +    end. + +first_match(RE, S, St) when S =/= [] -> +    case re_apply(S, St, RE) of +	{match, P, _Rest} ->  +	    {St, P-St}; +	nomatch ->  +	    first_match(RE, tl(S), St+1) +    end; +first_match(_RE, [], _St) ->  +    nomatch. + + +match(RE, S, St, Pos, L) -> +    case first_match(RE, S, St) of +	{St1, L1} -> +	    Nst = St1 + 1, +	    if L1 > L ->  +		    match(RE, lists:nthtail(Nst-St, S), Nst, St1, L1); +	       true ->  +		    match(RE, lists:nthtail(Nst-St, S), Nst, Pos, L) +	    end; +	nomatch ->  +	    {Pos, L} +    end. + + +%% Split a string into substrings where the RegExp describes the +%% field seperator. The RegExp " " is specially treated. + +split(String, " ") ->				%This is really special +    {ok, RE} = parse("[ \t]+"), +    case split_apply(String, RE, true) of +	[[]|Ss] ->  +	    {ok,Ss}; +	Ss ->  +	    {ok,Ss} +    end; +split(String, RegExp) when is_list(RegExp) -> +    case parse(RegExp) of +	{ok, RE} ->  +	    {ok, split_apply(String, RE, false)}; +	{error, E} ->  +	    {error,E} +    end; +split(String, RE) ->  +    {ok, split_apply(String, RE, false)}. + + +%% Substitute the first match of the regular expression RegExp  +%% with the string Replace in String. Accept pre-parsed regular +%% expressions. + +sub(String, RegExp, Rep) when is_list(RegExp) -> +    case parse(RegExp) of +	{ok, RE} ->  +	    sub(String, RE, Rep); +	{error, E} ->  +	    {error, E} +    end; +sub(String, RE, Rep) -> +    Ss = sub_match(String, RE, 1), +    {ok, sub_repl(Ss, Rep, String, 1), length(Ss)}. + + +%% Substitute every match of the regular expression RegExp with  +%% the string New in String. Accept pre-parsed regular expressions. + +gsub(String, RegExp, Rep) when is_list(RegExp) -> +    case parse(RegExp) of +	{ok, RE} ->  +	    gsub(String, RE, Rep); +	{error, E} ->  +	    {error, E} +    end; +gsub(String, RE, Rep) -> +    Ss = matches(String, RE, 1), +    {ok, sub_repl(Ss, Rep, String, 1), length(Ss)}. + + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + +%% This is the regular expression grammar used. It is equivalent to the +%% one used in AWK, except that we allow ^ $ to be used anywhere and fail +%% in the matching. +%% +%% reg -> reg1 : '$1'. +%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}. +%% reg1 -> reg2 : '$1'. +%% reg2 -> reg2 reg3 : {concat,'$1','$2'}. +%% reg2 -> reg3 : '$1'. +%% reg3 -> reg3 "*" : {kclosure,'$1'}. +%% reg3 -> reg3 "+" : {pclosure,'$1'}. +%% reg3 -> reg3 "?" : {optional,'$1'}. +%% reg3 -> reg4 : '$1'. +%% reg4 -> "(" reg ")" : '$2'. +%% reg4 -> "\\" char : '$2'. +%% reg4 -> "^" : bos. +%% reg4 -> "$" : eos. +%% reg4 -> "." : char. +%% reg4 -> "[" class "]" : {char_class,char_class('$2')} +%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')} +%% reg4 -> "\"" chars "\"" : char_string('$2') +%% reg4 -> char : '$1'. +%% reg4 -> empty : epsilon. +%%  The grammar of the current regular expressions. The actual parser +%%  is a recursive descent implementation of the grammar. + +reg(S) -> reg1(S). + +%% reg1 -> reg2 reg1' +%% reg1' -> "|" reg2 +%% reg1' -> empty + +reg1(S0) -> +    {L,S1} = reg2(S0), +    reg1p(S1, L). + +reg1p([$||S0], L) -> +    {R,S1} = reg2(S0), +    reg1p(S1, {'or',L,R}); +reg1p(S, L) -> {L,S}. + +%% reg2 -> reg3 reg2' +%% reg2' -> reg3 +%% reg2' -> empty + +reg2(S0) -> +    {L,S1} = reg3(S0), +    reg2p(S1, L). + +reg2p([C|S0], L) when (C =/= $|) andalso (C =/= $)) -> +    {R,S1} = reg3([C|S0]), +    reg2p(S1, {concat,L,R}); +reg2p(S, L) -> {L,S}. + +%% reg3 -> reg4 reg3' +%% reg3' -> "*" reg3' +%% reg3' -> "+" reg3' +%% reg3' -> "?" reg3' +%% reg3' -> empty + +reg3(S0) -> +    {L,S1} = reg4(S0), +    reg3p(S1, L). + +reg3p([$*|S], L) -> reg3p(S, {kclosure,L}); +reg3p([$+|S], L) -> reg3p(S, {pclosure,L}); +reg3p([$?|S], L) -> reg3p(S, {optional,L}); +reg3p(S, L) -> {L,S}. + +reg4([$(|S0]) -> +    case reg(S0) of +	{R,[$)|S1]} -> {R,S1}; +	{_R,_S} -> throw({error,{unterminated,"("}}) +    end; +reg4([$\\,O1,O2,O3|S])  +  when ((O1 >= $0) andalso  +	(O1 =< $7) andalso  +	(O2 >= $0) andalso  +	(O2 =< $7) andalso  +	(O3 >= $0) andalso  +	(O3 =< $7)) -> +    {(O1*8 + O2)*8 + O3 - 73*$0,S}; +reg4([$\\,C|S]) ->  +    {escape_char(C),S}; +reg4([$\\]) ->  +    throw({error, {unterminated,"\\"}}); +reg4([$^|S]) ->  +    {bos,S}; +reg4([$$|S]) ->  +    {eos,S}; +reg4([$.|S]) ->  +    {{comp_class,"\n"},S}; +reg4("[^" ++ S0) -> +    case char_class(S0) of +	{Cc,[$]|S1]} -> {{comp_class,Cc},S1}; +	{_Cc,_S} -> throw({error,{unterminated,"["}}) +    end; +reg4([$[|S0]) -> +    case char_class(S0) of +	{Cc,[$]|S1]} -> {{char_class,Cc},S1}; +	{_Cc,_S1} -> throw({error,{unterminated,"["}}) +    end; +reg4([C|S])  +  when (C =/= $*) andalso (C =/= $+) andalso (C =/= $?) andalso (C =/= $]) ->  +    {C, S}; +reg4([C|_S]) ->  +    throw({error,{illegal,[C]}}); +reg4([]) ->  +    {epsilon,[]}. + +escape_char($n) -> $\n;				%\n = LF +escape_char($r) -> $\r;				%\r = CR +escape_char($t) -> $\t;				%\t = TAB +escape_char($v) -> $\v;				%\v = VT +escape_char($b) -> $\b;				%\b = BS +escape_char($f) -> $\f;				%\f = FF +escape_char($e) -> $\e;				%\e = ESC +escape_char($s) -> $\s;				%\s = SPACE +escape_char($d) -> $\d;				%\d = DEL +escape_char(C) -> C. + +char_class([$]|S]) -> char_class(S, [$]]); +char_class(S) -> char_class(S, []). + +char($\\, [O1,O2,O3|S]) when +  O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 -> +    {(O1*8 + O2)*8 + O3 - 73*$0,S}; +char($\\, [C|S]) -> {escape_char(C),S}; +char(C, S) -> {C,S}. + +char_class([C1|S0], Cc) when C1 =/= $] -> +    case char(C1, S0) of +	{Cf,[$-,C2|S1]} when C2 =/= $] -> +	    case char(C2, S1) of +		{Cl,S2} when Cf < Cl -> char_class(S2, [{Cf,Cl}|Cc]);  +		{Cl,_S2} -> throw({error,{char_class,[Cf,$-,Cl]}}) +	    end; +	{C,S1} -> char_class(S1, [C|Cc]) +    end; +char_class(S, Cc) -> {Cc,S}. + + +%% re_apply(String, StartPos, RegExp) -> re_app_res(). +%% +%%  Apply the (parse of the) regular expression RegExp to String.  If +%%  there is a match return the position of the remaining string and +%%  the string if else return 'nomatch'. BestMatch specifies if we want +%%  the longest match, or just a match. +%% +%%  StartPos should be the real start position as it is used to decide +%%  if we ae at the beginning of the string. +%% +%%  Pass two functions to re_apply_or so it can decide, on the basis +%%  of BestMatch, whether to just any take any match or try both to +%%  find the longest. This is slower but saves duplicatng code. + +re_apply(S, St, RE) -> re_apply(RE, [], S, St). + +re_apply(epsilon, More, S, P) ->		%This always matches +    re_apply_more(More, S, P); +re_apply({'or',RE1,RE2}, More, S, P) -> +    re_apply_or(re_apply(RE1, More, S, P), +		re_apply(RE2, More, S, P)); +re_apply({concat,RE1,RE2}, More, S0, P) -> +    re_apply(RE1, [RE2|More], S0, P); +re_apply({kclosure,CE}, More, S, P) -> +    %% Be careful with the recursion, explicitly do one call before +    %% looping. +    re_apply_or(re_apply_more(More, S, P), +		re_apply(CE, [{kclosure,CE}|More], S, P)); +re_apply({pclosure,CE}, More, S, P) -> +    re_apply(CE, [{kclosure,CE}|More], S, P); +re_apply({optional,CE}, More, S, P) -> +    re_apply_or(re_apply_more(More, S, P), +		re_apply(CE, More, S, P)); +re_apply(bos, More, S, 1) -> re_apply_more(More, S, 1); +re_apply(eos, More, [$\n|S], P) -> re_apply_more(More, S, P); +re_apply(eos, More, [], P) -> re_apply_more(More, [], P); +re_apply({char_class,Cc}, More, [C|S], P) -> +    case in_char_class(C, Cc) of +	true -> re_apply_more(More, S, P+1); +	false -> nomatch +    end; +re_apply({comp_class,Cc}, More, [C|S], P) -> +    case in_char_class(C, Cc) of +	true -> nomatch; +	false -> re_apply_more(More, S, P+1) +    end; +re_apply(C, More, [C|S], P) when is_integer(C) -> +    re_apply_more(More, S, P+1); +re_apply(_RE, _More, _S, _P) -> nomatch. + +%% re_apply_more([RegExp], String, Length) -> re_app_res(). + +re_apply_more([RE|More], S, P) -> re_apply(RE, More, S, P); +re_apply_more([], S, P) -> {match,P,S}. + +%% in_char_class(Char, Class) -> bool(). + +in_char_class(C, [{C1,C2}|_Cc]) when C >= C1, C =< C2 -> true; +in_char_class(C, [C|_Cc]) -> true; +in_char_class(C, [_|Cc]) -> in_char_class(C, Cc); +in_char_class(_C, []) -> false. + +%% re_apply_or(Match1, Match2) -> re_app_res(). +%%  If we want the best match then choose the longest match, else just +%%  choose one by trying sequentially. + +re_apply_or({match,P1,S1},   {match,P2,_S2}) when P1 >= P2 -> {match,P1,S1}; +re_apply_or({match,_P1,_S1}, {match,P2,S2}) -> {match,P2,S2}; +re_apply_or(nomatch, R2) -> R2; +re_apply_or(R1, nomatch) -> R1. + + +matches(S, RE, St) -> +    case first_match(RE, S, St) of +	{St1,0} ->  +	    [{St1,0}|matches(string:substr(S, St1+2-St), RE, St1+1)]; +	{St1,L1} ->  +	    [{St1,L1}|matches(string:substr(S, St1+L1+1-St), RE, St1+L1)]; +	nomatch ->  +	    [] +    end. + +sub_match(S, RE, St) -> +    case first_match(RE, S, St) of +	{St1,L1} -> [{St1,L1}]; +	nomatch -> [] +    end. + +sub_repl([{St,L}|Ss], Rep, S, Pos) -> +    Rs = sub_repl(Ss, Rep, S, St+L), +    string:substr(S, Pos, St-Pos) ++  +	sub_repl(Rep, string:substr(S, St, L), Rs); +sub_repl([], _Rep, S, Pos) ->  +    string:substr(S, Pos). + +sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest); +sub_repl("\\&" ++ Rep, M, Rest) -> [$&|sub_repl(Rep, M, Rest)]; +sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)]; +sub_repl([], _M, Rest) -> Rest. + +split_apply(S, RE, Trim) -> split_apply(S, 1, RE, Trim, []). + +split_apply([], _P, _RE, true, []) ->  +    []; +split_apply([], _P, _RE, _T, Sub) ->  +    [lists:reverse(Sub)]; +split_apply(S, P, RE, T, Sub) -> +    case re_apply(S, P, RE) of +	{match,P,_Rest} -> +	    split_apply(tl(S), P+1, RE, T, [hd(S)|Sub]); +	{match,P1,Rest} -> +	    [lists:reverse(Sub)|split_apply(Rest, P1, RE, T, [])]; +	nomatch -> +	    split_apply(tl(S), P+1, RE, T, [hd(S)|Sub]) +    end. | 
