aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2013-08-16 11:37:54 +0200
committerPatrik Nyblom <[email protected]>2013-08-16 11:37:54 +0200
commit5d9a587a8fcc164e02f043959338edec2ff69381 (patch)
tree554f05a944777622b30031724010f6f31707565b /lib/stdlib/src
parent23610dbfc1c409f83349e9e293dd3cfc1f74d497 (diff)
parent52cb62b7930d9c7b9e04a210ff6b02946f27ae79 (diff)
downloadotp-5d9a587a8fcc164e02f043959338edec2ff69381.tar.gz
otp-5d9a587a8fcc164e02f043959338edec2ff69381.tar.bz2
otp-5d9a587a8fcc164e02f043959338edec2ff69381.zip
Merge branch 'pan/update_pcre_8.33'
* pan/update_pcre_8.33: Workaround TR gnu/181328, GCC 4.2.1 20070831 on FreeBSD 9.1 Clarify relation between erts_iolist_{size|to_buf} Fix backslash in titles of manpages Correct UTF-8 in stdlib's notes.xml Add more tests for corner error cases in erl_bif_re.c Add documentation of report_errors and match_limit(_recursion) Add match_limit and match_limit_recursion options Add return_errors option to re:run/3 Add README for updating PCRE Add documentation of extensions to re module Add new options to Erlang re interface and mend dupnames Update PCRE doc part of re.xml to PCRE 8.33 state Integrate new PCRE test suites Integrate patch for PCRE bug id 1370 Handle CRLF correctly in global regexp Add erts_prefix to pcre_library and update erl_bif_re Update to PCRE 8.33, w/o the erts_ prefix added OTP-11204 OTP-11205 OTP-10285
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/re.erl176
1 files changed, 126 insertions, 50 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index c5109ec455..afc63496d0 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -19,20 +19,21 @@
-module(re).
-export([grun/3,urun/3,ucompile/2,replace/3,replace/4,split/2,split/3]).
-%-opaque mp() :: {re_pattern, _, _, _}.
--type mp() :: {re_pattern, _, _, _}.
+%-opaque mp() :: {re_pattern, _, _, _, _}.
+-type mp() :: {re_pattern, _, _, _, _}.
-type nl_spec() :: cr | crlf | lf | anycrlf | any.
-type compile_option() :: unicode | anchored | caseless | dollar_endonly
| dotall | extended | firstline | multiline
| no_auto_capture | dupnames | ungreedy
- | {newline, nl_spec()}| bsr_anycrlf
- | bsr_unicode.
+ | {newline, nl_spec()}
+ | bsr_anycrlf | bsr_unicode
+ | no_start_optimize | ucp | never_utf.
%%% BIFs
--export([compile/1, compile/2, run/2, run/3]).
+-export([compile/1, compile/2, run/2, run/3, inspect/2]).
-spec compile(Regexp) -> {ok, MP} | {error, ErrSpec} when
Regexp :: iodata(),
@@ -63,17 +64,21 @@ run(_, _) ->
-spec run(Subject, RE, Options) -> {match, Captured} |
match |
- nomatch when
+ nomatch |
+ {error, ErrType} when
Subject :: iodata() | unicode:charlist(),
RE :: mp() | iodata() | unicode:charlist(),
Options :: [Option],
- Option :: anchored | global | notbol | noteol | notempty
+ Option :: anchored | global | notbol | noteol | notempty
+ | notempty_atstart | report_errors
| {offset, non_neg_integer()} |
+ {match_limit, non_neg_integer()} |
+ {match_limit_recursion, non_neg_integer()} |
{newline, NLSpec :: nl_spec()} |
bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
{capture, ValueSpec, Type} | CompileOpt,
Type :: index | list | binary,
- ValueSpec :: all | all_but_first | first | none | ValueList,
+ ValueSpec :: all | all_but_first | all_names | first | none | ValueList,
ValueList :: [ValueID],
ValueID :: integer() | string() | atom(),
CompileOpt :: compile_option(),
@@ -83,11 +88,21 @@ run(_, _) ->
| binary(),
ListConversionData :: string()
| {error, string(), binary()}
- | {incomplete, string(), binary()}.
+ | {incomplete, string(), binary()},
+ ErrType :: match_limit | match_limit_recursion | {compile, CompileErr},
+ CompileErr :: {ErrString :: string(), Position :: non_neg_integer()}.
run(_, _, _) ->
erlang:nif_error(undef).
+-spec inspect(MP,Item) -> {namelist, [ binary() ]} when
+ MP :: mp(),
+ Item :: namelist.
+
+inspect(_,_) ->
+ erlang:nif_error(undef).
+
+
%%% End of BIFs
-spec split(Subject, RE) -> SplitList when
@@ -102,8 +117,10 @@ split(Subject,RE) ->
Subject :: iodata() | unicode:charlist(),
RE :: mp() | iodata() | unicode:charlist(),
Options :: [ Option ],
- Option :: anchored | notbol | noteol | notempty
+ Option :: anchored | notbol | noteol | notempty | notempty_atstart
| {offset, non_neg_integer()} | {newline, nl_spec()}
+ | {match_limit, non_neg_integer()}
+ | {match_limit_recursion, non_neg_integer()}
| bsr_anycrlf | bsr_unicode | {return, ReturnType}
| {parts, NumParts} | group | trim | CompileOpt,
NumParts :: non_neg_integer() | infinity,
@@ -266,7 +283,7 @@ extend_subpatterns([],N) ->
extend_subpatterns([H|T],N) ->
[H | extend_subpatterns(T,N-1)].
-compile_split({re_pattern,N,_,_} = Comp, Options) ->
+compile_split({re_pattern,N,_,_,_} = Comp, Options) ->
{Comp,N,Options};
compile_split(Pat,Options0) when not is_tuple(Pat) ->
Options = lists:filter(fun(O) ->
@@ -275,7 +292,7 @@ compile_split(Pat,Options0) when not is_tuple(Pat) ->
case re:compile(Pat,Options) of
{error,Err} ->
{error,Err};
- {ok, {re_pattern,N,_,_} = Comp} ->
+ {ok, {re_pattern,N,_,_,_} = Comp} ->
NewOpt = lists:filter(fun(OO) -> (not copt(OO)) end, Options0),
{Comp,N,NewOpt}
end;
@@ -295,8 +312,11 @@ replace(Subject,RE,Replacement) ->
RE :: mp() | iodata() | unicode:charlist(),
Replacement :: iodata() | unicode:charlist(),
Options :: [Option],
- Option :: anchored | global | notbol | noteol | notempty
+ Option :: anchored | global | notbol | noteol | notempty
+ | notempty_atstart
| {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf
+ | {match_limit, non_neg_integer()}
+ | {match_limit_recursion, non_neg_integer()}
| bsr_unicode | {return, ReturnType} | CompileOpt,
ReturnType :: iodata | list | binary,
CompileOpt :: compile_option(),
@@ -352,6 +372,8 @@ process_repl_params([],Convert,Unicode) ->
process_repl_params([unicode|T],C,_U) ->
{NT,NC,NU} = process_repl_params(T,C,true),
{[unicode|NT],NC,NU};
+process_repl_params([report_errors|_],_,_) ->
+ throw(badopt);
process_repl_params([{capture,_,_}|_],_,_) ->
throw(badopt);
process_repl_params([{capture,_}|_],_,_) ->
@@ -387,6 +409,8 @@ process_split_params([group|T],C,U,L,S,_G) ->
process_split_params(T,C,U,L,S,true);
process_split_params([global|_],_,_,_,_,_) ->
throw(badopt);
+process_split_params([report_errors|_],_,_,_,_,_) ->
+ throw(badopt);
process_split_params([{capture,_,_}|_],_,_,_,_,_) ->
throw(badopt);
process_split_params([{capture,_}|_],_,_,_,_,_) ->
@@ -487,17 +511,31 @@ do_replace(Subject,Repl,SubExprs0) ->
end || Part <- Repl ].
-check_for_unicode({re_pattern,_,1,_},_) ->
+check_for_unicode({re_pattern,_,1,_,_},_) ->
true;
-check_for_unicode({re_pattern,_,0,_},_) ->
+check_for_unicode({re_pattern,_,0,_,_},_) ->
false;
check_for_unicode(_,L) ->
lists:member(unicode,L).
+
+check_for_crlf({re_pattern,_,_,1,_},_) ->
+ true;
+check_for_crlf({re_pattern,_,_,0,_},_) ->
+ false;
+check_for_crlf(_,L) ->
+ case lists:keysearch(newline,1,L) of
+ {value,{newline,any}} -> true;
+ {value,{newline,crlf}} -> true;
+ {value,{newline,anycrlf}} -> true;
+ _ -> false
+ end.
% SelectReturn = false | all | stirpfirst | none
% ConvertReturn = index | list | binary
% {capture, all} -> all (untouchded)
-% {capture, first} -> kept in argumentt list and Select all
+% {capture, all_names} -> if names are present: treated as a name {capture, [...]}
+% else: same as {capture, []}
+% {capture, first} -> kept in argument list and Select all
% {capture, all_but_first} -> removed from argument list and selects stripfirst
% {capture, none} -> removed from argument list and selects none
% {capture, []} -> removed from argument list and selects none
@@ -506,23 +544,30 @@ check_for_unicode(_,L) ->
% Call as process_parameters([],0,false,index,NeedClean)
-process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_) ->
+process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_,_) ->
{[], InitialOffset, SelectReturn, ConvertReturn};
-process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC) ->
- process_parameters(T,N,Select0,Return0,CC);
-process_parameters([global | T],Init0,Select0,Return0,CC) ->
- process_parameters(T,Init0,Select0,Return0,CC);
-process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC) ->
- process_parameters([{capture,Values}|T],Init0,Select0,Type,CC);
-process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) ->
+process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC,RE) ->
+ process_parameters(T,N,Select0,Return0,CC,RE);
+process_parameters([global | T],Init0,Select0,Return0,CC,RE) ->
+ process_parameters(T,Init0,Select0,Return0,CC,RE);
+process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC,RE) ->
+ process_parameters([{capture,Values}|T],Init0,Select0,Type,CC,RE);
+process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC,RE) ->
% First process the rest to see if capture was already present
{NewTail, Init1, Select1, Return1} =
- process_parameters(T,Init0,Select0,Return0,CC),
+ process_parameters(T,Init0,Select0,Return0,CC,RE),
case Select1 of
false ->
case Values of
all ->
{[{capture,all} | NewTail], Init1, all, Return0};
+ all_names ->
+ case re:inspect(RE,namelist) of
+ {namelist, []} ->
+ {[{capture,first} | NewTail], Init1, none, Return0};
+ {namelist, List} ->
+ {[{capture,[0|List]} | NewTail], Init1, stripfirst, Return0}
+ end;
first ->
{[{capture,first} | NewTail], Init1, all, Return0};
all_but_first ->
@@ -541,20 +586,20 @@ process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) ->
% Found overriding further down list, ignore this one
{NewTail, Init1, Select1, Return1}
end;
-process_parameters([H|T],Init0,Select0,Return0,true) ->
+process_parameters([H|T],Init0,Select0,Return0,true,RE) ->
case copt(H) of
true ->
- process_parameters(T,Init0,Select0,Return0,true);
+ process_parameters(T,Init0,Select0,Return0,true,RE);
false ->
{NewT,Init,Select,Return} =
- process_parameters(T,Init0,Select0,Return0,true),
+ process_parameters(T,Init0,Select0,Return0,true,RE),
{[H|NewT],Init,Select,Return}
end;
-process_parameters([H|T],Init0,Select0,Return0,false) ->
+process_parameters([H|T],Init0,Select0,Return0,false,RE) ->
{NewT,Init,Select,Return} =
- process_parameters(T,Init0,Select0,Return0,false),
+ process_parameters(T,Init0,Select0,Return0,false,RE),
{[H|NewT],Init,Select,Return};
-process_parameters(_,_,_,_,_) ->
+process_parameters(_,_,_,_,_,_) ->
throw(badlist).
postprocess({match,[]},_,_,_,_) ->
@@ -662,7 +707,7 @@ urun2(Subject0,RE0,Options0) ->
RE = case RE0 of
BinRE when is_binary(BinRE) ->
BinRE;
- {re_pattern,_,_,_} = ReCompiled ->
+ {re_pattern,_,_,_,_} = ReCompiled ->
ReCompiled;
ListRE ->
unicode:characters_to_binary(ListRE,unicode)
@@ -703,38 +748,46 @@ grun(Subject,RE,{Options,NeedClean,OrigRE}) ->
grun2(Subject,RE,{Options,NeedClean}) ->
Unicode = check_for_unicode(RE,Options),
+ CRLF = check_for_crlf(RE,Options),
FlatSubject = to_binary(Subject, Unicode),
- do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}).
+ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options,NeedClean}).
-do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) ->
+do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
{StrippedOptions, InitialOffset,
SelectReturn, ConvertReturn} =
case (catch
- process_parameters(Options0, 0, false, index, NeedClean)) of
+ process_parameters(Options0, 0, false, index, NeedClean,RE)) of
badlist ->
erlang:error(badarg,[Subject,RE,Options0]);
CorrectReturn ->
CorrectReturn
end,
- postprocess(loopexec(FlatSubject,RE,InitialOffset,
- byte_size(FlatSubject),
- Unicode,StrippedOptions),
- SelectReturn,ConvertReturn,FlatSubject,Unicode).
+ try
+ postprocess(loopexec(FlatSubject,RE,InitialOffset,
+ byte_size(FlatSubject),
+ Unicode,CRLF,StrippedOptions),
+ SelectReturn,ConvertReturn,FlatSubject,Unicode)
+ catch
+ throw:ErrTuple ->
+ ErrTuple
+ end.
-loopexec(_,_,X,Y,_,_) when X > Y ->
+loopexec(_,_,X,Y,_,_,_) when X > Y ->
{match,[]};
-loopexec(Subject,RE,X,Y,Unicode,Options) ->
+loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
case re:run(Subject,RE,[{offset,X}]++Options) of
+ {error, Err} ->
+ throw({error,Err});
nomatch ->
{match,[]};
{match,[{A,B}|More]} ->
{match,Rest} =
case B>0 of
true ->
- loopexec(Subject,RE,A+B,Y,Unicode,Options);
+ loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options);
false ->
{match,M} =
- case re:run(Subject,RE,[{offset,X},notempty,
+ case re:run(Subject,RE,[{offset,X},notempty_atstart,
anchored]++Options) of
nomatch ->
{match,[]};
@@ -745,10 +798,10 @@ loopexec(Subject,RE,X,Y,Unicode,Options) ->
[{_,NStep}|_] when NStep > 0 ->
A+NStep;
_ ->
- forward(Subject,A,1,Unicode)
+ forward(Subject,A,1,Unicode,CRLF)
end,
{match,MM} = loopexec(Subject,RE,NewA,Y,
- Unicode,Options),
+ Unicode,CRLF,Options),
case M of
[] ->
{match,MM};
@@ -759,11 +812,22 @@ loopexec(Subject,RE,X,Y,Unicode,Options) ->
{match,[[{A,B}|More] | Rest]}
end.
-forward(_Chal,A,0,_) ->
+forward(_Chal,A,0,_,_) ->
A;
-forward(_Chal,A,N,false) ->
- A+N;
-forward(Chal,A,N,true) ->
+forward(Chal,A,N,U,true) ->
+ <<_:A/binary,Tl/binary>> = Chal,
+ case Tl of
+ <<$\r,$\n,_/binary>> ->
+ forward(Chal,A+2,N-1,U,true);
+ _ ->
+ forward2(Chal,A,N,U,true)
+ end;
+forward(Chal,A,N,U,false) ->
+ forward2(Chal,A,N,U,false).
+
+forward2(Chal,A,N,false,CRLF) ->
+ forward(Chal,A+1,N-1,false,CRLF);
+forward2(Chal,A,N,true,CRLF) ->
<<_:A/binary,Tl/binary>> = Chal,
Forw = case Tl of
<<1:1,1:1,0:1,_:5,_/binary>> ->
@@ -775,10 +839,16 @@ forward(Chal,A,N,true) ->
_ ->
1
end,
- forward(Chal,A+Forw,N-1,true).
+ forward(Chal,A+Forw,N-1,true,CRLF).
copt(caseless) ->
true;
+copt(no_start_optimize) ->
+ true;
+copt(never_utf) ->
+ true;
+copt(ucp) ->
+ true;
copt(dollar_endonly) ->
true;
copt(dotall) ->
@@ -809,6 +879,8 @@ copt(_) ->
runopt(notempty) ->
true;
+runopt(notempty_atstart) ->
+ true;
runopt(notbol) ->
true;
runopt(noteol) ->
@@ -821,6 +893,10 @@ runopt({capture,_}) ->
true;
runopt(global) ->
true;
+runopt({match_limit,_}) ->
+ true;
+runopt({match_limit_recursion,_}) ->
+ true;
runopt(_) ->
false.