aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2013-07-30 17:41:14 +0200
committerPatrik Nyblom <[email protected]>2013-08-09 12:10:43 +0200
commit1f4c016785a924b2e42fbb7858640be3d46e9625 (patch)
tree76805d61bdb6d35d5acbfa6e0a453a132765b340
parent00999b464b9dbbbe4d538490a8892557c1d9d6df (diff)
downloadotp-1f4c016785a924b2e42fbb7858640be3d46e9625.tar.gz
otp-1f4c016785a924b2e42fbb7858640be3d46e9625.tar.bz2
otp-1f4c016785a924b2e42fbb7858640be3d46e9625.zip
Add return_errors option to re:run/3
-rw-r--r--erts/emulator/beam/atom.names3
-rw-r--r--erts/emulator/beam/erl_bif_re.c56
-rw-r--r--lib/stdlib/src/re.erl32
-rw-r--r--lib/stdlib/test/re_SUITE.erl179
4 files changed, 170 insertions, 100 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index cf8f511b85..8e5a079181 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -320,6 +320,8 @@ atom low
atom Lt='<'
atom machine
atom match
+atom match_limit
+atom match_limit_recursion
atom match_spec
atom max
atom maximum
@@ -477,6 +479,7 @@ atom register
atom registered_name
atom reload
atom rem
+atom report_errors
atom reset
atom restart
atom return_from
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index c74125ae41..46b5b848cf 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -180,6 +180,7 @@ static Eterm make_signed_integer(int x, Process *p)
#define PARSE_FLAG_STARTOFFSET 8
#define PARSE_FLAG_CAPTURE_OPT 16
#define PARSE_FLAG_GLOBAL 32
+#define PARSE_FLAG_REPORT_ERRORS 64
#define CAPSPEC_VALUES 0
#define CAPSPEC_TYPE 1
@@ -276,7 +277,7 @@ parse_options(Eterm listp, /* in */
default:
return -1;
}
- }else if (is_not_atom(item)) {
+ } else if (is_not_atom(item)) {
return -1;
} else {
switch(item) {
@@ -348,6 +349,10 @@ parse_options(Eterm listp, /* in */
copt |= PCRE_NEVER_UTF;
fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
break;
+ case am_report_errors:
+ fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT |
+ PARSE_FLAG_REPORT_ERRORS);
+ break;
case am_unicode:
copt |= PCRE_UTF8;
fl |= (PARSE_FLAG_UNIQUE_COMPILE_OPT | PARSE_FLAG_UNICODE);
@@ -389,7 +394,7 @@ parse_options(Eterm listp, /* in */
*/
static Eterm
-build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, const char *errstr, int errofset, int unicode, int with_ok)
+build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, const char *errstr, int errofset, int unicode, int with_ok, Eterm extra_err_tag)
{
Eterm *hp;
Eterm ret;
@@ -402,11 +407,18 @@ build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, con
int elen = sys_strlen(errstr);
int need = 3 /* tuple of 2 */ +
3 /* tuple of 2 */ +
- (2 * elen) /* The error string list */;
+ (2 * elen) /* The error string list */ +
+ ((extra_err_tag != NIL) ? 3 : 0);
hp = HAlloc(p, need);
ret = buf_to_intlist(&hp, (char *) errstr, elen, NIL);
ret = TUPLE2(hp, ret, make_small(errofset));
hp += 3;
+ if (extra_err_tag != NIL) {
+ /* Return {error_tag, {extra_tag,
+ {Code, String, Offset}}} instead */
+ ret = TUPLE2(hp, extra_err_tag, ret);
+ hp += 3;
+ }
ret = TUPLE2(hp, error_tag, ret);
} else {
erts_pcre_fullinfo(result, NULL, PCRE_INFO_SIZE, &pattern_size);
@@ -478,7 +490,7 @@ re_compile(Process* p, Eterm arg1, Eterm arg2)
&errstr, &errofset, default_table);
ret = build_compile_result(p, am_error, result, errcode,
- errstr, errofset, unicode, 1);
+ errstr, errofset, unicode, 1, NIL);
erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
BIF_RET(ret);
}
@@ -526,6 +538,7 @@ typedef struct _restart_context {
} RestartContext;
#define RESTART_FLAG_SUBJECT_IN_BINARY 0x1
+#define RESTART_FLAG_REPORT_MATCH_LIMIT 0x2
static void cleanup_restart_context(RestartContext *rc)
{
@@ -566,7 +579,19 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
Eterm res;
Eterm *hp;
if (rc <= 0) {
- res = am_nomatch;
+ if (restartp->flags & RESTART_FLAG_REPORT_MATCH_LIMIT) {
+ if (rc == PCRE_ERROR_MATCHLIMIT) {
+ hp = HAlloc(p,3);
+ res = TUPLE2(hp,am_error,am_match_limit);
+ } else if (rc == PCRE_ERROR_RECURSIONLIMIT) {
+ hp = HAlloc(p,3);
+ res = TUPLE2(hp,am_error,am_match_limit_recursion);
+ } else {
+ res = am_nomatch;
+ }
+ } else {
+ res = am_nomatch;
+ }
} else {
ReturnInfo *ri = restartp->ret_info;
ReturnInfo defri = {RetIndex,0,{0}};
@@ -1043,10 +1068,20 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
result = erts_pcre_compile2(expr, comp_options, &errcode,
&errstr, &errofset, default_table);
if (!result) {
- erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
/* Compilation error gives badarg except in the compile
- function */
- BIF_ERROR(p,BADARG);
+ function or if we have PARSE_FLAG_REPORT_ERRORS */
+ if (pflags & PARSE_FLAG_REPORT_ERRORS) {
+ res = build_compile_result(p, am_error, result, errcode,
+ errstr, errofset,
+ (pflags &
+ PARSE_FLAG_UNICODE) ? 1 : 0,
+ 1, am_compile);
+ erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
+ BIF_RET(res);
+ } else {
+ erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
+ BIF_ERROR(p,BADARG);
+ }
}
if (pflags & PARSE_FLAG_GLOBAL) {
Eterm precompiled =
@@ -1055,7 +1090,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
errstr, errofset,
(pflags &
PARSE_FLAG_UNICODE) ? 1 : 0,
- 0);
+ 0, NIL);
Eterm *hp,r;
erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
hp = HAlloc(p,4);
@@ -1190,6 +1225,9 @@ handle_iolist:
}
}
+ if (pflags & PARSE_FLAG_REPORT_ERRORS) {
+ restart.flags |= RESTART_FLAG_REPORT_MATCH_LIMIT;
+ }
#ifdef DEBUG
loop_count = 0xFFFFFFFF;
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 79176ff317..649e87ef2c 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -64,11 +64,13 @@ run(_, _) ->
-spec run(Subject, RE, Options) -> {match, Captured} |
match |
- nomatch when
+ nomatch |
+ {error, ErrType} when
Subject :: iodata() | unicode:charlist(),
RE :: mp() | iodata() | unicode:charlist(),
Options :: [Option],
- Option :: anchored | global | notbol | noteol | notempty | notempty_atstart
+ Option :: anchored | global | notbol | noteol | notempty
+ | notempty_atstart | report_errors
| {offset, non_neg_integer()} |
{newline, NLSpec :: nl_spec()} |
bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
@@ -84,7 +86,9 @@ run(_, _) ->
| binary(),
ListConversionData :: string()
| {error, string(), binary()}
- | {incomplete, string(), binary()}.
+ | {incomplete, string(), binary()},
+ ErrType :: match_limit | match_limit_recursion | {compile, CompileErr},
+ CompileErr :: {ErrString :: string(), Position :: non_neg_integer()}.
run(_, _, _) ->
erlang:nif_error(undef).
@@ -304,7 +308,8 @@ replace(Subject,RE,Replacement) ->
RE :: mp() | iodata() | unicode:charlist(),
Replacement :: iodata() | unicode:charlist(),
Options :: [Option],
- Option :: anchored | global | notbol | noteol | notempty | notempty_atstart
+ Option :: anchored | global | notbol | noteol | notempty
+ | notempty_atstart
| {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf
| bsr_unicode | {return, ReturnType} | CompileOpt,
ReturnType :: iodata | list | binary,
@@ -361,6 +366,8 @@ process_repl_params([],Convert,Unicode) ->
process_repl_params([unicode|T],C,_U) ->
{NT,NC,NU} = process_repl_params(T,C,true),
{[unicode|NT],NC,NU};
+process_repl_params([report_errors|_],_,_) ->
+ throw(badopt);
process_repl_params([{capture,_,_}|_],_,_) ->
throw(badopt);
process_repl_params([{capture,_}|_],_,_) ->
@@ -396,6 +403,8 @@ process_split_params([group|T],C,U,L,S,_G) ->
process_split_params(T,C,U,L,S,true);
process_split_params([global|_],_,_,_,_,_) ->
throw(badopt);
+process_split_params([report_errors|_],_,_,_,_,_) ->
+ throw(badopt);
process_split_params([{capture,_,_}|_],_,_,_,_,_) ->
throw(badopt);
process_split_params([{capture,_}|_],_,_,_,_,_) ->
@@ -747,15 +756,22 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
CorrectReturn ->
CorrectReturn
end,
- postprocess(loopexec(FlatSubject,RE,InitialOffset,
- byte_size(FlatSubject),
- Unicode,CRLF,StrippedOptions),
- SelectReturn,ConvertReturn,FlatSubject,Unicode).
+ try
+ postprocess(loopexec(FlatSubject,RE,InitialOffset,
+ byte_size(FlatSubject),
+ Unicode,CRLF,StrippedOptions),
+ SelectReturn,ConvertReturn,FlatSubject,Unicode)
+ catch
+ throw:ErrTuple ->
+ ErrTuple
+ end.
loopexec(_,_,X,Y,_,_,_) when X > Y ->
{match,[]};
loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
case re:run(Subject,RE,[{offset,X}]++Options) of
+ {error, Err} ->
+ throw({error,Err});
nomatch ->
{match,[]};
{match,[{A,B}|More]} ->
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index 129f2b3e4c..1a2f1e0ac5 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -472,115 +472,125 @@ error_handling() ->
% The malformed precomiled RE is detected after
% the trap to re:grun from grun, in the grun function clause
% that handles precompiled expressions
- ?line {'EXIT',{badarg,[{re,run,["apa",{1,2,3,4},[global]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,run,["apa",{1,2,3,4},[global]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:run("apa",{1,2,3,4},[global])),
% An invalid capture list will also cause a badarg late,
% but with a non pre compiled RE, the exception should be thrown by the
% grun function clause that handles RE's compiled implicitly by
% the run/3 BIF before trapping.
- ?line {'EXIT',{badarg,[{re,run,["apa","p",[{capture,[1,{a}]},global]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,run,["apa","p",[{capture,[1,{a}]},global]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:run("apa","p",[{capture,[1,{a}]},global])),
% And so the case of a precompiled expression together with
% a compile-option (binary and list subject):
- ?line {ok,RE} = re:compile("(p)"),
- ?line {match,[[{1,1},{1,1}]]} = re:run(<<"apa">>,RE,[global]),
- ?line {match,[[{1,1},{1,1}]]} = re:run("apa",RE,[global]),
- ?line {'EXIT',{badarg,[{re,run,
- [<<"apa">>,
- {re_pattern,1,0,_,_},
- [global,unicode]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {ok,RE} = re:compile("(p)"),
+ {match,[[{1,1},{1,1}]]} = re:run(<<"apa">>,RE,[global]),
+ {match,[[{1,1},{1,1}]]} = re:run("apa",RE,[global]),
+ {'EXIT',{badarg,[{re,run,
+ [<<"apa">>,
+ {re_pattern,1,0,_,_},
+ [global,unicode]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:run(<<"apa">>,RE,[global,unicode])),
- ?line {'EXIT',{badarg,[{re,run,
- ["apa",
- {re_pattern,1,0,_,_},
- [global,unicode]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,run,
+ ["apa",
+ {re_pattern,1,0,_,_},
+ [global,unicode]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:run("apa",RE,[global,unicode])),
- ?line {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[])),
- ?line {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[global])),
+ {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[])),
+ {error, {compile, {_,_}}} = re:run("apa","(p",[report_errors]),
+ {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[global])),
+ {error, {compile, {_,_}}} = re:run("apa","(p",[report_errors,global]),
% The replace errors:
- ?line {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:replace("apa",{1,2,3,4},"X",[])),
- ?line {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[global]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[global]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:replace("apa",{1,2,3,4},"X",[global])),
- ?line {'EXIT',{badarg,[{re,replace,
- ["apa",
- {re_pattern,1,0,_,_},
- "X",
- [unicode]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,replace,
+ ["apa",
+ {re_pattern,1,0,_,_},
+ "X",
+ [unicode]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:replace("apa",RE,"X",[unicode])),
- ?line <<"aXa">> = iolist_to_binary(re:replace("apa","p","X",[])),
- ?line {'EXIT',{badarg,[{re,replace,
- ["apa","p","X",[{capture,all,binary}]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ <<"aXa">> = iolist_to_binary(re:replace("apa","p","X",[])),
+ {'EXIT',{badarg,[{re,replace,
+ ["apa","p","X",[report_errors]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch iolist_to_binary(re:replace("apa","p","X",
- [{capture,all,binary}]))),
- ?line {'EXIT',{badarg,[{re,replace,
- ["apa","p","X",[{capture,all}]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ [report_errors]))),
+ {'EXIT',{badarg,[{re,replace,
+ ["apa","p","X",[{capture,all,binary}]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch iolist_to_binary(re:replace("apa","p","X",
- [{capture,all}]))),
- ?line {'EXIT',{badarg,[{re,replace,
- ["apa","p","X",[{return,banana}]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ [{capture,all,binary}]))),
+ {'EXIT',{badarg,[{re,replace,
+ ["apa","p","X",[{capture,all}]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch iolist_to_binary(re:replace("apa","p","X",
- [{return,banana}]))),
- ?line {'EXIT',{badarg,_}} = (catch re:replace("apa","(p","X",[])),
+ [{capture,all}]))),
+ {'EXIT',{badarg,[{re,replace,
+ ["apa","p","X",[{return,banana}]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
+ (catch iolist_to_binary(re:replace("apa","p","X",
+ [{return,banana}]))),
+ {'EXIT',{badarg,_}} = (catch re:replace("apa","(p","X",[])),
% Badarg, not compile error.
- ?line {'EXIT',{badarg,[{re,replace,
- ["apa","(p","X",[{return,banana}]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,replace,
+ ["apa","(p","X",[{return,banana}]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch iolist_to_binary(re:replace("apa","(p","X",
- [{return,banana}]))),
+ [{return,banana}]))),
% And the split errors:
- ?line [<<"a">>,<<"a">>] = (catch re:split("apa","p",[])),
- ?line [<<"a">>,<<"p">>,<<"a">>] = (catch re:split("apa",RE,[])),
- ?line {'EXIT',{badarg,[{re,split,["apa","p",[global]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ [<<"a">>,<<"a">>] = (catch re:split("apa","p",[])),
+ [<<"a">>,<<"p">>,<<"a">>] = (catch re:split("apa",RE,[])),
+ {'EXIT',{badarg,[{re,split,["apa","p",[report_errors]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
+ (catch re:split("apa","p",[report_errors])),
+ {'EXIT',{badarg,[{re,split,["apa","p",[global]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:split("apa","p",[global])),
- ?line {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all}]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all}]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:split("apa","p",[{capture,all}])),
- ?line {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all,binary}]],_},
- {?MODULE, error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all,binary}]],_},
+ {?MODULE, error_handling,0,_} | _]}} =
(catch re:split("apa","p",[{capture,all,binary}])),
- ?line {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:split("apa",{1,2,3,4})),
- ?line {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:split("apa",{1,2,3,4},[])),
- ?line {'EXIT',{badarg,[{re,split,
- ["apa",
- RE,
- [unicode]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,split,
+ ["apa",
+ RE,
+ [unicode]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:split("apa",RE,[unicode])),
- ?line {'EXIT',{badarg,[{re,split,
- ["apa",
- RE,
- [{return,banana}]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,split,
+ ["apa",
+ RE,
+ [{return,banana}]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:split("apa",RE,[{return,banana}])),
- ?line {'EXIT',{badarg,[{re,split,
- ["apa",
- RE,
- [banana]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,split,
+ ["apa",
+ RE,
+ [banana]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:split("apa",RE,[banana])),
- ?line {'EXIT',{badarg,_}} = (catch re:split("apa","(p")),
+ {'EXIT',{badarg,_}} = (catch re:split("apa","(p")),
%Exception on bad argument, not compilation error
- ?line {'EXIT',{badarg,[{re,split,
- ["apa",
- "(p",
- [banana]],_},
- {?MODULE,error_handling,0,_} | _]}} =
+ {'EXIT',{badarg,[{re,split,
+ ["apa",
+ "(p",
+ [banana]],_},
+ {?MODULE,error_handling,0,_} | _]}} =
(catch re:split("apa","(p",[banana])),
?t:timetrap_cancel(Dog),
ok.
@@ -603,14 +613,17 @@ re_infinite_loop(doc) ->
"Make sure matches that really loop infinitely actually fail";
re_infinite_loop(Config) when is_list(Config) ->
Dog = ?t:timetrap(?t:minutes(1)),
- ?line Str =
+ Str =
"http:/www.flickr.com/slideShow/index.gne?group_id=&user_id=69845378@N0",
- ?line EMail_regex = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+"
+ EMail_regex = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+"
++ "(\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*"
++ "@.*([a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+"
++ "([a-zA-Z]{2}|com|org|net|gov|mil"
++ "|biz|info|mobi|name|aero|jobs|museum)",
- ?line nomatch = re:run(Str, EMail_regex),
+ nomatch = re:run(Str, EMail_regex),
+ nomatch = re:run(Str, EMail_regex, [global]),
+ {error,match_limit} = re:run(Str, EMail_regex,[report_errors]),
+ {error,match_limit} = re:run(Str, EMail_regex,[report_errors,global]),
?t:timetrap_cancel(Dog),
ok.
re_backwards_accented(doc) ->