diff options
-rw-r--r-- | erts/emulator/beam/atom.names | 3 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_re.c | 56 | ||||
-rw-r--r-- | lib/stdlib/src/re.erl | 32 | ||||
-rw-r--r-- | lib/stdlib/test/re_SUITE.erl | 179 |
4 files changed, 170 insertions, 100 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index cf8f511b85..8e5a079181 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -320,6 +320,8 @@ atom low atom Lt='<' atom machine atom match +atom match_limit +atom match_limit_recursion atom match_spec atom max atom maximum @@ -477,6 +479,7 @@ atom register atom registered_name atom reload atom rem +atom report_errors atom reset atom restart atom return_from diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c index c74125ae41..46b5b848cf 100644 --- a/erts/emulator/beam/erl_bif_re.c +++ b/erts/emulator/beam/erl_bif_re.c @@ -180,6 +180,7 @@ static Eterm make_signed_integer(int x, Process *p) #define PARSE_FLAG_STARTOFFSET 8 #define PARSE_FLAG_CAPTURE_OPT 16 #define PARSE_FLAG_GLOBAL 32 +#define PARSE_FLAG_REPORT_ERRORS 64 #define CAPSPEC_VALUES 0 #define CAPSPEC_TYPE 1 @@ -276,7 +277,7 @@ parse_options(Eterm listp, /* in */ default: return -1; } - }else if (is_not_atom(item)) { + } else if (is_not_atom(item)) { return -1; } else { switch(item) { @@ -348,6 +349,10 @@ parse_options(Eterm listp, /* in */ copt |= PCRE_NEVER_UTF; fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; break; + case am_report_errors: + fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT | + PARSE_FLAG_REPORT_ERRORS); + break; case am_unicode: copt |= PCRE_UTF8; fl |= (PARSE_FLAG_UNIQUE_COMPILE_OPT | PARSE_FLAG_UNICODE); @@ -389,7 +394,7 @@ parse_options(Eterm listp, /* in */ */ static Eterm -build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, const char *errstr, int errofset, int unicode, int with_ok) +build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, const char *errstr, int errofset, int unicode, int with_ok, Eterm extra_err_tag) { Eterm *hp; Eterm ret; @@ -402,11 +407,18 @@ build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, con int elen = sys_strlen(errstr); int need = 3 /* tuple of 2 */ + 3 /* tuple of 2 */ + - (2 * elen) /* The error string list */; + (2 * elen) /* The error string list */ + + ((extra_err_tag != NIL) ? 3 : 0); hp = HAlloc(p, need); ret = buf_to_intlist(&hp, (char *) errstr, elen, NIL); ret = TUPLE2(hp, ret, make_small(errofset)); hp += 3; + if (extra_err_tag != NIL) { + /* Return {error_tag, {extra_tag, + {Code, String, Offset}}} instead */ + ret = TUPLE2(hp, extra_err_tag, ret); + hp += 3; + } ret = TUPLE2(hp, error_tag, ret); } else { erts_pcre_fullinfo(result, NULL, PCRE_INFO_SIZE, &pattern_size); @@ -478,7 +490,7 @@ re_compile(Process* p, Eterm arg1, Eterm arg2) &errstr, &errofset, default_table); ret = build_compile_result(p, am_error, result, errcode, - errstr, errofset, unicode, 1); + errstr, errofset, unicode, 1, NIL); erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); BIF_RET(ret); } @@ -526,6 +538,7 @@ typedef struct _restart_context { } RestartContext; #define RESTART_FLAG_SUBJECT_IN_BINARY 0x1 +#define RESTART_FLAG_REPORT_MATCH_LIMIT 0x2 static void cleanup_restart_context(RestartContext *rc) { @@ -566,7 +579,19 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete Eterm res; Eterm *hp; if (rc <= 0) { - res = am_nomatch; + if (restartp->flags & RESTART_FLAG_REPORT_MATCH_LIMIT) { + if (rc == PCRE_ERROR_MATCHLIMIT) { + hp = HAlloc(p,3); + res = TUPLE2(hp,am_error,am_match_limit); + } else if (rc == PCRE_ERROR_RECURSIONLIMIT) { + hp = HAlloc(p,3); + res = TUPLE2(hp,am_error,am_match_limit_recursion); + } else { + res = am_nomatch; + } + } else { + res = am_nomatch; + } } else { ReturnInfo *ri = restartp->ret_info; ReturnInfo defri = {RetIndex,0,{0}}; @@ -1043,10 +1068,20 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) result = erts_pcre_compile2(expr, comp_options, &errcode, &errstr, &errofset, default_table); if (!result) { - erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); /* Compilation error gives badarg except in the compile - function */ - BIF_ERROR(p,BADARG); + function or if we have PARSE_FLAG_REPORT_ERRORS */ + if (pflags & PARSE_FLAG_REPORT_ERRORS) { + res = build_compile_result(p, am_error, result, errcode, + errstr, errofset, + (pflags & + PARSE_FLAG_UNICODE) ? 1 : 0, + 1, am_compile); + erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); + BIF_RET(res); + } else { + erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); + BIF_ERROR(p,BADARG); + } } if (pflags & PARSE_FLAG_GLOBAL) { Eterm precompiled = @@ -1055,7 +1090,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) errstr, errofset, (pflags & PARSE_FLAG_UNICODE) ? 1 : 0, - 0); + 0, NIL); Eterm *hp,r; erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); hp = HAlloc(p,4); @@ -1190,6 +1225,9 @@ handle_iolist: } } + if (pflags & PARSE_FLAG_REPORT_ERRORS) { + restart.flags |= RESTART_FLAG_REPORT_MATCH_LIMIT; + } #ifdef DEBUG loop_count = 0xFFFFFFFF; diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 79176ff317..649e87ef2c 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -64,11 +64,13 @@ run(_, _) -> -spec run(Subject, RE, Options) -> {match, Captured} | match | - nomatch when + nomatch | + {error, ErrType} when Subject :: iodata() | unicode:charlist(), RE :: mp() | iodata() | unicode:charlist(), Options :: [Option], - Option :: anchored | global | notbol | noteol | notempty | notempty_atstart + Option :: anchored | global | notbol | noteol | notempty + | notempty_atstart | report_errors | {offset, non_neg_integer()} | {newline, NLSpec :: nl_spec()} | bsr_anycrlf | bsr_unicode | {capture, ValueSpec} | @@ -84,7 +86,9 @@ run(_, _) -> | binary(), ListConversionData :: string() | {error, string(), binary()} - | {incomplete, string(), binary()}. + | {incomplete, string(), binary()}, + ErrType :: match_limit | match_limit_recursion | {compile, CompileErr}, + CompileErr :: {ErrString :: string(), Position :: non_neg_integer()}. run(_, _, _) -> erlang:nif_error(undef). @@ -304,7 +308,8 @@ replace(Subject,RE,Replacement) -> RE :: mp() | iodata() | unicode:charlist(), Replacement :: iodata() | unicode:charlist(), Options :: [Option], - Option :: anchored | global | notbol | noteol | notempty | notempty_atstart + Option :: anchored | global | notbol | noteol | notempty + | notempty_atstart | {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf | bsr_unicode | {return, ReturnType} | CompileOpt, ReturnType :: iodata | list | binary, @@ -361,6 +366,8 @@ process_repl_params([],Convert,Unicode) -> process_repl_params([unicode|T],C,_U) -> {NT,NC,NU} = process_repl_params(T,C,true), {[unicode|NT],NC,NU}; +process_repl_params([report_errors|_],_,_) -> + throw(badopt); process_repl_params([{capture,_,_}|_],_,_) -> throw(badopt); process_repl_params([{capture,_}|_],_,_) -> @@ -396,6 +403,8 @@ process_split_params([group|T],C,U,L,S,_G) -> process_split_params(T,C,U,L,S,true); process_split_params([global|_],_,_,_,_,_) -> throw(badopt); +process_split_params([report_errors|_],_,_,_,_,_) -> + throw(badopt); process_split_params([{capture,_,_}|_],_,_,_,_,_) -> throw(badopt); process_split_params([{capture,_}|_],_,_,_,_,_) -> @@ -747,15 +756,22 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) -> CorrectReturn -> CorrectReturn end, - postprocess(loopexec(FlatSubject,RE,InitialOffset, - byte_size(FlatSubject), - Unicode,CRLF,StrippedOptions), - SelectReturn,ConvertReturn,FlatSubject,Unicode). + try + postprocess(loopexec(FlatSubject,RE,InitialOffset, + byte_size(FlatSubject), + Unicode,CRLF,StrippedOptions), + SelectReturn,ConvertReturn,FlatSubject,Unicode) + catch + throw:ErrTuple -> + ErrTuple + end. loopexec(_,_,X,Y,_,_,_) when X > Y -> {match,[]}; loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) -> case re:run(Subject,RE,[{offset,X}]++Options) of + {error, Err} -> + throw({error,Err}); nomatch -> {match,[]}; {match,[{A,B}|More]} -> diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index 129f2b3e4c..1a2f1e0ac5 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -472,115 +472,125 @@ error_handling() -> % The malformed precomiled RE is detected after % the trap to re:grun from grun, in the grun function clause % that handles precompiled expressions - ?line {'EXIT',{badarg,[{re,run,["apa",{1,2,3,4},[global]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,run,["apa",{1,2,3,4},[global]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:run("apa",{1,2,3,4},[global])), % An invalid capture list will also cause a badarg late, % but with a non pre compiled RE, the exception should be thrown by the % grun function clause that handles RE's compiled implicitly by % the run/3 BIF before trapping. - ?line {'EXIT',{badarg,[{re,run,["apa","p",[{capture,[1,{a}]},global]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,run,["apa","p",[{capture,[1,{a}]},global]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:run("apa","p",[{capture,[1,{a}]},global])), % And so the case of a precompiled expression together with % a compile-option (binary and list subject): - ?line {ok,RE} = re:compile("(p)"), - ?line {match,[[{1,1},{1,1}]]} = re:run(<<"apa">>,RE,[global]), - ?line {match,[[{1,1},{1,1}]]} = re:run("apa",RE,[global]), - ?line {'EXIT',{badarg,[{re,run, - [<<"apa">>, - {re_pattern,1,0,_,_}, - [global,unicode]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {ok,RE} = re:compile("(p)"), + {match,[[{1,1},{1,1}]]} = re:run(<<"apa">>,RE,[global]), + {match,[[{1,1},{1,1}]]} = re:run("apa",RE,[global]), + {'EXIT',{badarg,[{re,run, + [<<"apa">>, + {re_pattern,1,0,_,_}, + [global,unicode]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:run(<<"apa">>,RE,[global,unicode])), - ?line {'EXIT',{badarg,[{re,run, - ["apa", - {re_pattern,1,0,_,_}, - [global,unicode]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,run, + ["apa", + {re_pattern,1,0,_,_}, + [global,unicode]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:run("apa",RE,[global,unicode])), - ?line {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[])), - ?line {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[global])), + {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[])), + {error, {compile, {_,_}}} = re:run("apa","(p",[report_errors]), + {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[global])), + {error, {compile, {_,_}}} = re:run("apa","(p",[report_errors,global]), % The replace errors: - ?line {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:replace("apa",{1,2,3,4},"X",[])), - ?line {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[global]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[global]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:replace("apa",{1,2,3,4},"X",[global])), - ?line {'EXIT',{badarg,[{re,replace, - ["apa", - {re_pattern,1,0,_,_}, - "X", - [unicode]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,replace, + ["apa", + {re_pattern,1,0,_,_}, + "X", + [unicode]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:replace("apa",RE,"X",[unicode])), - ?line <<"aXa">> = iolist_to_binary(re:replace("apa","p","X",[])), - ?line {'EXIT',{badarg,[{re,replace, - ["apa","p","X",[{capture,all,binary}]],_}, - {?MODULE,error_handling,0,_} | _]}} = + <<"aXa">> = iolist_to_binary(re:replace("apa","p","X",[])), + {'EXIT',{badarg,[{re,replace, + ["apa","p","X",[report_errors]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch iolist_to_binary(re:replace("apa","p","X", - [{capture,all,binary}]))), - ?line {'EXIT',{badarg,[{re,replace, - ["apa","p","X",[{capture,all}]],_}, - {?MODULE,error_handling,0,_} | _]}} = + [report_errors]))), + {'EXIT',{badarg,[{re,replace, + ["apa","p","X",[{capture,all,binary}]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch iolist_to_binary(re:replace("apa","p","X", - [{capture,all}]))), - ?line {'EXIT',{badarg,[{re,replace, - ["apa","p","X",[{return,banana}]],_}, - {?MODULE,error_handling,0,_} | _]}} = + [{capture,all,binary}]))), + {'EXIT',{badarg,[{re,replace, + ["apa","p","X",[{capture,all}]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch iolist_to_binary(re:replace("apa","p","X", - [{return,banana}]))), - ?line {'EXIT',{badarg,_}} = (catch re:replace("apa","(p","X",[])), + [{capture,all}]))), + {'EXIT',{badarg,[{re,replace, + ["apa","p","X",[{return,banana}]],_}, + {?MODULE,error_handling,0,_} | _]}} = + (catch iolist_to_binary(re:replace("apa","p","X", + [{return,banana}]))), + {'EXIT',{badarg,_}} = (catch re:replace("apa","(p","X",[])), % Badarg, not compile error. - ?line {'EXIT',{badarg,[{re,replace, - ["apa","(p","X",[{return,banana}]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,replace, + ["apa","(p","X",[{return,banana}]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch iolist_to_binary(re:replace("apa","(p","X", - [{return,banana}]))), + [{return,banana}]))), % And the split errors: - ?line [<<"a">>,<<"a">>] = (catch re:split("apa","p",[])), - ?line [<<"a">>,<<"p">>,<<"a">>] = (catch re:split("apa",RE,[])), - ?line {'EXIT',{badarg,[{re,split,["apa","p",[global]],_}, - {?MODULE,error_handling,0,_} | _]}} = + [<<"a">>,<<"a">>] = (catch re:split("apa","p",[])), + [<<"a">>,<<"p">>,<<"a">>] = (catch re:split("apa",RE,[])), + {'EXIT',{badarg,[{re,split,["apa","p",[report_errors]],_}, + {?MODULE,error_handling,0,_} | _]}} = + (catch re:split("apa","p",[report_errors])), + {'EXIT',{badarg,[{re,split,["apa","p",[global]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:split("apa","p",[global])), - ?line {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all}]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all}]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:split("apa","p",[{capture,all}])), - ?line {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all,binary}]],_}, - {?MODULE, error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all,binary}]],_}, + {?MODULE, error_handling,0,_} | _]}} = (catch re:split("apa","p",[{capture,all,binary}])), - ?line {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:split("apa",{1,2,3,4})), - ?line {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:split("apa",{1,2,3,4},[])), - ?line {'EXIT',{badarg,[{re,split, - ["apa", - RE, - [unicode]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,split, + ["apa", + RE, + [unicode]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:split("apa",RE,[unicode])), - ?line {'EXIT',{badarg,[{re,split, - ["apa", - RE, - [{return,banana}]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,split, + ["apa", + RE, + [{return,banana}]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:split("apa",RE,[{return,banana}])), - ?line {'EXIT',{badarg,[{re,split, - ["apa", - RE, - [banana]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,split, + ["apa", + RE, + [banana]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:split("apa",RE,[banana])), - ?line {'EXIT',{badarg,_}} = (catch re:split("apa","(p")), + {'EXIT',{badarg,_}} = (catch re:split("apa","(p")), %Exception on bad argument, not compilation error - ?line {'EXIT',{badarg,[{re,split, - ["apa", - "(p", - [banana]],_}, - {?MODULE,error_handling,0,_} | _]}} = + {'EXIT',{badarg,[{re,split, + ["apa", + "(p", + [banana]],_}, + {?MODULE,error_handling,0,_} | _]}} = (catch re:split("apa","(p",[banana])), ?t:timetrap_cancel(Dog), ok. @@ -603,14 +613,17 @@ re_infinite_loop(doc) -> "Make sure matches that really loop infinitely actually fail"; re_infinite_loop(Config) when is_list(Config) -> Dog = ?t:timetrap(?t:minutes(1)), - ?line Str = + Str = "http:/www.flickr.com/slideShow/index.gne?group_id=&user_id=69845378@N0", - ?line EMail_regex = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+" + EMail_regex = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+" ++ "(\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*" ++ "@.*([a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+" ++ "([a-zA-Z]{2}|com|org|net|gov|mil" ++ "|biz|info|mobi|name|aero|jobs|museum)", - ?line nomatch = re:run(Str, EMail_regex), + nomatch = re:run(Str, EMail_regex), + nomatch = re:run(Str, EMail_regex, [global]), + {error,match_limit} = re:run(Str, EMail_regex,[report_errors]), + {error,match_limit} = re:run(Str, EMail_regex,[report_errors,global]), ?t:timetrap_cancel(Dog), ok. re_backwards_accented(doc) -> |