diff options
author | Patrik Nyblom <[email protected]> | 2013-07-31 15:17:27 +0200 |
---|---|---|
committer | Patrik Nyblom <[email protected]> | 2013-08-09 12:10:44 +0200 |
commit | 8cbc9296944b5d1397d15e5615890b61549d5064 (patch) | |
tree | 86a9a61f4e3f6b60dd745fadc154489a14db64bf | |
parent | 1f4c016785a924b2e42fbb7858640be3d46e9625 (diff) | |
download | otp-8cbc9296944b5d1397d15e5615890b61549d5064.tar.gz otp-8cbc9296944b5d1397d15e5615890b61549d5064.tar.bz2 otp-8cbc9296944b5d1397d15e5615890b61549d5064.zip |
Add match_limit and match_limit_recursion options
Added to re:run and sets the corresponding fields in 'extra' struct
for the PCRE match engine. The result can be viewed by also
setting 'report_errors' when matching.
Some housekeeping was also done...
The offset option also did not properly check for offset's >= 0.
Change nomatch to BADARG when pre-compiled mp() is faked:
By constructing a 5-tuple with faked content but the right data types,
you could do a re:run which returned nomatch when in fact the mp() was
bad. The cheapest solution is to check the return from pcre_exec
better.
Remove unreachable code in erts_bif_re.c:
Replaced tests for things that logically simply
cannot happen with ASSERT.
-rw-r--r-- | erts/emulator/beam/erl_bif_re.c | 156 | ||||
-rw-r--r-- | lib/stdlib/src/re.erl | 10 | ||||
-rw-r--r-- | lib/stdlib/test/re_SUITE.erl | 42 |
3 files changed, 170 insertions, 38 deletions
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c index 46b5b848cf..796fd301ad 100644 --- a/erts/emulator/beam/erl_bif_re.c +++ b/erts/emulator/beam/erl_bif_re.c @@ -181,6 +181,8 @@ static Eterm make_signed_integer(int x, Process *p) #define PARSE_FLAG_CAPTURE_OPT 16 #define PARSE_FLAG_GLOBAL 32 #define PARSE_FLAG_REPORT_ERRORS 64 +#define PARSE_FLAG_MATCH_LIMIT 128 +#define PARSE_FLAG_MATCH_LIMIT_RECURSION 256 #define CAPSPEC_VALUES 0 #define CAPSPEC_TYPE 1 @@ -193,7 +195,9 @@ parse_options(Eterm listp, /* in */ int *exec_options, /* out */ int *flags,/* out */ int *startoffset, /* out */ - Eterm *capture_spec) /* capture_spec[CAPSPEC_SIZE] */ /* out */ + Eterm *capture_spec, /* capture_spec[CAPSPEC_SIZE] */ /* out */ + int *match_limit, /* out */ + int *match_limit_recursion) /* out */ { int copt,eopt,fl; Eterm item; @@ -235,7 +239,7 @@ parse_options(Eterm listp, /* in */ case am_offset: { int tmp; - if (!term_to_int(tp[2],&tmp)) { + if (!term_to_int(tp[2],&tmp) || tmp < 0) { return -1; } if (startoffset != NULL) { @@ -244,6 +248,31 @@ parse_options(Eterm listp, /* in */ } fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|PARSE_FLAG_STARTOFFSET); break; + case am_match_limit: + { + int tmp; + if (!term_to_int(tp[2],&tmp) || tmp < 0) { + return -1; + } + if (match_limit != NULL) { + *match_limit = tmp; + } + } + fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|PARSE_FLAG_MATCH_LIMIT); + break; + case am_match_limit_recursion: + { + int tmp; + if (!term_to_int(tp[2],&tmp) || tmp < 0) { + return -1; + } + if (match_limit_recursion != NULL) { + *match_limit_recursion = tmp; + } + } + fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT| + PARSE_FLAG_MATCH_LIMIT_RECURSION); + break; case am_newline: if (!is_atom(tp[2])) { return -1; @@ -460,9 +489,12 @@ re_compile(Process* p, Eterm arg1, Eterm arg2) int options = 0; int pflags = 0; int unicode = 0; +#ifdef DEBUG + int buffres; +#endif - if (parse_options(arg2,&options,NULL,&pflags,NULL,NULL) + if (parse_options(arg2,&options,NULL,&pflags,NULL,NULL,NULL,NULL) < 0) { BIF_ERROR(p,BADARG); } @@ -481,10 +513,13 @@ re_compile(Process* p, Eterm arg1, Eterm arg2) BIF_ERROR(p,BADARG); } expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1); - if (erts_iolist_to_buf(arg1, expr, slen) != 0) { - erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); - BIF_ERROR(p,BADARG); - } +#ifdef DEBUG + buffres = +#endif + erts_iolist_to_buf(arg1, expr, slen); + + ASSERT(buffres >= 0); + expr[slen]='\0'; result = erts_pcre_compile2(expr, options, &errcode, &errstr, &errofset, default_table); @@ -910,8 +945,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) for(i=0;i<top;++i) { if (last == NULL || !has_dupnames || strcmp(last+2,nametable+2)) { - if (ri->num_spec < 0) - ri->num_spec = 0; + ASSERT(ri->num_spec >= 0); ++(ri->num_spec); if(ri->num_spec > sallocated) { sallocated += 10; @@ -936,8 +970,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) for(l=capture_spec[CAPSPEC_VALUES];is_list(l);l = CDR(list_val(l))) { int x; Eterm val = CAR(list_val(l)); - if (ri->num_spec < 0) - ri->num_spec = 0; + ASSERT(ri->num_spec >= 0); ++(ri->num_spec); if(ri->num_spec > sallocated) { sallocated += 10; @@ -965,6 +998,10 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) tmpb[ap->len] = '\0'; } else { ErlDrvSizeT slen; +#ifdef DEBUG + int buffres; +#endif + if (erts_iolist_size(val, &slen)) { goto error; } @@ -976,9 +1013,12 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) (tmpbsiz = slen + 1)); } } - if (erts_iolist_to_buf(val, tmpb, slen) != 0) { - goto error; - } + +#ifdef DEBUG + buffres = +#endif + erts_iolist_to_buf(val, tmpb, slen); + ASSERT(buffres >= 0); tmpb[slen] = '\0'; } build_one_capture(code,&ri,&sallocated,has_dupnames,tmpb); @@ -1030,8 +1070,11 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) unsigned long loop_count; Eterm capture[CAPSPEC_SIZE] = CAPSPEC_INIT; int is_list_cap; + int match_limit = 0; + int match_limit_recursion = 0; - if (parse_options(arg3,&comp_options,&options,&pflags,&startoffset,capture) + if (parse_options(arg3,&comp_options,&options,&pflags,&startoffset,capture, + &match_limit,&match_limit_recursion) < 0) { BIF_ERROR(p,BADARG); } @@ -1048,6 +1091,9 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) const char *errstr = ""; int errofset = 0; int capture_count; +#ifdef DEBUG + int buffres; +#endif if (pflags & PARSE_FLAG_UNICODE && (!is_binary(arg2) || !is_binary(arg1) || @@ -1060,10 +1106,14 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) } expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1); - if (erts_iolist_to_buf(arg2, expr, slen) != 0) { - erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); - BIF_ERROR(p,BADARG); - } + +#ifdef DEBUG + buffres = +#endif + erts_iolist_to_buf(arg2, expr, slen); + + ASSERT(buffres >= 0); + expr[slen]='\0'; result = erts_pcre_compile2(expr, comp_options, &errcode, &errstr, &errofset, default_table); @@ -1168,6 +1218,16 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) restart.extra.restart_flags = 0; restart.extra.loop_counter_return = &loop_count; restart.ret_info = NULL; + + if (pflags & PARSE_FLAG_MATCH_LIMIT) { + restart.extra.flags |= PCRE_EXTRA_MATCH_LIMIT; + restart.extra.match_limit = match_limit; + } + + if (pflags & PARSE_FLAG_MATCH_LIMIT_RECURSION) { + restart.extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; + restart.extra.match_limit_recursion = match_limit_recursion; + } if (pflags & PARSE_FLAG_CAPTURE_OPT) { if ((restart.ret_info = build_capture(capture,restart.code)) == NULL) { @@ -1203,6 +1263,9 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) restart.subject = (char *) (pb->bytes+offset); restart.flags |= RESTART_FLAG_SUBJECT_IN_BINARY; } else { +#ifdef DEBUG + int buffres; +#endif handle_iolist: if (erts_iolist_size(arg1, &slength)) { erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector); @@ -1214,15 +1277,11 @@ handle_iolist: } restart.subject = erts_alloc(ERTS_ALC_T_RE_SUBJECT, slength); - if (erts_iolist_to_buf(arg1, restart.subject, slength) != 0) { - erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector); - erts_free(ERTS_ALC_T_RE_SUBJECT, restart.code); - erts_free(ERTS_ALC_T_RE_SUBJECT, restart.subject); - if (restart.ret_info != NULL) { - erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ret_info); - } - BIF_ERROR(p,BADARG); - } +#ifdef DEBUG + buffres = +#endif + erts_iolist_to_buf(arg1, restart.subject, slength); + ASSERT(buffres >= 0); } if (pflags & PARSE_FLAG_REPORT_ERRORS) { @@ -1236,6 +1295,12 @@ handle_iolist: rc = erts_pcre_exec(restart.code, &(restart.extra), restart.subject, slength, startoffset, options, restart.ovector, ovsize); + + if (rc == PCRE_ERROR_BADENDIANNESS || rc == PCRE_ERROR_BADMAGIC) { + cleanup_restart_context(&restart); + BIF_ERROR(p,BADARG); + } + ASSERT(loop_count != 0xFFFFFFFF); BUMP_REDS(p, loop_count / LOOP_FACTOR); if (rc == PCRE_ERROR_LOOP_LIMIT) { @@ -1255,7 +1320,7 @@ handle_iolist: arg2 /* To avoid GC of precompiled code, XXX: not utilized yet */, magic_bin); } - + res = build_exec_return(p, rc, &restart, arg1); cleanup_restart_context(&restart); @@ -1331,7 +1396,7 @@ BIF_RETTYPE re_inspect_2(BIF_ALIST_2) { Eterm *tp,*tmp_vec,*hp; - int rc,i,top,j; + int i,top,j; int entrysize; char *nametable, *last,*name; int has_dupnames; @@ -1340,6 +1405,10 @@ re_inspect_2(BIF_ALIST_2) Eterm res; const pcre *code; byte *temp_alloc = NULL; +#ifdef DEBUG + int infores; +#endif + if (is_not_tuple(BIF_ARG_1) || (arityval(*tuple_val(BIF_ARG_1)) != 5)) { goto error; @@ -1362,18 +1431,33 @@ re_inspect_2(BIF_ALIST_2) if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0) goto error; - if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) - goto error; + +#ifdef DEBUG + infores = +#endif + erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top); + + ASSERT(infores == 0); + if (top <= 0) { hp = HAlloc(BIF_P, 3); res = TUPLE2(hp,am_namelist,NIL); erts_free_aligned_binary_bytes(temp_alloc); BIF_RET(res); } - if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0) - goto error; - if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable) != 0) - goto error; +#ifdef DEBUG + infores = +#endif + erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize); + + ASSERT(infores == 0); + +#ifdef DEBUG + infores = +#endif + erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable); + + ASSERT(infores == 0); has_dupnames = ((options & PCRE_DUPNAMES) != 0); /* First, count the names */ diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 649e87ef2c..afc63496d0 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -72,6 +72,8 @@ run(_, _) -> Option :: anchored | global | notbol | noteol | notempty | notempty_atstart | report_errors | {offset, non_neg_integer()} | + {match_limit, non_neg_integer()} | + {match_limit_recursion, non_neg_integer()} | {newline, NLSpec :: nl_spec()} | bsr_anycrlf | bsr_unicode | {capture, ValueSpec} | {capture, ValueSpec, Type} | CompileOpt, @@ -117,6 +119,8 @@ split(Subject,RE) -> Options :: [ Option ], Option :: anchored | notbol | noteol | notempty | notempty_atstart | {offset, non_neg_integer()} | {newline, nl_spec()} + | {match_limit, non_neg_integer()} + | {match_limit_recursion, non_neg_integer()} | bsr_anycrlf | bsr_unicode | {return, ReturnType} | {parts, NumParts} | group | trim | CompileOpt, NumParts :: non_neg_integer() | infinity, @@ -311,6 +315,8 @@ replace(Subject,RE,Replacement) -> Option :: anchored | global | notbol | noteol | notempty | notempty_atstart | {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf + | {match_limit, non_neg_integer()} + | {match_limit_recursion, non_neg_integer()} | bsr_unicode | {return, ReturnType} | CompileOpt, ReturnType :: iodata | list | binary, CompileOpt :: compile_option(), @@ -887,6 +893,10 @@ runopt({capture,_}) -> true; runopt(global) -> true; +runopt({match_limit,_}) -> + true; +runopt({match_limit_recursion,_}) -> + true; runopt(_) -> false. diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index 1a2f1e0ac5..911b8ef2f1 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -26,7 +26,8 @@ error_handling/1,pcre_cve_2008_2371/1, pcre_compile_workspace_overflow/1,re_infinite_loop/1, re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1, - opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1]). + opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1, + match_limit/1]). -include_lib("test_server/include/test_server.hrl"). -include_lib("kernel/include/file.hrl"). @@ -40,7 +41,8 @@ all() -> split_specials, error_handling, pcre_cve_2008_2371, pcre_compile_workspace_overflow, re_infinite_loop, re_backwards_accented, opt_dupnames, opt_all_names, - inspect, opt_no_start_optimize,opt_never_utf,opt_ucp]. + inspect, opt_no_start_optimize,opt_never_utf,opt_ucp, + match_limit]. groups() -> []. @@ -788,3 +790,39 @@ opt_ucp(Config) when is_list(Config) -> nomatch = re:run([1024],"\\w",[unicode]), % Latin1 word characters only, 1024 is not latin1 {match,[{0,2}]} = re:run([1024],"\\w",[unicode,ucp]), % Any Unicode word character works with 'ucp' ok. +match_limit(doc) -> + "Check that the match_limit and match_limit_recursion options work"; +match_limit(Config) when is_list(Config) -> + nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[]), + nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}]), + nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}]), + nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[report_errors]), + {error,match_limit} = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}, + report_errors]), + {error,match_limit_recursion} = + re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}, + report_errors]), + {error,match_limit} = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}, + report_errors,global]), + {error,match_limit_recursion} = + re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}, + report_errors,global]), + ["aaaaaaaaaaaaaz"] = re:split("aaaaaaaaaaaaaz","(a+)*zz", + [{match_limit_recursion,10},{return,list}]), + ["aaaaaaaaaaaaaz"] = re:split("aaaaaaaaaaaaaz","(a+)*zz", + [{match_limit,3000},{return,list}]), + "aaaaaaaaaaaaaz" = re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", + [{match_limit_recursion,10},{return,list}]), + "aaaaaaaaaaaaaz" = re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", + [{match_limit,3000},{return,list}]), + {'EXIT', {badarg,_}} = (catch re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", + [{match_limit_recursion,-1},{return,list}])), + {'EXIT', {badarg,_}} = (catch re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", + [{match_limit,-1},{return,list}])), + {'EXIT', {badarg,_}} = (catch re:run("aaaaaaaaaaaaaz","(a+)*zz", + [{match_limit_recursion,-1}, + report_errors,global])), + {'EXIT', {badarg,_}} = (catch re:run("aaaaaaaaaaaaaz","(a+)*zz", + [{match_limit,-1}, + report_errors,global])), + ok. |