aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2013-07-31 15:17:27 +0200
committerPatrik Nyblom <[email protected]>2013-08-09 12:10:44 +0200
commit8cbc9296944b5d1397d15e5615890b61549d5064 (patch)
tree86a9a61f4e3f6b60dd745fadc154489a14db64bf
parent1f4c016785a924b2e42fbb7858640be3d46e9625 (diff)
downloadotp-8cbc9296944b5d1397d15e5615890b61549d5064.tar.gz
otp-8cbc9296944b5d1397d15e5615890b61549d5064.tar.bz2
otp-8cbc9296944b5d1397d15e5615890b61549d5064.zip
Add match_limit and match_limit_recursion options
Added to re:run and sets the corresponding fields in 'extra' struct for the PCRE match engine. The result can be viewed by also setting 'report_errors' when matching. Some housekeeping was also done... The offset option also did not properly check for offset's >= 0. Change nomatch to BADARG when pre-compiled mp() is faked: By constructing a 5-tuple with faked content but the right data types, you could do a re:run which returned nomatch when in fact the mp() was bad. The cheapest solution is to check the return from pcre_exec better. Remove unreachable code in erts_bif_re.c: Replaced tests for things that logically simply cannot happen with ASSERT.
-rw-r--r--erts/emulator/beam/erl_bif_re.c156
-rw-r--r--lib/stdlib/src/re.erl10
-rw-r--r--lib/stdlib/test/re_SUITE.erl42
3 files changed, 170 insertions, 38 deletions
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 46b5b848cf..796fd301ad 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -181,6 +181,8 @@ static Eterm make_signed_integer(int x, Process *p)
#define PARSE_FLAG_CAPTURE_OPT 16
#define PARSE_FLAG_GLOBAL 32
#define PARSE_FLAG_REPORT_ERRORS 64
+#define PARSE_FLAG_MATCH_LIMIT 128
+#define PARSE_FLAG_MATCH_LIMIT_RECURSION 256
#define CAPSPEC_VALUES 0
#define CAPSPEC_TYPE 1
@@ -193,7 +195,9 @@ parse_options(Eterm listp, /* in */
int *exec_options, /* out */
int *flags,/* out */
int *startoffset, /* out */
- Eterm *capture_spec) /* capture_spec[CAPSPEC_SIZE] */ /* out */
+ Eterm *capture_spec, /* capture_spec[CAPSPEC_SIZE] */ /* out */
+ int *match_limit, /* out */
+ int *match_limit_recursion) /* out */
{
int copt,eopt,fl;
Eterm item;
@@ -235,7 +239,7 @@ parse_options(Eterm listp, /* in */
case am_offset:
{
int tmp;
- if (!term_to_int(tp[2],&tmp)) {
+ if (!term_to_int(tp[2],&tmp) || tmp < 0) {
return -1;
}
if (startoffset != NULL) {
@@ -244,6 +248,31 @@ parse_options(Eterm listp, /* in */
}
fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|PARSE_FLAG_STARTOFFSET);
break;
+ case am_match_limit:
+ {
+ int tmp;
+ if (!term_to_int(tp[2],&tmp) || tmp < 0) {
+ return -1;
+ }
+ if (match_limit != NULL) {
+ *match_limit = tmp;
+ }
+ }
+ fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|PARSE_FLAG_MATCH_LIMIT);
+ break;
+ case am_match_limit_recursion:
+ {
+ int tmp;
+ if (!term_to_int(tp[2],&tmp) || tmp < 0) {
+ return -1;
+ }
+ if (match_limit_recursion != NULL) {
+ *match_limit_recursion = tmp;
+ }
+ }
+ fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|
+ PARSE_FLAG_MATCH_LIMIT_RECURSION);
+ break;
case am_newline:
if (!is_atom(tp[2])) {
return -1;
@@ -460,9 +489,12 @@ re_compile(Process* p, Eterm arg1, Eterm arg2)
int options = 0;
int pflags = 0;
int unicode = 0;
+#ifdef DEBUG
+ int buffres;
+#endif
- if (parse_options(arg2,&options,NULL,&pflags,NULL,NULL)
+ if (parse_options(arg2,&options,NULL,&pflags,NULL,NULL,NULL,NULL)
< 0) {
BIF_ERROR(p,BADARG);
}
@@ -481,10 +513,13 @@ re_compile(Process* p, Eterm arg1, Eterm arg2)
BIF_ERROR(p,BADARG);
}
expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1);
- if (erts_iolist_to_buf(arg1, expr, slen) != 0) {
- erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
- BIF_ERROR(p,BADARG);
- }
+#ifdef DEBUG
+ buffres =
+#endif
+ erts_iolist_to_buf(arg1, expr, slen);
+
+ ASSERT(buffres >= 0);
+
expr[slen]='\0';
result = erts_pcre_compile2(expr, options, &errcode,
&errstr, &errofset, default_table);
@@ -910,8 +945,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
for(i=0;i<top;++i) {
if (last == NULL || !has_dupnames || strcmp(last+2,nametable+2)) {
- if (ri->num_spec < 0)
- ri->num_spec = 0;
+ ASSERT(ri->num_spec >= 0);
++(ri->num_spec);
if(ri->num_spec > sallocated) {
sallocated += 10;
@@ -936,8 +970,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
for(l=capture_spec[CAPSPEC_VALUES];is_list(l);l = CDR(list_val(l))) {
int x;
Eterm val = CAR(list_val(l));
- if (ri->num_spec < 0)
- ri->num_spec = 0;
+ ASSERT(ri->num_spec >= 0);
++(ri->num_spec);
if(ri->num_spec > sallocated) {
sallocated += 10;
@@ -965,6 +998,10 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
tmpb[ap->len] = '\0';
} else {
ErlDrvSizeT slen;
+#ifdef DEBUG
+ int buffres;
+#endif
+
if (erts_iolist_size(val, &slen)) {
goto error;
}
@@ -976,9 +1013,12 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
(tmpbsiz = slen + 1));
}
}
- if (erts_iolist_to_buf(val, tmpb, slen) != 0) {
- goto error;
- }
+
+#ifdef DEBUG
+ buffres =
+#endif
+ erts_iolist_to_buf(val, tmpb, slen);
+ ASSERT(buffres >= 0);
tmpb[slen] = '\0';
}
build_one_capture(code,&ri,&sallocated,has_dupnames,tmpb);
@@ -1030,8 +1070,11 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
unsigned long loop_count;
Eterm capture[CAPSPEC_SIZE] = CAPSPEC_INIT;
int is_list_cap;
+ int match_limit = 0;
+ int match_limit_recursion = 0;
- if (parse_options(arg3,&comp_options,&options,&pflags,&startoffset,capture)
+ if (parse_options(arg3,&comp_options,&options,&pflags,&startoffset,capture,
+ &match_limit,&match_limit_recursion)
< 0) {
BIF_ERROR(p,BADARG);
}
@@ -1048,6 +1091,9 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
const char *errstr = "";
int errofset = 0;
int capture_count;
+#ifdef DEBUG
+ int buffres;
+#endif
if (pflags & PARSE_FLAG_UNICODE &&
(!is_binary(arg2) || !is_binary(arg1) ||
@@ -1060,10 +1106,14 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
}
expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1);
- if (erts_iolist_to_buf(arg2, expr, slen) != 0) {
- erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
- BIF_ERROR(p,BADARG);
- }
+
+#ifdef DEBUG
+ buffres =
+#endif
+ erts_iolist_to_buf(arg2, expr, slen);
+
+ ASSERT(buffres >= 0);
+
expr[slen]='\0';
result = erts_pcre_compile2(expr, comp_options, &errcode,
&errstr, &errofset, default_table);
@@ -1168,6 +1218,16 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
restart.extra.restart_flags = 0;
restart.extra.loop_counter_return = &loop_count;
restart.ret_info = NULL;
+
+ if (pflags & PARSE_FLAG_MATCH_LIMIT) {
+ restart.extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
+ restart.extra.match_limit = match_limit;
+ }
+
+ if (pflags & PARSE_FLAG_MATCH_LIMIT_RECURSION) {
+ restart.extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+ restart.extra.match_limit_recursion = match_limit_recursion;
+ }
if (pflags & PARSE_FLAG_CAPTURE_OPT) {
if ((restart.ret_info = build_capture(capture,restart.code)) == NULL) {
@@ -1203,6 +1263,9 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
restart.subject = (char *) (pb->bytes+offset);
restart.flags |= RESTART_FLAG_SUBJECT_IN_BINARY;
} else {
+#ifdef DEBUG
+ int buffres;
+#endif
handle_iolist:
if (erts_iolist_size(arg1, &slength)) {
erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector);
@@ -1214,15 +1277,11 @@ handle_iolist:
}
restart.subject = erts_alloc(ERTS_ALC_T_RE_SUBJECT, slength);
- if (erts_iolist_to_buf(arg1, restart.subject, slength) != 0) {
- erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector);
- erts_free(ERTS_ALC_T_RE_SUBJECT, restart.code);
- erts_free(ERTS_ALC_T_RE_SUBJECT, restart.subject);
- if (restart.ret_info != NULL) {
- erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ret_info);
- }
- BIF_ERROR(p,BADARG);
- }
+#ifdef DEBUG
+ buffres =
+#endif
+ erts_iolist_to_buf(arg1, restart.subject, slength);
+ ASSERT(buffres >= 0);
}
if (pflags & PARSE_FLAG_REPORT_ERRORS) {
@@ -1236,6 +1295,12 @@ handle_iolist:
rc = erts_pcre_exec(restart.code, &(restart.extra), restart.subject,
slength, startoffset,
options, restart.ovector, ovsize);
+
+ if (rc == PCRE_ERROR_BADENDIANNESS || rc == PCRE_ERROR_BADMAGIC) {
+ cleanup_restart_context(&restart);
+ BIF_ERROR(p,BADARG);
+ }
+
ASSERT(loop_count != 0xFFFFFFFF);
BUMP_REDS(p, loop_count / LOOP_FACTOR);
if (rc == PCRE_ERROR_LOOP_LIMIT) {
@@ -1255,7 +1320,7 @@ handle_iolist:
arg2 /* To avoid GC of precompiled code, XXX: not utilized yet */,
magic_bin);
}
-
+
res = build_exec_return(p, rc, &restart, arg1);
cleanup_restart_context(&restart);
@@ -1331,7 +1396,7 @@ BIF_RETTYPE
re_inspect_2(BIF_ALIST_2)
{
Eterm *tp,*tmp_vec,*hp;
- int rc,i,top,j;
+ int i,top,j;
int entrysize;
char *nametable, *last,*name;
int has_dupnames;
@@ -1340,6 +1405,10 @@ re_inspect_2(BIF_ALIST_2)
Eterm res;
const pcre *code;
byte *temp_alloc = NULL;
+#ifdef DEBUG
+ int infores;
+#endif
+
if (is_not_tuple(BIF_ARG_1) || (arityval(*tuple_val(BIF_ARG_1)) != 5)) {
goto error;
@@ -1362,18 +1431,33 @@ re_inspect_2(BIF_ALIST_2)
if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
goto error;
- if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
- goto error;
+
+#ifdef DEBUG
+ infores =
+#endif
+ erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top);
+
+ ASSERT(infores == 0);
+
if (top <= 0) {
hp = HAlloc(BIF_P, 3);
res = TUPLE2(hp,am_namelist,NIL);
erts_free_aligned_binary_bytes(temp_alloc);
BIF_RET(res);
}
- if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0)
- goto error;
- if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable) != 0)
- goto error;
+#ifdef DEBUG
+ infores =
+#endif
+ erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize);
+
+ ASSERT(infores == 0);
+
+#ifdef DEBUG
+ infores =
+#endif
+ erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable);
+
+ ASSERT(infores == 0);
has_dupnames = ((options & PCRE_DUPNAMES) != 0);
/* First, count the names */
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 649e87ef2c..afc63496d0 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -72,6 +72,8 @@ run(_, _) ->
Option :: anchored | global | notbol | noteol | notempty
| notempty_atstart | report_errors
| {offset, non_neg_integer()} |
+ {match_limit, non_neg_integer()} |
+ {match_limit_recursion, non_neg_integer()} |
{newline, NLSpec :: nl_spec()} |
bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
{capture, ValueSpec, Type} | CompileOpt,
@@ -117,6 +119,8 @@ split(Subject,RE) ->
Options :: [ Option ],
Option :: anchored | notbol | noteol | notempty | notempty_atstart
| {offset, non_neg_integer()} | {newline, nl_spec()}
+ | {match_limit, non_neg_integer()}
+ | {match_limit_recursion, non_neg_integer()}
| bsr_anycrlf | bsr_unicode | {return, ReturnType}
| {parts, NumParts} | group | trim | CompileOpt,
NumParts :: non_neg_integer() | infinity,
@@ -311,6 +315,8 @@ replace(Subject,RE,Replacement) ->
Option :: anchored | global | notbol | noteol | notempty
| notempty_atstart
| {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf
+ | {match_limit, non_neg_integer()}
+ | {match_limit_recursion, non_neg_integer()}
| bsr_unicode | {return, ReturnType} | CompileOpt,
ReturnType :: iodata | list | binary,
CompileOpt :: compile_option(),
@@ -887,6 +893,10 @@ runopt({capture,_}) ->
true;
runopt(global) ->
true;
+runopt({match_limit,_}) ->
+ true;
+runopt({match_limit_recursion,_}) ->
+ true;
runopt(_) ->
false.
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index 1a2f1e0ac5..911b8ef2f1 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -26,7 +26,8 @@
error_handling/1,pcre_cve_2008_2371/1,
pcre_compile_workspace_overflow/1,re_infinite_loop/1,
re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1,
- opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1]).
+ opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1,
+ match_limit/1]).
-include_lib("test_server/include/test_server.hrl").
-include_lib("kernel/include/file.hrl").
@@ -40,7 +41,8 @@ all() ->
split_specials, error_handling, pcre_cve_2008_2371,
pcre_compile_workspace_overflow, re_infinite_loop,
re_backwards_accented, opt_dupnames, opt_all_names,
- inspect, opt_no_start_optimize,opt_never_utf,opt_ucp].
+ inspect, opt_no_start_optimize,opt_never_utf,opt_ucp,
+ match_limit].
groups() ->
[].
@@ -788,3 +790,39 @@ opt_ucp(Config) when is_list(Config) ->
nomatch = re:run([1024],"\\w",[unicode]), % Latin1 word characters only, 1024 is not latin1
{match,[{0,2}]} = re:run([1024],"\\w",[unicode,ucp]), % Any Unicode word character works with 'ucp'
ok.
+match_limit(doc) ->
+ "Check that the match_limit and match_limit_recursion options work";
+match_limit(Config) when is_list(Config) ->
+ nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[]),
+ nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}]),
+ nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}]),
+ nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[report_errors]),
+ {error,match_limit} = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000},
+ report_errors]),
+ {error,match_limit_recursion} =
+ re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10},
+ report_errors]),
+ {error,match_limit} = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000},
+ report_errors,global]),
+ {error,match_limit_recursion} =
+ re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10},
+ report_errors,global]),
+ ["aaaaaaaaaaaaaz"] = re:split("aaaaaaaaaaaaaz","(a+)*zz",
+ [{match_limit_recursion,10},{return,list}]),
+ ["aaaaaaaaaaaaaz"] = re:split("aaaaaaaaaaaaaz","(a+)*zz",
+ [{match_limit,3000},{return,list}]),
+ "aaaaaaaaaaaaaz" = re:replace("aaaaaaaaaaaaaz","(a+)*zz","!",
+ [{match_limit_recursion,10},{return,list}]),
+ "aaaaaaaaaaaaaz" = re:replace("aaaaaaaaaaaaaz","(a+)*zz","!",
+ [{match_limit,3000},{return,list}]),
+ {'EXIT', {badarg,_}} = (catch re:replace("aaaaaaaaaaaaaz","(a+)*zz","!",
+ [{match_limit_recursion,-1},{return,list}])),
+ {'EXIT', {badarg,_}} = (catch re:replace("aaaaaaaaaaaaaz","(a+)*zz","!",
+ [{match_limit,-1},{return,list}])),
+ {'EXIT', {badarg,_}} = (catch re:run("aaaaaaaaaaaaaz","(a+)*zz",
+ [{match_limit_recursion,-1},
+ report_errors,global])),
+ {'EXIT', {badarg,_}} = (catch re:run("aaaaaaaaaaaaaz","(a+)*zz",
+ [{match_limit,-1},
+ report_errors,global])),
+ ok.