From 8cbc9296944b5d1397d15e5615890b61549d5064 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 31 Jul 2013 15:17:27 +0200 Subject: Add match_limit and match_limit_recursion options Added to re:run and sets the corresponding fields in 'extra' struct for the PCRE match engine. The result can be viewed by also setting 'report_errors' when matching. Some housekeeping was also done... The offset option also did not properly check for offset's >= 0. Change nomatch to BADARG when pre-compiled mp() is faked: By constructing a 5-tuple with faked content but the right data types, you could do a re:run which returned nomatch when in fact the mp() was bad. The cheapest solution is to check the return from pcre_exec better. Remove unreachable code in erts_bif_re.c: Replaced tests for things that logically simply cannot happen with ASSERT. --- lib/stdlib/src/re.erl | 10 ++++++++++ lib/stdlib/test/re_SUITE.erl | 42 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 2 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 649e87ef2c..afc63496d0 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -72,6 +72,8 @@ run(_, _) -> Option :: anchored | global | notbol | noteol | notempty | notempty_atstart | report_errors | {offset, non_neg_integer()} | + {match_limit, non_neg_integer()} | + {match_limit_recursion, non_neg_integer()} | {newline, NLSpec :: nl_spec()} | bsr_anycrlf | bsr_unicode | {capture, ValueSpec} | {capture, ValueSpec, Type} | CompileOpt, @@ -117,6 +119,8 @@ split(Subject,RE) -> Options :: [ Option ], Option :: anchored | notbol | noteol | notempty | notempty_atstart | {offset, non_neg_integer()} | {newline, nl_spec()} + | {match_limit, non_neg_integer()} + | {match_limit_recursion, non_neg_integer()} | bsr_anycrlf | bsr_unicode | {return, ReturnType} | {parts, NumParts} | group | trim | CompileOpt, NumParts :: non_neg_integer() | infinity, @@ -311,6 +315,8 @@ replace(Subject,RE,Replacement) -> Option :: anchored | global | notbol | noteol | notempty | notempty_atstart | {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf + | {match_limit, non_neg_integer()} + | {match_limit_recursion, non_neg_integer()} | bsr_unicode | {return, ReturnType} | CompileOpt, ReturnType :: iodata | list | binary, CompileOpt :: compile_option(), @@ -887,6 +893,10 @@ runopt({capture,_}) -> true; runopt(global) -> true; +runopt({match_limit,_}) -> + true; +runopt({match_limit_recursion,_}) -> + true; runopt(_) -> false. diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index 1a2f1e0ac5..911b8ef2f1 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -26,7 +26,8 @@ error_handling/1,pcre_cve_2008_2371/1, pcre_compile_workspace_overflow/1,re_infinite_loop/1, re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1, - opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1]). + opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1, + match_limit/1]). -include_lib("test_server/include/test_server.hrl"). -include_lib("kernel/include/file.hrl"). @@ -40,7 +41,8 @@ all() -> split_specials, error_handling, pcre_cve_2008_2371, pcre_compile_workspace_overflow, re_infinite_loop, re_backwards_accented, opt_dupnames, opt_all_names, - inspect, opt_no_start_optimize,opt_never_utf,opt_ucp]. + inspect, opt_no_start_optimize,opt_never_utf,opt_ucp, + match_limit]. groups() -> []. @@ -788,3 +790,39 @@ opt_ucp(Config) when is_list(Config) -> nomatch = re:run([1024],"\\w",[unicode]), % Latin1 word characters only, 1024 is not latin1 {match,[{0,2}]} = re:run([1024],"\\w",[unicode,ucp]), % Any Unicode word character works with 'ucp' ok. +match_limit(doc) -> + "Check that the match_limit and match_limit_recursion options work"; +match_limit(Config) when is_list(Config) -> + nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[]), + nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}]), + nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}]), + nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[report_errors]), + {error,match_limit} = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}, + report_errors]), + {error,match_limit_recursion} = + re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}, + report_errors]), + {error,match_limit} = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}, + report_errors,global]), + {error,match_limit_recursion} = + re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}, + report_errors,global]), + ["aaaaaaaaaaaaaz"] = re:split("aaaaaaaaaaaaaz","(a+)*zz", + [{match_limit_recursion,10},{return,list}]), + ["aaaaaaaaaaaaaz"] = re:split("aaaaaaaaaaaaaz","(a+)*zz", + [{match_limit,3000},{return,list}]), + "aaaaaaaaaaaaaz" = re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", + [{match_limit_recursion,10},{return,list}]), + "aaaaaaaaaaaaaz" = re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", + [{match_limit,3000},{return,list}]), + {'EXIT', {badarg,_}} = (catch re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", + [{match_limit_recursion,-1},{return,list}])), + {'EXIT', {badarg,_}} = (catch re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", + [{match_limit,-1},{return,list}])), + {'EXIT', {badarg,_}} = (catch re:run("aaaaaaaaaaaaaz","(a+)*zz", + [{match_limit_recursion,-1}, + report_errors,global])), + {'EXIT', {badarg,_}} = (catch re:run("aaaaaaaaaaaaaz","(a+)*zz", + [{match_limit,-1}, + report_errors,global])), + ok. -- cgit v1.2.3