From 07eb9659d726a9dfcf8f6f5e6a2265a995b409ec Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Fri, 24 May 2019 15:20:02 +0200 Subject: Add global_unicode_validation() test --- lib/stdlib/test/re_SUITE.erl | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index 7b82647416..3cbc8abce2 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -28,7 +28,7 @@ pcre_compile_workspace_overflow/1,re_infinite_loop/1, re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1, opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1, - match_limit/1,sub_binaries/1,copt/1]). + match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1]). -include_lib("common_test/include/ct.hrl"). -include_lib("kernel/include/file.hrl"). @@ -45,7 +45,7 @@ all() -> pcre_compile_workspace_overflow, re_infinite_loop, re_backwards_accented, opt_dupnames, opt_all_names, inspect, opt_no_start_optimize,opt_never_utf,opt_ucp, - match_limit, sub_binaries, re_version]. + match_limit, sub_binaries, re_version, global_unicode_validation]. groups() -> []. @@ -200,7 +200,31 @@ re_version(_Config) -> {match,[Version]} = re:run(Version,"^[0-9]\\.[0-9]{2} 20[0-9]{2}-[0-9]{2}-[0-9]{2}",[{capture,all,binary}]), ok. +global_unicode_validation(Config) when is_list(Config) -> + %% Test that unicode validation of the subject is not done + %% for every match found... + Bin = binary:copy(<<"abc\n">>,100000), + {TimeAscii, _} = take_time(fun () -> + re:run(Bin, <<"b">>, [global]) + end), + {TimeUnicode, _} = take_time(fun () -> + re:run(Bin, <<"b">>, [unicode,global]) + end), + if TimeAscii == 0; TimeUnicode == 0 -> + {comment, "Not good enough resolution to compare results"}; + true -> + %% The time the operations takes should be in the + %% same order of magnitude. If validation of the + %% whole subject occurs for every match, the unicode + %% variant will take way longer time... + true = TimeUnicode div TimeAscii < 10 + end. +take_time(Fun) -> + Start = erlang:monotonic_time(nanosecond), + Res = Fun(), + End = erlang:monotonic_time(nanosecond), + {End-Start, Res}. %% Test compile options given directly to run. combined_options(Config) when is_list(Config) -> -- cgit v1.2.3 From 0c253ebdd7392e06045c0b1fb16f8aa55d8a7092 Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Wed, 22 May 2019 19:02:21 +0200 Subject: Only validate subject once when global is used in re:run() --- lib/stdlib/src/re.erl | 52 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 726b409d4d..197564b895 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -33,6 +33,8 @@ %%% BIFs +-export([internal_run/4]). + -export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]). -spec version() -> binary(). @@ -100,6 +102,40 @@ run(_, _) -> run(_, _, _) -> erlang:nif_error(undef). +-spec internal_run(Subject, RE, Options, FirstCall) -> {match, Captured} | + match | + nomatch | + {error, ErrType} when + Subject :: iodata() | unicode:charlist(), + RE :: mp() | iodata() | unicode:charlist(), + Options :: [Option], + Option :: anchored | global | notbol | noteol | notempty + | notempty_atstart | report_errors + | {offset, non_neg_integer()} | + {match_limit, non_neg_integer()} | + {match_limit_recursion, non_neg_integer()} | + {newline, NLSpec :: nl_spec()} | + bsr_anycrlf | bsr_unicode | {capture, ValueSpec} | + {capture, ValueSpec, Type} | CompileOpt, + Type :: index | list | binary, + ValueSpec :: all | all_but_first | all_names | first | none | ValueList, + ValueList :: [ValueID], + ValueID :: integer() | string() | atom(), + CompileOpt :: compile_option(), + Captured :: [CaptureData] | [[CaptureData]], + CaptureData :: {integer(), integer()} + | ListConversionData + | binary(), + ListConversionData :: string() + | {error, string(), binary()} + | {incomplete, string(), binary()}, + ErrType :: match_limit | match_limit_recursion | {compile, CompileErr}, + CompileErr :: {ErrString :: string(), Position :: non_neg_integer()}, + FirstCall :: boolean(). + +internal_run(_, _, _, _) -> + erlang:nif_error(undef). + -spec inspect(MP,Item) -> {namelist, [ binary() ]} when MP :: mp(), Item :: namelist. @@ -765,17 +801,17 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) -> try postprocess(loopexec(FlatSubject,RE,InitialOffset, byte_size(FlatSubject), - Unicode,CRLF,StrippedOptions), + Unicode,CRLF,StrippedOptions,true), SelectReturn,ConvertReturn,FlatSubject,Unicode) catch throw:ErrTuple -> ErrTuple end. -loopexec(_,_,X,Y,_,_,_) when X > Y -> +loopexec(_,_,X,Y,_,_,_,_) when X > Y -> {match,[]}; -loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) -> - case re:run(Subject,RE,[{offset,X}]++Options) of +loopexec(Subject,RE,X,Y,Unicode,CRLF,Options, First) -> + case re:internal_run(Subject,RE,[{offset,X}]++Options,First) of {error, Err} -> throw({error,Err}); nomatch -> @@ -784,11 +820,11 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) -> {match,Rest} = case B>0 of true -> - loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options); + loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options,false); false -> {match,M} = - case re:run(Subject,RE,[{offset,X},notempty_atstart, - anchored]++Options) of + case re:internal_run(Subject,RE,[{offset,X},notempty_atstart, + anchored]++Options,false) of nomatch -> {match,[]}; {match,Other} -> @@ -801,7 +837,7 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) -> forward(Subject,A,1,Unicode,CRLF) end, {match,MM} = loopexec(Subject,RE,NewA,Y, - Unicode,CRLF,Options), + Unicode,CRLF,Options,false), case M of [] -> {match,MM}; -- cgit v1.2.3 From c1588491158f8e59b92936381e606b173488d17a Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Wed, 22 May 2019 19:36:35 +0200 Subject: Update runtime dependency from stdlib to erts --- lib/stdlib/src/stdlib.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src index 3c449d3cb9..6d3d5cc02b 100644 --- a/lib/stdlib/src/stdlib.app.src +++ b/lib/stdlib/src/stdlib.app.src @@ -107,7 +107,7 @@ dets]}, {applications, [kernel]}, {env, []}, - {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3", + {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-@OTP-15831@","crypto-3.3", "compiler-5.0"]} ]}. -- cgit v1.2.3 From f9f857c8094e6bb50a944316ea120e53cd5552ed Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Fri, 24 May 2019 14:50:00 +0200 Subject: Add yield_on_subject_validation() test --- lib/stdlib/test/re_SUITE.erl | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index 7b82647416..55ed99d4a7 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -28,7 +28,7 @@ pcre_compile_workspace_overflow/1,re_infinite_loop/1, re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1, opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1, - match_limit/1,sub_binaries/1,copt/1]). + match_limit/1,sub_binaries/1,copt/1,yield_on_subject_validation/1]). -include_lib("common_test/include/ct.hrl"). -include_lib("kernel/include/file.hrl"). @@ -45,7 +45,7 @@ all() -> pcre_compile_workspace_overflow, re_infinite_loop, re_backwards_accented, opt_dupnames, opt_all_names, inspect, opt_no_start_optimize,opt_never_utf,opt_ucp, - match_limit, sub_binaries, re_version]. + match_limit, sub_binaries, re_version, yield_on_subject_validation]. groups() -> []. @@ -200,7 +200,32 @@ re_version(_Config) -> {match,[Version]} = re:run(Version,"^[0-9]\\.[0-9]{2} 20[0-9]{2}-[0-9]{2}-[0-9]{2}",[{capture,all,binary}]), ok. +yield_on_subject_validation(Config) when is_list(Config) -> + Go = make_ref(), + Bin = binary:copy(<<"abc\n">>,100000), + {P, M} = spawn_opt(fun () -> + receive Go -> ok end, + {match,[{1,1}]} = re:run(Bin, <<"b">>, [unicode]) + end, + [link, monitor]), + 1 = erlang:trace(P, true, [running]), + P ! Go, + N = count_re_run_trap_out(P, M), + true = N >= 5, + ok. +count_re_run_trap_out(P, M) when is_reference(M) -> + receive {'DOWN',M,process,P,normal} -> ok end, + TD = erlang:trace_delivered(P), + receive {trace_delivered, P, TD} -> ok end, + count_re_run_trap_out(P, 0); +count_re_run_trap_out(P, N) when is_integer(N) -> + receive + {trace,P,out,{erlang,re_run_trap,3}} -> + count_re_run_trap_out(P, N+1) + after 0 -> + N + end. %% Test compile options given directly to run. combined_options(Config) when is_list(Config) -> -- cgit v1.2.3 From 6cae8e7a45970ceb7fb1dd5bf974bf9ebe26144f Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Wed, 22 May 2019 19:36:35 +0200 Subject: Update runtime dependency from stdlib to erts --- lib/stdlib/src/stdlib.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src index 3c449d3cb9..0ec38e2bd0 100644 --- a/lib/stdlib/src/stdlib.app.src +++ b/lib/stdlib/src/stdlib.app.src @@ -107,7 +107,7 @@ dets]}, {applications, [kernel]}, {env, []}, - {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-9.0","crypto-3.3", + {runtime_dependencies, ["sasl-3.0","kernel-5.0","erts-@OTP-15836@","crypto-3.3", "compiler-5.0"]} ]}. -- cgit v1.2.3