aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2019-06-18 22:24:47 +0200
committerRickard Green <[email protected]>2019-06-18 22:24:47 +0200
commit272ee2d47e7c648a1c5987429053d7a3ee208716 (patch)
treeb1a27655cd91d517e624c4d1a32089d8ddf138b1 /lib/stdlib
parent4319a7f8bc93388ec540e954eb9bb73ea1eec4ef (diff)
parentc1588491158f8e59b92936381e606b173488d17a (diff)
downloadotp-272ee2d47e7c648a1c5987429053d7a3ee208716.tar.gz
otp-272ee2d47e7c648a1c5987429053d7a3ee208716.tar.bz2
otp-272ee2d47e7c648a1c5987429053d7a3ee208716.zip
Merge branch 'rickard/re-global-unicode-validation/OTP-15831/ERL-876' into rickard/re-unicode-validation/OTP-15831/OTP-15836/ERL-876
* rickard/re-global-unicode-validation/OTP-15831/ERL-876: Update runtime dependency from stdlib to erts Only validate subject once when global is used in re:run() Add global_unicode_validation() test
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/src/re.erl52
-rw-r--r--lib/stdlib/src/stdlib.app.src2
-rw-r--r--lib/stdlib/test/re_SUITE.erl28
3 files changed, 71 insertions, 11 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 726b409d4d..197564b895 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -33,6 +33,8 @@
%%% BIFs
+-export([internal_run/4]).
+
-export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]).
-spec version() -> binary().
@@ -100,6 +102,40 @@ run(_, _) ->
run(_, _, _) ->
erlang:nif_error(undef).
+-spec internal_run(Subject, RE, Options, FirstCall) -> {match, Captured} |
+ match |
+ nomatch |
+ {error, ErrType} when
+ Subject :: iodata() | unicode:charlist(),
+ RE :: mp() | iodata() | unicode:charlist(),
+ Options :: [Option],
+ Option :: anchored | global | notbol | noteol | notempty
+ | notempty_atstart | report_errors
+ | {offset, non_neg_integer()} |
+ {match_limit, non_neg_integer()} |
+ {match_limit_recursion, non_neg_integer()} |
+ {newline, NLSpec :: nl_spec()} |
+ bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
+ {capture, ValueSpec, Type} | CompileOpt,
+ Type :: index | list | binary,
+ ValueSpec :: all | all_but_first | all_names | first | none | ValueList,
+ ValueList :: [ValueID],
+ ValueID :: integer() | string() | atom(),
+ CompileOpt :: compile_option(),
+ Captured :: [CaptureData] | [[CaptureData]],
+ CaptureData :: {integer(), integer()}
+ | ListConversionData
+ | binary(),
+ ListConversionData :: string()
+ | {error, string(), binary()}
+ | {incomplete, string(), binary()},
+ ErrType :: match_limit | match_limit_recursion | {compile, CompileErr},
+ CompileErr :: {ErrString :: string(), Position :: non_neg_integer()},
+ FirstCall :: boolean().
+
+internal_run(_, _, _, _) ->
+ erlang:nif_error(undef).
+
-spec inspect(MP,Item) -> {namelist, [ binary() ]} when
MP :: mp(),
Item :: namelist.
@@ -765,17 +801,17 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
try
postprocess(loopexec(FlatSubject,RE,InitialOffset,
byte_size(FlatSubject),
- Unicode,CRLF,StrippedOptions),
+ Unicode,CRLF,StrippedOptions,true),
SelectReturn,ConvertReturn,FlatSubject,Unicode)
catch
throw:ErrTuple ->
ErrTuple
end.
-loopexec(_,_,X,Y,_,_,_) when X > Y ->
+loopexec(_,_,X,Y,_,_,_,_) when X > Y ->
{match,[]};
-loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
- case re:run(Subject,RE,[{offset,X}]++Options) of
+loopexec(Subject,RE,X,Y,Unicode,CRLF,Options, First) ->
+ case re:internal_run(Subject,RE,[{offset,X}]++Options,First) of
{error, Err} ->
throw({error,Err});
nomatch ->
@@ -784,11 +820,11 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
{match,Rest} =
case B>0 of
true ->
- loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options);
+ loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options,false);
false ->
{match,M} =
- case re:run(Subject,RE,[{offset,X},notempty_atstart,
- anchored]++Options) of
+ case re:internal_run(Subject,RE,[{offset,X},notempty_atstart,
+ anchored]++Options,false) of
nomatch ->
{match,[]};
{match,Other} ->
@@ -801,7 +837,7 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
forward(Subject,A,1,Unicode,CRLF)
end,
{match,MM} = loopexec(Subject,RE,NewA,Y,
- Unicode,CRLF,Options),
+ Unicode,CRLF,Options,false),
case M of
[] ->
{match,MM};
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index ecb514e9f3..7bcefc3615 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -108,7 +108,7 @@
dets]},
{applications, [kernel]},
{env, []},
- {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.4","crypto-3.3",
+ {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-@OTP-15831@","crypto-3.3",
"compiler-5.0"]}
]}.
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index c9ef9da990..f026159b47 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -28,7 +28,7 @@
pcre_compile_workspace_overflow/1,re_infinite_loop/1,
re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1,
opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1,
- match_limit/1,sub_binaries/1,copt/1]).
+ match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1]).
-include_lib("common_test/include/ct.hrl").
-include_lib("kernel/include/file.hrl").
@@ -45,7 +45,7 @@ all() ->
pcre_compile_workspace_overflow, re_infinite_loop,
re_backwards_accented, opt_dupnames, opt_all_names,
inspect, opt_no_start_optimize,opt_never_utf,opt_ucp,
- match_limit, sub_binaries, re_version].
+ match_limit, sub_binaries, re_version, global_unicode_validation].
groups() ->
[].
@@ -200,7 +200,31 @@ re_version(_Config) ->
{match,[Version]} = re:run(Version,"^[0-9]\\.[0-9]{2} 20[0-9]{2}-[0-9]{2}-[0-9]{2}",[{capture,all,binary}]),
ok.
+global_unicode_validation(Config) when is_list(Config) ->
+ %% Test that unicode validation of the subject is not done
+ %% for every match found...
+ Bin = binary:copy(<<"abc\n">>,100000),
+ {TimeAscii, _} = take_time(fun () ->
+ re:run(Bin, <<"b">>, [global])
+ end),
+ {TimeUnicode, _} = take_time(fun () ->
+ re:run(Bin, <<"b">>, [unicode,global])
+ end),
+ if TimeAscii == 0; TimeUnicode == 0 ->
+ {comment, "Not good enough resolution to compare results"};
+ true ->
+ %% The time the operations takes should be in the
+ %% same order of magnitude. If validation of the
+ %% whole subject occurs for every match, the unicode
+ %% variant will take way longer time...
+ true = TimeUnicode div TimeAscii < 10
+ end.
+take_time(Fun) ->
+ Start = erlang:monotonic_time(nanosecond),
+ Res = Fun(),
+ End = erlang:monotonic_time(nanosecond),
+ {End-Start, Res}.
%% Test compile options given directly to run.
combined_options(Config) when is_list(Config) ->