aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2019-06-25 16:13:04 +0200
committerGitHub <[email protected]>2019-06-25 16:13:04 +0200
commit1e9655cc7e51f3b043162fa7883228768bc5e9fb (patch)
tree95c05a26a21111c965a989db7934c9c989af741a /lib/stdlib
parent09abac433a59f0644316a54f61ade7132db673d1 (diff)
parent413d6d694eaf7530fd10bdc4dcafa4c7efa24e3e (diff)
downloadotp-1e9655cc7e51f3b043162fa7883228768bc5e9fb.tar.gz
otp-1e9655cc7e51f3b043162fa7883228768bc5e9fb.tar.bz2
otp-1e9655cc7e51f3b043162fa7883228768bc5e9fb.zip
Merge pull request #2250 from rickard-green/rickard/re-unicode-validation/OTP-15831/OTP-15836/ERL-876
re unicode validation
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/src/re.erl52
-rw-r--r--lib/stdlib/src/stdlib.app.src2
-rw-r--r--lib/stdlib/test/re_SUITE.erl57
3 files changed, 100 insertions, 11 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 726b409d4d..197564b895 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -33,6 +33,8 @@
%%% BIFs
+-export([internal_run/4]).
+
-export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]).
-spec version() -> binary().
@@ -100,6 +102,40 @@ run(_, _) ->
run(_, _, _) ->
erlang:nif_error(undef).
+-spec internal_run(Subject, RE, Options, FirstCall) -> {match, Captured} |
+ match |
+ nomatch |
+ {error, ErrType} when
+ Subject :: iodata() | unicode:charlist(),
+ RE :: mp() | iodata() | unicode:charlist(),
+ Options :: [Option],
+ Option :: anchored | global | notbol | noteol | notempty
+ | notempty_atstart | report_errors
+ | {offset, non_neg_integer()} |
+ {match_limit, non_neg_integer()} |
+ {match_limit_recursion, non_neg_integer()} |
+ {newline, NLSpec :: nl_spec()} |
+ bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
+ {capture, ValueSpec, Type} | CompileOpt,
+ Type :: index | list | binary,
+ ValueSpec :: all | all_but_first | all_names | first | none | ValueList,
+ ValueList :: [ValueID],
+ ValueID :: integer() | string() | atom(),
+ CompileOpt :: compile_option(),
+ Captured :: [CaptureData] | [[CaptureData]],
+ CaptureData :: {integer(), integer()}
+ | ListConversionData
+ | binary(),
+ ListConversionData :: string()
+ | {error, string(), binary()}
+ | {incomplete, string(), binary()},
+ ErrType :: match_limit | match_limit_recursion | {compile, CompileErr},
+ CompileErr :: {ErrString :: string(), Position :: non_neg_integer()},
+ FirstCall :: boolean().
+
+internal_run(_, _, _, _) ->
+ erlang:nif_error(undef).
+
-spec inspect(MP,Item) -> {namelist, [ binary() ]} when
MP :: mp(),
Item :: namelist.
@@ -765,17 +801,17 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
try
postprocess(loopexec(FlatSubject,RE,InitialOffset,
byte_size(FlatSubject),
- Unicode,CRLF,StrippedOptions),
+ Unicode,CRLF,StrippedOptions,true),
SelectReturn,ConvertReturn,FlatSubject,Unicode)
catch
throw:ErrTuple ->
ErrTuple
end.
-loopexec(_,_,X,Y,_,_,_) when X > Y ->
+loopexec(_,_,X,Y,_,_,_,_) when X > Y ->
{match,[]};
-loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
- case re:run(Subject,RE,[{offset,X}]++Options) of
+loopexec(Subject,RE,X,Y,Unicode,CRLF,Options, First) ->
+ case re:internal_run(Subject,RE,[{offset,X}]++Options,First) of
{error, Err} ->
throw({error,Err});
nomatch ->
@@ -784,11 +820,11 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
{match,Rest} =
case B>0 of
true ->
- loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options);
+ loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options,false);
false ->
{match,M} =
- case re:run(Subject,RE,[{offset,X},notempty_atstart,
- anchored]++Options) of
+ case re:internal_run(Subject,RE,[{offset,X},notempty_atstart,
+ anchored]++Options,false) of
nomatch ->
{match,[]};
{match,Other} ->
@@ -801,7 +837,7 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
forward(Subject,A,1,Unicode,CRLF)
end,
{match,MM} = loopexec(Subject,RE,NewA,Y,
- Unicode,CRLF,Options),
+ Unicode,CRLF,Options,false),
case M of
[] ->
{match,MM};
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index ecb514e9f3..d7d57941c2 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -108,7 +108,7 @@
dets]},
{applications, [kernel]},
{env, []},
- {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.4","crypto-3.3",
+ {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-@OTP-15831:OTP-15836@","crypto-3.3",
"compiler-5.0"]}
]}.
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index c9ef9da990..06d8fe9255 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -28,7 +28,8 @@
pcre_compile_workspace_overflow/1,re_infinite_loop/1,
re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1,
opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1,
- match_limit/1,sub_binaries/1,copt/1]).
+ match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1,
+ yield_on_subject_validation/1]).
-include_lib("common_test/include/ct.hrl").
-include_lib("kernel/include/file.hrl").
@@ -45,7 +46,8 @@ all() ->
pcre_compile_workspace_overflow, re_infinite_loop,
re_backwards_accented, opt_dupnames, opt_all_names,
inspect, opt_no_start_optimize,opt_never_utf,opt_ucp,
- match_limit, sub_binaries, re_version].
+ match_limit, sub_binaries, re_version, global_unicode_validation,
+ yield_on_subject_validation].
groups() ->
[].
@@ -200,7 +202,58 @@ re_version(_Config) ->
{match,[Version]} = re:run(Version,"^[0-9]\\.[0-9]{2} 20[0-9]{2}-[0-9]{2}-[0-9]{2}",[{capture,all,binary}]),
ok.
+global_unicode_validation(Config) when is_list(Config) ->
+ %% Test that unicode validation of the subject is not done
+ %% for every match found...
+ Bin = binary:copy(<<"abc\n">>,100000),
+ {TimeAscii, _} = take_time(fun () ->
+ re:run(Bin, <<"b">>, [global])
+ end),
+ {TimeUnicode, _} = take_time(fun () ->
+ re:run(Bin, <<"b">>, [unicode,global])
+ end),
+ if TimeAscii == 0; TimeUnicode == 0 ->
+ {comment, "Not good enough resolution to compare results"};
+ true ->
+ %% The time the operations takes should be in the
+ %% same order of magnitude. If validation of the
+ %% whole subject occurs for every match, the unicode
+ %% variant will take way longer time...
+ true = TimeUnicode div TimeAscii < 10
+ end.
+
+take_time(Fun) ->
+ Start = erlang:monotonic_time(nanosecond),
+ Res = Fun(),
+ End = erlang:monotonic_time(nanosecond),
+ {End-Start, Res}.
+
+yield_on_subject_validation(Config) when is_list(Config) ->
+ Go = make_ref(),
+ Bin = binary:copy(<<"abc\n">>,100000),
+ {P, M} = spawn_opt(fun () ->
+ receive Go -> ok end,
+ {match,[{1,1}]} = re:run(Bin, <<"b">>, [unicode])
+ end,
+ [link, monitor]),
+ 1 = erlang:trace(P, true, [running]),
+ P ! Go,
+ N = count_re_run_trap_out(P, M),
+ true = N >= 5,
+ ok.
+count_re_run_trap_out(P, M) when is_reference(M) ->
+ receive {'DOWN',M,process,P,normal} -> ok end,
+ TD = erlang:trace_delivered(P),
+ receive {trace_delivered, P, TD} -> ok end,
+ count_re_run_trap_out(P, 0);
+count_re_run_trap_out(P, N) when is_integer(N) ->
+ receive
+ {trace,P,out,{erlang,re_run_trap,3}} ->
+ count_re_run_trap_out(P, N+1)
+ after 0 ->
+ N
+ end.
%% Test compile options given directly to run.
combined_options(Config) when is_list(Config) ->