aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--erts/emulator/beam/bif.tab1
-rw-r--r--erts/emulator/beam/erl_bif_re.c29
-rw-r--r--lib/stdlib/src/re.erl52
-rw-r--r--lib/stdlib/src/stdlib.app.src2
-rw-r--r--lib/stdlib/test/re_SUITE.erl28
5 files changed, 97 insertions, 15 deletions
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index db9c258cb7..602db106b1 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -413,6 +413,7 @@ bif re:compile/1
bif re:compile/2
bif re:run/2
bif re:run/3
+bif re:internal_run/4
#
# Bifs in lists module.
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index e0b9202fe7..8f872ecf45 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -46,7 +46,7 @@ static Export *urun_trap_exportp = NULL;
static Export *ucompile_trap_exportp = NULL;
static BIF_RETTYPE re_exec_trap(BIF_ALIST_3);
-static BIF_RETTYPE re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3);
+static BIF_RETTYPE re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, int first);
static void *erts_erts_pcre_malloc(size_t size) {
return erts_alloc(ERTS_ALC_T_RE_HEAP,size);
@@ -1094,7 +1094,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
* The actual re:run/2,3 BIFs
*/
static BIF_RETTYPE
-re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
+re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, int first)
{
const pcre *code_tmp;
RestartContext restart;
@@ -1120,6 +1120,14 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
< 0) {
BIF_ERROR(p,BADARG);
}
+ if (!first) {
+ /*
+ * 'first' is false when re:grun() previously has called re:internal_run()
+ * with the same subject; i.e., no need to do yet another validation of
+ * the subject regarding utf8 encoding...
+ */
+ options |= PCRE_NO_UTF8_CHECK;
+ }
is_list_cap = ((pflags & PARSE_FLAG_CAPTURE_OPT) &&
(capture[CAPSPEC_TYPE] == am_list));
@@ -1360,15 +1368,28 @@ handle_iolist:
}
BIF_RETTYPE
+re_internal_run_4(BIF_ALIST_4)
+{
+ int first;
+ if (BIF_ARG_4 == am_false)
+ first = 0;
+ else if (BIF_ARG_4 == am_true)
+ first = !0;
+ else
+ BIF_ERROR(BIF_P,BADARG);
+ return re_run(BIF_P,BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, first);
+}
+
+BIF_RETTYPE
re_run_3(BIF_ALIST_3)
{
- return re_run(BIF_P,BIF_ARG_1, BIF_ARG_2, BIF_ARG_3);
+ return re_run(BIF_P,BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, !0);
}
BIF_RETTYPE
re_run_2(BIF_ALIST_2)
{
- return re_run(BIF_P,BIF_ARG_1, BIF_ARG_2, NIL);
+ return re_run(BIF_P,BIF_ARG_1, BIF_ARG_2, NIL, !0);
}
/*
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 726b409d4d..197564b895 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -33,6 +33,8 @@
%%% BIFs
+-export([internal_run/4]).
+
-export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]).
-spec version() -> binary().
@@ -100,6 +102,40 @@ run(_, _) ->
run(_, _, _) ->
erlang:nif_error(undef).
+-spec internal_run(Subject, RE, Options, FirstCall) -> {match, Captured} |
+ match |
+ nomatch |
+ {error, ErrType} when
+ Subject :: iodata() | unicode:charlist(),
+ RE :: mp() | iodata() | unicode:charlist(),
+ Options :: [Option],
+ Option :: anchored | global | notbol | noteol | notempty
+ | notempty_atstart | report_errors
+ | {offset, non_neg_integer()} |
+ {match_limit, non_neg_integer()} |
+ {match_limit_recursion, non_neg_integer()} |
+ {newline, NLSpec :: nl_spec()} |
+ bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
+ {capture, ValueSpec, Type} | CompileOpt,
+ Type :: index | list | binary,
+ ValueSpec :: all | all_but_first | all_names | first | none | ValueList,
+ ValueList :: [ValueID],
+ ValueID :: integer() | string() | atom(),
+ CompileOpt :: compile_option(),
+ Captured :: [CaptureData] | [[CaptureData]],
+ CaptureData :: {integer(), integer()}
+ | ListConversionData
+ | binary(),
+ ListConversionData :: string()
+ | {error, string(), binary()}
+ | {incomplete, string(), binary()},
+ ErrType :: match_limit | match_limit_recursion | {compile, CompileErr},
+ CompileErr :: {ErrString :: string(), Position :: non_neg_integer()},
+ FirstCall :: boolean().
+
+internal_run(_, _, _, _) ->
+ erlang:nif_error(undef).
+
-spec inspect(MP,Item) -> {namelist, [ binary() ]} when
MP :: mp(),
Item :: namelist.
@@ -765,17 +801,17 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
try
postprocess(loopexec(FlatSubject,RE,InitialOffset,
byte_size(FlatSubject),
- Unicode,CRLF,StrippedOptions),
+ Unicode,CRLF,StrippedOptions,true),
SelectReturn,ConvertReturn,FlatSubject,Unicode)
catch
throw:ErrTuple ->
ErrTuple
end.
-loopexec(_,_,X,Y,_,_,_) when X > Y ->
+loopexec(_,_,X,Y,_,_,_,_) when X > Y ->
{match,[]};
-loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
- case re:run(Subject,RE,[{offset,X}]++Options) of
+loopexec(Subject,RE,X,Y,Unicode,CRLF,Options, First) ->
+ case re:internal_run(Subject,RE,[{offset,X}]++Options,First) of
{error, Err} ->
throw({error,Err});
nomatch ->
@@ -784,11 +820,11 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
{match,Rest} =
case B>0 of
true ->
- loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options);
+ loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options,false);
false ->
{match,M} =
- case re:run(Subject,RE,[{offset,X},notempty_atstart,
- anchored]++Options) of
+ case re:internal_run(Subject,RE,[{offset,X},notempty_atstart,
+ anchored]++Options,false) of
nomatch ->
{match,[]};
{match,Other} ->
@@ -801,7 +837,7 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
forward(Subject,A,1,Unicode,CRLF)
end,
{match,MM} = loopexec(Subject,RE,NewA,Y,
- Unicode,CRLF,Options),
+ Unicode,CRLF,Options,false),
case M of
[] ->
{match,MM};
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index ecb514e9f3..7bcefc3615 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -108,7 +108,7 @@
dets]},
{applications, [kernel]},
{env, []},
- {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-10.4","crypto-3.3",
+ {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-@OTP-15831@","crypto-3.3",
"compiler-5.0"]}
]}.
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index c9ef9da990..f026159b47 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -28,7 +28,7 @@
pcre_compile_workspace_overflow/1,re_infinite_loop/1,
re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1,
opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1,
- match_limit/1,sub_binaries/1,copt/1]).
+ match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1]).
-include_lib("common_test/include/ct.hrl").
-include_lib("kernel/include/file.hrl").
@@ -45,7 +45,7 @@ all() ->
pcre_compile_workspace_overflow, re_infinite_loop,
re_backwards_accented, opt_dupnames, opt_all_names,
inspect, opt_no_start_optimize,opt_never_utf,opt_ucp,
- match_limit, sub_binaries, re_version].
+ match_limit, sub_binaries, re_version, global_unicode_validation].
groups() ->
[].
@@ -200,7 +200,31 @@ re_version(_Config) ->
{match,[Version]} = re:run(Version,"^[0-9]\\.[0-9]{2} 20[0-9]{2}-[0-9]{2}-[0-9]{2}",[{capture,all,binary}]),
ok.
+global_unicode_validation(Config) when is_list(Config) ->
+ %% Test that unicode validation of the subject is not done
+ %% for every match found...
+ Bin = binary:copy(<<"abc\n">>,100000),
+ {TimeAscii, _} = take_time(fun () ->
+ re:run(Bin, <<"b">>, [global])
+ end),
+ {TimeUnicode, _} = take_time(fun () ->
+ re:run(Bin, <<"b">>, [unicode,global])
+ end),
+ if TimeAscii == 0; TimeUnicode == 0 ->
+ {comment, "Not good enough resolution to compare results"};
+ true ->
+ %% The time the operations takes should be in the
+ %% same order of magnitude. If validation of the
+ %% whole subject occurs for every match, the unicode
+ %% variant will take way longer time...
+ true = TimeUnicode div TimeAscii < 10
+ end.
+take_time(Fun) ->
+ Start = erlang:monotonic_time(nanosecond),
+ Res = Fun(),
+ End = erlang:monotonic_time(nanosecond),
+ {End-Start, Res}.
%% Test compile options given directly to run.
combined_options(Config) when is_list(Config) ->