aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--erts/emulator/beam/erl_bif_re.c1
-rw-r--r--erts/emulator/pcre/pcre.h3
-rw-r--r--erts/emulator/pcre/pcre_exec.c115
-rw-r--r--erts/emulator/pcre/pcre_internal.h11
-rw-r--r--erts/emulator/pcre/pcre_valid_utf8.c73
-rw-r--r--lib/stdlib/src/stdlib.app.src2
-rw-r--r--lib/stdlib/test/re_SUITE.erl33
7 files changed, 202 insertions, 36 deletions
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 8f872ecf45..b3bf1c7ee3 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -1428,6 +1428,7 @@ static BIF_RETTYPE re_exec_trap(BIF_ALIST_3)
loop_count = 0xFFFFFFFF;
#endif
rc = erts_pcre_exec(NULL, &(restartp->extra), NULL, 0, 0, 0, NULL, 0);
+
ASSERT(loop_count != 0xFFFFFFFF);
BUMP_REDS(BIF_P, loop_count / LOOP_FACTOR);
if (rc == PCRE_ERROR_LOOP_LIMIT) {
diff --git a/erts/emulator/pcre/pcre.h b/erts/emulator/pcre/pcre.h
index 3563791223..505e2ccce0 100644
--- a/erts/emulator/pcre/pcre.h
+++ b/erts/emulator/pcre/pcre.h
@@ -240,6 +240,9 @@ with J. */
#define PCRE_UTF8_ERR20 20
#define PCRE_UTF8_ERR21 21
#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
+#if defined(ERLANG_INTEGRATION)
+#define PCRE_UTF8_YIELD 23
+#endif
/* Specific error codes for UTF-16 validity checks */
diff --git a/erts/emulator/pcre/pcre_exec.c b/erts/emulator/pcre/pcre_exec.c
index 1946e97a72..55a7b377bf 100644
--- a/erts/emulator/pcre/pcre_exec.c
+++ b/erts/emulator/pcre/pcre_exec.c
@@ -6642,10 +6642,16 @@ typedef struct {
REAL_PCRE *Xre;
heapframe Xframe_zero; /* Always NO_RECURSE */
+ /* for yield in valid_utf() */
+
+ struct PRIV(valid_utf_ystate) valid_utf_ystate;
+
/* Original function parameters that need be saved */
int Xstart_offset;
int Xoffsetcount;
int *Xoffsets;
+ int Xlength;
+ PCRE_SPTR Xsubject;
} PcreExecContext;
#endif
@@ -6675,6 +6681,7 @@ pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
#endif
{
#ifndef ERLANG_INTEGRATION
+#define ERTS_UPDATE_CONSUMED(X, MD)
int rc, ocount, arg_offset_max;
int newline;
BOOL using_temporary_offsets = FALSE;
@@ -6736,6 +6743,8 @@ heapframe frame_zero;
start_offset = exec_context->Xstart_offset; \
offsetcount = exec_context->Xoffsetcount; \
offsets = exec_context->Xoffsets; \
+ length = exec_context->Xlength; \
+ subject = exec_context->Xsubject; \
} while (0)
#define SWAPOUT() do { \
@@ -6750,8 +6759,30 @@ heapframe frame_zero;
exec_context->Xstart_offset = start_offset; \
exec_context->Xoffsetcount = offsetcount; \
exec_context->Xoffsets = offsets; \
+ exec_context->Xlength = length; \
+ exec_context->Xsubject = subject; \
} while (0)
+#define ERTS_UPDATE_CONSUMED(X, MD) \
+do { \
+ if (((X)->flags & PCRE_EXTRA_LOOP_LIMIT) != 0) { \
+ unsigned long consumed__; \
+ if (!(X)->restart_data) { \
+ consumed__ = 0; \
+ } \
+ else { \
+ PcreExecContext *ctx__ = (PcreExecContext *) \
+ (*(X)->restart_data); \
+ consumed__ = ctx__->valid_utf_ystate.cnt; \
+ ctx__->valid_utf_ystate.cnt = 0; \
+ } \
+ if ((MD)) { \
+ match_data *md__ = (MD); \
+ consumed__ += (X)->loop_limit - md__->loop_limit; \
+ } \
+ *((X)->loop_counter_return) = consumed__; \
+ } \
+} while (0)
PcreExecContext *exec_context;
PcreExecContext internal_context;
@@ -6776,15 +6807,21 @@ pcre_uchar req_char;
/* we are restarting, every initialization is skipped and we jump directly into the loop */
exec_context = (PcreExecContext *) *(extra_data->restart_data);
SWAPIN();
-
+ if (exec_context->valid_utf_ystate.yielded)
+ goto restart_valid_utf;
goto RESTART_INTERRUPTED;
} else {
if (extra_data != NULL &&
(extra_data->flags & PCRE_EXTRA_LOOP_LIMIT)) {
exec_context = (PcreExecContext *) (erts_pcre_malloc)(sizeof(PcreExecContext));
- *(extra_data->restart_data) = (void *) exec_context;
+ *(extra_data->restart_data) = (void *) exec_context;
+ exec_context->valid_utf_ystate.yielded = 0;
/* need freeing by special routine from client */
} else {
+#if defined(ERLANG_INTEGRATION)
+ fprintf(stderr, "Unexpected execution path\n");
+ abort();
+#endif
exec_context = &internal_context;
}
@@ -6865,9 +6902,38 @@ code for an invalid string if a results vector is available. */
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
{
int erroroffset;
- int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
+ int errorcode;
+
+#if !defined(ERLANG_INTEGRATION)
+ errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length);
+#else
+ struct PRIV(valid_utf_ystate) *ystate;
+
+ if (!extra_data || !extra_data->restart_data) {
+ ystate = NULL;
+ }
+ else if (!(extra_data->flags & PCRE_EXTRA_LOOP_LIMIT)) {
+ exec_context->valid_utf_ystate.cnt = 10;
+ ystate = NULL;
+ }
+ else {
+ exec_context->valid_utf_ystate.yielded = 0;
+ restart_valid_utf:
+ ystate = &exec_context->valid_utf_ystate;
+ ystate->cnt = (int) extra_data->loop_limit;
+ }
+ errorcode = PRIV(yielding_valid_utf)((PCRE_PUCHAR)subject, length,
+ &erroroffset, ystate);
+#endif
if (errorcode != 0)
{
+#if defined(ERLANG_INTEGRATION)
+ if (ystate && ystate->yielded) {
+ ERTS_UPDATE_CONSUMED(extra_data, NULL);
+ SWAPOUT();
+ return PCRE_ERROR_LOOP_LIMIT;
+ }
+#endif
if (offsetcount >= 2)
{
offsets[0] = erroroffset;
@@ -6890,6 +6956,11 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
return PCRE_ERROR_BADUTF8_OFFSET;
#endif
}
+#if defined(ERLANG_INTEGRATION)
+else {
+ exec_context->valid_utf_ystate.cnt = 0;
+}
+#endif
#endif
/* If the pattern was successfully studied with JIT support, run the JIT
@@ -6950,7 +7021,11 @@ if (extra_data != NULL)
#ifdef ERLANG_INTEGRATION
if ((flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
{
- md->loop_limit = extra_data->loop_limit;
+ md->loop_limit = extra_data->loop_limit;
+ if (extra_data->restart_data)
+ md->loop_limit -= extra_data->loop_limit - exec_context->valid_utf_ystate.cnt;
+ if (md->loop_limit < 10)
+ md->loop_limit = 10; /* At least do something if we've come this far... */
}
#endif
}
@@ -7266,14 +7341,8 @@ for(;;)
#endif
if ((start_bits[c/8] & (1 << (c&7))) != 0)
{
-#ifdef ERLANG_INTEGRATION
- if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
- {
- *extra_data->loop_counter_return =
- (extra_data->loop_limit - md->loop_limit);
- }
-#endif
- break;
+ ERTS_UPDATE_CONSUMED(extra_data, md);
+ break;
}
start_match++;
}
@@ -7298,13 +7367,7 @@ for(;;)
(pcre_uint32)(end_subject - start_match) < study->minlength)
{
rc = MATCH_NOMATCH;
-#ifdef ERLANG_INTEGRATION
- if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
- {
- *extra_data->loop_counter_return =
- (extra_data->loop_limit - md->loop_limit);
- }
-#endif
+ ERTS_UPDATE_CONSUMED(extra_data, md);
break;
}
@@ -7353,13 +7416,7 @@ for(;;)
if (p >= end_subject)
{
rc = MATCH_NOMATCH;
-#ifdef ERLANG_INTEGRATION
- if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
- {
- *extra_data->loop_counter_return =
- (extra_data->loop_limit - md->loop_limit);
- }
-#endif
+ ERTS_UPDATE_CONSUMED(extra_data, md);
break;
}
@@ -7390,11 +7447,7 @@ for(;;)
EDEBUGF(("Calling match..."));
rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
#ifdef ERLANG_INTEGRATION
- if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
- {
- *extra_data->loop_counter_return =
- (extra_data->loop_limit - md->loop_limit);
- }
+ ERTS_UPDATE_CONSUMED(extra_data, md);
SWAPOUT();
while(rc == PCRE_ERROR_LOOP_LIMIT) {
EDEBUGF(("Loop limit break detected"));
diff --git a/erts/emulator/pcre/pcre_internal.h b/erts/emulator/pcre/pcre_internal.h
index c84dcb5a38..71f473e86f 100644
--- a/erts/emulator/pcre/pcre_internal.h
+++ b/erts/emulator/pcre/pcre_internal.h
@@ -2756,6 +2756,17 @@ extern int PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
#endif /* COMPILE_PCRE[8|16|32] */
+#if defined(ERLANG_INTEGRATION)
+struct PRIV(valid_utf_ystate) {
+ unsigned int cnt;
+ int length;
+ int yielded;
+ PCRE_PUCHAR p;
+};
+extern int PRIV(yielding_valid_utf)(PCRE_PUCHAR, int, int *,
+ struct PRIV(valid_utf_ystate) *);
+#endif
+
extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
int *, BOOL);
diff --git a/erts/emulator/pcre/pcre_valid_utf8.c b/erts/emulator/pcre/pcre_valid_utf8.c
index 516d8f4725..1dc1f9ba0c 100644
--- a/erts/emulator/pcre/pcre_valid_utf8.c
+++ b/erts/emulator/pcre/pcre_valid_utf8.c
@@ -107,19 +107,80 @@ Returns: = 0 if the string is a valid UTF-8 string
int
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
{
+
+#if defined(ERLANG_INTEGRATION)
+ return PRIV(yielding_valid_utf)(string, length, erroroffset, NULL);
+}
+
+int
+PRIV(yielding_valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset, struct PRIV(valid_utf_ystate) *ystate)
+{
+#endif
+
#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
+#if defined(ERLANG_INTEGRATION)
+register long cnt;
+
+if (!ystate) {
+ cnt = -1;
+}
+else {
+ cnt = ystate->cnt;
+ if (ystate->yielded) {
+ p = ystate->p;
+ length = ystate->length;
+ if (length < 0)
+ goto restart_length;
+ else
+ goto restart_validate;
+ }
+}
+#endif
+
if (length < 0)
{
- for (p = string; *p != 0; p++);
- length = (int)(p - string);
+ for (p = string; *p != 0; p++) {
+#if defined(ERLANG_INTEGRATION)
+ if (cnt > 0 && --cnt == 0) {
+ /*
+ * Return with cnt set to amount consumed;
+ * i.e. same amount as at start...
+ */
+ ystate->yielded = !0;
+ ystate->length = length;
+ ystate->p = p;
+ return PCRE_UTF8_YIELD;
+ }
+ restart_length:
+ (void) !0;
+#endif
+ }
+ length = (int)(p - string);
}
for (p = string; length-- > 0; p++)
{
register pcre_uchar ab, c, d;
+#if defined(ERLANG_INTEGRATION)
+
+ if (cnt > 0 && --cnt == 0) {
+ /*
+ * Return with cnt set to amount consumed;
+ * i.e. same amount as at start...
+ */
+ ystate->yielded = !0;
+ ystate->length = length;
+ ystate->p = p;
+ return PCRE_UTF8_YIELD;
+ }
+
+ restart_validate:
+
+#endif
+
c = *p;
if (c < 128) continue; /* ASCII character */
@@ -290,6 +351,14 @@ for (p = string; length-- > 0; p++)
}
}
+#if defined(ERLANG_INTEGRATION)
+if (ystate) {
+ /* Return with cnt set to amount consumed... */
+ ystate->cnt -= cnt;
+ ystate->yielded = 0;
+}
+#endif
+
#else /* Not SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
diff --git a/lib/stdlib/src/stdlib.app.src b/lib/stdlib/src/stdlib.app.src
index 7bcefc3615..d7d57941c2 100644
--- a/lib/stdlib/src/stdlib.app.src
+++ b/lib/stdlib/src/stdlib.app.src
@@ -108,7 +108,7 @@
dets]},
{applications, [kernel]},
{env, []},
- {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-@OTP-15831@","crypto-3.3",
+ {runtime_dependencies, ["sasl-3.0","kernel-6.0","erts-@OTP-15831:OTP-15836@","crypto-3.3",
"compiler-5.0"]}
]}.
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index f026159b47..06d8fe9255 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -28,7 +28,8 @@
pcre_compile_workspace_overflow/1,re_infinite_loop/1,
re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1,
opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1,
- match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1]).
+ match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1,
+ yield_on_subject_validation/1]).
-include_lib("common_test/include/ct.hrl").
-include_lib("kernel/include/file.hrl").
@@ -45,7 +46,8 @@ all() ->
pcre_compile_workspace_overflow, re_infinite_loop,
re_backwards_accented, opt_dupnames, opt_all_names,
inspect, opt_no_start_optimize,opt_never_utf,opt_ucp,
- match_limit, sub_binaries, re_version, global_unicode_validation].
+ match_limit, sub_binaries, re_version, global_unicode_validation,
+ yield_on_subject_validation].
groups() ->
[].
@@ -226,6 +228,33 @@ take_time(Fun) ->
End = erlang:monotonic_time(nanosecond),
{End-Start, Res}.
+yield_on_subject_validation(Config) when is_list(Config) ->
+ Go = make_ref(),
+ Bin = binary:copy(<<"abc\n">>,100000),
+ {P, M} = spawn_opt(fun () ->
+ receive Go -> ok end,
+ {match,[{1,1}]} = re:run(Bin, <<"b">>, [unicode])
+ end,
+ [link, monitor]),
+ 1 = erlang:trace(P, true, [running]),
+ P ! Go,
+ N = count_re_run_trap_out(P, M),
+ true = N >= 5,
+ ok.
+
+count_re_run_trap_out(P, M) when is_reference(M) ->
+ receive {'DOWN',M,process,P,normal} -> ok end,
+ TD = erlang:trace_delivered(P),
+ receive {trace_delivered, P, TD} -> ok end,
+ count_re_run_trap_out(P, 0);
+count_re_run_trap_out(P, N) when is_integer(N) ->
+ receive
+ {trace,P,out,{erlang,re_run_trap,3}} ->
+ count_re_run_trap_out(P, N+1)
+ after 0 ->
+ N
+ end.
+
%% Test compile options given directly to run.
combined_options(Config) when is_list(Config) ->
ok = crtest("ABDabcdABCD","abcd",[],true,{match,[{3,4}]}),