aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/pcre
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2019-05-22 17:51:42 +0200
committerRickard Green <[email protected]>2019-06-18 22:27:23 +0200
commitb4ee064b539958f6a13bc9554758a106a49f4bd6 (patch)
treef5557f2be227b884ae551b6914d269365da15db9 /erts/emulator/pcre
parentf9f857c8094e6bb50a944316ea120e53cd5552ed (diff)
downloadotp-b4ee064b539958f6a13bc9554758a106a49f4bd6.tar.gz
otp-b4ee064b539958f6a13bc9554758a106a49f4bd6.tar.bz2
otp-b4ee064b539958f6a13bc9554758a106a49f4bd6.zip
Yield when validating UTF8 for long subject in re:run()
Diffstat (limited to 'erts/emulator/pcre')
-rw-r--r--erts/emulator/pcre/pcre.h3
-rw-r--r--erts/emulator/pcre/pcre_exec.c115
-rw-r--r--erts/emulator/pcre/pcre_internal.h11
-rw-r--r--erts/emulator/pcre/pcre_valid_utf8.c73
4 files changed, 169 insertions, 33 deletions
diff --git a/erts/emulator/pcre/pcre.h b/erts/emulator/pcre/pcre.h
index ab8f40cfc1..686925bf04 100644
--- a/erts/emulator/pcre/pcre.h
+++ b/erts/emulator/pcre/pcre.h
@@ -240,6 +240,9 @@ with J. */
#define PCRE_UTF8_ERR20 20
#define PCRE_UTF8_ERR21 21
#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
+#if defined(ERLANG_INTEGRATION)
+#define PCRE_UTF8_YIELD 23
+#endif
/* Specific error codes for UTF-16 validity checks */
diff --git a/erts/emulator/pcre/pcre_exec.c b/erts/emulator/pcre/pcre_exec.c
index 6708ba92a6..3602c3aaab 100644
--- a/erts/emulator/pcre/pcre_exec.c
+++ b/erts/emulator/pcre/pcre_exec.c
@@ -6642,10 +6642,16 @@ typedef struct {
REAL_PCRE *Xre;
heapframe Xframe_zero; /* Always NO_RECURSE */
+ /* for yield in valid_utf() */
+
+ struct PRIV(valid_utf_ystate) valid_utf_ystate;
+
/* Original function parameters that need be saved */
int Xstart_offset;
int Xoffsetcount;
int *Xoffsets;
+ int Xlength;
+ PCRE_SPTR Xsubject;
} PcreExecContext;
#endif
@@ -6675,6 +6681,7 @@ pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
#endif
{
#ifndef ERLANG_INTEGRATION
+#define ERTS_UPDATE_CONSUMED(X, MD)
int rc, ocount, arg_offset_max;
int newline;
BOOL using_temporary_offsets = FALSE;
@@ -6736,6 +6743,8 @@ heapframe frame_zero;
start_offset = exec_context->Xstart_offset; \
offsetcount = exec_context->Xoffsetcount; \
offsets = exec_context->Xoffsets; \
+ length = exec_context->Xlength; \
+ subject = exec_context->Xsubject; \
} while (0)
#define SWAPOUT() do { \
@@ -6750,8 +6759,30 @@ heapframe frame_zero;
exec_context->Xstart_offset = start_offset; \
exec_context->Xoffsetcount = offsetcount; \
exec_context->Xoffsets = offsets; \
+ exec_context->Xlength = length; \
+ exec_context->Xsubject = subject; \
} while (0)
+#define ERTS_UPDATE_CONSUMED(X, MD) \
+do { \
+ if (((X)->flags & PCRE_EXTRA_LOOP_LIMIT) != 0) { \
+ unsigned long consumed__; \
+ if (!(X)->restart_data) { \
+ consumed__ = 0; \
+ } \
+ else { \
+ PcreExecContext *ctx__ = (PcreExecContext *) \
+ (*(X)->restart_data); \
+ consumed__ = ctx__->valid_utf_ystate.cnt; \
+ ctx__->valid_utf_ystate.cnt = 0; \
+ } \
+ if ((MD)) { \
+ match_data *md__ = (MD); \
+ consumed__ += (X)->loop_limit - md__->loop_limit; \
+ } \
+ *((X)->loop_counter_return) = consumed__; \
+ } \
+} while (0)
PcreExecContext *exec_context;
PcreExecContext internal_context;
@@ -6776,15 +6807,21 @@ pcre_uchar req_char;
/* we are restarting, every initialization is skipped and we jump directly into the loop */
exec_context = (PcreExecContext *) *(extra_data->restart_data);
SWAPIN();
-
+ if (exec_context->valid_utf_ystate.yielded)
+ goto restart_valid_utf;
goto RESTART_INTERRUPTED;
} else {
if (extra_data != NULL &&
(extra_data->flags & PCRE_EXTRA_LOOP_LIMIT)) {
exec_context = (PcreExecContext *) (erts_pcre_malloc)(sizeof(PcreExecContext));
- *(extra_data->restart_data) = (void *) exec_context;
+ *(extra_data->restart_data) = (void *) exec_context;
+ exec_context->valid_utf_ystate.yielded = 0;
/* need freeing by special routine from client */
} else {
+#if defined(ERLANG_INTEGRATION)
+ fprintf(stderr, "Unexpected execution path\n");
+ abort();
+#endif
exec_context = &internal_context;
}
@@ -6865,9 +6902,38 @@ code for an invalid string if a results vector is available. */
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
{
int erroroffset;
- int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
+ int errorcode;
+
+#if !defined(ERLANG_INTEGRATION)
+ errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length);
+#else
+ struct PRIV(valid_utf_ystate) *ystate;
+
+ if (!extra_data || !extra_data->restart_data) {
+ ystate = NULL;
+ }
+ else if (!(extra_data->flags & PCRE_EXTRA_LOOP_LIMIT)) {
+ exec_context->valid_utf_ystate.cnt = 10;
+ ystate = NULL;
+ }
+ else {
+ exec_context->valid_utf_ystate.yielded = 0;
+ restart_valid_utf:
+ ystate = &exec_context->valid_utf_ystate;
+ ystate->cnt = (int) extra_data->loop_limit;
+ }
+ errorcode = PRIV(yielding_valid_utf)((PCRE_PUCHAR)subject, length,
+ &erroroffset, ystate);
+#endif
if (errorcode != 0)
{
+#if defined(ERLANG_INTEGRATION)
+ if (ystate && ystate->yielded) {
+ ERTS_UPDATE_CONSUMED(extra_data, NULL);
+ SWAPOUT();
+ return PCRE_ERROR_LOOP_LIMIT;
+ }
+#endif
if (offsetcount >= 2)
{
offsets[0] = erroroffset;
@@ -6890,6 +6956,11 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
return PCRE_ERROR_BADUTF8_OFFSET;
#endif
}
+#if defined(ERLANG_INTEGRATION)
+else {
+ exec_context->valid_utf_ystate.cnt = 0;
+}
+#endif
#endif
/* If the pattern was successfully studied with JIT support, run the JIT
@@ -6950,7 +7021,11 @@ if (extra_data != NULL)
#ifdef ERLANG_INTEGRATION
if ((flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
{
- md->loop_limit = extra_data->loop_limit;
+ md->loop_limit = extra_data->loop_limit;
+ if (extra_data->restart_data)
+ md->loop_limit -= extra_data->loop_limit - exec_context->valid_utf_ystate.cnt;
+ if (md->loop_limit < 10)
+ md->loop_limit = 10; /* At least do something if we've come this far... */
}
#endif
}
@@ -7266,14 +7341,8 @@ for(;;)
#endif
if ((start_bits[c/8] & (1 << (c&7))) != 0)
{
-#ifdef ERLANG_INTEGRATION
- if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
- {
- *extra_data->loop_counter_return =
- (extra_data->loop_limit - md->loop_limit);
- }
-#endif
- break;
+ ERTS_UPDATE_CONSUMED(extra_data, md);
+ break;
}
start_match++;
}
@@ -7298,13 +7367,7 @@ for(;;)
(pcre_uint32)(end_subject - start_match) < study->minlength)
{
rc = MATCH_NOMATCH;
-#ifdef ERLANG_INTEGRATION
- if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
- {
- *extra_data->loop_counter_return =
- (extra_data->loop_limit - md->loop_limit);
- }
-#endif
+ ERTS_UPDATE_CONSUMED(extra_data, md);
break;
}
@@ -7353,13 +7416,7 @@ for(;;)
if (p >= end_subject)
{
rc = MATCH_NOMATCH;
-#ifdef ERLANG_INTEGRATION
- if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
- {
- *extra_data->loop_counter_return =
- (extra_data->loop_limit - md->loop_limit);
- }
-#endif
+ ERTS_UPDATE_CONSUMED(extra_data, md);
break;
}
@@ -7390,11 +7447,7 @@ for(;;)
EDEBUGF(("Calling match..."));
rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
#ifdef ERLANG_INTEGRATION
- if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
- {
- *extra_data->loop_counter_return =
- (extra_data->loop_limit - md->loop_limit);
- }
+ ERTS_UPDATE_CONSUMED(extra_data, md);
SWAPOUT();
while(rc == PCRE_ERROR_LOOP_LIMIT) {
EDEBUGF(("Loop limit break detected"));
diff --git a/erts/emulator/pcre/pcre_internal.h b/erts/emulator/pcre/pcre_internal.h
index c84dcb5a38..71f473e86f 100644
--- a/erts/emulator/pcre/pcre_internal.h
+++ b/erts/emulator/pcre/pcre_internal.h
@@ -2756,6 +2756,17 @@ extern int PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
#endif /* COMPILE_PCRE[8|16|32] */
+#if defined(ERLANG_INTEGRATION)
+struct PRIV(valid_utf_ystate) {
+ unsigned int cnt;
+ int length;
+ int yielded;
+ PCRE_PUCHAR p;
+};
+extern int PRIV(yielding_valid_utf)(PCRE_PUCHAR, int, int *,
+ struct PRIV(valid_utf_ystate) *);
+#endif
+
extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
int *, BOOL);
diff --git a/erts/emulator/pcre/pcre_valid_utf8.c b/erts/emulator/pcre/pcre_valid_utf8.c
index 516d8f4725..1dc1f9ba0c 100644
--- a/erts/emulator/pcre/pcre_valid_utf8.c
+++ b/erts/emulator/pcre/pcre_valid_utf8.c
@@ -107,19 +107,80 @@ Returns: = 0 if the string is a valid UTF-8 string
int
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
{
+
+#if defined(ERLANG_INTEGRATION)
+ return PRIV(yielding_valid_utf)(string, length, erroroffset, NULL);
+}
+
+int
+PRIV(yielding_valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset, struct PRIV(valid_utf_ystate) *ystate)
+{
+#endif
+
#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
+#if defined(ERLANG_INTEGRATION)
+register long cnt;
+
+if (!ystate) {
+ cnt = -1;
+}
+else {
+ cnt = ystate->cnt;
+ if (ystate->yielded) {
+ p = ystate->p;
+ length = ystate->length;
+ if (length < 0)
+ goto restart_length;
+ else
+ goto restart_validate;
+ }
+}
+#endif
+
if (length < 0)
{
- for (p = string; *p != 0; p++);
- length = (int)(p - string);
+ for (p = string; *p != 0; p++) {
+#if defined(ERLANG_INTEGRATION)
+ if (cnt > 0 && --cnt == 0) {
+ /*
+ * Return with cnt set to amount consumed;
+ * i.e. same amount as at start...
+ */
+ ystate->yielded = !0;
+ ystate->length = length;
+ ystate->p = p;
+ return PCRE_UTF8_YIELD;
+ }
+ restart_length:
+ (void) !0;
+#endif
+ }
+ length = (int)(p - string);
}
for (p = string; length-- > 0; p++)
{
register pcre_uchar ab, c, d;
+#if defined(ERLANG_INTEGRATION)
+
+ if (cnt > 0 && --cnt == 0) {
+ /*
+ * Return with cnt set to amount consumed;
+ * i.e. same amount as at start...
+ */
+ ystate->yielded = !0;
+ ystate->length = length;
+ ystate->p = p;
+ return PCRE_UTF8_YIELD;
+ }
+
+ restart_validate:
+
+#endif
+
c = *p;
if (c < 128) continue; /* ASCII character */
@@ -290,6 +351,14 @@ for (p = string; length-- > 0; p++)
}
}
+#if defined(ERLANG_INTEGRATION)
+if (ystate) {
+ /* Return with cnt set to amount consumed... */
+ ystate->cnt -= cnt;
+ ystate->yielded = 0;
+}
+#endif
+
#else /* Not SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);