diff options
Diffstat (limited to 'erts/emulator/pcre')
-rw-r--r-- | erts/emulator/pcre/LICENCE | 10 | ||||
-rw-r--r-- | erts/emulator/pcre/pcre-8.42.tar.bz2 | bin | 1570171 -> 0 bytes | |||
-rw-r--r-- | erts/emulator/pcre/pcre-8.43.tar.bz2 | bin | 0 -> 1576584 bytes | |||
-rw-r--r-- | erts/emulator/pcre/pcre.h | 7 | ||||
-rw-r--r-- | erts/emulator/pcre/pcre_compile.c | 18 | ||||
-rw-r--r-- | erts/emulator/pcre/pcre_exec.c | 115 | ||||
-rw-r--r-- | erts/emulator/pcre/pcre_internal.h | 11 | ||||
-rw-r--r-- | erts/emulator/pcre/pcre_jit_compile.c | 2 | ||||
-rw-r--r-- | erts/emulator/pcre/pcre_valid_utf8.c | 73 |
9 files changed, 191 insertions, 45 deletions
diff --git a/erts/emulator/pcre/LICENCE b/erts/emulator/pcre/LICENCE index f6ef7fd766..760a6666b6 100644 --- a/erts/emulator/pcre/LICENCE +++ b/erts/emulator/pcre/LICENCE @@ -25,7 +25,7 @@ Email domain: cam.ac.uk University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2018 University of Cambridge +Copyright (c) 1997-2019 University of Cambridge All rights reserved. @@ -34,9 +34,9 @@ PCRE JUST-IN-TIME COMPILATION SUPPORT Written by: Zoltan Herczeg Email local part: hzmester -Emain domain: freemail.hu +Email domain: freemail.hu -Copyright(c) 2010-2018 Zoltan Herczeg +Copyright(c) 2010-2019 Zoltan Herczeg All rights reserved. @@ -45,9 +45,9 @@ STACK-LESS JUST-IN-TIME COMPILER Written by: Zoltan Herczeg Email local part: hzmester -Emain domain: freemail.hu +Email domain: freemail.hu -Copyright(c) 2009-2018 Zoltan Herczeg +Copyright(c) 2009-2019 Zoltan Herczeg All rights reserved. diff --git a/erts/emulator/pcre/pcre-8.42.tar.bz2 b/erts/emulator/pcre/pcre-8.42.tar.bz2 Binary files differdeleted file mode 100644 index 61bfa38970..0000000000 --- a/erts/emulator/pcre/pcre-8.42.tar.bz2 +++ /dev/null diff --git a/erts/emulator/pcre/pcre-8.43.tar.bz2 b/erts/emulator/pcre/pcre-8.43.tar.bz2 Binary files differnew file mode 100644 index 0000000000..e20c601f71 --- /dev/null +++ b/erts/emulator/pcre/pcre-8.43.tar.bz2 diff --git a/erts/emulator/pcre/pcre.h b/erts/emulator/pcre/pcre.h index 3563791223..49c9fc6dc8 100644 --- a/erts/emulator/pcre/pcre.h +++ b/erts/emulator/pcre/pcre.h @@ -43,9 +43,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE_MAJOR 8 -#define PCRE_MINOR 42 +#define PCRE_MINOR 43 #define PCRE_PRERELEASE -#define PCRE_DATE 2018-03-20 +#define PCRE_DATE 2019-02-23 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE, the appropriate @@ -240,6 +240,9 @@ with J. */ #define PCRE_UTF8_ERR20 20 #define PCRE_UTF8_ERR21 21 #define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */ +#if defined(ERLANG_INTEGRATION) +#define PCRE_UTF8_YIELD 23 +#endif /* Specific error codes for UTF-16 validity checks */ diff --git a/erts/emulator/pcre/pcre_compile.c b/erts/emulator/pcre/pcre_compile.c index ae7f6e2a2a..6ac222b27e 100644 --- a/erts/emulator/pcre/pcre_compile.c +++ b/erts/emulator/pcre/pcre_compile.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2016 University of Cambridge + Copyright (c) 1997-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -3300,7 +3300,7 @@ for(;;) if ((*xclass_flags & XCL_MAP) == 0) { /* No bits are set for characters < 256. */ - if (list[1] == 0) return TRUE; + if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0; /* Might be an empty repeat. */ continue; } @@ -7643,6 +7643,8 @@ for (;; ptr++) /* Can't determine a first byte now */ if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; + zerofirstchar = firstchar; + zerofirstcharflags = firstcharflags; continue; @@ -8683,10 +8685,18 @@ do { if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE; } - /* Positive forward assertions and conditions */ + /* Positive forward assertion */ - else if (op == OP_ASSERT || op == OP_COND) + else if (op == OP_ASSERT) + { + if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE; + } + + /* Condition; not anchored if no second branch */ + + else if (op == OP_COND) { + if (scode[GET(scode,1)] != OP_ALT) return FALSE; if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE; } diff --git a/erts/emulator/pcre/pcre_exec.c b/erts/emulator/pcre/pcre_exec.c index 1946e97a72..55a7b377bf 100644 --- a/erts/emulator/pcre/pcre_exec.c +++ b/erts/emulator/pcre/pcre_exec.c @@ -6642,10 +6642,16 @@ typedef struct { REAL_PCRE *Xre; heapframe Xframe_zero; /* Always NO_RECURSE */ + /* for yield in valid_utf() */ + + struct PRIV(valid_utf_ystate) valid_utf_ystate; + /* Original function parameters that need be saved */ int Xstart_offset; int Xoffsetcount; int *Xoffsets; + int Xlength; + PCRE_SPTR Xsubject; } PcreExecContext; #endif @@ -6675,6 +6681,7 @@ pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, #endif { #ifndef ERLANG_INTEGRATION +#define ERTS_UPDATE_CONSUMED(X, MD) int rc, ocount, arg_offset_max; int newline; BOOL using_temporary_offsets = FALSE; @@ -6736,6 +6743,8 @@ heapframe frame_zero; start_offset = exec_context->Xstart_offset; \ offsetcount = exec_context->Xoffsetcount; \ offsets = exec_context->Xoffsets; \ + length = exec_context->Xlength; \ + subject = exec_context->Xsubject; \ } while (0) #define SWAPOUT() do { \ @@ -6750,8 +6759,30 @@ heapframe frame_zero; exec_context->Xstart_offset = start_offset; \ exec_context->Xoffsetcount = offsetcount; \ exec_context->Xoffsets = offsets; \ + exec_context->Xlength = length; \ + exec_context->Xsubject = subject; \ } while (0) +#define ERTS_UPDATE_CONSUMED(X, MD) \ +do { \ + if (((X)->flags & PCRE_EXTRA_LOOP_LIMIT) != 0) { \ + unsigned long consumed__; \ + if (!(X)->restart_data) { \ + consumed__ = 0; \ + } \ + else { \ + PcreExecContext *ctx__ = (PcreExecContext *) \ + (*(X)->restart_data); \ + consumed__ = ctx__->valid_utf_ystate.cnt; \ + ctx__->valid_utf_ystate.cnt = 0; \ + } \ + if ((MD)) { \ + match_data *md__ = (MD); \ + consumed__ += (X)->loop_limit - md__->loop_limit; \ + } \ + *((X)->loop_counter_return) = consumed__; \ + } \ +} while (0) PcreExecContext *exec_context; PcreExecContext internal_context; @@ -6776,15 +6807,21 @@ pcre_uchar req_char; /* we are restarting, every initialization is skipped and we jump directly into the loop */ exec_context = (PcreExecContext *) *(extra_data->restart_data); SWAPIN(); - + if (exec_context->valid_utf_ystate.yielded) + goto restart_valid_utf; goto RESTART_INTERRUPTED; } else { if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_LOOP_LIMIT)) { exec_context = (PcreExecContext *) (erts_pcre_malloc)(sizeof(PcreExecContext)); - *(extra_data->restart_data) = (void *) exec_context; + *(extra_data->restart_data) = (void *) exec_context; + exec_context->valid_utf_ystate.yielded = 0; /* need freeing by special routine from client */ } else { +#if defined(ERLANG_INTEGRATION) + fprintf(stderr, "Unexpected execution path\n"); + abort(); +#endif exec_context = &internal_context; } @@ -6865,9 +6902,38 @@ code for an invalid string if a results vector is available. */ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) { int erroroffset; - int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset); + int errorcode; + +#if !defined(ERLANG_INTEGRATION) + errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length); +#else + struct PRIV(valid_utf_ystate) *ystate; + + if (!extra_data || !extra_data->restart_data) { + ystate = NULL; + } + else if (!(extra_data->flags & PCRE_EXTRA_LOOP_LIMIT)) { + exec_context->valid_utf_ystate.cnt = 10; + ystate = NULL; + } + else { + exec_context->valid_utf_ystate.yielded = 0; + restart_valid_utf: + ystate = &exec_context->valid_utf_ystate; + ystate->cnt = (int) extra_data->loop_limit; + } + errorcode = PRIV(yielding_valid_utf)((PCRE_PUCHAR)subject, length, + &erroroffset, ystate); +#endif if (errorcode != 0) { +#if defined(ERLANG_INTEGRATION) + if (ystate && ystate->yielded) { + ERTS_UPDATE_CONSUMED(extra_data, NULL); + SWAPOUT(); + return PCRE_ERROR_LOOP_LIMIT; + } +#endif if (offsetcount >= 2) { offsets[0] = erroroffset; @@ -6890,6 +6956,11 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) return PCRE_ERROR_BADUTF8_OFFSET; #endif } +#if defined(ERLANG_INTEGRATION) +else { + exec_context->valid_utf_ystate.cnt = 0; +} +#endif #endif /* If the pattern was successfully studied with JIT support, run the JIT @@ -6950,7 +7021,11 @@ if (extra_data != NULL) #ifdef ERLANG_INTEGRATION if ((flags & PCRE_EXTRA_LOOP_LIMIT) != 0) { - md->loop_limit = extra_data->loop_limit; + md->loop_limit = extra_data->loop_limit; + if (extra_data->restart_data) + md->loop_limit -= extra_data->loop_limit - exec_context->valid_utf_ystate.cnt; + if (md->loop_limit < 10) + md->loop_limit = 10; /* At least do something if we've come this far... */ } #endif } @@ -7266,14 +7341,8 @@ for(;;) #endif if ((start_bits[c/8] & (1 << (c&7))) != 0) { -#ifdef ERLANG_INTEGRATION - if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0) - { - *extra_data->loop_counter_return = - (extra_data->loop_limit - md->loop_limit); - } -#endif - break; + ERTS_UPDATE_CONSUMED(extra_data, md); + break; } start_match++; } @@ -7298,13 +7367,7 @@ for(;;) (pcre_uint32)(end_subject - start_match) < study->minlength) { rc = MATCH_NOMATCH; -#ifdef ERLANG_INTEGRATION - if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0) - { - *extra_data->loop_counter_return = - (extra_data->loop_limit - md->loop_limit); - } -#endif + ERTS_UPDATE_CONSUMED(extra_data, md); break; } @@ -7353,13 +7416,7 @@ for(;;) if (p >= end_subject) { rc = MATCH_NOMATCH; -#ifdef ERLANG_INTEGRATION - if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0) - { - *extra_data->loop_counter_return = - (extra_data->loop_limit - md->loop_limit); - } -#endif + ERTS_UPDATE_CONSUMED(extra_data, md); break; } @@ -7390,11 +7447,7 @@ for(;;) EDEBUGF(("Calling match...")); rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0); #ifdef ERLANG_INTEGRATION - if ((extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) != 0) - { - *extra_data->loop_counter_return = - (extra_data->loop_limit - md->loop_limit); - } + ERTS_UPDATE_CONSUMED(extra_data, md); SWAPOUT(); while(rc == PCRE_ERROR_LOOP_LIMIT) { EDEBUGF(("Loop limit break detected")); diff --git a/erts/emulator/pcre/pcre_internal.h b/erts/emulator/pcre/pcre_internal.h index c84dcb5a38..71f473e86f 100644 --- a/erts/emulator/pcre/pcre_internal.h +++ b/erts/emulator/pcre/pcre_internal.h @@ -2756,6 +2756,17 @@ extern int PRIV(strcmp_uc_c8_utf)(const pcre_uchar *, #endif /* COMPILE_PCRE[8|16|32] */ +#if defined(ERLANG_INTEGRATION) +struct PRIV(valid_utf_ystate) { + unsigned int cnt; + int length; + int yielded; + PCRE_PUCHAR p; +}; +extern int PRIV(yielding_valid_utf)(PCRE_PUCHAR, int, int *, + struct PRIV(valid_utf_ystate) *); +#endif + extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int); extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, int *, BOOL); diff --git a/erts/emulator/pcre/pcre_jit_compile.c b/erts/emulator/pcre/pcre_jit_compile.c index 926e40f6d3..2d2288f81e 100644 --- a/erts/emulator/pcre/pcre_jit_compile.c +++ b/erts/emulator/pcre/pcre_jit_compile.c @@ -9002,7 +9002,7 @@ if (exact > 1) #ifdef SUPPORT_UTF && !common->utf #endif - ) + && type != OP_ANYNL && type != OP_EXTUNI) { OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); diff --git a/erts/emulator/pcre/pcre_valid_utf8.c b/erts/emulator/pcre/pcre_valid_utf8.c index 516d8f4725..1dc1f9ba0c 100644 --- a/erts/emulator/pcre/pcre_valid_utf8.c +++ b/erts/emulator/pcre/pcre_valid_utf8.c @@ -107,19 +107,80 @@ Returns: = 0 if the string is a valid UTF-8 string int PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset) { + +#if defined(ERLANG_INTEGRATION) + return PRIV(yielding_valid_utf)(string, length, erroroffset, NULL); +} + +int +PRIV(yielding_valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset, struct PRIV(valid_utf_ystate) *ystate) +{ +#endif + #ifdef SUPPORT_UTF register PCRE_PUCHAR p; +#if defined(ERLANG_INTEGRATION) +register long cnt; + +if (!ystate) { + cnt = -1; +} +else { + cnt = ystate->cnt; + if (ystate->yielded) { + p = ystate->p; + length = ystate->length; + if (length < 0) + goto restart_length; + else + goto restart_validate; + } +} +#endif + if (length < 0) { - for (p = string; *p != 0; p++); - length = (int)(p - string); + for (p = string; *p != 0; p++) { +#if defined(ERLANG_INTEGRATION) + if (cnt > 0 && --cnt == 0) { + /* + * Return with cnt set to amount consumed; + * i.e. same amount as at start... + */ + ystate->yielded = !0; + ystate->length = length; + ystate->p = p; + return PCRE_UTF8_YIELD; + } + restart_length: + (void) !0; +#endif + } + length = (int)(p - string); } for (p = string; length-- > 0; p++) { register pcre_uchar ab, c, d; +#if defined(ERLANG_INTEGRATION) + + if (cnt > 0 && --cnt == 0) { + /* + * Return with cnt set to amount consumed; + * i.e. same amount as at start... + */ + ystate->yielded = !0; + ystate->length = length; + ystate->p = p; + return PCRE_UTF8_YIELD; + } + + restart_validate: + +#endif + c = *p; if (c < 128) continue; /* ASCII character */ @@ -290,6 +351,14 @@ for (p = string; length-- > 0; p++) } } +#if defined(ERLANG_INTEGRATION) +if (ystate) { + /* Return with cnt set to amount consumed... */ + ystate->cnt -= cnt; + ystate->yielded = 0; +} +#endif + #else /* Not SUPPORT_UTF */ (void)(string); /* Keep picky compilers happy */ (void)(length); |