From 6138bb612cd23c0ca37faadbe3fc9944be275228 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 29 Jul 2013 10:54:20 +0200 Subject: Integrate patch for PCRE bug id 1370 --- erts/emulator/pcre/pcre-8.33_1370.diff | 60 ++++++++++++++++++++++++++++++++++ erts/emulator/pcre/pcre_exec.c | 38 ++++++++++++++++----- 2 files changed, 90 insertions(+), 8 deletions(-) create mode 100644 erts/emulator/pcre/pcre-8.33_1370.diff (limited to 'erts') diff --git a/erts/emulator/pcre/pcre-8.33_1370.diff b/erts/emulator/pcre/pcre-8.33_1370.diff new file mode 100644 index 0000000000..d62398985d --- /dev/null +++ b/erts/emulator/pcre/pcre-8.33_1370.diff @@ -0,0 +1,60 @@ +--- code/trunk/pcre_exec.c 2013/07/02 18:37:36 1346 ++++ code/trunk/pcre_exec.c 2013/07/26 10:03:38 1350 +@@ -5637,7 +5637,7 @@ + } + } + +- /* Match extended Unicode sequences. We will get here only if the ++ /* Match extended Unicode grapheme clusters. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (ctype == OP_EXTUNI) +@@ -5670,21 +5670,41 @@ + /* eptr is now past the end of the maximum run */ + + if (possessive) continue; /* No backtracking */ ++ + for(;;) + { +- if (eptr == pp) goto TAIL_RECURSE; ++ int lgb, rgb; ++ PCRE_PUCHAR fptr; ++ ++ if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */ + RMATCH(eptr, ecode, offset_top, md, eptrb, RM45); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); ++ ++ /* Backtracking over an extended grapheme cluster involves inspecting ++ the previous two characters (if present) to see if a break is ++ permitted between them. */ ++ + eptr--; +- for (;;) /* Move back over one extended */ ++ if (!utf) c = *eptr; else ++ { ++ BACKCHAR(eptr); ++ GETCHAR(c, eptr); ++ } ++ rgb = UCD_GRAPHBREAK(c); ++ ++ for (;;) + { +- if (!utf) c = *eptr; else ++ if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */ ++ fptr = eptr - 1; ++ if (!utf) c = *fptr; else + { +- BACKCHAR(eptr); +- GETCHAR(c, eptr); ++ BACKCHAR(fptr); ++ GETCHAR(c, fptr); + } +- if (UCD_CATEGORY(c) != ucp_M) break; +- eptr--; ++ lgb = UCD_GRAPHBREAK(c); ++ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; ++ eptr = fptr; ++ rgb = lgb; + } + } + } diff --git a/erts/emulator/pcre/pcre_exec.c b/erts/emulator/pcre/pcre_exec.c index 0371f9efa8..1cab78cdd8 100644 --- a/erts/emulator/pcre/pcre_exec.c +++ b/erts/emulator/pcre/pcre_exec.c @@ -5818,7 +5818,7 @@ for (;;) } } - /* Match extended Unicode sequences. We will get here only if the + /* Match extended Unicode grapheme clusters. We will get here only if the support is in the binary; otherwise a compile-time error occurs. */ else if (ctype == OP_EXTUNI) @@ -5855,21 +5855,43 @@ for (;;) /* eptr is now past the end of the maximum run */ if (possessive) continue; /* No backtracking */ + for(;;) /* LOOP_COUNT: Ok */ { - if (eptr == pp) goto TAIL_RECURSE; +#ifndef ERLANG_INTEGRATION + int lgb, rgb; +#endif + PCRE_PUCHAR fptr; + + if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */ RMATCH(eptr, ecode, offset_top, md, eptrb, RM45); if (rrc != MATCH_NOMATCH) RRETURN(rrc); + + /* Backtracking over an extended grapheme cluster involves inspecting + the previous two characters (if present) to see if a break is + permitted between them. */ + eptr--; - for (;;) /* Move back over one extended */ /* LOOP_COUNT: COST */ + if (!utf) c = *eptr; else { - if (!utf) c = *eptr; else + BACKCHAR(eptr); + GETCHAR(c, eptr); + } + rgb = UCD_GRAPHBREAK(c); + + for (;;) /* LOOP_COUNT: COST */ + { + if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */ + fptr = eptr - 1; + if (!utf) c = *fptr; else { - BACKCHAR(eptr); - GETCHAR(c, eptr); + BACKCHAR(fptr); + GETCHAR(c, fptr); } - if (UCD_CATEGORY(c) != ucp_M) break; - eptr--; + lgb = UCD_GRAPHBREAK(c); + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break; + eptr = fptr; + rgb = lgb; COST(1); } } -- cgit v1.2.3