aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/pcre
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/pcre')
-rw-r--r--erts/emulator/pcre/pcre-8.33_1370.diff60
-rw-r--r--erts/emulator/pcre/pcre_exec.c38
2 files changed, 90 insertions, 8 deletions
diff --git a/erts/emulator/pcre/pcre-8.33_1370.diff b/erts/emulator/pcre/pcre-8.33_1370.diff
new file mode 100644
index 0000000000..d62398985d
--- /dev/null
+++ b/erts/emulator/pcre/pcre-8.33_1370.diff
@@ -0,0 +1,60 @@
+--- code/trunk/pcre_exec.c 2013/07/02 18:37:36 1346
++++ code/trunk/pcre_exec.c 2013/07/26 10:03:38 1350
+@@ -5637,7 +5637,7 @@
+ }
+ }
+
+- /* Match extended Unicode sequences. We will get here only if the
++ /* Match extended Unicode grapheme clusters. We will get here only if the
+ support is in the binary; otherwise a compile-time error occurs. */
+
+ else if (ctype == OP_EXTUNI)
+@@ -5670,21 +5670,41 @@
+ /* eptr is now past the end of the maximum run */
+
+ if (possessive) continue; /* No backtracking */
++
+ for(;;)
+ {
+- if (eptr == pp) goto TAIL_RECURSE;
++ int lgb, rgb;
++ PCRE_PUCHAR fptr;
++
++ if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
++
++ /* Backtracking over an extended grapheme cluster involves inspecting
++ the previous two characters (if present) to see if a break is
++ permitted between them. */
++
+ eptr--;
+- for (;;) /* Move back over one extended */
++ if (!utf) c = *eptr; else
++ {
++ BACKCHAR(eptr);
++ GETCHAR(c, eptr);
++ }
++ rgb = UCD_GRAPHBREAK(c);
++
++ for (;;)
+ {
+- if (!utf) c = *eptr; else
++ if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
++ fptr = eptr - 1;
++ if (!utf) c = *fptr; else
+ {
+- BACKCHAR(eptr);
+- GETCHAR(c, eptr);
++ BACKCHAR(fptr);
++ GETCHAR(c, fptr);
+ }
+- if (UCD_CATEGORY(c) != ucp_M) break;
+- eptr--;
++ lgb = UCD_GRAPHBREAK(c);
++ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
++ eptr = fptr;
++ rgb = lgb;
+ }
+ }
+ }
diff --git a/erts/emulator/pcre/pcre_exec.c b/erts/emulator/pcre/pcre_exec.c
index 0371f9efa8..1cab78cdd8 100644
--- a/erts/emulator/pcre/pcre_exec.c
+++ b/erts/emulator/pcre/pcre_exec.c
@@ -5818,7 +5818,7 @@ for (;;)
}
}
- /* Match extended Unicode sequences. We will get here only if the
+ /* Match extended Unicode grapheme clusters. We will get here only if the
support is in the binary; otherwise a compile-time error occurs. */
else if (ctype == OP_EXTUNI)
@@ -5855,21 +5855,43 @@ for (;;)
/* eptr is now past the end of the maximum run */
if (possessive) continue; /* No backtracking */
+
for(;;) /* LOOP_COUNT: Ok */
{
- if (eptr == pp) goto TAIL_RECURSE;
+#ifndef ERLANG_INTEGRATION
+ int lgb, rgb;
+#endif
+ PCRE_PUCHAR fptr;
+
+ if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+
+ /* Backtracking over an extended grapheme cluster involves inspecting
+ the previous two characters (if present) to see if a break is
+ permitted between them. */
+
eptr--;
- for (;;) /* Move back over one extended */ /* LOOP_COUNT: COST */
+ if (!utf) c = *eptr; else
{
- if (!utf) c = *eptr; else
+ BACKCHAR(eptr);
+ GETCHAR(c, eptr);
+ }
+ rgb = UCD_GRAPHBREAK(c);
+
+ for (;;) /* LOOP_COUNT: COST */
+ {
+ if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ fptr = eptr - 1;
+ if (!utf) c = *fptr; else
{
- BACKCHAR(eptr);
- GETCHAR(c, eptr);
+ BACKCHAR(fptr);
+ GETCHAR(c, fptr);
}
- if (UCD_CATEGORY(c) != ucp_M) break;
- eptr--;
+ lgb = UCD_GRAPHBREAK(c);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+ eptr = fptr;
+ rgb = lgb;
COST(1);
}
}