aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/pcre/pcre-8.33_1370.diff
blob: d62398985de993b428ea2d62b7639a6335bff581 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
--- code/trunk/pcre_exec.c	2013/07/02 18:37:36	1346
+++ code/trunk/pcre_exec.c	2013/07/26 10:03:38	1350
@@ -5637,7 +5637,7 @@
           }
         }
 
-      /* Match extended Unicode sequences. We will get here only if the
+      /* Match extended Unicode grapheme clusters. We will get here only if the
       support is in the binary; otherwise a compile-time error occurs. */
 
       else if (ctype == OP_EXTUNI)
@@ -5670,21 +5670,41 @@
         /* eptr is now past the end of the maximum run */
 
         if (possessive) continue;    /* No backtracking */
+         
         for(;;)
           {
-          if (eptr == pp) goto TAIL_RECURSE;
+          int lgb, rgb; 
+          PCRE_PUCHAR fptr;
+            
+          if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+
+          /* Backtracking over an extended grapheme cluster involves inspecting
+          the previous two characters (if present) to see if a break is
+          permitted between them. */
+ 
           eptr--;
-          for (;;)                        /* Move back over one extended */
+          if (!utf) c = *eptr; else
+            {
+            BACKCHAR(eptr);
+            GETCHAR(c, eptr);
+            }
+          rgb = UCD_GRAPHBREAK(c);
+
+          for (;;)
             {
-            if (!utf) c = *eptr; else
+            if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
+            fptr = eptr - 1;
+            if (!utf) c = *fptr; else
               {
-              BACKCHAR(eptr);
-              GETCHAR(c, eptr);
+              BACKCHAR(fptr);
+              GETCHAR(c, fptr);
               }
-            if (UCD_CATEGORY(c) != ucp_M) break;
-            eptr--;
+            lgb = UCD_GRAPHBREAK(c);        
+            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+            eptr = fptr;
+            rgb = lgb;
             }
           }
         }