1 files changed, 73 insertions, 9 deletions
diff --git a/erts/emulator/pcre/ucp.h b/erts/emulator/pcre/ucp.h
index 52f91f1a65..bbfe0f3ecb 100644
--- a/erts/emulator/pcre/ucp.h
+++ b/erts/emulator/pcre/ucp.h
@@ -8,9 +8,12 @@
 #define _UCP_H
 
 /* This file contains definitions of the property values that are returned by
-the function _erts_pcre_ucp_findprop(). New values that are added for new releases
-of Unicode should always be at the end of each enum, for backwards
-compatibility. */
+the UCD access macros. New values that are added for new releases of Unicode
+should always be at the end of each enum, for backwards compatibility.
+
+IMPORTANT: Note also that the specific numeric values of the enums have to be
+the same as the values that are generated by the maint/MultiStage2.py script,
+where the equivalent property descriptive names are listed in vectors. */
 
 /* These are the general character categories. */
 
@@ -24,7 +27,7 @@ enum {
   ucp_Z      /* Separator */
 };
 
-/* These are the particular character types. */
+/* These are the particular character categories. */
 
 enum {
   ucp_Cc,    /* Control */
@@ -59,6 +62,26 @@ enum {
   ucp_Zs     /* Space separator */
 };
 
+/* These are grapheme break properties. Note that the code for processing them
+assumes that the values are less than 16. If more values are added that take
+the number to 16 or more, the code will have to be rewritten. */
+
+enum {
+  ucp_gbCR,                /*  0 */
+  ucp_gbLF,                /*  1 */
+  ucp_gbControl,           /*  2 */
+  ucp_gbExtend,            /*  3 */
+  ucp_gbPrepend,           /*  4 */
+  ucp_gbSpacingMark,       /*  5 */
+  ucp_gbL,                 /*  6 Hangul syllable type L */
+  ucp_gbV,                 /*  7 Hangul syllable type V */
+  ucp_gbT,                 /*  8 Hangul syllable type T */
+  ucp_gbLV,                /*  9 Hangul syllable type LV */
+  ucp_gbLVT,               /* 10 Hangul syllable type LVT */
+  ucp_gbRegionalIndicator, /* 11 */
+  ucp_gbOther              /* 12 */
+};
+
 /* These are the script identifications. */
 
 enum {
@@ -123,11 +146,52 @@ enum {
   ucp_Tifinagh,
   ucp_Ugaritic,
   ucp_Yi,
-  ucp_Balinese,      /* New for Unicode 5.0.0 */
-  ucp_Cuneiform,     /* New for Unicode 5.0.0 */
-  ucp_Nko,           /* New for Unicode 5.0.0 */
-  ucp_Phags_Pa,      /* New for Unicode 5.0.0 */
-  ucp_Phoenician     /* New for Unicode 5.0.0 */
+  /* New for Unicode 5.0: */
+  ucp_Balinese,
+  ucp_Cuneiform,
+  ucp_Nko,
+  ucp_Phags_Pa,
+  ucp_Phoenician,
+  /* New for Unicode 5.1: */
+  ucp_Carian,
+  ucp_Cham,
+  ucp_Kayah_Li,
+  ucp_Lepcha,
+  ucp_Lycian,
+  ucp_Lydian,
+  ucp_Ol_Chiki,
+  ucp_Rejang,
+  ucp_Saurashtra,
+  ucp_Sundanese,
+  ucp_Vai,
+  /* New for Unicode 5.2: */
+  ucp_Avestan,
+  ucp_Bamum,
+  ucp_Egyptian_Hieroglyphs,
+  ucp_Imperial_Aramaic,
+  ucp_Inscriptional_Pahlavi,
+  ucp_Inscriptional_Parthian,
+  ucp_Javanese,
+  ucp_Kaithi,
+  ucp_Lisu,
+  ucp_Meetei_Mayek,
+  ucp_Old_South_Arabian,
+  ucp_Old_Turkic,
+  ucp_Samaritan,
+  ucp_Tai_Tham,
+  ucp_Tai_Viet,
+  /* New for Unicode 6.0.0: */
+  ucp_Batak,
+  ucp_Brahmi,
+  ucp_Mandaic,
+  /* New for Unicode 6.1.0: */
+  ucp_Chakma,
+  ucp_Meroitic_Cursive,
+  ucp_Meroitic_Hieroglyphs,
+  ucp_Miao,
+  ucp_Sharada,
+  ucp_Sora_Sompeng,
+  ucp_Takri
 };
 
 #endif