diff options
Diffstat (limited to 'erts/emulator/pcre/ucp.h')
| -rw-r--r-- | erts/emulator/pcre/ucp.h | 82 | 
1 files changed, 73 insertions, 9 deletions
| diff --git a/erts/emulator/pcre/ucp.h b/erts/emulator/pcre/ucp.h index 52f91f1a65..bbfe0f3ecb 100644 --- a/erts/emulator/pcre/ucp.h +++ b/erts/emulator/pcre/ucp.h @@ -8,9 +8,12 @@  #define _UCP_H  /* This file contains definitions of the property values that are returned by -the function _erts_pcre_ucp_findprop(). New values that are added for new releases -of Unicode should always be at the end of each enum, for backwards -compatibility. */ +the UCD access macros. New values that are added for new releases of Unicode +should always be at the end of each enum, for backwards compatibility. + +IMPORTANT: Note also that the specific numeric values of the enums have to be +the same as the values that are generated by the maint/MultiStage2.py script, +where the equivalent property descriptive names are listed in vectors. */  /* These are the general character categories. */ @@ -24,7 +27,7 @@ enum {    ucp_Z      /* Separator */  }; -/* These are the particular character types. */ +/* These are the particular character categories. */  enum {    ucp_Cc,    /* Control */ @@ -59,6 +62,26 @@ enum {    ucp_Zs     /* Space separator */  }; +/* These are grapheme break properties. Note that the code for processing them +assumes that the values are less than 16. If more values are added that take +the number to 16 or more, the code will have to be rewritten. */ + +enum { +  ucp_gbCR,                /*  0 */ +  ucp_gbLF,                /*  1 */ +  ucp_gbControl,           /*  2 */ +  ucp_gbExtend,            /*  3 */ +  ucp_gbPrepend,           /*  4 */ +  ucp_gbSpacingMark,       /*  5 */ +  ucp_gbL,                 /*  6 Hangul syllable type L */ +  ucp_gbV,                 /*  7 Hangul syllable type V */ +  ucp_gbT,                 /*  8 Hangul syllable type T */ +  ucp_gbLV,                /*  9 Hangul syllable type LV */ +  ucp_gbLVT,               /* 10 Hangul syllable type LVT */ +  ucp_gbRegionalIndicator, /* 11 */ +  ucp_gbOther              /* 12 */ +}; +  /* These are the script identifications. */  enum { @@ -123,11 +146,52 @@ enum {    ucp_Tifinagh,    ucp_Ugaritic,    ucp_Yi, -  ucp_Balinese,      /* New for Unicode 5.0.0 */ -  ucp_Cuneiform,     /* New for Unicode 5.0.0 */ -  ucp_Nko,           /* New for Unicode 5.0.0 */ -  ucp_Phags_Pa,      /* New for Unicode 5.0.0 */ -  ucp_Phoenician     /* New for Unicode 5.0.0 */ +  /* New for Unicode 5.0: */ +  ucp_Balinese, +  ucp_Cuneiform, +  ucp_Nko, +  ucp_Phags_Pa, +  ucp_Phoenician, +  /* New for Unicode 5.1: */ +  ucp_Carian, +  ucp_Cham, +  ucp_Kayah_Li, +  ucp_Lepcha, +  ucp_Lycian, +  ucp_Lydian, +  ucp_Ol_Chiki, +  ucp_Rejang, +  ucp_Saurashtra, +  ucp_Sundanese, +  ucp_Vai, +  /* New for Unicode 5.2: */ +  ucp_Avestan, +  ucp_Bamum, +  ucp_Egyptian_Hieroglyphs, +  ucp_Imperial_Aramaic, +  ucp_Inscriptional_Pahlavi, +  ucp_Inscriptional_Parthian, +  ucp_Javanese, +  ucp_Kaithi, +  ucp_Lisu, +  ucp_Meetei_Mayek, +  ucp_Old_South_Arabian, +  ucp_Old_Turkic, +  ucp_Samaritan, +  ucp_Tai_Tham, +  ucp_Tai_Viet, +  /* New for Unicode 6.0.0: */ +  ucp_Batak, +  ucp_Brahmi, +  ucp_Mandaic, +  /* New for Unicode 6.1.0: */ +  ucp_Chakma, +  ucp_Meroitic_Cursive, +  ucp_Meroitic_Hieroglyphs, +  ucp_Miao, +  ucp_Sharada, +  ucp_Sora_Sompeng, +  ucp_Takri  };  #endif | 
