diff options
Diffstat (limited to 'erts/emulator/pcre/ucp.h')
-rw-r--r-- | erts/emulator/pcre/ucp.h | 82 |
1 files changed, 73 insertions, 9 deletions
diff --git a/erts/emulator/pcre/ucp.h b/erts/emulator/pcre/ucp.h index 52f91f1a65..bbfe0f3ecb 100644 --- a/erts/emulator/pcre/ucp.h +++ b/erts/emulator/pcre/ucp.h @@ -8,9 +8,12 @@ #define _UCP_H /* This file contains definitions of the property values that are returned by -the function _erts_pcre_ucp_findprop(). New values that are added for new releases -of Unicode should always be at the end of each enum, for backwards -compatibility. */ +the UCD access macros. New values that are added for new releases of Unicode +should always be at the end of each enum, for backwards compatibility. + +IMPORTANT: Note also that the specific numeric values of the enums have to be +the same as the values that are generated by the maint/MultiStage2.py script, +where the equivalent property descriptive names are listed in vectors. */ /* These are the general character categories. */ @@ -24,7 +27,7 @@ enum { ucp_Z /* Separator */ }; -/* These are the particular character types. */ +/* These are the particular character categories. */ enum { ucp_Cc, /* Control */ @@ -59,6 +62,26 @@ enum { ucp_Zs /* Space separator */ }; +/* These are grapheme break properties. Note that the code for processing them +assumes that the values are less than 16. If more values are added that take +the number to 16 or more, the code will have to be rewritten. */ + +enum { + ucp_gbCR, /* 0 */ + ucp_gbLF, /* 1 */ + ucp_gbControl, /* 2 */ + ucp_gbExtend, /* 3 */ + ucp_gbPrepend, /* 4 */ + ucp_gbSpacingMark, /* 5 */ + ucp_gbL, /* 6 Hangul syllable type L */ + ucp_gbV, /* 7 Hangul syllable type V */ + ucp_gbT, /* 8 Hangul syllable type T */ + ucp_gbLV, /* 9 Hangul syllable type LV */ + ucp_gbLVT, /* 10 Hangul syllable type LVT */ + ucp_gbRegionalIndicator, /* 11 */ + ucp_gbOther /* 12 */ +}; + /* These are the script identifications. */ enum { @@ -123,11 +146,52 @@ enum { ucp_Tifinagh, ucp_Ugaritic, ucp_Yi, - ucp_Balinese, /* New for Unicode 5.0.0 */ - ucp_Cuneiform, /* New for Unicode 5.0.0 */ - ucp_Nko, /* New for Unicode 5.0.0 */ - ucp_Phags_Pa, /* New for Unicode 5.0.0 */ - ucp_Phoenician /* New for Unicode 5.0.0 */ + /* New for Unicode 5.0: */ + ucp_Balinese, + ucp_Cuneiform, + ucp_Nko, + ucp_Phags_Pa, + ucp_Phoenician, + /* New for Unicode 5.1: */ + ucp_Carian, + ucp_Cham, + ucp_Kayah_Li, + ucp_Lepcha, + ucp_Lycian, + ucp_Lydian, + ucp_Ol_Chiki, + ucp_Rejang, + ucp_Saurashtra, + ucp_Sundanese, + ucp_Vai, + /* New for Unicode 5.2: */ + ucp_Avestan, + ucp_Bamum, + ucp_Egyptian_Hieroglyphs, + ucp_Imperial_Aramaic, + ucp_Inscriptional_Pahlavi, + ucp_Inscriptional_Parthian, + ucp_Javanese, + ucp_Kaithi, + ucp_Lisu, + ucp_Meetei_Mayek, + ucp_Old_South_Arabian, + ucp_Old_Turkic, + ucp_Samaritan, + ucp_Tai_Tham, + ucp_Tai_Viet, + /* New for Unicode 6.0.0: */ + ucp_Batak, + ucp_Brahmi, + ucp_Mandaic, + /* New for Unicode 6.1.0: */ + ucp_Chakma, + ucp_Meroitic_Cursive, + ucp_Meroitic_Hieroglyphs, + ucp_Miao, + ucp_Sharada, + ucp_Sora_Sompeng, + ucp_Takri }; #endif |