diff options
author | Patrik Nyblom <[email protected]> | 2013-08-16 11:37:54 +0200 |
---|---|---|
committer | Patrik Nyblom <[email protected]> | 2013-08-16 11:37:54 +0200 |
commit | 5d9a587a8fcc164e02f043959338edec2ff69381 (patch) | |
tree | 554f05a944777622b30031724010f6f31707565b /erts/emulator/pcre/ucp.h | |
parent | 23610dbfc1c409f83349e9e293dd3cfc1f74d497 (diff) | |
parent | 52cb62b7930d9c7b9e04a210ff6b02946f27ae79 (diff) | |
download | otp-5d9a587a8fcc164e02f043959338edec2ff69381.tar.gz otp-5d9a587a8fcc164e02f043959338edec2ff69381.tar.bz2 otp-5d9a587a8fcc164e02f043959338edec2ff69381.zip |
Merge branch 'pan/update_pcre_8.33'
* pan/update_pcre_8.33:
Workaround TR gnu/181328, GCC 4.2.1 20070831 on FreeBSD 9.1
Clarify relation between erts_iolist_{size|to_buf}
Fix backslash in titles of manpages
Correct UTF-8 in stdlib's notes.xml
Add more tests for corner error cases in erl_bif_re.c
Add documentation of report_errors and match_limit(_recursion)
Add match_limit and match_limit_recursion options
Add return_errors option to re:run/3
Add README for updating PCRE
Add documentation of extensions to re module
Add new options to Erlang re interface and mend dupnames
Update PCRE doc part of re.xml to PCRE 8.33 state
Integrate new PCRE test suites
Integrate patch for PCRE bug id 1370
Handle CRLF correctly in global regexp
Add erts_prefix to pcre_library and update erl_bif_re
Update to PCRE 8.33, w/o the erts_ prefix added
OTP-11204
OTP-11205
OTP-10285
Diffstat (limited to 'erts/emulator/pcre/ucp.h')
-rw-r--r-- | erts/emulator/pcre/ucp.h | 82 |
1 files changed, 73 insertions, 9 deletions
diff --git a/erts/emulator/pcre/ucp.h b/erts/emulator/pcre/ucp.h index 52f91f1a65..bbfe0f3ecb 100644 --- a/erts/emulator/pcre/ucp.h +++ b/erts/emulator/pcre/ucp.h @@ -8,9 +8,12 @@ #define _UCP_H /* This file contains definitions of the property values that are returned by -the function _erts_pcre_ucp_findprop(). New values that are added for new releases -of Unicode should always be at the end of each enum, for backwards -compatibility. */ +the UCD access macros. New values that are added for new releases of Unicode +should always be at the end of each enum, for backwards compatibility. + +IMPORTANT: Note also that the specific numeric values of the enums have to be +the same as the values that are generated by the maint/MultiStage2.py script, +where the equivalent property descriptive names are listed in vectors. */ /* These are the general character categories. */ @@ -24,7 +27,7 @@ enum { ucp_Z /* Separator */ }; -/* These are the particular character types. */ +/* These are the particular character categories. */ enum { ucp_Cc, /* Control */ @@ -59,6 +62,26 @@ enum { ucp_Zs /* Space separator */ }; +/* These are grapheme break properties. Note that the code for processing them +assumes that the values are less than 16. If more values are added that take +the number to 16 or more, the code will have to be rewritten. */ + +enum { + ucp_gbCR, /* 0 */ + ucp_gbLF, /* 1 */ + ucp_gbControl, /* 2 */ + ucp_gbExtend, /* 3 */ + ucp_gbPrepend, /* 4 */ + ucp_gbSpacingMark, /* 5 */ + ucp_gbL, /* 6 Hangul syllable type L */ + ucp_gbV, /* 7 Hangul syllable type V */ + ucp_gbT, /* 8 Hangul syllable type T */ + ucp_gbLV, /* 9 Hangul syllable type LV */ + ucp_gbLVT, /* 10 Hangul syllable type LVT */ + ucp_gbRegionalIndicator, /* 11 */ + ucp_gbOther /* 12 */ +}; + /* These are the script identifications. */ enum { @@ -123,11 +146,52 @@ enum { ucp_Tifinagh, ucp_Ugaritic, ucp_Yi, - ucp_Balinese, /* New for Unicode 5.0.0 */ - ucp_Cuneiform, /* New for Unicode 5.0.0 */ - ucp_Nko, /* New for Unicode 5.0.0 */ - ucp_Phags_Pa, /* New for Unicode 5.0.0 */ - ucp_Phoenician /* New for Unicode 5.0.0 */ + /* New for Unicode 5.0: */ + ucp_Balinese, + ucp_Cuneiform, + ucp_Nko, + ucp_Phags_Pa, + ucp_Phoenician, + /* New for Unicode 5.1: */ + ucp_Carian, + ucp_Cham, + ucp_Kayah_Li, + ucp_Lepcha, + ucp_Lycian, + ucp_Lydian, + ucp_Ol_Chiki, + ucp_Rejang, + ucp_Saurashtra, + ucp_Sundanese, + ucp_Vai, + /* New for Unicode 5.2: */ + ucp_Avestan, + ucp_Bamum, + ucp_Egyptian_Hieroglyphs, + ucp_Imperial_Aramaic, + ucp_Inscriptional_Pahlavi, + ucp_Inscriptional_Parthian, + ucp_Javanese, + ucp_Kaithi, + ucp_Lisu, + ucp_Meetei_Mayek, + ucp_Old_South_Arabian, + ucp_Old_Turkic, + ucp_Samaritan, + ucp_Tai_Tham, + ucp_Tai_Viet, + /* New for Unicode 6.0.0: */ + ucp_Batak, + ucp_Brahmi, + ucp_Mandaic, + /* New for Unicode 6.1.0: */ + ucp_Chakma, + ucp_Meroitic_Cursive, + ucp_Meroitic_Hieroglyphs, + ucp_Miao, + ucp_Sharada, + ucp_Sora_Sompeng, + ucp_Takri }; #endif |