aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/pcre/ucp.h
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2013-08-16 11:37:54 +0200
committerPatrik Nyblom <[email protected]>2013-08-16 11:37:54 +0200
commit5d9a587a8fcc164e02f043959338edec2ff69381 (patch)
tree554f05a944777622b30031724010f6f31707565b /erts/emulator/pcre/ucp.h
parent23610dbfc1c409f83349e9e293dd3cfc1f74d497 (diff)
parent52cb62b7930d9c7b9e04a210ff6b02946f27ae79 (diff)
downloadotp-5d9a587a8fcc164e02f043959338edec2ff69381.tar.gz
otp-5d9a587a8fcc164e02f043959338edec2ff69381.tar.bz2
otp-5d9a587a8fcc164e02f043959338edec2ff69381.zip
Merge branch 'pan/update_pcre_8.33'
* pan/update_pcre_8.33: Workaround TR gnu/181328, GCC 4.2.1 20070831 on FreeBSD 9.1 Clarify relation between erts_iolist_{size|to_buf} Fix backslash in titles of manpages Correct UTF-8 in stdlib's notes.xml Add more tests for corner error cases in erl_bif_re.c Add documentation of report_errors and match_limit(_recursion) Add match_limit and match_limit_recursion options Add return_errors option to re:run/3 Add README for updating PCRE Add documentation of extensions to re module Add new options to Erlang re interface and mend dupnames Update PCRE doc part of re.xml to PCRE 8.33 state Integrate new PCRE test suites Integrate patch for PCRE bug id 1370 Handle CRLF correctly in global regexp Add erts_prefix to pcre_library and update erl_bif_re Update to PCRE 8.33, w/o the erts_ prefix added OTP-11204 OTP-11205 OTP-10285
Diffstat (limited to 'erts/emulator/pcre/ucp.h')
-rw-r--r--erts/emulator/pcre/ucp.h82
1 files changed, 73 insertions, 9 deletions
diff --git a/erts/emulator/pcre/ucp.h b/erts/emulator/pcre/ucp.h
index 52f91f1a65..bbfe0f3ecb 100644
--- a/erts/emulator/pcre/ucp.h
+++ b/erts/emulator/pcre/ucp.h
@@ -8,9 +8,12 @@
#define _UCP_H
/* This file contains definitions of the property values that are returned by
-the function _erts_pcre_ucp_findprop(). New values that are added for new releases
-of Unicode should always be at the end of each enum, for backwards
-compatibility. */
+the UCD access macros. New values that are added for new releases of Unicode
+should always be at the end of each enum, for backwards compatibility.
+
+IMPORTANT: Note also that the specific numeric values of the enums have to be
+the same as the values that are generated by the maint/MultiStage2.py script,
+where the equivalent property descriptive names are listed in vectors. */
/* These are the general character categories. */
@@ -24,7 +27,7 @@ enum {
ucp_Z /* Separator */
};
-/* These are the particular character types. */
+/* These are the particular character categories. */
enum {
ucp_Cc, /* Control */
@@ -59,6 +62,26 @@ enum {
ucp_Zs /* Space separator */
};
+/* These are grapheme break properties. Note that the code for processing them
+assumes that the values are less than 16. If more values are added that take
+the number to 16 or more, the code will have to be rewritten. */
+
+enum {
+ ucp_gbCR, /* 0 */
+ ucp_gbLF, /* 1 */
+ ucp_gbControl, /* 2 */
+ ucp_gbExtend, /* 3 */
+ ucp_gbPrepend, /* 4 */
+ ucp_gbSpacingMark, /* 5 */
+ ucp_gbL, /* 6 Hangul syllable type L */
+ ucp_gbV, /* 7 Hangul syllable type V */
+ ucp_gbT, /* 8 Hangul syllable type T */
+ ucp_gbLV, /* 9 Hangul syllable type LV */
+ ucp_gbLVT, /* 10 Hangul syllable type LVT */
+ ucp_gbRegionalIndicator, /* 11 */
+ ucp_gbOther /* 12 */
+};
+
/* These are the script identifications. */
enum {
@@ -123,11 +146,52 @@ enum {
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
- ucp_Balinese, /* New for Unicode 5.0.0 */
- ucp_Cuneiform, /* New for Unicode 5.0.0 */
- ucp_Nko, /* New for Unicode 5.0.0 */
- ucp_Phags_Pa, /* New for Unicode 5.0.0 */
- ucp_Phoenician /* New for Unicode 5.0.0 */
+ /* New for Unicode 5.0: */
+ ucp_Balinese,
+ ucp_Cuneiform,
+ ucp_Nko,
+ ucp_Phags_Pa,
+ ucp_Phoenician,
+ /* New for Unicode 5.1: */
+ ucp_Carian,
+ ucp_Cham,
+ ucp_Kayah_Li,
+ ucp_Lepcha,
+ ucp_Lycian,
+ ucp_Lydian,
+ ucp_Ol_Chiki,
+ ucp_Rejang,
+ ucp_Saurashtra,
+ ucp_Sundanese,
+ ucp_Vai,
+ /* New for Unicode 5.2: */
+ ucp_Avestan,
+ ucp_Bamum,
+ ucp_Egyptian_Hieroglyphs,
+ ucp_Imperial_Aramaic,
+ ucp_Inscriptional_Pahlavi,
+ ucp_Inscriptional_Parthian,
+ ucp_Javanese,
+ ucp_Kaithi,
+ ucp_Lisu,
+ ucp_Meetei_Mayek,
+ ucp_Old_South_Arabian,
+ ucp_Old_Turkic,
+ ucp_Samaritan,
+ ucp_Tai_Tham,
+ ucp_Tai_Viet,
+ /* New for Unicode 6.0.0: */
+ ucp_Batak,
+ ucp_Brahmi,
+ ucp_Mandaic,
+ /* New for Unicode 6.1.0: */
+ ucp_Chakma,
+ ucp_Meroitic_Cursive,
+ ucp_Meroitic_Hieroglyphs,
+ ucp_Miao,
+ ucp_Sharada,
+ ucp_Sora_Sompeng,
+ ucp_Takri
};
#endif