aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/pcre/pcre_newline.c
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2013-08-16 11:37:54 +0200
committerPatrik Nyblom <[email protected]>2013-08-16 11:37:54 +0200
commit5d9a587a8fcc164e02f043959338edec2ff69381 (patch)
tree554f05a944777622b30031724010f6f31707565b /erts/emulator/pcre/pcre_newline.c
parent23610dbfc1c409f83349e9e293dd3cfc1f74d497 (diff)
parent52cb62b7930d9c7b9e04a210ff6b02946f27ae79 (diff)
downloadotp-5d9a587a8fcc164e02f043959338edec2ff69381.tar.gz
otp-5d9a587a8fcc164e02f043959338edec2ff69381.tar.bz2
otp-5d9a587a8fcc164e02f043959338edec2ff69381.zip
Merge branch 'pan/update_pcre_8.33'
* pan/update_pcre_8.33: Workaround TR gnu/181328, GCC 4.2.1 20070831 on FreeBSD 9.1 Clarify relation between erts_iolist_{size|to_buf} Fix backslash in titles of manpages Correct UTF-8 in stdlib's notes.xml Add more tests for corner error cases in erl_bif_re.c Add documentation of report_errors and match_limit(_recursion) Add match_limit and match_limit_recursion options Add return_errors option to re:run/3 Add README for updating PCRE Add documentation of extensions to re module Add new options to Erlang re interface and mend dupnames Update PCRE doc part of re.xml to PCRE 8.33 state Integrate new PCRE test suites Integrate patch for PCRE bug id 1370 Handle CRLF correctly in global regexp Add erts_prefix to pcre_library and update erl_bif_re Update to PCRE 8.33, w/o the erts_ prefix added OTP-11204 OTP-11205 OTP-10285
Diffstat (limited to 'erts/emulator/pcre/pcre_newline.c')
-rw-r--r--erts/emulator/pcre/pcre_newline.c118
1 files changed, 82 insertions, 36 deletions
diff --git a/erts/emulator/pcre/pcre_newline.c b/erts/emulator/pcre/pcre_newline.c
index 7dbda88aff..02394078d5 100644
--- a/erts/emulator/pcre/pcre_newline.c
+++ b/erts/emulator/pcre/pcre_newline.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -68,23 +68,33 @@ Arguments:
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
- utf8 TRUE if in utf8 mode
+ utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
-_erts_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
- int *lenptr, BOOL utf8)
+PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
+ BOOL utf)
{
-int c;
-if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
+pcre_uint32 c;
+(void)utf;
+#ifdef SUPPORT_UTF
+if (utf)
+ {
+ GETCHAR(c, ptr);
+ }
+else
+#endif /* SUPPORT_UTF */
+ c = *ptr;
+
+/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c)
{
- case 0x000a: *lenptr = 1; return TRUE; /* LF */
- case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
- return TRUE; /* CR */
+ case CHAR_LF: *lenptr = 1; return TRUE;
+ case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
+ return TRUE;
default: return FALSE;
}
@@ -92,14 +102,29 @@ if (type == NLTYPE_ANYCRLF) switch(c)
else switch(c)
{
- case 0x000a: /* LF */
- case 0x000b: /* VT */
- case 0x000c: *lenptr = 1; return TRUE; /* FF */
- case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
- return TRUE; /* CR */
- case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
+#ifdef EBCDIC
+ case CHAR_NEL:
+#endif
+ case CHAR_LF:
+ case CHAR_VT:
+ case CHAR_FF: *lenptr = 1; return TRUE;
+
+ case CHAR_CR:
+ *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
+ return TRUE;
+
+#ifndef EBCDIC
+#ifdef COMPILE_PCRE8
+ case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
+#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
+ case CHAR_NEL:
+ case 0x2028: /* LS */
+ case 0x2029: *lenptr = 1; return TRUE; /* PS */
+#endif /* COMPILE_PCRE8 */
+#endif /* Not EBCDIC */
+
default: return FALSE;
}
}
@@ -118,46 +143,67 @@ Arguments:
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
- utf8 TRUE if in utf8 mode
+ utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
-_erts_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
- int *lenptr, BOOL utf8)
+PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
+ BOOL utf)
{
-int c;
+pcre_uint32 c;
+(void)utf;
ptr--;
-#ifdef SUPPORT_UTF8
-if (utf8)
+#ifdef SUPPORT_UTF
+if (utf)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
-else c = *ptr;
-#else /* no UTF-8 support */
-c = *ptr;
-#endif /* SUPPORT_UTF8 */
+else
+#endif /* SUPPORT_UTF */
+ c = *ptr;
+
+/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c)
{
- case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
- return TRUE; /* LF */
- case 0x000d: *lenptr = 1; return TRUE; /* CR */
+ case CHAR_LF:
+ *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
+ return TRUE;
+
+ case CHAR_CR: *lenptr = 1; return TRUE;
default: return FALSE;
}
+/* NLTYPE_ANY */
+
else switch(c)
{
- case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
- return TRUE; /* LF */
- case 0x000b: /* VT */
- case 0x000c: /* FF */
- case 0x000d: *lenptr = 1; return TRUE; /* CR */
- case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
- case 0x2028: /* LS */
- case 0x2029: *lenptr = 3; return TRUE; /* PS */
+ case CHAR_LF:
+ *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
+ return TRUE;
+
+#ifdef EBCDIC
+ case CHAR_NEL:
+#endif
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_CR: *lenptr = 1; return TRUE;
+
+#ifndef EBCDIC
+#ifdef COMPILE_PCRE8
+ case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
+ case 0x2028: /* LS */
+ case 0x2029: *lenptr = 3; return TRUE; /* PS */
+#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
+ case CHAR_NEL:
+ case 0x2028: /* LS */
+ case 0x2029: *lenptr = 1; return TRUE; /* PS */
+#endif /* COMPILE_PCRE8 */
+#endif /* NotEBCDIC */
+
default: return FALSE;
}
}