aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/pcre/pcre_newline.c
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2017-08-30 20:55:08 +0200
committerSverker Eriksson <[email protected]>2017-08-30 20:55:08 +0200
commit7c67bbddb53c364086f66260701bc54a61c9659c (patch)
tree92ab0d4b91d5e2f6e7a3f9d61ea25089e8a71fe0 /erts/emulator/pcre/pcre_newline.c
parent97dc5e7f396129222419811c173edc7fa767b0f8 (diff)
parent3b7a6ffddc819bf305353a593904cea9e932e7dc (diff)
downloadotp-7c67bbddb53c364086f66260701bc54a61c9659c.tar.gz
otp-7c67bbddb53c364086f66260701bc54a61c9659c.tar.bz2
otp-7c67bbddb53c364086f66260701bc54a61c9659c.zip
Merge tag 'OTP-19.0' into sverker/19/binary_to_atom-utf8-crash/ERL-474/OTP-14590
Diffstat (limited to 'erts/emulator/pcre/pcre_newline.c')
-rw-r--r--erts/emulator/pcre/pcre_newline.c118
1 files changed, 82 insertions, 36 deletions
diff --git a/erts/emulator/pcre/pcre_newline.c b/erts/emulator/pcre/pcre_newline.c
index 7dbda88aff..02394078d5 100644
--- a/erts/emulator/pcre/pcre_newline.c
+++ b/erts/emulator/pcre/pcre_newline.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -68,23 +68,33 @@ Arguments:
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
- utf8 TRUE if in utf8 mode
+ utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
-_erts_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
- int *lenptr, BOOL utf8)
+PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
+ BOOL utf)
{
-int c;
-if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
+pcre_uint32 c;
+(void)utf;
+#ifdef SUPPORT_UTF
+if (utf)
+ {
+ GETCHAR(c, ptr);
+ }
+else
+#endif /* SUPPORT_UTF */
+ c = *ptr;
+
+/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c)
{
- case 0x000a: *lenptr = 1; return TRUE; /* LF */
- case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
- return TRUE; /* CR */
+ case CHAR_LF: *lenptr = 1; return TRUE;
+ case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
+ return TRUE;
default: return FALSE;
}
@@ -92,14 +102,29 @@ if (type == NLTYPE_ANYCRLF) switch(c)
else switch(c)
{
- case 0x000a: /* LF */
- case 0x000b: /* VT */
- case 0x000c: *lenptr = 1; return TRUE; /* FF */
- case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
- return TRUE; /* CR */
- case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
+#ifdef EBCDIC
+ case CHAR_NEL:
+#endif
+ case CHAR_LF:
+ case CHAR_VT:
+ case CHAR_FF: *lenptr = 1; return TRUE;
+
+ case CHAR_CR:
+ *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
+ return TRUE;
+
+#ifndef EBCDIC
+#ifdef COMPILE_PCRE8
+ case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
+#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
+ case CHAR_NEL:
+ case 0x2028: /* LS */
+ case 0x2029: *lenptr = 1; return TRUE; /* PS */
+#endif /* COMPILE_PCRE8 */
+#endif /* Not EBCDIC */
+
default: return FALSE;
}
}
@@ -118,46 +143,67 @@ Arguments:
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
- utf8 TRUE if in utf8 mode
+ utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
-_erts_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
- int *lenptr, BOOL utf8)
+PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
+ BOOL utf)
{
-int c;
+pcre_uint32 c;
+(void)utf;
ptr--;
-#ifdef SUPPORT_UTF8
-if (utf8)
+#ifdef SUPPORT_UTF
+if (utf)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
-else c = *ptr;
-#else /* no UTF-8 support */
-c = *ptr;
-#endif /* SUPPORT_UTF8 */
+else
+#endif /* SUPPORT_UTF */
+ c = *ptr;
+
+/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c)
{
- case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
- return TRUE; /* LF */
- case 0x000d: *lenptr = 1; return TRUE; /* CR */
+ case CHAR_LF:
+ *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
+ return TRUE;
+
+ case CHAR_CR: *lenptr = 1; return TRUE;
default: return FALSE;
}
+/* NLTYPE_ANY */
+
else switch(c)
{
- case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
- return TRUE; /* LF */
- case 0x000b: /* VT */
- case 0x000c: /* FF */
- case 0x000d: *lenptr = 1; return TRUE; /* CR */
- case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
- case 0x2028: /* LS */
- case 0x2029: *lenptr = 3; return TRUE; /* PS */
+ case CHAR_LF:
+ *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
+ return TRUE;
+
+#ifdef EBCDIC
+ case CHAR_NEL:
+#endif
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_CR: *lenptr = 1; return TRUE;
+
+#ifndef EBCDIC
+#ifdef COMPILE_PCRE8
+ case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
+ case 0x2028: /* LS */
+ case 0x2029: *lenptr = 3; return TRUE; /* PS */
+#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
+ case CHAR_NEL:
+ case 0x2028: /* LS */
+ case 0x2029: *lenptr = 1; return TRUE; /* PS */
+#endif /* COMPILE_PCRE8 */
+#endif /* NotEBCDIC */
+
default: return FALSE;
}
}