diff options
Diffstat (limited to 'erts/emulator/pcre/pcre_newline.c')
-rw-r--r-- | erts/emulator/pcre/pcre_newline.c | 118 |
1 files changed, 82 insertions, 36 deletions
diff --git a/erts/emulator/pcre/pcre_newline.c b/erts/emulator/pcre/pcre_newline.c index 7dbda88aff..02394078d5 100644 --- a/erts/emulator/pcre/pcre_newline.c +++ b/erts/emulator/pcre/pcre_newline.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -68,23 +68,33 @@ Arguments: type the newline type endptr pointer to the end of the string lenptr where to return the length - utf8 TRUE if in utf8 mode + utf TRUE if in utf mode Returns: TRUE or FALSE */ BOOL -_erts_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr, - int *lenptr, BOOL utf8) +PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr, + BOOL utf) { -int c; -if (utf8) { GETCHAR(c, ptr); } else c = *ptr; +pcre_uint32 c; +(void)utf; +#ifdef SUPPORT_UTF +if (utf) + { + GETCHAR(c, ptr); + } +else +#endif /* SUPPORT_UTF */ + c = *ptr; + +/* Note that this function is called only for ANY or ANYCRLF. */ if (type == NLTYPE_ANYCRLF) switch(c) { - case 0x000a: *lenptr = 1; return TRUE; /* LF */ - case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; - return TRUE; /* CR */ + case CHAR_LF: *lenptr = 1; return TRUE; + case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; + return TRUE; default: return FALSE; } @@ -92,14 +102,29 @@ if (type == NLTYPE_ANYCRLF) switch(c) else switch(c) { - case 0x000a: /* LF */ - case 0x000b: /* VT */ - case 0x000c: *lenptr = 1; return TRUE; /* FF */ - case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; - return TRUE; /* CR */ - case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ +#ifdef EBCDIC + case CHAR_NEL: +#endif + case CHAR_LF: + case CHAR_VT: + case CHAR_FF: *lenptr = 1; return TRUE; + + case CHAR_CR: + *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; + return TRUE; + +#ifndef EBCDIC +#ifdef COMPILE_PCRE8 + case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; case 0x2028: /* LS */ case 0x2029: *lenptr = 3; return TRUE; /* PS */ +#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ + case CHAR_NEL: + case 0x2028: /* LS */ + case 0x2029: *lenptr = 1; return TRUE; /* PS */ +#endif /* COMPILE_PCRE8 */ +#endif /* Not EBCDIC */ + default: return FALSE; } } @@ -118,46 +143,67 @@ Arguments: type the newline type startptr pointer to the start of the string lenptr where to return the length - utf8 TRUE if in utf8 mode + utf TRUE if in utf mode Returns: TRUE or FALSE */ BOOL -_erts_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr, - int *lenptr, BOOL utf8) +PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr, + BOOL utf) { -int c; +pcre_uint32 c; +(void)utf; ptr--; -#ifdef SUPPORT_UTF8 -if (utf8) +#ifdef SUPPORT_UTF +if (utf) { BACKCHAR(ptr); GETCHAR(c, ptr); } -else c = *ptr; -#else /* no UTF-8 support */ -c = *ptr; -#endif /* SUPPORT_UTF8 */ +else +#endif /* SUPPORT_UTF */ + c = *ptr; + +/* Note that this function is called only for ANY or ANYCRLF. */ if (type == NLTYPE_ANYCRLF) switch(c) { - case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; - return TRUE; /* LF */ - case 0x000d: *lenptr = 1; return TRUE; /* CR */ + case CHAR_LF: + *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; + return TRUE; + + case CHAR_CR: *lenptr = 1; return TRUE; default: return FALSE; } +/* NLTYPE_ANY */ + else switch(c) { - case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; - return TRUE; /* LF */ - case 0x000b: /* VT */ - case 0x000c: /* FF */ - case 0x000d: *lenptr = 1; return TRUE; /* CR */ - case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ - case 0x2028: /* LS */ - case 0x2029: *lenptr = 3; return TRUE; /* PS */ + case CHAR_LF: + *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; + return TRUE; + +#ifdef EBCDIC + case CHAR_NEL: +#endif + case CHAR_VT: + case CHAR_FF: + case CHAR_CR: *lenptr = 1; return TRUE; + +#ifndef EBCDIC +#ifdef COMPILE_PCRE8 + case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; + case 0x2028: /* LS */ + case 0x2029: *lenptr = 3; return TRUE; /* PS */ +#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ + case CHAR_NEL: + case 0x2028: /* LS */ + case 0x2029: *lenptr = 1; return TRUE; /* PS */ +#endif /* COMPILE_PCRE8 */ +#endif /* NotEBCDIC */ + default: return FALSE; } } |