aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/pcre/pcre_make_latin1_default.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/pcre/pcre_make_latin1_default.c')
-rw-r--r--erts/emulator/pcre/pcre_make_latin1_default.c367
1 files changed, 367 insertions, 0 deletions
diff --git a/erts/emulator/pcre/pcre_make_latin1_default.c b/erts/emulator/pcre/pcre_make_latin1_default.c
new file mode 100644
index 0000000000..b8a8062764
--- /dev/null
+++ b/erts/emulator/pcre/pcre_make_latin1_default.c
@@ -0,0 +1,367 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+/* This is a "hacked" version of pcre_maketables that
+ * will generate an acceptable character table for any
+ * iso-latin-1 language when running in 8-bit mode.
+ */
+
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2008 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* %ExternalCopyright% */
+
+/* This module contains the external function pcre_maketables(), which builds
+character tables for PCRE in the current locale. The file is compiled on its
+own as part of the PCRE library. However, it is also included in the
+compilation of dftables.c, in which case the macro DFTABLES is defined. */
+
+
+#ifndef DFTABLES
+# ifdef HAVE_CONFIG_H
+# include "config.h"
+# endif
+# include "pcre_internal.h"
+#endif
+
+
+/*************************************************
+* Create PCRE character tables *
+*************************************************/
+
+/* This function builds a set of character tables for use by PCRE and returns
+a pointer to them. They are build using the ctype functions, and consequently
+their contents will depend upon the current locale setting. When compiled as
+part of the library, the store is obtained via pcre_malloc(), but when compiled
+inside dftables, use malloc().
+
+Arguments: none
+Returns: pointer to the contiguous block of data
+*/
+
+typedef struct {
+ int is_alpha,is_upper,is_lower,is_alnum,is_space,is_xdigit,is_graph,is_punct,is_cntrl;
+ int upcase;
+ int lowcase;
+} HiCharProp;
+
+static HiCharProp hicharprop[] = {
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 160 NO-BREAK SPACE */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 161 � INVERTED EXCLAMATION MARK */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 162 � CENT SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 163 � POUND SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 164 � CURRENCY SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 165 � YEN SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 166 � BROKEN BAR */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 167 � SECTION SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 168 � DIAERESIS */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 169 � COPYRIGHT SIGN */
+ {1,0,0,1,0,0,1,0,0, 0,0}, /* 170 � FEMININE ORDINAL INDICATOR */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 171 � LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 172 � NOT SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 173 � SOFT HYPHEN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 174 � REGISTERED SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 175 � MACRON */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 176 � DEGREE SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 177 � PLUS-MINUS SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 178 � SUPERSCRIPT TWO */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 179 � SUPERSCRIPT THREE */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 180 � ACUTE ACCENT */
+ {1,0,1,1,0,0,1,0,0, 0,0}, /* 181 � MICRO SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 182 � PILCROW SIGN */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 183 � MIDDLE DOT */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 184 � CEDILLA */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 185 � SUPERSCRIPT ONE */
+ {1,0,0,1,0,0,1,0,0, 0,0}, /* 186 � MASCULINE ORDINAL INDICATOR */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 187 � RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 188 � VULGAR FRACTION ONE QUARTER */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 189 � VULGAR FRACTION ONE HALF */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 190 � VULGAR FRACTION THREE QUARTERS */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 191 � INVERTED QUESTION MARK */
+ {1,1,0,1,0,0,1,0,0, 0,224}, /* 192 � LATIN CAPITAL LETTER A WITH GRAVE */
+ {1,1,0,1,0,0,1,0,0, 0,225}, /* 193 � LATIN CAPITAL LETTER A WITH ACUTE */
+ {1,1,0,1,0,0,1,0,0, 0,226}, /* 194 � LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
+ {1,1,0,1,0,0,1,0,0, 0,227}, /* 195 � LATIN CAPITAL LETTER A WITH TILDE */
+ {1,1,0,1,0,0,1,0,0, 0,228}, /* 196 � LATIN CAPITAL LETTER A WITH DIAERESIS */
+ {1,1,0,1,0,0,1,0,0, 0,229}, /* 197 � LATIN CAPITAL LETTER A WITH RING ABOVE */
+ {1,1,0,1,0,0,1,0,0, 0,230}, /* 198 � LATIN CAPITAL LETTER AE */
+ {1,1,0,1,0,0,1,0,0, 0,231}, /* 199 � LATIN CAPITAL LETTER C WITH CEDILLA */
+ {1,1,0,1,0,0,1,0,0, 0,232}, /* 200 � LATIN CAPITAL LETTER E WITH GRAVE */
+ {1,1,0,1,0,0,1,0,0, 0,233}, /* 201 � LATIN CAPITAL LETTER E WITH ACUTE */
+ {1,1,0,1,0,0,1,0,0, 0,234}, /* 202 � LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
+ {1,1,0,1,0,0,1,0,0, 0,235}, /* 203 � LATIN CAPITAL LETTER E WITH DIAERESIS */
+ {1,1,0,1,0,0,1,0,0, 0,236}, /* 204 � LATIN CAPITAL LETTER I WITH GRAVE */
+ {1,1,0,1,0,0,1,0,0, 0,237}, /* 205 � LATIN CAPITAL LETTER I WITH ACUTE */
+ {1,1,0,1,0,0,1,0,0, 0,238}, /* 206 � LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
+ {1,1,0,1,0,0,1,0,0, 0,239}, /* 207 � LATIN CAPITAL LETTER I WITH DIAERESIS */
+ {1,1,0,1,0,0,1,0,0, 0,240}, /* 208 � LATIN CAPITAL LETTER ETH */
+ {1,1,0,1,0,0,1,0,0, 0,241}, /* 209 � LATIN CAPITAL LETTER N WITH TILDE */
+ {1,1,0,1,0,0,1,0,0, 0,242}, /* 210 � LATIN CAPITAL LETTER O WITH GRAVE */
+ {1,1,0,1,0,0,1,0,0, 0,243}, /* 211 � LATIN CAPITAL LETTER O WITH ACUTE */
+ {1,1,0,1,0,0,1,0,0, 0,244}, /* 212 � LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
+ {1,1,0,1,0,0,1,0,0, 0,245}, /* 213 � LATIN CAPITAL LETTER O WITH TILDE */
+ {1,1,0,1,0,0,1,0,0, 0,246}, /* 214 � LATIN CAPITAL LETTER O WITH DIAERESIS */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 215 � MULTIPLICATION SIGN */
+ {1,1,0,1,0,0,1,0,0, 0,248}, /* 216 � LATIN CAPITAL LETTER O WITH STROKE */
+ {1,1,0,1,0,0,1,0,0, 0,249}, /* 217 � LATIN CAPITAL LETTER U WITH GRAVE */
+ {1,1,0,1,0,0,1,0,0, 0,250}, /* 218 � LATIN CAPITAL LETTER U WITH ACUTE */
+ {1,1,0,1,0,0,1,0,0, 0,251}, /* 219 � LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
+ {1,1,0,1,0,0,1,0,0, 0,252}, /* 220 � LATIN CAPITAL LETTER U WITH DIAERESIS */
+ {1,1,0,1,0,0,1,0,0, 0,253}, /* 221 � LATIN CAPITAL LETTER Y WITH ACUTE */
+ {1,1,0,1,0,0,1,0,0, 0,254}, /* 222 � LATIN CAPITAL LETTER THORN */
+ {1,0,1,1,0,0,1,0,0, 223,0}, /* 223 � LATIN SMALL LETTER SHARP S Ouch! */
+ {1,0,1,1,0,0,1,0,0, 192,0}, /* 224 � LATIN SMALL LETTER A WITH GRAVE */
+ {1,0,1,1,0,0,1,0,0, 193,0}, /* 225 � LATIN SMALL LETTER A WITH ACUTE */
+ {1,0,1,1,0,0,1,0,0, 194,0}, /* 226 � LATIN SMALL LETTER A WITH CIRCUMFLEX */
+ {1,0,1,1,0,0,1,0,0, 195,0}, /* 227 � LATIN SMALL LETTER A WITH TILDE */
+ {1,0,1,1,0,0,1,0,0, 196,0}, /* 228 � LATIN SMALL LETTER A WITH DIAERESIS */
+ {1,0,1,1,0,0,1,0,0, 197,0}, /* 229 � LATIN SMALL LETTER A WITH RING ABOVE */
+ {1,0,1,1,0,0,1,0,0, 198,0}, /* 230 � LATIN SMALL LETTER AE */
+ {1,0,1,1,0,0,1,0,0, 199,0}, /* 231 � LATIN SMALL LETTER C WITH CEDILLA */
+ {1,0,1,1,0,0,1,0,0, 200,0}, /* 232 � LATIN SMALL LETTER E WITH GRAVE */
+ {1,0,1,1,0,0,1,0,0, 201,0}, /* 233 � LATIN SMALL LETTER E WITH ACUTE */
+ {1,0,1,1,0,0,1,0,0, 202,0}, /* 234 � LATIN SMALL LETTER E WITH CIRCUMFLEX */
+ {1,0,1,1,0,0,1,0,0, 203,0}, /* 235 � LATIN SMALL LETTER E WITH DIAERESIS */
+ {1,0,1,1,0,0,1,0,0, 204,0}, /* 236 � LATIN SMALL LETTER I WITH GRAVE */
+ {1,0,1,1,0,0,1,0,0, 205,0}, /* 237 � LATIN SMALL LETTER I WITH ACUTE */
+ {1,0,1,1,0,0,1,0,0, 206,0}, /* 238 � LATIN SMALL LETTER I WITH CIRCUMFLEX */
+ {1,0,1,1,0,0,1,0,0, 207,0}, /* 239 � LATIN SMALL LETTER I WITH DIAERESIS */
+ {1,0,1,1,0,0,1,0,0, 208,0}, /* 240 � LATIN SMALL LETTER ETH */
+ {1,0,1,1,0,0,1,0,0, 209,0}, /* 241 � LATIN SMALL LETTER N WITH TILDE */
+ {1,0,1,1,0,0,1,0,0, 210,0}, /* 242 � LATIN SMALL LETTER O WITH GRAVE */
+ {1,0,1,1,0,0,1,0,0, 211,0}, /* 243 � LATIN SMALL LETTER O WITH ACUTE */
+ {1,0,1,1,0,0,1,0,0, 212,0}, /* 244 � LATIN SMALL LETTER O WITH CIRCUMFLEX */
+ {1,0,1,1,0,0,1,0,0, 213,0}, /* 245 � LATIN SMALL LETTER O WITH TILDE */
+ {1,0,1,1,0,0,1,0,0, 214,0}, /* 246 � LATIN SMALL LETTER O WITH DIAERESIS */
+ {0,0,0,0,0,0,1,1,0, 0,0}, /* 247 � DIVISION SIGN */
+ {1,0,1,1,0,0,1,0,0, 216,0}, /* 248 � LATIN SMALL LETTER O WITH STROKE */
+ {1,0,1,1,0,0,1,0,0, 217,0}, /* 249 � LATIN SMALL LETTER U WITH GRAVE */
+ {1,0,1,1,0,0,1,0,0, 218,0}, /* 250 � LATIN SMALL LETTER U WITH ACUTE */
+ {1,0,1,1,0,0,1,0,0, 219,0}, /* 251 � LATIN SMALL LETTER U WITH CIRCUMFLEX */
+ {1,0,1,1,0,0,1,0,0, 220,0}, /* 252 � LATIN SMALL LETTER U WITH DIAERESIS */
+ {1,0,1,1,0,0,1,0,0, 221,0}, /* 253 � LATIN SMALL LETTER Y WITH ACUTE */
+ {1,0,1,1,0,0,1,0,0, 222,0}, /* 254 � LATIN SMALL LETTER THORN */
+ {1,0,1,1,0,0,1,0,0, 255,0}}; /* 255 � LATIN SMALL LETTER Y WITH DIAERESIS */
+
+
+static int my_tolower(int x) {
+ if (x < 128)
+ return tolower(x);
+ else if (x < 160)
+ return x;
+ else if (hicharprop[x - 160].lowcase == 0)
+ return x;
+ else
+ return hicharprop[x - 160].lowcase;
+}
+
+static int my_toupper(int x) {
+ if (x < 128)
+ return toupper(x);
+ else if (x < 160)
+ return x;
+ else if (hicharprop[x - 160].upcase == 0)
+ return x;
+ else
+ return hicharprop[x - 160].upcase;
+}
+
+static int my_islower(int x) {
+ if (x < 128)
+ return islower(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_lower;
+}
+
+static int my_isupper(int x) {
+ if (x < 128)
+ return isupper(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_upper;
+}
+
+static int my_isdigit(int x) {
+ if (x < 128)
+ return isdigit(x);
+ else
+ return 0;
+}
+
+static int my_isalpha(int x) {
+ if (x < 128)
+ return isalpha(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_alpha;
+}
+
+static int my_isalnum(int x) {
+ if (x < 128)
+ return isalnum(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_alnum;
+}
+
+static int my_isspace(int x) {
+ if (x < 128)
+ return isspace(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_space;
+}
+
+static int my_isxdigit(int x) {
+ if (x < 128)
+ return isxdigit(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_xdigit;
+}
+static int my_isgraph(int x) {
+ if (x < 128)
+ return isgraph(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_graph;
+}
+static int my_isprint(int x) {
+ if (x < 128)
+ return isprint(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_graph | hicharprop[x - 160].is_space ;
+}
+
+static int my_ispunct(int x) {
+ if (x < 128)
+ return ispunct(x);
+ else if (x < 160)
+ return 0;
+ else
+ return hicharprop[x - 160].is_punct;
+}
+
+
+static int my_iscntrl(int x) {
+ if (x < 128)
+ return iscntrl(x);
+ else if (x < 160)
+ return 1;
+ else
+ return hicharprop[x - 160].is_cntrl;
+}
+const unsigned char *
+pcre_make_latin1_tables(void)
+{
+unsigned char *yield, *p;
+int i;
+
+yield = (unsigned char*)malloc(tables_length);
+
+if (yield == NULL) return NULL;
+p = yield;
+
+/* First comes the lower casing table */
+
+for (i = 0; i < 256; i++) *p++ = my_tolower(i);
+
+/* Next the case-flipping table */
+
+for (i = 0; i < 256; i++) *p++ = my_islower(i)? my_toupper(i) : my_tolower(i);
+
+/* Then the character class tables. Don't try to be clever and save effort on
+exclusive ones - in some locales things may be different. Note that the table
+for "space" includes everything "isspace" gives, including VT in the default
+locale. This makes it work for the POSIX class [:space:]. Note also that it is
+possible for a character to be alnum or alpha without being lower or upper,
+such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
+least under Debian Linux's locales as of 12/2005). So we must test for alnum
+specially. */
+
+memset(p, 0, cbit_length);
+for (i = 0; i < 256; i++)
+ {
+ if (my_isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
+ if (my_isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
+ if (my_islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
+ if (my_isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
+ if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
+ if (my_isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
+ if (my_isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
+ if (my_isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
+ if (my_isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
+ if (my_ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
+ if (my_iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
+ }
+p += cbit_length;
+
+/* Finally, the character type table. In this, we exclude VT from the white
+space chars, because Perl doesn't recognize it as such for \s and for comments
+within regexes. */
+
+for (i = 0; i < 256; i++)
+ {
+ int x = 0;
+ if (i != 0x0b && my_isspace(i)) x += ctype_space;
+ if (my_isalpha(i)) x += ctype_letter;
+ if (my_isdigit(i)) x += ctype_digit;
+ if (my_isxdigit(i)) x += ctype_xdigit;
+ if (my_isalnum(i) || i == '_') x += ctype_word;
+
+ /* Note: strchr includes the terminating zero in the characters it considers.
+ In this instance, that is ok because we want binary zero to be flagged as a
+ meta-character, which in this sense is any character that terminates a run
+ of data characters. */
+
+ if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
+ *p++ = x;
+ }
+
+return yield;
+}
+
+/* End of pcre_maketables.c */