aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_bif_re.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/erl_bif_re.c')
-rw-r--r--erts/emulator/beam/erl_bif_re.c504
1 files changed, 436 insertions, 68 deletions
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 3d34c2a77f..ff7746ce1d 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2008-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2008-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -99,7 +100,7 @@ Sint erts_re_set_loop_limit(Sint limit)
static int term_to_int(Eterm term, int *sp)
{
-#if defined(ARCH_64) && !HALFWORD_HEAP
+#if defined(ARCH_64)
if (is_small(term)) {
Uint x = signed_val(term);
@@ -150,7 +151,7 @@ static int term_to_int(Eterm term, int *sp)
static Eterm make_signed_integer(int x, Process *p)
{
-#if defined(ARCH_64) && !HALFWORD_HEAP
+#if defined(ARCH_64)
return make_small(x);
#else
Eterm* hp;
@@ -180,6 +181,9 @@ static Eterm make_signed_integer(int x, Process *p)
#define PARSE_FLAG_STARTOFFSET 8
#define PARSE_FLAG_CAPTURE_OPT 16
#define PARSE_FLAG_GLOBAL 32
+#define PARSE_FLAG_REPORT_ERRORS 64
+#define PARSE_FLAG_MATCH_LIMIT 128
+#define PARSE_FLAG_MATCH_LIMIT_RECURSION 256
#define CAPSPEC_VALUES 0
#define CAPSPEC_TYPE 1
@@ -192,7 +196,9 @@ parse_options(Eterm listp, /* in */
int *exec_options, /* out */
int *flags,/* out */
int *startoffset, /* out */
- Eterm *capture_spec) /* capture_spec[CAPSPEC_SIZE] */ /* out */
+ Eterm *capture_spec, /* capture_spec[CAPSPEC_SIZE] */ /* out */
+ int *match_limit, /* out */
+ int *match_limit_recursion) /* out */
{
int copt,eopt,fl;
Eterm item;
@@ -234,7 +240,7 @@ parse_options(Eterm listp, /* in */
case am_offset:
{
int tmp;
- if (!term_to_int(tp[2],&tmp)) {
+ if (!term_to_int(tp[2],&tmp) || tmp < 0) {
return -1;
}
if (startoffset != NULL) {
@@ -243,6 +249,31 @@ parse_options(Eterm listp, /* in */
}
fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|PARSE_FLAG_STARTOFFSET);
break;
+ case am_match_limit:
+ {
+ int tmp;
+ if (!term_to_int(tp[2],&tmp) || tmp < 0) {
+ return -1;
+ }
+ if (match_limit != NULL) {
+ *match_limit = tmp;
+ }
+ }
+ fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|PARSE_FLAG_MATCH_LIMIT);
+ break;
+ case am_match_limit_recursion:
+ {
+ int tmp;
+ if (!term_to_int(tp[2],&tmp) || tmp < 0) {
+ return -1;
+ }
+ if (match_limit_recursion != NULL) {
+ *match_limit_recursion = tmp;
+ }
+ }
+ fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|
+ PARSE_FLAG_MATCH_LIMIT_RECURSION);
+ break;
case am_newline:
if (!is_atom(tp[2])) {
return -1;
@@ -276,7 +307,7 @@ parse_options(Eterm listp, /* in */
default:
return -1;
}
- }else if (is_not_atom(item)) {
+ } else if (is_not_atom(item)) {
return -1;
} else {
switch(item) {
@@ -288,6 +319,10 @@ parse_options(Eterm listp, /* in */
eopt |= PCRE_NOTEMPTY;
fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
break;
+ case am_notempty_atstart:
+ eopt |= PCRE_NOTEMPTY_ATSTART;
+ fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
+ break;
case am_notbol:
eopt |= PCRE_NOTBOL;
fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
@@ -296,6 +331,10 @@ parse_options(Eterm listp, /* in */
eopt |= PCRE_NOTEOL;
fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
break;
+ case am_no_start_optimize:
+ copt |= PCRE_NO_START_OPTIMIZE;
+ fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+ break;
case am_caseless:
copt |= PCRE_CASELESS;
fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
@@ -332,6 +371,18 @@ parse_options(Eterm listp, /* in */
copt |= PCRE_UNGREEDY;
fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
break;
+ case am_ucp:
+ copt |= PCRE_UCP;
+ fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+ break;
+ case am_never_utf:
+ copt |= PCRE_NEVER_UTF;
+ fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+ break;
+ case am_report_errors:
+ fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT |
+ PARSE_FLAG_REPORT_ERRORS);
+ break;
case am_unicode:
copt |= PCRE_UTF8;
fl |= (PARSE_FLAG_UNIQUE_COMPILE_OPT | PARSE_FLAG_UNICODE);
@@ -359,7 +410,7 @@ parse_options(Eterm listp, /* in */
if (compile_options != NULL) {
*compile_options = copt;
}
- if (exec_options != NULL) {
+ if (exec_options != NULL) {
*exec_options = eopt;
}
if (flags != NULL) {
@@ -373,34 +424,49 @@ parse_options(Eterm listp, /* in */
*/
static Eterm
-build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, const char *errstr, int errofset, int unicode, int with_ok)
+build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, const char *errstr, int errofset, int unicode, int with_ok, Eterm extra_err_tag)
{
Eterm *hp;
Eterm ret;
size_t pattern_size;
int capture_count;
+ int use_crlf;
+ unsigned long options;
if (!result) {
/* Return {error_tag, {Code, String, Offset}} */
int elen = sys_strlen(errstr);
int need = 3 /* tuple of 2 */ +
3 /* tuple of 2 */ +
- (2 * elen) /* The error string list */;
+ (2 * elen) /* The error string list */ +
+ ((extra_err_tag != NIL) ? 3 : 0);
hp = HAlloc(p, need);
ret = buf_to_intlist(&hp, (char *) errstr, elen, NIL);
ret = TUPLE2(hp, ret, make_small(errofset));
hp += 3;
+ if (extra_err_tag != NIL) {
+ /* Return {error_tag, {extra_tag,
+ {Code, String, Offset}}} instead */
+ ret = TUPLE2(hp, extra_err_tag, ret);
+ hp += 3;
+ }
ret = TUPLE2(hp, error_tag, ret);
} else {
erts_pcre_fullinfo(result, NULL, PCRE_INFO_SIZE, &pattern_size);
erts_pcre_fullinfo(result, NULL, PCRE_INFO_CAPTURECOUNT, &capture_count);
+ erts_pcre_fullinfo(result, NULL, PCRE_INFO_OPTIONS, &options);
+ options &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF | PCRE_NEWLINE_CRLF |
+ PCRE_NEWLINE_ANY | PCRE_NEWLINE_ANYCRLF;
+ use_crlf = (options == PCRE_NEWLINE_ANY ||
+ options == PCRE_NEWLINE_CRLF ||
+ options == PCRE_NEWLINE_ANYCRLF);
/* XXX: Optimize - keep in offheap binary to allow this to
be kept across traps w/o need of copying */
ret = new_binary(p, (byte *) result, pattern_size);
erts_pcre_free(result);
- hp = HAlloc(p, (with_ok) ? (3+5) : 5);
- ret = TUPLE4(hp,am_re_pattern, make_small(capture_count), make_small(unicode),ret);
+ hp = HAlloc(p, (with_ok) ? (3+6) : 6);
+ ret = TUPLE5(hp,am_re_pattern, make_small(capture_count), make_small(unicode),make_small(use_crlf),ret);
if (with_ok) {
- hp += 5;
+ hp += 6;
ret = TUPLE2(hp,am_ok,ret);
}
}
@@ -424,9 +490,12 @@ re_compile(Process* p, Eterm arg1, Eterm arg2)
int options = 0;
int pflags = 0;
int unicode = 0;
+#ifdef DEBUG
+ int buffres;
+#endif
- if (parse_options(arg2,&options,NULL,&pflags,NULL,NULL)
+ if (parse_options(arg2,&options,NULL,&pflags,NULL,NULL,NULL,NULL)
< 0) {
BIF_ERROR(p,BADARG);
}
@@ -445,16 +514,19 @@ re_compile(Process* p, Eterm arg1, Eterm arg2)
BIF_ERROR(p,BADARG);
}
expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1);
- if (erts_iolist_to_buf(arg1, expr, slen) != 0) {
- erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
- BIF_ERROR(p,BADARG);
- }
+#ifdef DEBUG
+ buffres =
+#endif
+ erts_iolist_to_buf(arg1, expr, slen);
+
+ ASSERT(buffres >= 0);
+
expr[slen]='\0';
result = erts_pcre_compile2(expr, options, &errcode,
&errstr, &errofset, default_table);
ret = build_compile_result(p, am_error, result, errcode,
- errstr, errofset, unicode, 1);
+ errstr, errofset, unicode, 1, NIL);
erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
BIF_RET(ret);
}
@@ -492,7 +564,7 @@ typedef struct _return_info {
} ReturnInfo;
typedef struct _restart_context {
- pcre_extra extra;
+ erts_pcre_extra extra;
void *restart_data;
Uint32 flags;
char *subject; /* to be able to free it when done */
@@ -502,6 +574,7 @@ typedef struct _restart_context {
} RestartContext;
#define RESTART_FLAG_SUBJECT_IN_BINARY 0x1
+#define RESTART_FLAG_REPORT_MATCH_LIMIT 0x2
static void cleanup_restart_context(RestartContext *rc)
{
@@ -542,13 +615,35 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
Eterm res;
Eterm *hp;
if (rc <= 0) {
- res = am_nomatch;
+ if (restartp->flags & RESTART_FLAG_REPORT_MATCH_LIMIT) {
+ if (rc == PCRE_ERROR_MATCHLIMIT) {
+ hp = HAlloc(p,3);
+ res = TUPLE2(hp,am_error,am_match_limit);
+ } else if (rc == PCRE_ERROR_RECURSIONLIMIT) {
+ hp = HAlloc(p,3);
+ res = TUPLE2(hp,am_error,am_match_limit_recursion);
+ } else {
+ res = am_nomatch;
+ }
+ } else {
+ res = am_nomatch;
+ }
} else {
- ReturnInfo *ri = restartp->ret_info;
- ReturnInfo defri = {RetIndex,0,{0}};
- if (ri == NULL) {
+ ReturnInfo *ri;
+ ReturnInfo defri;
+
+ if (restartp->ret_info == NULL) {
+ /* OpenBSD 5.8 gcc compiler for some reason creates
+ bad code if the above initialization is done
+ inline with the struct. So don't do that. */
+ defri.type = RetIndex;
+ defri.num_spec = 0;
+ defri.v[0] = 0;
ri = &defri;
+ } else {
+ ri = restartp->ret_info;
}
+
if (ri->type == RetNone) {
res = am_match;
} else if (ri->type == RetIndex){
@@ -577,6 +672,17 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
ri->num_spec * 2 * sizeof(Eterm));
for (i = 0; i < ri->num_spec; ++i) {
x = ri->v[i];
+ if (x < -1) {
+ int n = i-x+1;
+ int j;
+ for (j = i+1; j < ri->num_spec && j < n; ++j) {
+ if (restartp->ovector[(ri->v[j])*2] >= 0) {
+ x = ri->v[j];
+ break;
+ }
+ }
+ i = n-1;
+ }
if (x < rc && x >= 0) {
tmp_vect[n*2] = make_signed_integer(restartp->ovector[x*2],p);
tmp_vect[n*2+1] = make_signed_integer(restartp->ovector[x*2+1]-restartp->ovector[x*2],p);
@@ -658,6 +764,17 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
ri->num_spec * sizeof(Eterm));
for (i = 0; i < ri->num_spec; ++i) {
x = ri->v[i];
+ if (x < -1) {
+ int n = i-x+1;
+ int j;
+ for (j = i+1; j < ri->num_spec && j < n; ++j) {
+ if (restartp->ovector[(ri->v[j])*2] >= 0) {
+ x = ri->v[j];
+ break;
+ }
+ }
+ i = n-1;
+ }
if (x < rc && x >= 0) {
char *cp;
int len;
@@ -722,6 +839,49 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
*/
#define RINFO_SIZ(Num) (sizeof(ReturnInfo) + (sizeof(int) * (Num - 1)))
+#define PICK_INDEX(NameEntry) \
+ ((int) ((((unsigned) ((unsigned char *) (NameEntry))[0]) << 8) + \
+ ((unsigned) ((unsigned char *) (NameEntry))[1])))
+
+
+static void build_one_capture(const pcre *code, ReturnInfo **ri, int *sallocated, int has_dupnames, char *name)
+{
+ ReturnInfo *r = (*ri);
+ if (has_dupnames) {
+ /* Build a sequence of positions, starting with -size if
+ more than one, otherwise just put the index there... */
+ char *first,*last;
+ int esize = erts_pcre_get_stringtable_entries(code,name,&first,&last);
+ if (esize == PCRE_ERROR_NOSUBSTRING) {
+ r->v[r->num_spec - 1] = -1;
+ } else if(last == first) {
+ r->v[r->num_spec - 1] = PICK_INDEX(first);
+ } else {
+ int num = ((last - first) / esize) + 1;
+ int i;
+ ASSERT(num > 1);
+ r->v[r->num_spec - 1] = -num; /* A value less than -1 means
+ multiple indexes for same name */
+ for (i = 0; i < num; ++i) {
+ ++(r->num_spec);
+ if(r->num_spec > (*sallocated)) {
+ (*sallocated) += 10;
+ r = erts_realloc(ERTS_ALC_T_RE_SUBJECT, r,
+ RINFO_SIZ((*sallocated)));
+ }
+ r->v[r->num_spec - 1] = PICK_INDEX(first);
+ first += esize;
+ }
+ }
+ } else {
+ /* Use the faster binary search if no duplicate names are present */
+ if ((r->v[r->num_spec - 1] = erts_pcre_get_stringnumber(code,name)) ==
+ PCRE_ERROR_NOSUBSTRING) {
+ r->v[r->num_spec - 1] = -1;
+ }
+ }
+ *ri = r;
+}
static ReturnInfo *
build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
@@ -770,13 +930,58 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
}
ri->v[ri->num_spec - 1] = 0;
break;
+ case am_all_names:
+ {
+ int rc,i,top;
+ int entrysize;
+ unsigned char *nametable, *last = NULL;
+ int has_dupnames;
+ unsigned long options;
+
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+ goto error;
+ if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+ goto error;
+ if (top <= 0) {
+ ri->num_spec = 0;
+ ri->type = RetNone;
+ break;
+ }
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0)
+ goto error;
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable) != 0)
+ goto error;
+
+ has_dupnames = ((options & PCRE_DUPNAMES) != 0);
+
+ for(i=0;i<top;++i) {
+ if (last == NULL || !has_dupnames || strcmp((char *) last+2,(char *) nametable+2)) {
+ ASSERT(ri->num_spec >= 0);
+ ++(ri->num_spec);
+ if(ri->num_spec > sallocated) {
+ sallocated += 10;
+ ri = erts_realloc(ERTS_ALC_T_RE_SUBJECT, ri, RINFO_SIZ(sallocated));
+ }
+ if (has_dupnames) {
+ /* This could be more effective, we actually have
+ the names and could fill in the vector
+ immediately. Now we lookup the name again. */
+ build_one_capture(code,&ri,&sallocated,has_dupnames,(char *) nametable+2);
+ } else {
+ ri->v[ri->num_spec - 1] = PICK_INDEX(nametable);
+ }
+ }
+ last = nametable;
+ nametable += entrysize;
+ }
+ break;
+ }
default:
if (is_list(capture_spec[CAPSPEC_VALUES])) {
for(l=capture_spec[CAPSPEC_VALUES];is_list(l);l = CDR(list_val(l))) {
int x;
Eterm val = CAR(list_val(l));
- if (ri->num_spec < 0)
- ri->num_spec = 0;
+ ASSERT(ri->num_spec >= 0);
++(ri->num_spec);
if(ri->num_spec > sallocated) {
sallocated += 10;
@@ -785,6 +990,11 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
if (term_to_int(val,&x)) {
ri->v[ri->num_spec - 1] = x;
} else if (is_atom(val) || is_binary(val) || is_list(val)) {
+ int has_dupnames;
+ unsigned long options;
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+ goto error;
+ has_dupnames = ((options & PCRE_DUPNAMES) != 0);
if (is_atom(val)) {
Atom *ap = atom_tab(atom_val(val));
if ((ap->len + 1) > tmpbsiz) {
@@ -799,6 +1009,10 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
tmpb[ap->len] = '\0';
} else {
ErlDrvSizeT slen;
+#ifdef DEBUG
+ int buffres;
+#endif
+
if (erts_iolist_size(val, &slen)) {
goto error;
}
@@ -810,15 +1024,15 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
(tmpbsiz = slen + 1));
}
}
- if (erts_iolist_to_buf(val, tmpb, slen) != 0) {
- goto error;
- }
+
+#ifdef DEBUG
+ buffres =
+#endif
+ erts_iolist_to_buf(val, tmpb, slen);
+ ASSERT(buffres >= 0);
tmpb[slen] = '\0';
}
- if ((ri->v[ri->num_spec - 1] = erts_pcre_get_stringnumber(code,tmpb)) ==
- PCRE_ERROR_NOSUBSTRING) {
- ri->v[ri->num_spec - 1] = -1;
- }
+ build_one_capture(code,&ri,&sallocated,has_dupnames,tmpb);
} else {
goto error;
}
@@ -867,15 +1081,18 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
unsigned long loop_count;
Eterm capture[CAPSPEC_SIZE] = CAPSPEC_INIT;
int is_list_cap;
+ int match_limit = 0;
+ int match_limit_recursion = 0;
- if (parse_options(arg3,&comp_options,&options,&pflags,&startoffset,capture)
+ if (parse_options(arg3,&comp_options,&options,&pflags,&startoffset,capture,
+ &match_limit,&match_limit_recursion)
< 0) {
BIF_ERROR(p,BADARG);
}
is_list_cap = ((pflags & PARSE_FLAG_CAPTURE_OPT) &&
(capture[CAPSPEC_TYPE] == am_list));
- if (is_not_tuple(arg2) || (arityval(*tuple_val(arg2)) != 4)) {
+ if (is_not_tuple(arg2) || (arityval(*tuple_val(arg2)) != 5)) {
if (is_binary(arg2) || is_list(arg2) || is_nil(arg2)) {
/* Compile from textual RE */
ErlDrvSizeT slen;
@@ -885,6 +1102,9 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
const char *errstr = "";
int errofset = 0;
int capture_count;
+#ifdef DEBUG
+ int buffres;
+#endif
if (pflags & PARSE_FLAG_UNICODE &&
(!is_binary(arg2) || !is_binary(arg1) ||
@@ -897,18 +1117,32 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
}
expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1);
- if (erts_iolist_to_buf(arg2, expr, slen) != 0) {
- erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
- BIF_ERROR(p,BADARG);
- }
+
+#ifdef DEBUG
+ buffres =
+#endif
+ erts_iolist_to_buf(arg2, expr, slen);
+
+ ASSERT(buffres >= 0);
+
expr[slen]='\0';
result = erts_pcre_compile2(expr, comp_options, &errcode,
&errstr, &errofset, default_table);
if (!result) {
- erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
/* Compilation error gives badarg except in the compile
- function */
- BIF_ERROR(p,BADARG);
+ function or if we have PARSE_FLAG_REPORT_ERRORS */
+ if (pflags & PARSE_FLAG_REPORT_ERRORS) {
+ res = build_compile_result(p, am_error, result, errcode,
+ errstr, errofset,
+ (pflags &
+ PARSE_FLAG_UNICODE) ? 1 : 0,
+ 1, am_compile);
+ erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
+ BIF_RET(res);
+ } else {
+ erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
+ BIF_ERROR(p,BADARG);
+ }
}
if (pflags & PARSE_FLAG_GLOBAL) {
Eterm precompiled =
@@ -917,7 +1151,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
errstr, errofset,
(pflags &
PARSE_FLAG_UNICODE) ? 1 : 0,
- 0);
+ 0, NIL);
Eterm *hp,r;
erts_free(ERTS_ALC_T_RE_TMP_BUF, expr);
hp = HAlloc(p,4);
@@ -947,7 +1181,8 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
tp = tuple_val(arg2);
if (tp[1] != am_re_pattern || is_not_small(tp[2]) ||
- is_not_small(tp[3]) || is_not_binary(tp[4])) {
+ is_not_small(tp[3]) || is_not_small(tp[4]) ||
+ is_not_binary(tp[5])) {
BIF_ERROR(p,BADARG);
}
@@ -967,9 +1202,9 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
}
ovsize = 3*(unsigned_val(tp[2])+1);
- code_size = binary_size(tp[4]);
- if ((code_tmp = (const pcre *)
- erts_get_aligned_binary_bytes(tp[4], &temp_alloc)) == NULL) {
+ code_size = binary_size(tp[5]);
+ code_tmp = (const pcre *) erts_get_aligned_binary_bytes(tp[5], &temp_alloc);
+ if (code_tmp == NULL || code_size < 4) {
erts_free_aligned_binary_bytes(temp_alloc);
BIF_ERROR(p, BADARG);
}
@@ -994,6 +1229,16 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
restart.extra.restart_flags = 0;
restart.extra.loop_counter_return = &loop_count;
restart.ret_info = NULL;
+
+ if (pflags & PARSE_FLAG_MATCH_LIMIT) {
+ restart.extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
+ restart.extra.match_limit = match_limit;
+ }
+
+ if (pflags & PARSE_FLAG_MATCH_LIMIT_RECURSION) {
+ restart.extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+ restart.extra.match_limit_recursion = match_limit_recursion;
+ }
if (pflags & PARSE_FLAG_CAPTURE_OPT) {
if ((restart.ret_info = build_capture(capture,restart.code)) == NULL) {
@@ -1002,7 +1247,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
BIF_ERROR(p,BADARG);
}
}
-
+
/* Optimized - if already in binary off heap, keep that and avoid
copying, also binary returns can be sub binaries in that case */
@@ -1029,6 +1274,9 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
restart.subject = (char *) (pb->bytes+offset);
restart.flags |= RESTART_FLAG_SUBJECT_IN_BINARY;
} else {
+#ifdef DEBUG
+ int buffres;
+#endif
handle_iolist:
if (erts_iolist_size(arg1, &slength)) {
erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector);
@@ -1040,24 +1288,30 @@ handle_iolist:
}
restart.subject = erts_alloc(ERTS_ALC_T_RE_SUBJECT, slength);
- if (erts_iolist_to_buf(arg1, restart.subject, slength) != 0) {
- erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector);
- erts_free(ERTS_ALC_T_RE_SUBJECT, restart.code);
- erts_free(ERTS_ALC_T_RE_SUBJECT, restart.subject);
- if (restart.ret_info != NULL) {
- erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ret_info);
- }
- BIF_ERROR(p,BADARG);
- }
+#ifdef DEBUG
+ buffres =
+#endif
+ erts_iolist_to_buf(arg1, restart.subject, slength);
+ ASSERT(buffres >= 0);
}
+ if (pflags & PARSE_FLAG_REPORT_ERRORS) {
+ restart.flags |= RESTART_FLAG_REPORT_MATCH_LIMIT;
+ }
#ifdef DEBUG
loop_count = 0xFFFFFFFF;
#endif
+
+ rc = erts_pcre_exec(restart.code, &(restart.extra), restart.subject,
+ slength, startoffset,
+ options, restart.ovector, ovsize);
+
+ if (rc == PCRE_ERROR_BADENDIANNESS || rc == PCRE_ERROR_BADMAGIC) {
+ cleanup_restart_context(&restart);
+ BIF_ERROR(p,BADARG);
+ }
- rc = erts_pcre_exec(restart.code, &(restart.extra), restart.subject, slength, startoffset,
- options, restart.ovector, ovsize);
ASSERT(loop_count != 0xFFFFFFFF);
BUMP_REDS(p, loop_count / LOOP_FACTOR);
if (rc == PCRE_ERROR_LOOP_LIMIT) {
@@ -1077,7 +1331,7 @@ handle_iolist:
arg2 /* To avoid GC of precompiled code, XXX: not utilized yet */,
magic_bin);
}
-
+
res = build_exec_return(p, rc, &restart, arg1);
cleanup_restart_context(&restart);
@@ -1149,6 +1403,120 @@ static BIF_RETTYPE re_exec_trap(BIF_ALIST_3)
BIF_RET(res);
}
+BIF_RETTYPE
+re_inspect_2(BIF_ALIST_2)
+{
+ Eterm *tp,*tmp_vec,*hp;
+ int i,top,j;
+ int entrysize;
+ unsigned char *nametable, *last,*name;
+ int has_dupnames;
+ unsigned long options;
+ int num_names;
+ Eterm res;
+ const pcre *code;
+ byte *temp_alloc = NULL;
+#ifdef DEBUG
+ int infores;
+#endif
+
+
+ if (is_not_tuple(BIF_ARG_1) || (arityval(*tuple_val(BIF_ARG_1)) != 5)) {
+ goto error;
+ }
+ tp = tuple_val(BIF_ARG_1);
+ if (tp[1] != am_re_pattern || is_not_small(tp[2]) ||
+ is_not_small(tp[3]) || is_not_small(tp[4]) ||
+ is_not_binary(tp[5])) {
+ goto error;
+ }
+ if (BIF_ARG_2 != am_namelist) {
+ goto error;
+ }
+ if ((code = (const pcre *)
+ erts_get_aligned_binary_bytes(tp[5], &temp_alloc)) == NULL) {
+ goto error;
+ }
+
+ /* OK, so let's try to get some info */
+
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+ goto error;
+
+#ifdef DEBUG
+ infores =
+#endif
+ erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top);
+
+ ASSERT(infores == 0);
+
+ if (top <= 0) {
+ hp = HAlloc(BIF_P, 3);
+ res = TUPLE2(hp,am_namelist,NIL);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_RET(res);
+ }
+#ifdef DEBUG
+ infores =
+#endif
+ erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize);
+
+ ASSERT(infores == 0);
+
+#ifdef DEBUG
+ infores =
+#endif
+ erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable);
+
+ ASSERT(infores == 0);
+
+ has_dupnames = ((options & PCRE_DUPNAMES) != 0);
+ /* First, count the names */
+ num_names = 0;
+ last = NULL;
+ name = nametable;
+ for(i=0;i<top;++i) {
+ if (last == NULL || !has_dupnames || strcmp((char *) last+2,
+ (char *) name+2)) {
+ ++num_names;
+ }
+ last = name;
+ name += entrysize;
+ }
+ tmp_vec = erts_alloc(ERTS_ALC_T_RE_TMP_BUF,
+ num_names * sizeof(Eterm));
+ /* Re-iterate and fill tmp_vec */
+ last = NULL;
+ name = nametable;
+ j = 0;
+ for(i=0;i<top;++i) {
+ if (last == NULL || !has_dupnames || strcmp((char *) last+2,
+ (char *) name+2)) {
+ tmp_vec[j++] = new_binary(BIF_P, (byte *) name+2, strlen((char *) name+2));
+ }
+ last = name;
+ name += entrysize;
+ }
+ ASSERT(j == num_names);
+ hp = HAlloc(BIF_P, 3+2*j);
+ res = NIL;
+ for(i = j-1 ;i >= 0; --i) {
+ res = CONS(hp,tmp_vec[i],res);
+ hp += 2;
+ }
+ res = TUPLE2(hp,am_namelist,res);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ erts_free(ERTS_ALC_T_RE_TMP_BUF, tmp_vec);
+ BIF_RET(res);
+
+ error:
+ /* tmp_vec never allocated when we reach here */
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_ERROR(BIF_P,BADARG);
+}
+
+
+