diff options
Diffstat (limited to 'erts/emulator/beam/erl_bif_re.c')
-rw-r--r-- | erts/emulator/beam/erl_bif_re.c | 1142 |
1 files changed, 1142 insertions, 0 deletions
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c new file mode 100644 index 0000000000..16abab65b0 --- /dev/null +++ b/erts/emulator/beam/erl_bif_re.c @@ -0,0 +1,1142 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2008-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "error.h" +#include "bif.h" +#include "erl_binary.h" +#include "big.h" +#define ERLANG_INTEGRATION 1 +#define PCRE_STATIC +#include "pcre.h" + +#define PCRE_DEFAULT_COMPILE_OPTS 0 +#define PCRE_DEFAULT_EXEC_OPTS 0 +#define LOOP_FACTOR 10 + + +static const unsigned char *default_table; +static Uint max_loop_limit; +static Export re_exec_trap_export; +static Export *grun_trap_exportp = NULL; +static Export *urun_trap_exportp = NULL; +static Export *ucompile_trap_exportp = NULL; + +static BIF_RETTYPE re_exec_trap(BIF_ALIST_3); + +static void *erts_erts_pcre_malloc(size_t size) { + return erts_alloc(ERTS_ALC_T_RE_HEAP,size); +} + +static void erts_erts_pcre_free(void *ptr) { + erts_free(ERTS_ALC_T_RE_HEAP,ptr); +} + +static void *erts_erts_pcre_stack_malloc(size_t size) { + return erts_alloc(ERTS_ALC_T_RE_STACK,size); +} + +static void erts_erts_pcre_stack_free(void *ptr) { + erts_free(ERTS_ALC_T_RE_STACK,ptr); +} + +void erts_init_bif_re(void) +{ + erts_pcre_malloc = &erts_erts_pcre_malloc; + erts_pcre_free = &erts_erts_pcre_free; + erts_pcre_stack_malloc = &erts_erts_pcre_stack_malloc; + erts_pcre_stack_free = &erts_erts_pcre_stack_free; + default_table = NULL; /* ISO8859-1 default, forced into pcre */ + max_loop_limit = CONTEXT_REDS * LOOP_FACTOR; + + sys_memset((void *) &re_exec_trap_export, 0, sizeof(Export)); + re_exec_trap_export.address = &re_exec_trap_export.code[3]; + re_exec_trap_export.code[0] = am_erlang; + re_exec_trap_export.code[1] = am_re_run_trap; + re_exec_trap_export.code[2] = 3; + re_exec_trap_export.code[3] = (Eterm) em_apply_bif; + re_exec_trap_export.code[4] = (Eterm) &re_exec_trap; + + grun_trap_exportp = erts_export_put(am_re,am_grun,3); + urun_trap_exportp = erts_export_put(am_re,am_urun,3); + ucompile_trap_exportp = erts_export_put(am_re,am_ucompile,2); + + return; +} + +Sint erts_re_set_loop_limit(Sint limit) +{ + Sint save = (Sint) max_loop_limit; + if (limit <= 0) { + max_loop_limit = CONTEXT_REDS * LOOP_FACTOR; + } else { + max_loop_limit = (Uint) limit; + } + return save; +} + +/* + * Deal with plain int's and so on for the library interface + */ + +static int term_to_int(Eterm term, int *sp) +{ +#ifdef ARCH_64 + + if (is_small(term)) { + Uint x = signed_val(term); + if (x > INT_MAX) { + return 0; + } + *sp = (int) x; + return 1; + } + return 0; + +#else + + if (is_small(term)) { + *sp = signed_val(term); + return 1; + } else if (is_big(term)) { + ErtsDigit* xr = big_v(term); + dsize_t xl = big_size(term); + int sign = big_sign(term); + unsigned uval = 0; + int n = 0; + + if (xl*D_EXP > sizeof(unsigned)*8) { + return 0; + } + while (xl-- > 0) { + uval |= ((unsigned)(*xr++)) << n; + n += D_EXP; + } + if (sign) { + uval = -uval; + if ((int)uval > 0) + return 0; + } else { + if ((int)uval < 0) + return 0; + } + *sp = uval; + return 1; + } else { + return 0; + } + +#endif + +} + +static Eterm make_signed_integer(int x, Process *p) +{ +#ifdef ARCH_64 + return make_small(x); +#else + Eterm* hp; + if (IS_SSMALL(x)) + return make_small(x); + else { + hp = HAlloc(p, BIG_UINT_HEAP_SIZE); + if (x >= 0) { + *hp = make_pos_bignum_header(1); + } else { + x = -x; + *hp = make_neg_bignum_header(1); + } + BIG_DIGIT(hp, 0) = x; + return make_big(hp); + } +#endif +} + +/* + * Parse option lists + */ + +#define PARSE_FLAG_UNIQUE_COMPILE_OPT 1 +#define PARSE_FLAG_UNIQUE_EXEC_OPT 2 +#define PARSE_FLAG_UNICODE 4 +#define PARSE_FLAG_STARTOFFSET 8 +#define PARSE_FLAG_CAPTURE_OPT 16 +#define PARSE_FLAG_GLOBAL 32 + +#define CAPSPEC_VALUES 0 +#define CAPSPEC_TYPE 1 +#define CAPSPEC_SIZE 2 + +static int /* 0 == ok, < 0 == error */ +parse_options(Eterm listp, /* in */ + int *compile_options, /* out */ + int *exec_options, /* out */ + int *flags,/* out */ + int *startoffset, /* out */ + Eterm *capture_spec) /* capture_spec[CAPSPEC_SIZE] */ /* out */ +{ + int copt,eopt,fl; + Eterm item; + + if (listp == NIL) { + copt = PCRE_DEFAULT_COMPILE_OPTS; + eopt = PCRE_DEFAULT_EXEC_OPTS; + fl = 0; + } else { + copt = 0; + eopt = 0; + fl = 0; + for (;is_list(listp); listp = CDR(list_val(listp))) { + item = CAR(list_val(listp)); + if (is_tuple(item)) { + Eterm *tp = tuple_val(item); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + if (arityval(*tp) == 3 && tp[1] == am_capture) { + if (capture_spec != NULL) { + capture_spec[CAPSPEC_VALUES] = tp[2]; + capture_spec[CAPSPEC_TYPE] = tp[3]; + } + fl |= (PARSE_FLAG_CAPTURE_OPT | + PARSE_FLAG_UNIQUE_EXEC_OPT); + continue; + } else { + return -1; + } + } + switch(tp[1]) { + case am_capture: + if (capture_spec != NULL) { + capture_spec[CAPSPEC_VALUES] = tp[2]; + capture_spec[CAPSPEC_TYPE] = am_index; + } + fl |= (PARSE_FLAG_CAPTURE_OPT | + PARSE_FLAG_UNIQUE_EXEC_OPT); + break; + case am_offset: + { + int tmp; + if (!term_to_int(tp[2],&tmp)) { + return -1; + } + if (startoffset != NULL) { + *startoffset = tmp; + } + } + fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT|PARSE_FLAG_STARTOFFSET); + break; + case am_newline: + if (!is_atom(tp[2])) { + return -1; + } + switch (tp[2]) { + case am_cr: + copt |= PCRE_NEWLINE_CR; + eopt |= PCRE_NEWLINE_CR; + break; + case am_crlf: + copt |= PCRE_NEWLINE_CRLF; + eopt |= PCRE_NEWLINE_CRLF; + break; + case am_lf: + copt |= PCRE_NEWLINE_LF; + eopt |= PCRE_NEWLINE_LF; + break; + case am_anycrlf: + copt |= PCRE_NEWLINE_ANYCRLF; + eopt |= PCRE_NEWLINE_ANYCRLF; + break; + case am_any: + eopt |= PCRE_NEWLINE_ANY; + copt |= PCRE_NEWLINE_ANY; + break; + default: + return -1; + break; + } + break; + default: + return -1; + } + }else if (is_not_atom(item)) { + return -1; + } else { + switch(item) { + case am_anchored: + copt |= PCRE_ANCHORED; + eopt |= PCRE_ANCHORED; + break; + case am_notempty: + eopt |= PCRE_NOTEMPTY; + fl |= PARSE_FLAG_UNIQUE_EXEC_OPT; + break; + case am_notbol: + eopt |= PCRE_NOTBOL; + fl |= PARSE_FLAG_UNIQUE_EXEC_OPT; + break; + case am_noteol: + eopt |= PCRE_NOTEOL; + fl |= PARSE_FLAG_UNIQUE_EXEC_OPT; + break; + case am_caseless: + copt |= PCRE_CASELESS; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_dollar_endonly: + copt |= PCRE_DOLLAR_ENDONLY; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_dotall: + copt |= PCRE_DOTALL; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_extended: + copt |= PCRE_EXTENDED; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_firstline: + copt |= PCRE_FIRSTLINE; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_multiline: + copt |= PCRE_MULTILINE; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_no_auto_capture: + copt |= PCRE_NO_AUTO_CAPTURE; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_dupnames: + copt |= PCRE_DUPNAMES; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_ungreedy: + copt |= PCRE_UNGREEDY; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_unicode: + copt |= PCRE_UTF8; + fl |= (PARSE_FLAG_UNIQUE_COMPILE_OPT | PARSE_FLAG_UNICODE); + break; + case am_global: + fl |= (PARSE_FLAG_UNIQUE_EXEC_OPT | PARSE_FLAG_GLOBAL); + break; + case am_bsr_anycrlf: + eopt |= PCRE_BSR_ANYCRLF; + copt |= PCRE_BSR_ANYCRLF; + break; + case am_bsr_unicode: + eopt |= PCRE_BSR_UNICODE; + copt |= PCRE_BSR_UNICODE; + break; + default: + return -1; + } + } + } + if (is_not_nil(listp)) { + return -1; + } + } + if (compile_options != NULL) { + *compile_options = copt; + } + if (exec_options != NULL) { + *exec_options = eopt; + } + if (flags != NULL) { + *flags = fl; + } + return 0; +} + +/* + * Build Erlang term result from compilation + */ + +static Eterm +build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, const char *errstr, int errofset, int unicode, int with_ok) +{ + Eterm *hp; + Eterm ret; + size_t pattern_size; + int capture_count; + if (!result) { + /* Return {error_tag, {Code, String, Offset}} */ + int elen = sys_strlen(errstr); + int need = 3 /* tuple of 2 */ + + 3 /* tuple of 2 */ + + (2 * elen) /* The error string list */; + hp = HAlloc(p, need); + ret = buf_to_intlist(&hp, (char *) errstr, elen, NIL); + ret = TUPLE2(hp, ret, make_small(errofset)); + hp += 3; + ret = TUPLE2(hp, error_tag, ret); + } else { + erts_pcre_fullinfo(result, NULL, PCRE_INFO_SIZE, &pattern_size); + erts_pcre_fullinfo(result, NULL, PCRE_INFO_CAPTURECOUNT, &capture_count); + /* XXX: Optimize - keep in offheap binary to allow this to + be kept across traps w/o need of copying */ + ret = new_binary(p, (byte *) result, pattern_size); + erts_pcre_free(result); + hp = HAlloc(p, (with_ok) ? (3+5) : 5); + ret = TUPLE4(hp,am_re_pattern, make_small(capture_count), make_small(unicode),ret); + if (with_ok) { + hp += 5; + ret = TUPLE2(hp,am_ok,ret); + } + } + return ret; +} + +/* + * Compile BIFs + */ + +BIF_RETTYPE +re_compile_2(BIF_ALIST_2) +{ + int slen; + char *expr; + pcre *result; + int errcode = 0; + const char *errstr = ""; + int errofset = 0; + Eterm ret; + int options = 0; + int pflags = 0; + int unicode = 0; + + + if (parse_options(BIF_ARG_2,&options,NULL,&pflags,NULL,NULL) + < 0) { + BIF_ERROR(BIF_P,BADARG); + } + + if (pflags & PARSE_FLAG_UNIQUE_EXEC_OPT) { + BIF_ERROR(BIF_P,BADARG); + } + + unicode = (pflags & PARSE_FLAG_UNICODE) ? 1 : 0; + + if (pflags & PARSE_FLAG_UNICODE && !is_binary(BIF_ARG_1)) { + BIF_TRAP2(ucompile_trap_exportp, BIF_P, BIF_ARG_1, BIF_ARG_2); + } + + if ((slen = io_list_len(BIF_ARG_1)) < 0) { + BIF_ERROR(BIF_P,BADARG); + } + expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1); + if (io_list_to_buf(BIF_ARG_1, expr, slen) != 0) { + erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); + BIF_ERROR(BIF_P,BADARG); + } + expr[slen]='\0'; + result = erts_pcre_compile2(expr, options, &errcode, + &errstr, &errofset, default_table); + + ret = build_compile_result(BIF_P, am_error, result, errcode, + errstr, errofset, unicode, 1); + erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); + BIF_RET(ret); +} + +BIF_RETTYPE +re_compile_1(BIF_ALIST_1) +{ + return re_compile_2(BIF_P,BIF_ARG_1,NIL); +} + +/* + * Restart contexts for the re:run bif + */ + +/* + * When erts_pcre_exec is restarted, only the actual extra-structure with + * it's restart-data need to be kept. The match is then called with + * watever is saved. The code is pointed out by this and cannot be + * reallocated or GC'ed, why it's passed along as a off-heap-binary, + * but not actually passed in the erts_pcre_exec restart calls. + */ + +typedef enum { RetIndex, RetString, RetBin, RetNone } ReturnType; + +typedef struct _return_info { + ReturnType type; + int num_spec; /* 0 == all, -1 == all_but first, > 0 specified in vector */ + int v[1]; +} ReturnInfo; + +typedef struct _restart_context { + pcre_extra extra; + void *restart_data; + Uint32 flags; + char *subject; /* to be able to free it when done */ + pcre *code; /* Keep a copy */ + int *ovector; /* Keep until done */ + ReturnInfo *ret_info; +} RestartContext; + +#define RESTART_FLAG_SUBJECT_IN_BINARY 0x1 + +static void cleanup_restart_context(RestartContext *rc) +{ + if (rc->restart_data != NULL) { + erts_pcre_free_restart_data(rc->restart_data); + rc->restart_data = NULL; + } + if (rc->ovector != NULL) { + erts_free(ERTS_ALC_T_RE_SUBJECT, rc->ovector); + rc->ovector = NULL; + } + if (rc->subject != NULL && !(rc->flags & RESTART_FLAG_SUBJECT_IN_BINARY)) { + erts_free(ERTS_ALC_T_RE_SUBJECT, rc->subject); + } + rc->subject = NULL; + if (rc->code != NULL) { + erts_free(ERTS_ALC_T_RE_SUBJECT, rc->code); + rc->code = NULL; + } + if (rc->ret_info != NULL) { + erts_free(ERTS_ALC_T_RE_SUBJECT, rc->ret_info); + rc->ret_info = NULL; + } +} + +static void cleanup_restart_context_bin(Binary *bp) +{ + RestartContext *rc = ERTS_MAGIC_BIN_DATA(bp); + cleanup_restart_context(rc); +} + +/* + * Build the return value for Erlang from result and restart context + */ + +static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Eterm orig_subject) +{ + Eterm res; + Eterm *hp; + if (rc <= 0) { + res = am_nomatch; + } else { + ReturnInfo *ri = restartp->ret_info; + ReturnInfo defri = {RetIndex,0,{0}}; + if (ri == NULL) { + ri = &defri; + } + if (ri->type == RetNone) { + res = am_match; + } else if (ri->type == RetIndex){ + Eterm *tmp_vect; + Eterm tpl; + int i; + if (ri->num_spec <= 0) { + tmp_vect = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, + rc * 2 * sizeof(Eterm)); + for(i = -(ri->num_spec) ;i < rc; ++i) { + tmp_vect[i*2] = make_signed_integer(restartp->ovector[i*2],p); + tmp_vect[i*2+1] = make_signed_integer(restartp->ovector[i*2+1] - restartp->ovector[i*2],p); + } + hp = HAlloc(p, 3+(3+2)*(rc + ri->num_spec)); + res = NIL; + for(i = rc-1 ;i >= -(ri->num_spec); --i) { + tpl = TUPLE2(hp,tmp_vect[i*2],tmp_vect[i*2+1]); + hp += 3; + res = CONS(hp,tpl,res); + hp += 2; + } + } else { + int n = 0; + int x; + tmp_vect = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, + ri->num_spec * 2 * sizeof(Eterm)); + for (i = 0; i < ri->num_spec; ++i) { + x = ri->v[i]; + if (x < rc && x >= 0) { + tmp_vect[n*2] = make_signed_integer(restartp->ovector[x*2],p); + tmp_vect[n*2+1] = make_signed_integer(restartp->ovector[x*2+1]-restartp->ovector[x*2],p); + } else { + tmp_vect[n*2] = make_small(-1); + tmp_vect[n*2+1] = make_small(0); + } + ++n; + } + hp = HAlloc(p, 3+(3+2)*n); + res = NIL; + for(i = n-1 ;i >= 0; --i) { + tpl = TUPLE2(hp,tmp_vect[i*2],tmp_vect[i*2+1]); + hp += 3; + res = CONS(hp,tpl,res); + hp += 2; + } + } + res = TUPLE2(hp,am_match,res); + erts_free(ERTS_ALC_T_RE_TMP_BUF, tmp_vect); + } else { + Eterm *tmp_vect; + int i; + Eterm orig = NIL; + Uint offset = 0; + Uint bitoffs = 0; + Uint bitsize = 0; + if (restartp->flags & RESTART_FLAG_SUBJECT_IN_BINARY) { + ERTS_GET_REAL_BIN(orig_subject, orig, offset, bitoffs, bitsize); + } + if (ri->num_spec <= 0) { + tmp_vect = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, + rc * sizeof(Eterm)); + for(i = -(ri->num_spec) ;i < rc; ++i) { /* XXX: Unicode */ + char *cp; + int len; + if (restartp->ovector[i*2] < 0) { + cp = restartp->subject; + len = 0; + } else { + cp = restartp->subject + restartp->ovector[i*2]; + len = restartp->ovector[i*2+1] - restartp->ovector[i*2]; + } + if (ri->type == RetBin) { + if (restartp->flags & RESTART_FLAG_SUBJECT_IN_BINARY) { + /* Optimized - if subject was binary to begin + with, we can make sub-binaries. */ + ErlSubBin *sb; + Uint virtual_offset = cp - restartp->subject; + hp = HAlloc(p, ERL_SUB_BIN_SIZE); + sb = (ErlSubBin *) hp; + sb->thing_word = HEADER_SUB_BIN; + sb->size = len; + sb->offs = offset + virtual_offset; + sb->orig = orig; + sb->bitoffs = bitoffs; + sb->bitsize = bitsize; + sb->is_writable = 0; + tmp_vect[i] = make_binary(sb); + } else { + tmp_vect[i] = new_binary(p, (byte *) cp, len); + } + } else { + Eterm *hp2; + hp2 = HAlloc(p,(2*len)); + tmp_vect[i] = buf_to_intlist(&hp2, cp, len, NIL); + } + } + hp = HAlloc(p, 3+2*(rc + ri->num_spec)); + res = NIL; + for(i = rc-1 ;i >= -(ri->num_spec); --i) { + res = CONS(hp,tmp_vect[i],res); + hp += 2; + } + } else { + int n = 0; + int x; + tmp_vect = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, + ri->num_spec * sizeof(Eterm)); + for (i = 0; i < ri->num_spec; ++i) { + x = ri->v[i]; + if (x < rc && x >= 0) { + char *cp; + int len; + if (restartp->ovector[x*2] < 0) { + cp = restartp->subject; + len = 0; + } else { + cp = restartp->subject + restartp->ovector[x*2]; + len = restartp->ovector[x*2+1] - restartp->ovector[x*2]; + } + if (ri->type == RetBin) { + if (restartp->flags & RESTART_FLAG_SUBJECT_IN_BINARY) { + /* Optimized - if subject was binary to begin + with, we could make sub-binaries. */ + ErlSubBin *sb; + Uint virtual_offset = cp - restartp->subject; + hp = HAlloc(p, ERL_SUB_BIN_SIZE); + sb = (ErlSubBin *) hp; + sb->thing_word = HEADER_SUB_BIN; + sb->size = len; + sb->offs = offset + virtual_offset; + sb->orig = orig; + sb->bitoffs = bitoffs; + sb->bitsize = bitsize; + sb->is_writable = 0; + tmp_vect[n] = make_binary(sb); + } else { + tmp_vect[n] = new_binary(p, (byte *) cp, len); + } + } else { + Eterm *hp2; + hp2 = HAlloc(p,(2*len)); + tmp_vect[n] = buf_to_intlist(&hp2, cp, len, NIL); + } + } else { + if (ri->type == RetBin) { + tmp_vect[n] = new_binary(p, (byte *) "", 0); + } else { + tmp_vect[n] = NIL; + } + } + ++n; + } + hp = HAlloc(p, 3+2*n); + res = NIL; + for(i = n-1 ;i >= 0; --i) { + res = CONS(hp,tmp_vect[i],res); + hp += 2; + } + + } + res = TUPLE2(hp,am_match,res); + erts_free(ERTS_ALC_T_RE_TMP_BUF, tmp_vect); + } + } + return res; +} + +/* + * Extra parsing function, build the ReturnInfo structure from + * a capture specification in the option list + */ + +#define RINFO_SIZ(Num) (sizeof(ReturnInfo) + (sizeof(int) * (Num - 1))) + +static ReturnInfo * +build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) +{ + ReturnInfo *ri = erts_alloc(ERTS_ALC_T_RE_SUBJECT, RINFO_SIZ(0)); + int sallocated = 0; + char *tmpb = NULL; + int tmpbsiz = 0; + Eterm l; + + ri->type = RetIndex; + ri->num_spec = 0; + + + switch(capture_spec[CAPSPEC_TYPE]) { + case am_index: + ri->type = RetIndex; + break; + case am_list: + ri->type = RetString; + break; + case am_binary: + ri->type = RetBin; + break; + default: + goto error; + } + + switch(capture_spec[CAPSPEC_VALUES]) { + case am_all: + ri->num_spec = 0; + break; + case am_none: + case NIL: + ri->num_spec = 0; + ri->type = RetNone; + break; + case am_all_but_first: + ri->num_spec = -1; + break; + case am_first: + ri->num_spec = 1; + if(ri->num_spec > sallocated) { + sallocated = ri->num_spec; + ri = erts_realloc(ERTS_ALC_T_RE_SUBJECT, ri, RINFO_SIZ(sallocated)); + } + ri->v[ri->num_spec - 1] = 0; + break; + default: + if (is_list(capture_spec[CAPSPEC_VALUES])) { + for(l=capture_spec[CAPSPEC_VALUES];is_list(l);l = CDR(list_val(l))) { + int x; + Eterm val = CAR(list_val(l)); + if (ri->num_spec < 0) + ri->num_spec = 0; + ++(ri->num_spec); + if(ri->num_spec > sallocated) { + sallocated += 10; + ri = erts_realloc(ERTS_ALC_T_RE_SUBJECT, ri, RINFO_SIZ(sallocated)); + } + if (term_to_int(val,&x)) { + ri->v[ri->num_spec - 1] = x; + } else if (is_atom(val) || is_binary(val) || is_list(val)) { + if (is_atom(val)) { + Atom *ap = atom_tab(atom_val(val)); + if ((ap->len + 1) > tmpbsiz) { + if (!tmpbsiz) { + tmpb = erts_alloc(ERTS_ALC_T_RE_TMP_BUF,(tmpbsiz = ap->len + 1)); + } else { + tmpb = erts_realloc(ERTS_ALC_T_RE_TMP_BUF,tmpb, + (tmpbsiz = ap->len + 1)); + } + } + memcpy(tmpb,ap->name,ap->len); + tmpb[ap->len] = '\0'; + } else { + int slen = io_list_len(val); + if (slen < 0) { + goto error; + } + if ((slen + 1) > tmpbsiz) { + if (!tmpbsiz) { + tmpb = erts_alloc(ERTS_ALC_T_RE_TMP_BUF,(tmpbsiz = slen + 1)); + } else { + tmpb = erts_realloc(ERTS_ALC_T_RE_TMP_BUF,tmpb, + (tmpbsiz = slen + 1)); + } + } + if (io_list_to_buf(val, tmpb, slen) != 0) { + goto error; + } + tmpb[slen] = '\0'; + } + if ((ri->v[ri->num_spec - 1] = erts_pcre_get_stringnumber(code,tmpb)) == + PCRE_ERROR_NOSUBSTRING) { + ri->v[ri->num_spec - 1] = -1; + } + } else { + goto error; + } + } + if (l != NIL) { + goto error; + } + } else { + goto error; + } + break; + } + + if(tmpb != NULL) { + erts_free(ERTS_ALC_T_RE_TMP_BUF,tmpb); + } + return ri; + error: + if(tmpb != NULL) { + erts_free(ERTS_ALC_T_RE_TMP_BUF,tmpb); + } + erts_free(ERTS_ALC_T_RE_SUBJECT, ri); + return NULL; +} + + +/* + * The actual re:run/2,3 BIFs + */ +BIF_RETTYPE +re_run_3(BIF_ALIST_3) +{ + const pcre *code_tmp; + RestartContext restart; + byte *temp_alloc = NULL; + int slength; + int startoffset = 0; + int options = 0, comp_options = 0; + int ovsize; + int pflags; + Eterm *tp; + int rc; + Eterm res; + size_t code_size; + Uint loop_limit_tmp; + unsigned long loop_count; + Eterm capture[CAPSPEC_SIZE]; + int is_list_cap; + + if (parse_options(BIF_ARG_3,&comp_options,&options,&pflags,&startoffset,capture) + < 0) { + BIF_ERROR(BIF_P,BADARG); + } + is_list_cap = ((pflags & PARSE_FLAG_CAPTURE_OPT) && + (capture[CAPSPEC_TYPE] == am_list)); + + if (is_not_tuple(BIF_ARG_2) || (arityval(*tuple_val(BIF_ARG_2)) != 4)) { + if (is_binary(BIF_ARG_2) || is_list(BIF_ARG_2) || is_nil(BIF_ARG_2)) { + /* Compile from textual RE */ + int slen; + char *expr; + pcre *result; + int errcode = 0; + const char *errstr = ""; + int errofset = 0; + int capture_count; + + if (pflags & PARSE_FLAG_UNICODE && + (!is_binary(BIF_ARG_1) || + (is_list_cap && !(pflags & PARSE_FLAG_GLOBAL)))) { + BIF_TRAP3(urun_trap_exportp, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); + } + + if ((slen = io_list_len(BIF_ARG_2)) < 0) { + BIF_ERROR(BIF_P,BADARG); + } + + expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1); + if (io_list_to_buf(BIF_ARG_2, expr, slen) != 0) { + erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); + BIF_ERROR(BIF_P,BADARG); + } + expr[slen]='\0'; + result = erts_pcre_compile2(expr, comp_options, &errcode, + &errstr, &errofset, default_table); + if (!result) { + erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); + /* Compilation error gives badarg except in the compile + function */ + BIF_ERROR(BIF_P,BADARG); + } + if (pflags & PARSE_FLAG_GLOBAL) { + Eterm precompiled = + build_compile_result(BIF_P, am_error, + result, errcode, + errstr, errofset, + (pflags & + PARSE_FLAG_UNICODE) ? 1 : 0, + 0); + Eterm *hp,r; + erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); + hp = HAlloc(BIF_P,4); + /* BIF_ARG_2 is in the tuple just to make exceptions right */ + r = TUPLE3(hp,BIF_ARG_3, + ((pflags & PARSE_FLAG_UNIQUE_COMPILE_OPT) ? + am_true : + am_false), BIF_ARG_2); + BIF_TRAP3(grun_trap_exportp, BIF_P, BIF_ARG_1, precompiled, r); + } + + erts_pcre_fullinfo(result, NULL, PCRE_INFO_SIZE, &code_size); + erts_pcre_fullinfo(result, NULL, PCRE_INFO_CAPTURECOUNT, &capture_count); + ovsize = 3*(capture_count+1); + restart.code = erts_alloc(ERTS_ALC_T_RE_SUBJECT, code_size); + memcpy(restart.code, result, code_size); + erts_pcre_free(result); + erts_free(ERTS_ALC_T_RE_TMP_BUF, expr); + /*unicode = (pflags & PARSE_FLAG_UNICODE) ? 1 : 0;*/ + } else { + BIF_ERROR(BIF_P,BADARG); + } + } else { + if (pflags & PARSE_FLAG_UNIQUE_COMPILE_OPT) { + BIF_ERROR(BIF_P,BADARG); + } + + tp = tuple_val(BIF_ARG_2); + if (tp[1] != am_re_pattern || is_not_small(tp[2]) || + is_not_small(tp[3]) || is_not_binary(tp[4])) { + BIF_ERROR(BIF_P,BADARG); + } + + if (unsigned_val(tp[3]) && + (!is_binary(BIF_ARG_1) || + (is_list_cap && !(pflags & PARSE_FLAG_GLOBAL)))) { /* unicode */ + BIF_TRAP3(urun_trap_exportp, BIF_P, BIF_ARG_1, BIF_ARG_2, + BIF_ARG_3); + } + + if (pflags & PARSE_FLAG_GLOBAL) { + Eterm *hp,r; + hp = HAlloc(BIF_P,3); + r = TUPLE2(hp,BIF_ARG_3,am_false); + BIF_TRAP3(grun_trap_exportp, BIF_P, BIF_ARG_1, BIF_ARG_2, + r); + } + + ovsize = 3*(unsigned_val(tp[2])+1); + code_size = binary_size(tp[4]); + if ((code_tmp = (const pcre *) + erts_get_aligned_binary_bytes(tp[4], &temp_alloc)) == NULL) { + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(BIF_P, BADARG); + } + restart.code = erts_alloc(ERTS_ALC_T_RE_SUBJECT, code_size); + memcpy(restart.code, code_tmp, code_size); + erts_free_aligned_binary_bytes(temp_alloc); + + } + + + restart.ovector = erts_alloc(ERTS_ALC_T_RE_SUBJECT, ovsize * sizeof(int)); + restart.extra.flags = PCRE_EXTRA_TABLES | PCRE_EXTRA_LOOP_LIMIT; + restart.extra.tables = default_table; + restart.extra.loop_limit = ERTS_BIF_REDS_LEFT(BIF_P) * LOOP_FACTOR; + loop_limit_tmp = max_loop_limit; /* To lesser probability of race in debug + situation (erts_debug) */ + if (restart.extra.loop_limit > loop_limit_tmp) { + restart.extra.loop_limit = loop_limit_tmp; + } + restart.restart_data = NULL; + restart.extra.restart_data = &restart.restart_data; + restart.extra.restart_flags = 0; + restart.extra.loop_counter_return = &loop_count; + restart.ret_info = NULL; + + if (pflags & PARSE_FLAG_CAPTURE_OPT) { + if ((restart.ret_info = build_capture(capture,restart.code)) == NULL) { + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector); + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.code); + BIF_ERROR(BIF_P,BADARG); + } + } + + /* Optimized - if already in binary off heap, keep that and avoid + copying, also binary returns can be sub binaries in that case */ + + restart.flags = 0; + if (is_binary(BIF_ARG_1)) { + Eterm real_bin; + Uint offset; + Eterm* bptr; + int bitoffs; + int bitsize; + ProcBin* pb; + + ERTS_GET_REAL_BIN(BIF_ARG_1, real_bin, offset, bitoffs, bitsize); + + slength = binary_size(BIF_ARG_1); + bptr = binary_val(real_bin); + if (bitsize != 0 || bitoffs != 0 || (*bptr != HEADER_PROC_BIN)) { + goto handle_iolist; + } + pb = (ProcBin *) bptr; + restart.subject = (char *) (pb->bytes+offset); + restart.flags |= RESTART_FLAG_SUBJECT_IN_BINARY; + } else { +handle_iolist: + if ((slength = io_list_len(BIF_ARG_1)) < 0) { + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector); + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.code); + if (restart.ret_info != NULL) { + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ret_info); + } + BIF_ERROR(BIF_P,BADARG); + } + restart.subject = erts_alloc(ERTS_ALC_T_RE_SUBJECT, slength); + + if (io_list_to_buf(BIF_ARG_1, restart.subject, slength) != 0) { + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector); + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.code); + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.subject); + if (restart.ret_info != NULL) { + erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ret_info); + } + BIF_ERROR(BIF_P,BADARG); + } + } + + +#ifdef DEBUG + loop_count = 0xFFFFFFFF; +#endif + + rc = erts_pcre_exec(restart.code, &(restart.extra), restart.subject, slength, startoffset, + options, restart.ovector, ovsize); + ASSERT(loop_count != 0xFFFFFFFF); + BUMP_REDS(BIF_P, loop_count / LOOP_FACTOR); + if (rc == PCRE_ERROR_LOOP_LIMIT) { + /* Trap */ + Binary *mbp = erts_create_magic_binary(sizeof(RestartContext), + cleanup_restart_context_bin); + RestartContext *restartp = ERTS_MAGIC_BIN_DATA(mbp); + Eterm magic_bin; + Eterm *hp; + memcpy(restartp,&restart,sizeof(RestartContext)); + BUMP_ALL_REDS(BIF_P); + hp = HAlloc(BIF_P, PROC_BIN_SIZE); + magic_bin = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), mbp); + BIF_TRAP3(&re_exec_trap_export, + BIF_P, + BIF_ARG_1, + BIF_ARG_2 /* To avoid GC of precompiled code, XXX: not utilized yet */, + magic_bin); + } + + res = build_exec_return(BIF_P, rc, &restart, BIF_ARG_1); + + cleanup_restart_context(&restart); + + BIF_RET(res); +} + +BIF_RETTYPE +re_run_2(BIF_ALIST_2) +{ + return re_run_3(BIF_P,BIF_ARG_1, BIF_ARG_2, NIL); +} + +/* + * The "magic" trap target, continue a re:run + */ + +static BIF_RETTYPE re_exec_trap(BIF_ALIST_3) + /* XXX: Optimize - arg 1 and 2 to be utilized for keeping binary + code and subject */ +{ + Binary *mbp; + RestartContext *restartp; + int rc; + unsigned long loop_count; + Uint loop_limit_tmp; + Eterm res; + + ASSERT(ERTS_TERM_IS_MAGIC_BINARY(BIF_ARG_3)); + + mbp = ((ProcBin *) binary_val(BIF_ARG_3))->val; + + ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(mbp) + == cleanup_restart_context_bin); + + restartp = (RestartContext *) ERTS_MAGIC_BIN_DATA(mbp); + + restartp->extra.loop_limit = ERTS_BIF_REDS_LEFT(BIF_P) * LOOP_FACTOR; + loop_limit_tmp = max_loop_limit; /* To lesser probability of race in debug + situation (erts_debug) */ + if (restartp->extra.loop_limit > loop_limit_tmp) { + restartp->extra.loop_limit = loop_limit_tmp; + } + restartp->extra.loop_counter_return = &loop_count; + restartp->extra.restart_data = &restartp->restart_data; + restartp->extra.restart_flags = 0; + +#ifdef DEBUG + loop_count = 0xFFFFFFFF; +#endif + rc = erts_pcre_exec(NULL, &(restartp->extra), NULL, 0, 0, 0, NULL, 0); + ASSERT(loop_count != 0xFFFFFFFF); + BUMP_REDS(BIF_P, loop_count / LOOP_FACTOR); + if (rc == PCRE_ERROR_LOOP_LIMIT) { + /* Trap */ + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&re_exec_trap_export, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); + } + res = build_exec_return(BIF_P, rc, restartp, BIF_ARG_1); + + cleanup_restart_context(restartp); + + BIF_RET(res); +} + + + + |