aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2013-07-18 10:18:58 +0200
committerPatrik Nyblom <[email protected]>2013-08-09 12:10:30 +0200
commit6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c (patch)
tree024c55cef26cdf0ad167b23d61fee9737177da7b /erts
parent9cd8b5d2af163f29cf77ae74057789be977f6414 (diff)
downloadotp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.tar.gz
otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.tar.bz2
otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.zip
Add new options to Erlang re interface and mend dupnames
Add notempty_atstart, no_start_optimize, ucp and never_utf options from new PCRE version. Use the new notempty_atstart in global matching. Add inspect/2 function Correctly handle dupnames when capturing a name, as in Perl, get the leftmost matching occurence. Also added all_names, to get all the names in the pattern in alphabetical (name) order. To be able to use this in global matching, an inspect function that can dig out a namelist was added.
Diffstat (limited to 'erts')
-rw-r--r--erts/emulator/beam/atom.names6
-rw-r--r--erts/emulator/beam/bif.tab6
-rw-r--r--erts/emulator/beam/erl_bif_re.c233
3 files changed, 240 insertions, 5 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index eba1d0fa23..cf8f511b85 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -71,6 +71,7 @@ atom ac
atom active
atom all
atom all_but_first
+atom all_names
atom alloc_info
atom alloc_sizes
atom allocated
@@ -348,11 +349,13 @@ atom multi_scheduling
atom multiline
atom name
atom named_table
+atom namelist
atom native_addresses
atom Neq='=/='
atom Neqeq='/='
atom net_kernel
atom net_kernel_terminated
+atom never_utf
atom new
atom new_index
atom new_uniq
@@ -378,6 +381,7 @@ atom nosuspend
atom no_float
atom no_integer
atom no_network
+atom no_start_optimize
atom not
atom not_a_list
atom not_loaded
@@ -388,6 +392,7 @@ atom notalive
atom notbol
atom noteol
atom notempty
+atom notempty_atstart
atom notify
atom notsup
atom nouse_stdio
@@ -554,6 +559,7 @@ atom true
atom tuple
atom type
atom ucompile
+atom ucp
atom undef
atom ungreedy
atom unicode
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index dc8e9101de..7c8e4b31cf 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -574,6 +574,12 @@ bif erlang:binary_to_float/1
bif io:printable_range/0
#
+# New in R17A
+#
+
+bif re:inspect/2
+
+#
# Obsolete
#
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 12fc834685..c74125ae41 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -288,6 +288,10 @@ parse_options(Eterm listp, /* in */
eopt |= PCRE_NOTEMPTY;
fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
break;
+ case am_notempty_atstart:
+ eopt |= PCRE_NOTEMPTY_ATSTART;
+ fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
+ break;
case am_notbol:
eopt |= PCRE_NOTBOL;
fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
@@ -296,6 +300,10 @@ parse_options(Eterm listp, /* in */
eopt |= PCRE_NOTEOL;
fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
break;
+ case am_no_start_optimize:
+ copt |= PCRE_NO_START_OPTIMIZE;
+ fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+ break;
case am_caseless:
copt |= PCRE_CASELESS;
fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
@@ -332,6 +340,14 @@ parse_options(Eterm listp, /* in */
copt |= PCRE_UNGREEDY;
fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
break;
+ case am_ucp:
+ copt |= PCRE_UCP;
+ fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+ break;
+ case am_never_utf:
+ copt |= PCRE_NEVER_UTF;
+ fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+ break;
case am_unicode:
copt |= PCRE_UTF8;
fl |= (PARSE_FLAG_UNIQUE_COMPILE_OPT | PARSE_FLAG_UNICODE);
@@ -359,7 +375,7 @@ parse_options(Eterm listp, /* in */
if (compile_options != NULL) {
*compile_options = copt;
}
- if (exec_options != NULL) {
+ if (exec_options != NULL) {
*exec_options = eopt;
}
if (flags != NULL) {
@@ -585,6 +601,17 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
ri->num_spec * 2 * sizeof(Eterm));
for (i = 0; i < ri->num_spec; ++i) {
x = ri->v[i];
+ if (x < -1) {
+ int n = i-x+1;
+ int j;
+ for (j = i+1; j < ri->num_spec && j < n; ++j) {
+ if (restartp->ovector[(ri->v[j])*2] >= 0) {
+ x = ri->v[j];
+ break;
+ }
+ }
+ i = n-1;
+ }
if (x < rc && x >= 0) {
tmp_vect[n*2] = make_signed_integer(restartp->ovector[x*2],p);
tmp_vect[n*2+1] = make_signed_integer(restartp->ovector[x*2+1]-restartp->ovector[x*2],p);
@@ -666,6 +693,17 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
ri->num_spec * sizeof(Eterm));
for (i = 0; i < ri->num_spec; ++i) {
x = ri->v[i];
+ if (x < -1) {
+ int n = i-x+1;
+ int j;
+ for (j = i+1; j < ri->num_spec && j < n; ++j) {
+ if (restartp->ovector[(ri->v[j])*2] >= 0) {
+ x = ri->v[j];
+ break;
+ }
+ }
+ i = n-1;
+ }
if (x < rc && x >= 0) {
char *cp;
int len;
@@ -730,6 +768,49 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
*/
#define RINFO_SIZ(Num) (sizeof(ReturnInfo) + (sizeof(int) * (Num - 1)))
+#define PICK_INDEX(NameEntry) \
+ ((int) ((((unsigned) ((unsigned char *) (NameEntry))[0]) << 8) + \
+ ((unsigned) ((unsigned char *) (NameEntry))[1])))
+
+
+static void build_one_capture(const pcre *code, ReturnInfo **ri, int *sallocated, int has_dupnames, char *name)
+{
+ ReturnInfo *r = (*ri);
+ if (has_dupnames) {
+ /* Build a sequence of positions, starting with -size if
+ more than one, otherwise just put the index there... */
+ char *first,*last;
+ int esize = erts_pcre_get_stringtable_entries(code,name,&first,&last);
+ if (esize == PCRE_ERROR_NOSUBSTRING) {
+ r->v[r->num_spec - 1] = -1;
+ } else if(last == first) {
+ r->v[r->num_spec - 1] = PICK_INDEX(first);
+ } else {
+ int num = ((last - first) / esize) + 1;
+ int i;
+ ASSERT(num > 1);
+ r->v[r->num_spec - 1] = -num; /* A value less than -1 means
+ multiple indexes for same name */
+ for (i = 0; i < num; ++i) {
+ ++(r->num_spec);
+ if(r->num_spec > (*sallocated)) {
+ (*sallocated) += 10;
+ r = erts_realloc(ERTS_ALC_T_RE_SUBJECT, r,
+ RINFO_SIZ((*sallocated)));
+ }
+ r->v[r->num_spec - 1] = PICK_INDEX(first);
+ first += esize;
+ }
+ }
+ } else {
+ /* Use the faster binary search if no duplicate names are present */
+ if ((r->v[r->num_spec - 1] = erts_pcre_get_stringnumber(code,name)) ==
+ PCRE_ERROR_NOSUBSTRING) {
+ r->v[r->num_spec - 1] = -1;
+ }
+ }
+ *ri = r;
+}
static ReturnInfo *
build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
@@ -778,6 +859,53 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
}
ri->v[ri->num_spec - 1] = 0;
break;
+ case am_all_names:
+ {
+ int rc,i,top;
+ int entrysize;
+ char *nametable, *last = NULL;
+ int has_dupnames;
+ unsigned long options;
+
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+ goto error;
+ if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+ goto error;
+ if (top <= 0) {
+ ri->num_spec = 0;
+ ri->type = RetNone;
+ break;
+ }
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0)
+ goto error;
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable) != 0)
+ goto error;
+
+ has_dupnames = ((options & PCRE_DUPNAMES) != 0);
+
+ for(i=0;i<top;++i) {
+ if (last == NULL || !has_dupnames || strcmp(last+2,nametable+2)) {
+ if (ri->num_spec < 0)
+ ri->num_spec = 0;
+ ++(ri->num_spec);
+ if(ri->num_spec > sallocated) {
+ sallocated += 10;
+ ri = erts_realloc(ERTS_ALC_T_RE_SUBJECT, ri, RINFO_SIZ(sallocated));
+ }
+ if (has_dupnames) {
+ /* This could be more effective, we actually have
+ the names and could fill in the vector
+ immediately. Now we lookup the name again. */
+ build_one_capture(code,&ri,&sallocated,has_dupnames,nametable+2);
+ } else {
+ ri->v[ri->num_spec - 1] = PICK_INDEX(nametable);
+ }
+ }
+ last = nametable;
+ nametable += entrysize;
+ }
+ break;
+ }
default:
if (is_list(capture_spec[CAPSPEC_VALUES])) {
for(l=capture_spec[CAPSPEC_VALUES];is_list(l);l = CDR(list_val(l))) {
@@ -793,6 +921,11 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
if (term_to_int(val,&x)) {
ri->v[ri->num_spec - 1] = x;
} else if (is_atom(val) || is_binary(val) || is_list(val)) {
+ int has_dupnames;
+ unsigned long options;
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+ goto error;
+ has_dupnames = ((options & PCRE_DUPNAMES) != 0);
if (is_atom(val)) {
Atom *ap = atom_tab(atom_val(val));
if ((ap->len + 1) > tmpbsiz) {
@@ -823,10 +956,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
}
tmpb[slen] = '\0';
}
- if ((ri->v[ri->num_spec - 1] = erts_pcre_get_stringnumber(code,tmpb)) ==
- PCRE_ERROR_NOSUBSTRING) {
- ri->v[ri->num_spec - 1] = -1;
- }
+ build_one_capture(code,&ri,&sallocated,has_dupnames,tmpb);
} else {
goto error;
}
@@ -1159,6 +1289,99 @@ static BIF_RETTYPE re_exec_trap(BIF_ALIST_3)
BIF_RET(res);
}
+BIF_RETTYPE
+re_inspect_2(BIF_ALIST_2)
+{
+ Eterm *tp,*tmp_vec,*hp;
+ int rc,i,top,j;
+ int entrysize;
+ char *nametable, *last,*name;
+ int has_dupnames;
+ unsigned long options;
+ int num_names;
+ Eterm res;
+ const pcre *code;
+ byte *temp_alloc = NULL;
+
+ if (is_not_tuple(BIF_ARG_1) || (arityval(*tuple_val(BIF_ARG_1)) != 5)) {
+ goto error;
+ }
+ tp = tuple_val(BIF_ARG_1);
+ if (tp[1] != am_re_pattern || is_not_small(tp[2]) ||
+ is_not_small(tp[3]) || is_not_small(tp[4]) ||
+ is_not_binary(tp[5])) {
+ goto error;
+ }
+ if (BIF_ARG_2 != am_namelist) {
+ goto error;
+ }
+ if ((code = (const pcre *)
+ erts_get_aligned_binary_bytes(tp[5], &temp_alloc)) == NULL) {
+ goto error;
+ }
+
+ /* OK, so let's try to get some info */
+
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+ goto error;
+ if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+ goto error;
+ if (top <= 0) {
+ hp = HAlloc(BIF_P, 3);
+ res = TUPLE2(hp,am_namelist,NIL);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_RET(res);
+ }
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0)
+ goto error;
+ if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable) != 0)
+ goto error;
+
+ has_dupnames = ((options & PCRE_DUPNAMES) != 0);
+ /* First, count the names */
+ num_names = 0;
+ last = NULL;
+ name = nametable;
+ for(i=0;i<top;++i) {
+ if (last == NULL || !has_dupnames || strcmp(last+2,name+2)) {
+ ++num_names;
+ }
+ last = name;
+ name += entrysize;
+ }
+ tmp_vec = erts_alloc(ERTS_ALC_T_RE_TMP_BUF,
+ num_names * sizeof(Eterm));
+ /* Re-iterate and fill tmp_vec */
+ last = NULL;
+ name = nametable;
+ j = 0;
+ for(i=0;i<top;++i) {
+ if (last == NULL || !has_dupnames || strcmp(last+2,name+2)) {
+ tmp_vec[j++] = new_binary(BIF_P, (byte *) name+2, strlen(name+2));
+ }
+ last = name;
+ name += entrysize;
+ }
+ ASSERT(j == num_names);
+ hp = HAlloc(BIF_P, 3+2*j);
+ res = NIL;
+ for(i = j-1 ;i >= 0; --i) {
+ res = CONS(hp,tmp_vec[i],res);
+ hp += 2;
+ }
+ res = TUPLE2(hp,am_namelist,res);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ erts_free(ERTS_ALC_T_RE_TMP_BUF, tmp_vec);
+ BIF_RET(res);
+
+ error:
+ /* tmp_vec never allocated when we reach here */
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_ERROR(BIF_P,BADARG);
+}
+
+
+