Add new options to Erlang re interface and mend dupnames

Add notempty_atstart, no_start_optimize, ucp and never_utf options from new PCRE version. Use the new notempty_atstart in global matching. Add inspect/2 function Correctly handle dupnames when capturing a name, as in Perl, get the leftmost matching occurence. Also added all_names, to get all the names in the pattern in alphabetical (name) order. To be able to use this in global matching, an inspect function that can dig out a namelist was added.
author: Patrik Nyblom <[email protected]> 2013-07-18 10:18:58 +0200
committer: Patrik Nyblom <[email protected]> 2013-08-09 12:10:30 +0200
commit: 6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c (patch)
tree: 024c55cef26cdf0ad167b23d61fee9737177da7b
parent: 9cd8b5d2af163f29cf77ae74057789be977f6414 (diff)
download: otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.tar.gz
otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.tar.bz2
otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.zip
5 files changed, 446 insertions, 30 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index eba1d0fa23..cf8f511b85 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -71,6 +71,7 @@ atom ac
 atom active
 atom all
 atom all_but_first
+atom all_names
 atom alloc_info
 atom alloc_sizes
 atom allocated
@@ -348,11 +349,13 @@ atom multi_scheduling
 atom multiline
 atom name
 atom named_table
+atom namelist
 atom native_addresses
 atom Neq='=/='
 atom Neqeq='/='
 atom net_kernel
 atom net_kernel_terminated
+atom never_utf
 atom new
 atom new_index
 atom new_uniq
@@ -378,6 +381,7 @@ atom nosuspend
 atom no_float
 atom no_integer
 atom no_network
+atom no_start_optimize
 atom not
 atom not_a_list
 atom not_loaded
@@ -388,6 +392,7 @@ atom notalive
 atom notbol
 atom noteol
 atom notempty
+atom notempty_atstart
 atom notify
 atom notsup
 atom nouse_stdio
@@ -554,6 +559,7 @@ atom true
 atom tuple
 atom type
 atom ucompile
+atom ucp
 atom undef
 atom ungreedy
 atom unicode
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index dc8e9101de..7c8e4b31cf 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -574,6 +574,12 @@ bif erlang:binary_to_float/1
 bif io:printable_range/0
 
 #
+# New in R17A
+#
+
+bif re:inspect/2
+
+#
 # Obsolete
 #
 
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 12fc834685..c74125ae41 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -288,6 +288,10 @@ parse_options(Eterm listp, /* in */
 		    eopt |= PCRE_NOTEMPTY; 
 		    fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
 		    break;
+		case am_notempty_atstart:
+		    eopt |= PCRE_NOTEMPTY_ATSTART; 
+		    fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
+		    break;
 		case am_notbol:
 		    eopt |= PCRE_NOTBOL; 
 		    fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
@@ -296,6 +300,10 @@ parse_options(Eterm listp, /* in */
 		    eopt |= PCRE_NOTEOL; 
 		    fl |= PARSE_FLAG_UNIQUE_EXEC_OPT;
 		    break;
+		case am_no_start_optimize:
+		    copt |= PCRE_NO_START_OPTIMIZE; 
+		    fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+		    break;
 		case am_caseless:
 		    copt |= PCRE_CASELESS; 
 		    fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
@@ -332,6 +340,14 @@ parse_options(Eterm listp, /* in */
 		    copt |= PCRE_UNGREEDY; 
 		    fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
 		    break;
+		case am_ucp:
+		    copt |= PCRE_UCP; 
+		    fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+		    break;
+		case am_never_utf:
+		    copt |= PCRE_NEVER_UTF; 
+		    fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT;
+		    break;
 		case am_unicode:
 		    copt |= PCRE_UTF8; 
 		    fl |= (PARSE_FLAG_UNIQUE_COMPILE_OPT | PARSE_FLAG_UNICODE);
@@ -359,7 +375,7 @@ parse_options(Eterm listp, /* in */
     if (compile_options != NULL) {
 	*compile_options = copt;
     }
-    if (exec_options != NULL) {
+   if (exec_options != NULL) {
 	*exec_options = eopt;
     }
     if (flags != NULL) {
@@ -585,6 +601,17 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
 				      ri->num_spec * 2 * sizeof(Eterm));
 		for (i = 0; i < ri->num_spec; ++i) {
 		    x = ri->v[i];
+		    if (x < -1) {
+			int n = i-x+1;
+			int j;
+			for (j = i+1; j < ri->num_spec && j < n; ++j) {
+			    if (restartp->ovector[(ri->v[j])*2] >= 0) {
+				x = ri->v[j];
+				break;
+			    }
+			}
+			i = n-1;
+		    }
 		    if (x < rc && x >= 0) {
 			tmp_vect[n*2] = make_signed_integer(restartp->ovector[x*2],p);
 			tmp_vect[n*2+1] = make_signed_integer(restartp->ovector[x*2+1]-restartp->ovector[x*2],p);
@@ -666,6 +693,17 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
 				      ri->num_spec * sizeof(Eterm));
 		for (i = 0; i < ri->num_spec; ++i) {
 		    x = ri->v[i];
+		    if (x < -1) {
+			int n = i-x+1;
+			int j;
+			for (j = i+1; j < ri->num_spec && j < n; ++j) {
+			    if (restartp->ovector[(ri->v[j])*2] >= 0) {
+				x = ri->v[j];
+				break;
+			    }
+			}
+			i = n-1;
+		    }
 		    if (x < rc && x >= 0) {
 			char *cp;
 			int len;
@@ -730,6 +768,49 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete
  */
 
 #define RINFO_SIZ(Num) (sizeof(ReturnInfo) + (sizeof(int) * (Num - 1)))
+#define PICK_INDEX(NameEntry)					        \
+    ((int) ((((unsigned) ((unsigned char *) (NameEntry))[0]) << 8) +	\
+	    ((unsigned) ((unsigned char *) (NameEntry))[1])))
+
+
+static void build_one_capture(const pcre *code, ReturnInfo **ri, int *sallocated, int has_dupnames, char *name) 
+{
+    ReturnInfo *r = (*ri);
+    if (has_dupnames) {
+	/* Build a sequence of positions, starting with -size if
+	   more than one, otherwise just put the index there... */
+	char *first,*last;
+	int esize = erts_pcre_get_stringtable_entries(code,name,&first,&last);
+	if (esize == PCRE_ERROR_NOSUBSTRING) {
+	    r->v[r->num_spec - 1] = -1;
+	} else if(last == first) {
+	    r->v[r->num_spec - 1] = PICK_INDEX(first);
+	} else {
+	    int num = ((last - first) / esize) + 1;
+	    int i;
+	    ASSERT(num > 1);
+	    r->v[r->num_spec - 1] = -num; /* A value less than -1 means
+					       multiple indexes for same name */
+	    for (i = 0; i < num; ++i) {
+		++(r->num_spec);
+		if(r->num_spec > (*sallocated)) {
+		    (*sallocated) += 10;
+		    r = erts_realloc(ERTS_ALC_T_RE_SUBJECT, r, 
+				      RINFO_SIZ((*sallocated)));
+		}
+		r->v[r->num_spec - 1] = PICK_INDEX(first);
+		first += esize;
+	    }
+	}
+    } else {
+	/* Use the faster binary search if no duplicate names are present */  
+	if ((r->v[r->num_spec - 1] = erts_pcre_get_stringnumber(code,name)) ==
+	    PCRE_ERROR_NOSUBSTRING) {
+	    r->v[r->num_spec - 1] = -1;
+	}
+    }
+    *ri = r;
+}    
 
 static ReturnInfo *
 build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
@@ -778,6 +859,53 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
 	}
 	ri->v[ri->num_spec - 1] = 0;
 	break;
+    case am_all_names:
+	{
+	    int rc,i,top;
+	    int entrysize;
+	    char *nametable, *last = NULL;
+	    int has_dupnames;
+	    unsigned long options;
+
+	    if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+		goto error;
+	    if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+		goto error;
+	    if (top <= 0) {
+		ri->num_spec = 0;
+		ri->type = RetNone;
+		break;
+	    }
+	    if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0)
+		goto error;
+	    if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable) != 0)
+		goto error;
+	    
+	    has_dupnames = ((options & PCRE_DUPNAMES) != 0);
+
+	    for(i=0;i<top;++i) {
+		if (last == NULL || !has_dupnames || strcmp(last+2,nametable+2)) {
+		    if (ri->num_spec < 0)
+			ri->num_spec = 0;
+		    ++(ri->num_spec);
+		    if(ri->num_spec > sallocated) {
+			sallocated += 10;
+			ri = erts_realloc(ERTS_ALC_T_RE_SUBJECT, ri, RINFO_SIZ(sallocated));
+		    }
+		    if (has_dupnames) {
+			/* This could be more effective, we actually have 
+			   the names and could fill in the vector
+			   immediately. Now we lookup the name again. */
+			build_one_capture(code,&ri,&sallocated,has_dupnames,nametable+2);
+		    } else {
+			ri->v[ri->num_spec - 1] = PICK_INDEX(nametable);	
+		    }
+		}
+		last = nametable;
+		nametable += entrysize;
+	    }
+	    break;
+	}
     default:
 	if (is_list(capture_spec[CAPSPEC_VALUES])) {
 	    for(l=capture_spec[CAPSPEC_VALUES];is_list(l);l = CDR(list_val(l))) {
@@ -793,6 +921,11 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
 		if (term_to_int(val,&x)) {
 		    ri->v[ri->num_spec - 1] = x;
 		} else if (is_atom(val) || is_binary(val) || is_list(val)) {
+		    int has_dupnames;
+		    unsigned long options;
+		    if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+			goto error;
+		    has_dupnames = ((options & PCRE_DUPNAMES) != 0);
 		    if (is_atom(val)) {
 			Atom *ap = atom_tab(atom_val(val));
 			if ((ap->len + 1) > tmpbsiz) {
@@ -823,10 +956,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
 			}
 			tmpb[slen] = '\0';
 		    }
-		    if ((ri->v[ri->num_spec - 1] = erts_pcre_get_stringnumber(code,tmpb)) ==
-			PCRE_ERROR_NOSUBSTRING) {
-			ri->v[ri->num_spec - 1] = -1;
-		    }
+		    build_one_capture(code,&ri,&sallocated,has_dupnames,tmpb);
 		} else {
 		    goto error;
 		}
@@ -1159,6 +1289,99 @@ static BIF_RETTYPE re_exec_trap(BIF_ALIST_3)
     BIF_RET(res);
 }
     
+BIF_RETTYPE
+re_inspect_2(BIF_ALIST_2) 
+{
+    Eterm *tp,*tmp_vec,*hp;
+    int rc,i,top,j;
+    int entrysize;
+    char *nametable, *last,*name;
+    int has_dupnames;
+    unsigned long options;
+    int num_names;
+    Eterm res;
+    const pcre *code;
+    byte *temp_alloc = NULL;
+
+    if (is_not_tuple(BIF_ARG_1) || (arityval(*tuple_val(BIF_ARG_1)) != 5)) {
+	goto error;
+    }
+    tp = tuple_val(BIF_ARG_1);
+    if (tp[1] != am_re_pattern || is_not_small(tp[2]) || 
+	is_not_small(tp[3]) || is_not_small(tp[4]) || 
+	is_not_binary(tp[5])) {
+	goto error;
+    }
+    if (BIF_ARG_2 != am_namelist) {
+	goto error;
+    }
+    if ((code = (const pcre *) 
+	 erts_get_aligned_binary_bytes(tp[5], &temp_alloc)) == NULL) {
+	goto error;
+    }
+
+    /* OK, so let's try to get some info */
+    
+    if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
+	goto error;
+    if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+	goto error;
+    if (top <= 0) {
+	hp = HAlloc(BIF_P, 3);
+	res = TUPLE2(hp,am_namelist,NIL);
+	erts_free_aligned_binary_bytes(temp_alloc);
+	BIF_RET(res);
+    }
+    if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0)
+	goto error;
+    if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable) != 0)
+	goto error;
+    
+    has_dupnames = ((options & PCRE_DUPNAMES) != 0);
+    /* First, count the names */
+    num_names = 0;
+    last = NULL;
+    name = nametable;
+    for(i=0;i<top;++i) {
+	if (last == NULL || !has_dupnames || strcmp(last+2,name+2)) {
+	    ++num_names;
+	}
+	last = name;
+	name += entrysize;
+    }
+    tmp_vec =  erts_alloc(ERTS_ALC_T_RE_TMP_BUF, 
+			  num_names * sizeof(Eterm));
+    /* Re-iterate and fill tmp_vec */
+    last = NULL;
+    name = nametable;
+    j = 0;
+    for(i=0;i<top;++i) {
+	if (last == NULL || !has_dupnames || strcmp(last+2,name+2)) {
+	    tmp_vec[j++] = new_binary(BIF_P, (byte *) name+2, strlen(name+2));
+	}
+	last = name;
+	name += entrysize;
+    }
+    ASSERT(j == num_names);
+    hp = HAlloc(BIF_P, 3+2*j);
+    res = NIL;
+    for(i = j-1 ;i >= 0; --i) {
+	res = CONS(hp,tmp_vec[i],res);
+	hp += 2;
+    }
+    res = TUPLE2(hp,am_namelist,res);
+    erts_free_aligned_binary_bytes(temp_alloc);
+    erts_free(ERTS_ALC_T_RE_TMP_BUF, tmp_vec);
+    BIF_RET(res);
+
+ error:
+    /* tmp_vec never allocated when we reach here */
+    erts_free_aligned_binary_bytes(temp_alloc);
+    BIF_ERROR(BIF_P,BADARG);
+}
+    
+
+	
     
 
 	
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index d8d529e6a4..4d6de1100d 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -28,11 +28,12 @@
                         | dotall | extended | firstline | multiline
                         | no_auto_capture | dupnames | ungreedy
                         | {newline, nl_spec()}| bsr_anycrlf
+                        | no_start_optimize | ucp | never_utf
                         | bsr_unicode.
 
 %%% BIFs
 
--export([compile/1, compile/2, run/2, run/3]).
+-export([compile/1, compile/2, run/2, run/3, inspect/2]).
 
 -spec compile(Regexp) -> {ok, MP} | {error, ErrSpec} when
       Regexp :: iodata(),
@@ -67,13 +68,13 @@ run(_, _) ->
       Subject :: iodata() | unicode:charlist(),
       RE :: mp() | iodata() | unicode:charlist(),
       Options :: [Option],
-      Option :: anchored | global | notbol | noteol | notempty
+      Option :: anchored | global | notbol | noteol | notempty | notempty_atstart
               | {offset, non_neg_integer()} |
                 {newline, NLSpec :: nl_spec()} |
                 bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
                 {capture, ValueSpec, Type} | CompileOpt,
       Type :: index | list | binary,
-      ValueSpec :: all | all_but_first | first | none | ValueList,
+      ValueSpec :: all | all_but_first | all_names | first | none | ValueList,
       ValueList :: [ValueID],
       ValueID :: integer() | string() | atom(),
       CompileOpt :: compile_option(),
@@ -88,6 +89,14 @@ run(_, _) ->
 run(_, _, _) ->
     erlang:nif_error(undef).
 
+-spec inspect(MP,Item) -> {namelist, [ binary() ]} when
+      MP :: mp(),
+      Item :: namelist.
+
+inspect(_,_) ->
+    erlang:nif_error(undef).
+    
+
 %%% End of BIFs
 
 -spec split(Subject, RE) -> SplitList when
@@ -102,7 +111,7 @@ split(Subject,RE) ->
       Subject :: iodata() | unicode:charlist(),
       RE :: mp() | iodata() | unicode:charlist(),
       Options :: [ Option ],
-      Option :: anchored | notbol | noteol | notempty
+      Option :: anchored | notbol | noteol | notempty | notempty_atstart
               | {offset, non_neg_integer()} | {newline, nl_spec()}
               | bsr_anycrlf | bsr_unicode | {return, ReturnType}
               | {parts, NumParts} | group | trim | CompileOpt,
@@ -295,7 +304,7 @@ replace(Subject,RE,Replacement) ->
       RE :: mp() | iodata() | unicode:charlist(),
       Replacement :: iodata() | unicode:charlist(),
       Options :: [Option],
-      Option :: anchored | global | notbol | noteol | notempty
+      Option :: anchored | global | notbol | noteol | notempty | notempty_atstart
               | {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf
               | bsr_unicode | {return, ReturnType} | CompileOpt,
       ReturnType :: iodata | list | binary,
@@ -509,7 +518,9 @@ check_for_crlf(_,L) ->
 % SelectReturn = false | all | stirpfirst | none 
 % ConvertReturn = index | list | binary
 % {capture, all} -> all (untouchded)
-% {capture, first} -> kept in argumentt list and Select all
+% {capture, all_names} -> if names are present: treated as a name {capture, [...]} 
+%                                      else:    same as {capture, []}
+% {capture, first} -> kept in argument list and Select all
 % {capture, all_but_first} -> removed from argument list and selects stripfirst
 % {capture, none} ->  removed from argument list and selects none
 % {capture, []} -> removed from argument list and selects none
@@ -518,23 +529,30 @@ check_for_crlf(_,L) ->
 
 % Call as process_parameters([],0,false,index,NeedClean)
 
-process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_) ->
+process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_,_) ->
     {[], InitialOffset, SelectReturn, ConvertReturn};
-process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC) ->
-    process_parameters(T,N,Select0,Return0,CC);
-process_parameters([global | T],Init0,Select0,Return0,CC) ->
-    process_parameters(T,Init0,Select0,Return0,CC);
-process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC) ->
-    process_parameters([{capture,Values}|T],Init0,Select0,Type,CC);
-process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) ->
+process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC,RE) ->
+    process_parameters(T,N,Select0,Return0,CC,RE);
+process_parameters([global | T],Init0,Select0,Return0,CC,RE) ->
+    process_parameters(T,Init0,Select0,Return0,CC,RE);
+process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC,RE) ->
+    process_parameters([{capture,Values}|T],Init0,Select0,Type,CC,RE);
+process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC,RE) ->
     % First process the rest to see if capture was already present
     {NewTail, Init1, Select1, Return1} = 
-	process_parameters(T,Init0,Select0,Return0,CC),
+	process_parameters(T,Init0,Select0,Return0,CC,RE),
     case Select1 of
 	false ->
 	    case Values of
 		all ->
 		    {[{capture,all} | NewTail], Init1, all, Return0}; 
+		all_names ->
+		    case re:inspect(RE,namelist) of
+			{namelist, []} ->
+			    {[{capture,first} | NewTail], Init1, none, Return0};
+			{namelist, List} ->
+			    {[{capture,[0|List]} | NewTail], Init1, stripfirst, Return0}
+		    end; 
 		first ->
 		    {[{capture,first} | NewTail], Init1, all, Return0};
 		all_but_first ->
@@ -553,20 +571,20 @@ process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) ->
 	    % Found overriding further down list, ignore this one
 	    {NewTail, Init1, Select1, Return1}
     end;
-process_parameters([H|T],Init0,Select0,Return0,true) ->
+process_parameters([H|T],Init0,Select0,Return0,true,RE) ->
     case copt(H) of
 	true ->
-	    process_parameters(T,Init0,Select0,Return0,true);
+	    process_parameters(T,Init0,Select0,Return0,true,RE);
 	false ->
 	    {NewT,Init,Select,Return} =
-		process_parameters(T,Init0,Select0,Return0,true),	
+		process_parameters(T,Init0,Select0,Return0,true,RE),	
 	    {[H|NewT],Init,Select,Return}
     end;
-process_parameters([H|T],Init0,Select0,Return0,false) ->
+process_parameters([H|T],Init0,Select0,Return0,false,RE) ->
     {NewT,Init,Select,Return} =
-		process_parameters(T,Init0,Select0,Return0,false),
+		process_parameters(T,Init0,Select0,Return0,false,RE),
     {[H|NewT],Init,Select,Return};
-process_parameters(_,_,_,_,_) ->
+process_parameters(_,_,_,_,_,_) ->
     throw(badlist).
 
 postprocess({match,[]},_,_,_,_) ->
@@ -723,7 +741,7 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
     {StrippedOptions, InitialOffset,
      SelectReturn, ConvertReturn} = 
 	case (catch 
-		  process_parameters(Options0, 0, false, index, NeedClean)) of
+		  process_parameters(Options0, 0, false, index, NeedClean,RE)) of
 	    badlist ->
 		erlang:error(badarg,[Subject,RE,Options0]);
 	    CorrectReturn ->
@@ -747,7 +765,7 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
 			loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options);
 		    false ->
 			{match,M} = 
-			    case re:run(Subject,RE,[{offset,X},notempty,
+			    case re:run(Subject,RE,[{offset,X},notempty_atstart,
 						anchored]++Options) of
 				nomatch ->
 				    {match,[]};
@@ -803,6 +821,12 @@ forward2(Chal,A,N,true,CRLF) ->
 
 copt(caseless) ->
     true;
+copt(no_start_optimize) ->
+    true;
+copt(never_utf) ->
+    true;
+copt(ucp) ->
+    true;
 copt(dollar_endonly) ->
     true;
 copt(dotall) ->
@@ -833,6 +857,8 @@ copt(_) ->
 
 runopt(notempty) ->
     true;
+runopt(notempty_atstart) ->
+    true;
 runopt(notbol) ->
     true;
 runopt(noteol) ->
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index d86e5f5b91..129f2b3e4c 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -25,7 +25,8 @@
 	 split_autogen/1,split_options/1,split_specials/1,
 	 error_handling/1,pcre_cve_2008_2371/1,
 	 pcre_compile_workspace_overflow/1,re_infinite_loop/1, 
-	 re_backwards_accented/1]).
+	 re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1,
+	 opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1]).
 
 -include_lib("test_server/include/test_server.hrl").
 -include_lib("kernel/include/file.hrl").
@@ -37,7 +38,9 @@ all() ->
      replace_autogen, global_capture, replace_input_types,
      replace_return, split_autogen, split_options,
      split_specials, error_handling, pcre_cve_2008_2371,
-     pcre_compile_workspace_overflow, re_infinite_loop, re_backwards_accented].
+     pcre_compile_workspace_overflow, re_infinite_loop, 
+     re_backwards_accented, opt_dupnames, opt_all_names, 
+     inspect, opt_no_start_optimize,opt_never_utf,opt_ucp].
 
 groups() -> 
     [].
@@ -620,3 +623,155 @@ re_backwards_accented(Config) when is_list(Config) ->
 			 [unicode,{capture,none}]),
     ?t:timetrap_cancel(Dog),
     ok.
+opt_dupnames(doc) ->
+    "Check correct handling of dupnames option to re";
+opt_dupnames(Config) when is_list(Config) ->
+    Days = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"],
+    _ = [ begin
+	      Short = lists:sublist(Day,3),
+	      {match,[Short]} =
+		  re:run(Day,
+			 "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|"
+			 "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|"
+			 "(?<DN>Sat)(?:urday)?",
+			 [dupnames, {capture, ['DN'], list}])
+	  end || Day <- Days ],
+    _ = [ begin
+	      Short = list_to_binary(lists:sublist(Day,3)),
+	      {match,[Short]} =
+		  re:run(Day,
+			 "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|"
+			 "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|"
+			 "(?<DN>Sat)(?:urday)?",
+			 [dupnames, {capture, ['DN'], binary}])
+	  end || Day <- Days ],
+    _ = [ begin
+	      {match,[{0,3}]} =
+		  re:run(Day,
+			 "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|"
+			 "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|"
+			 "(?<DN>Sat)(?:urday)?",
+			 [dupnames, {capture, ['DN'], index}])
+	  end || Day <- Days ],
+    {match,[{0,1},{1,3},{7,1}]} = re:run("SMondayX","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|"
+					 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|"
+					 "(?<DN>Sat)(?:urday)?",
+					 [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]),
+    {match,[{-1,0},{0,3},{-1,0}]} = re:run("Wednesday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|"
+					 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|"
+					 "(?<DN>Sat)(?:urday)?",
+					 [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]),
+    nomatch = re:run("Wednsday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|"
+		     "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|"
+		     "(?<DN>Sat)(?:urday)?",
+		     [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]),
+    ok.
+
+opt_all_names(doc) ->
+    "Test capturing of all_names";
+opt_all_names(Config) when is_list(Config) ->
+    Days = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"],
+    {match,[{1,3},{0,1},{7,1}]} = re:run("SMondayX","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|"
+					 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|"
+					 "(?<DN>Sat)(?:urday)?",
+					 [dupnames, {capture, all_names,index}]),
+    {match,[{0,3},{-1,0},{-1,0}]} = re:run("Wednesday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|"
+					 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|"
+					 "(?<DN>Sat)(?:urday)?",
+					 [dupnames, {capture, all_names,index}]),
+    
+    _ = [ begin
+	      {match,[{0,3}]} =
+		  re:run(Day,
+			 "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|"
+			 "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|"
+			 "(?<DN>Sat)(?:urday)?",
+			 [dupnames, {capture, all_names, index}])
+	  end || Day <- Days ],
+    _ = [ begin
+	      match =
+		  re:run(Day,
+			 "(Mon|Fri|Sun)(?:day)?|(Tue)(?:sday)?|"
+			 "(Wed)(?:nesday)?|(Thu)(?:rsday)?|"
+			 "(Sat)(?:urday)?",
+			 [dupnames, {capture, all_names, index}])
+	  end || Day <- Days ],
+    {match,[{0,1},{-1,0},{-1,0}]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]),
+    {match,[{-1,0},{0,1},{-1,0}]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]),
+    {match,[{-1,0},{-1,0},{0,1}]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]),
+    {match,[<<"A">>,<<>>,<<>>]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]),
+    {match,[<<>>,<<"B">>,<<>>]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]),
+    {match,[<<>>,<<>>,<<"C">>]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]),
+    {match,["A",[],[]]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]),
+    {match,[[],"B",[]]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]),
+    {match,[[],[],"C"]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]),
+    {match,[{-1,0},{-1,0},{0,1}]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]),
+    {match,[{-1,0},{0,1},{-1,0}]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]),
+    {match,[{0,1},{-1,0},{-1,0}]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]),
+    {match,[<<>>,<<>>,<<"A">>]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]),
+    {match,[<<>>,<<"B">>,<<>>]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]),
+    {match,[<<"C">>,<<>>,<<>>]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]),
+    {match,[[],[],"A"]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]),
+    {match,[[],"B",[]]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]),
+    {match,["C",[],[]]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]),
+    {match,[[<<>>,<<>>,<<"C">>],
+	    [<<>>,<<>>,<<"C">>],
+	    [<<>>,<<>>,<<"C">>]]} = re:run("CCC","(?<A>A)|(?<B>B)|(?<C>C)",
+				       [global,{capture, all_names, binary}]),
+    {match,[[<<"C">>,<<>>],
+	    [<<>>,<<"B">>],
+	    [<<"C">>,<<>>]]} = re:run("CBC","(?<A>A)|(?<B>B)|(?<A>C)",
+				      [global,dupnames,{capture, all_names, binary}]),
+    {match,[[]]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]),
+    {match,["D"]} = re:run("ABCDE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]),
+    {match,["F"]} = re:run("ABCFE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]),
+    {match,["F",[]]} = re:run("ABCFE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A','B'],list}]),
+    {match,[[],"E"]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A','B'],list}]),
+    {match,[[],"E"]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,all_names,list}]),
+    {match,[{-1,0},{3,1}]}  = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,all_names,index}]),
+    ok.
+
+inspect(doc) ->
+    "Test the minimal inspect function";
+inspect(Config) when is_list(Config)->
+    {ok,MP} = re:compile("(?<A>A)|(?<B>B)|(?<C>C)."),
+    {namelist,[<<"A">>,<<"B">>,<<"C">>]} = re:inspect(MP,namelist),
+    {ok,MPD} = re:compile("(?<A>A)|(?<B>B)|(?<A>C).",[dupnames]),
+    {namelist,[<<"A">>,<<"B">>]} = re:inspect(MPD,namelist),
+    {ok,MPN} = re:compile("(A)|(B)|(C)."),
+    {namelist,[]} = re:inspect(MPN,namelist),
+    {'EXIT',{badarg,_}} = (catch re:inspect(MPD,namelistk)),
+    {'EXIT',{badarg,_}} = (catch re:inspect({re_pattern,3,0,0,<<"kalle">>},namelist)),
+    ok.
+
+opt_no_start_optimize(doc) ->
+    "Test that the no_start_optimize compilation flag works";
+opt_no_start_optimize(Config) when is_list(Config) ->
+    {match, [{3,3}]} = re:run("DEFABC","(*COMMIT)ABC",[]), % Start optimization makes this result wrong!
+    nomatch = re:run("DEFABC","(*COMMIT)ABC",[no_start_optimize]), % This is the correct result...
+    ok.
+
+opt_never_utf(doc) ->
+    "Check that the never_utf option works";
+opt_never_utf(Config) when is_list(Config) ->
+    {match,[{0,3}]} = re:run("ABC","ABC",[never_utf]),
+    {match,[{0,3}]} = re:run("ABC","(*UTF)ABC",[]),
+    {ok,_} = re:compile("(*UTF)ABC"),
+    {ok,_} = re:compile("(*UTF)ABC",[unicode]),
+    {ok,_} = re:compile("(*UTF8)ABC"),
+    {'EXIT',{badarg,_}} = (catch re:run("ABC","ABC",[unicode,never_utf])),
+    {'EXIT',{badarg,_}} = (catch re:run("ABC","(*UTF)ABC",[never_utf])),
+    {'EXIT',{badarg,_}} = (catch re:run("ABC","(*UTF8)ABC",[never_utf])),
+    {error,_} = (catch re:compile("ABC",[unicode,never_utf])),
+    {error,_} = (catch re:compile("(*UTF)ABC",[never_utf])),
+    {error,_} = (catch re:compile("(*UTF8)ABC",[never_utf])),
+    ok.
+opt_ucp(doc) ->
+    "Check that the ucp option is passed to PCRE";
+opt_ucp(Config) when is_list(Config) ->
+    {match,[{0,1}]} = re:run([$a],"\\w",[unicode]),
+    {match,[{0,2}]} = re:run([229],"\\w",[unicode]), % Latin1 works without UCP, as we have a default 
+						     % Latin1 table
+    nomatch = re:run([1024],"\\w",[unicode]), % Latin1 word characters only, 1024 is not latin1
+    {match,[{0,2}]} = re:run([1024],"\\w",[unicode,ucp]), % Any Unicode word character works with 'ucp'
+    ok.
author	Patrik Nyblom <[email protected]>	2013-07-18 10:18:58 +0200
committer	Patrik Nyblom <[email protected]>	2013-08-09 12:10:30 +0200
commit	6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c (patch)
tree	024c55cef26cdf0ad167b23d61fee9737177da7b
parent	9cd8b5d2af163f29cf77ae74057789be977f6414 (diff)
download	otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.tar.gz otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.tar.bz2 otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.zip