diff options
author | Patrik Nyblom <[email protected]> | 2013-07-18 10:18:58 +0200 |
---|---|---|
committer | Patrik Nyblom <[email protected]> | 2013-08-09 12:10:30 +0200 |
commit | 6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c (patch) | |
tree | 024c55cef26cdf0ad167b23d61fee9737177da7b | |
parent | 9cd8b5d2af163f29cf77ae74057789be977f6414 (diff) | |
download | otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.tar.gz otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.tar.bz2 otp-6146e7642d4bb9f7c9bb5f8cbca548c1d9667e5c.zip |
Add new options to Erlang re interface and mend dupnames
Add notempty_atstart, no_start_optimize, ucp and
never_utf options from new PCRE version.
Use the new notempty_atstart in global matching.
Add inspect/2 function
Correctly handle dupnames when capturing a name, as
in Perl, get the leftmost matching occurence.
Also added all_names, to get all the names in the pattern
in alphabetical (name) order.
To be able to use this in global matching, an inspect
function that can dig out a namelist was added.
-rw-r--r-- | erts/emulator/beam/atom.names | 6 | ||||
-rw-r--r-- | erts/emulator/beam/bif.tab | 6 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_re.c | 233 | ||||
-rw-r--r-- | lib/stdlib/src/re.erl | 72 | ||||
-rw-r--r-- | lib/stdlib/test/re_SUITE.erl | 159 |
5 files changed, 446 insertions, 30 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index eba1d0fa23..cf8f511b85 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -71,6 +71,7 @@ atom ac atom active atom all atom all_but_first +atom all_names atom alloc_info atom alloc_sizes atom allocated @@ -348,11 +349,13 @@ atom multi_scheduling atom multiline atom name atom named_table +atom namelist atom native_addresses atom Neq='=/=' atom Neqeq='/=' atom net_kernel atom net_kernel_terminated +atom never_utf atom new atom new_index atom new_uniq @@ -378,6 +381,7 @@ atom nosuspend atom no_float atom no_integer atom no_network +atom no_start_optimize atom not atom not_a_list atom not_loaded @@ -388,6 +392,7 @@ atom notalive atom notbol atom noteol atom notempty +atom notempty_atstart atom notify atom notsup atom nouse_stdio @@ -554,6 +559,7 @@ atom true atom tuple atom type atom ucompile +atom ucp atom undef atom ungreedy atom unicode diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index dc8e9101de..7c8e4b31cf 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -574,6 +574,12 @@ bif erlang:binary_to_float/1 bif io:printable_range/0 # +# New in R17A +# + +bif re:inspect/2 + +# # Obsolete # diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c index 12fc834685..c74125ae41 100644 --- a/erts/emulator/beam/erl_bif_re.c +++ b/erts/emulator/beam/erl_bif_re.c @@ -288,6 +288,10 @@ parse_options(Eterm listp, /* in */ eopt |= PCRE_NOTEMPTY; fl |= PARSE_FLAG_UNIQUE_EXEC_OPT; break; + case am_notempty_atstart: + eopt |= PCRE_NOTEMPTY_ATSTART; + fl |= PARSE_FLAG_UNIQUE_EXEC_OPT; + break; case am_notbol: eopt |= PCRE_NOTBOL; fl |= PARSE_FLAG_UNIQUE_EXEC_OPT; @@ -296,6 +300,10 @@ parse_options(Eterm listp, /* in */ eopt |= PCRE_NOTEOL; fl |= PARSE_FLAG_UNIQUE_EXEC_OPT; break; + case am_no_start_optimize: + copt |= PCRE_NO_START_OPTIMIZE; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; case am_caseless: copt |= PCRE_CASELESS; fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; @@ -332,6 +340,14 @@ parse_options(Eterm listp, /* in */ copt |= PCRE_UNGREEDY; fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; break; + case am_ucp: + copt |= PCRE_UCP; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; + case am_never_utf: + copt |= PCRE_NEVER_UTF; + fl |= PARSE_FLAG_UNIQUE_COMPILE_OPT; + break; case am_unicode: copt |= PCRE_UTF8; fl |= (PARSE_FLAG_UNIQUE_COMPILE_OPT | PARSE_FLAG_UNICODE); @@ -359,7 +375,7 @@ parse_options(Eterm listp, /* in */ if (compile_options != NULL) { *compile_options = copt; } - if (exec_options != NULL) { + if (exec_options != NULL) { *exec_options = eopt; } if (flags != NULL) { @@ -585,6 +601,17 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete ri->num_spec * 2 * sizeof(Eterm)); for (i = 0; i < ri->num_spec; ++i) { x = ri->v[i]; + if (x < -1) { + int n = i-x+1; + int j; + for (j = i+1; j < ri->num_spec && j < n; ++j) { + if (restartp->ovector[(ri->v[j])*2] >= 0) { + x = ri->v[j]; + break; + } + } + i = n-1; + } if (x < rc && x >= 0) { tmp_vect[n*2] = make_signed_integer(restartp->ovector[x*2],p); tmp_vect[n*2+1] = make_signed_integer(restartp->ovector[x*2+1]-restartp->ovector[x*2],p); @@ -666,6 +693,17 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete ri->num_spec * sizeof(Eterm)); for (i = 0; i < ri->num_spec; ++i) { x = ri->v[i]; + if (x < -1) { + int n = i-x+1; + int j; + for (j = i+1; j < ri->num_spec && j < n; ++j) { + if (restartp->ovector[(ri->v[j])*2] >= 0) { + x = ri->v[j]; + break; + } + } + i = n-1; + } if (x < rc && x >= 0) { char *cp; int len; @@ -730,6 +768,49 @@ static Eterm build_exec_return(Process *p, int rc, RestartContext *restartp, Ete */ #define RINFO_SIZ(Num) (sizeof(ReturnInfo) + (sizeof(int) * (Num - 1))) +#define PICK_INDEX(NameEntry) \ + ((int) ((((unsigned) ((unsigned char *) (NameEntry))[0]) << 8) + \ + ((unsigned) ((unsigned char *) (NameEntry))[1]))) + + +static void build_one_capture(const pcre *code, ReturnInfo **ri, int *sallocated, int has_dupnames, char *name) +{ + ReturnInfo *r = (*ri); + if (has_dupnames) { + /* Build a sequence of positions, starting with -size if + more than one, otherwise just put the index there... */ + char *first,*last; + int esize = erts_pcre_get_stringtable_entries(code,name,&first,&last); + if (esize == PCRE_ERROR_NOSUBSTRING) { + r->v[r->num_spec - 1] = -1; + } else if(last == first) { + r->v[r->num_spec - 1] = PICK_INDEX(first); + } else { + int num = ((last - first) / esize) + 1; + int i; + ASSERT(num > 1); + r->v[r->num_spec - 1] = -num; /* A value less than -1 means + multiple indexes for same name */ + for (i = 0; i < num; ++i) { + ++(r->num_spec); + if(r->num_spec > (*sallocated)) { + (*sallocated) += 10; + r = erts_realloc(ERTS_ALC_T_RE_SUBJECT, r, + RINFO_SIZ((*sallocated))); + } + r->v[r->num_spec - 1] = PICK_INDEX(first); + first += esize; + } + } + } else { + /* Use the faster binary search if no duplicate names are present */ + if ((r->v[r->num_spec - 1] = erts_pcre_get_stringnumber(code,name)) == + PCRE_ERROR_NOSUBSTRING) { + r->v[r->num_spec - 1] = -1; + } + } + *ri = r; +} static ReturnInfo * build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) @@ -778,6 +859,53 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) } ri->v[ri->num_spec - 1] = 0; break; + case am_all_names: + { + int rc,i,top; + int entrysize; + char *nametable, *last = NULL; + int has_dupnames; + unsigned long options; + + if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0) + goto error; + if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) + goto error; + if (top <= 0) { + ri->num_spec = 0; + ri->type = RetNone; + break; + } + if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0) + goto error; + if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable) != 0) + goto error; + + has_dupnames = ((options & PCRE_DUPNAMES) != 0); + + for(i=0;i<top;++i) { + if (last == NULL || !has_dupnames || strcmp(last+2,nametable+2)) { + if (ri->num_spec < 0) + ri->num_spec = 0; + ++(ri->num_spec); + if(ri->num_spec > sallocated) { + sallocated += 10; + ri = erts_realloc(ERTS_ALC_T_RE_SUBJECT, ri, RINFO_SIZ(sallocated)); + } + if (has_dupnames) { + /* This could be more effective, we actually have + the names and could fill in the vector + immediately. Now we lookup the name again. */ + build_one_capture(code,&ri,&sallocated,has_dupnames,nametable+2); + } else { + ri->v[ri->num_spec - 1] = PICK_INDEX(nametable); + } + } + last = nametable; + nametable += entrysize; + } + break; + } default: if (is_list(capture_spec[CAPSPEC_VALUES])) { for(l=capture_spec[CAPSPEC_VALUES];is_list(l);l = CDR(list_val(l))) { @@ -793,6 +921,11 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) if (term_to_int(val,&x)) { ri->v[ri->num_spec - 1] = x; } else if (is_atom(val) || is_binary(val) || is_list(val)) { + int has_dupnames; + unsigned long options; + if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0) + goto error; + has_dupnames = ((options & PCRE_DUPNAMES) != 0); if (is_atom(val)) { Atom *ap = atom_tab(atom_val(val)); if ((ap->len + 1) > tmpbsiz) { @@ -823,10 +956,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) } tmpb[slen] = '\0'; } - if ((ri->v[ri->num_spec - 1] = erts_pcre_get_stringnumber(code,tmpb)) == - PCRE_ERROR_NOSUBSTRING) { - ri->v[ri->num_spec - 1] = -1; - } + build_one_capture(code,&ri,&sallocated,has_dupnames,tmpb); } else { goto error; } @@ -1159,6 +1289,99 @@ static BIF_RETTYPE re_exec_trap(BIF_ALIST_3) BIF_RET(res); } +BIF_RETTYPE +re_inspect_2(BIF_ALIST_2) +{ + Eterm *tp,*tmp_vec,*hp; + int rc,i,top,j; + int entrysize; + char *nametable, *last,*name; + int has_dupnames; + unsigned long options; + int num_names; + Eterm res; + const pcre *code; + byte *temp_alloc = NULL; + + if (is_not_tuple(BIF_ARG_1) || (arityval(*tuple_val(BIF_ARG_1)) != 5)) { + goto error; + } + tp = tuple_val(BIF_ARG_1); + if (tp[1] != am_re_pattern || is_not_small(tp[2]) || + is_not_small(tp[3]) || is_not_small(tp[4]) || + is_not_binary(tp[5])) { + goto error; + } + if (BIF_ARG_2 != am_namelist) { + goto error; + } + if ((code = (const pcre *) + erts_get_aligned_binary_bytes(tp[5], &temp_alloc)) == NULL) { + goto error; + } + + /* OK, so let's try to get some info */ + + if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0) + goto error; + if ((rc = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) + goto error; + if (top <= 0) { + hp = HAlloc(BIF_P, 3); + res = TUPLE2(hp,am_namelist,NIL); + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(res); + } + if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize) != 0) + goto error; + if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, (unsigned char **) &nametable) != 0) + goto error; + + has_dupnames = ((options & PCRE_DUPNAMES) != 0); + /* First, count the names */ + num_names = 0; + last = NULL; + name = nametable; + for(i=0;i<top;++i) { + if (last == NULL || !has_dupnames || strcmp(last+2,name+2)) { + ++num_names; + } + last = name; + name += entrysize; + } + tmp_vec = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, + num_names * sizeof(Eterm)); + /* Re-iterate and fill tmp_vec */ + last = NULL; + name = nametable; + j = 0; + for(i=0;i<top;++i) { + if (last == NULL || !has_dupnames || strcmp(last+2,name+2)) { + tmp_vec[j++] = new_binary(BIF_P, (byte *) name+2, strlen(name+2)); + } + last = name; + name += entrysize; + } + ASSERT(j == num_names); + hp = HAlloc(BIF_P, 3+2*j); + res = NIL; + for(i = j-1 ;i >= 0; --i) { + res = CONS(hp,tmp_vec[i],res); + hp += 2; + } + res = TUPLE2(hp,am_namelist,res); + erts_free_aligned_binary_bytes(temp_alloc); + erts_free(ERTS_ALC_T_RE_TMP_BUF, tmp_vec); + BIF_RET(res); + + error: + /* tmp_vec never allocated when we reach here */ + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(BIF_P,BADARG); +} + + + diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index d8d529e6a4..4d6de1100d 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -28,11 +28,12 @@ | dotall | extended | firstline | multiline | no_auto_capture | dupnames | ungreedy | {newline, nl_spec()}| bsr_anycrlf + | no_start_optimize | ucp | never_utf | bsr_unicode. %%% BIFs --export([compile/1, compile/2, run/2, run/3]). +-export([compile/1, compile/2, run/2, run/3, inspect/2]). -spec compile(Regexp) -> {ok, MP} | {error, ErrSpec} when Regexp :: iodata(), @@ -67,13 +68,13 @@ run(_, _) -> Subject :: iodata() | unicode:charlist(), RE :: mp() | iodata() | unicode:charlist(), Options :: [Option], - Option :: anchored | global | notbol | noteol | notempty + Option :: anchored | global | notbol | noteol | notempty | notempty_atstart | {offset, non_neg_integer()} | {newline, NLSpec :: nl_spec()} | bsr_anycrlf | bsr_unicode | {capture, ValueSpec} | {capture, ValueSpec, Type} | CompileOpt, Type :: index | list | binary, - ValueSpec :: all | all_but_first | first | none | ValueList, + ValueSpec :: all | all_but_first | all_names | first | none | ValueList, ValueList :: [ValueID], ValueID :: integer() | string() | atom(), CompileOpt :: compile_option(), @@ -88,6 +89,14 @@ run(_, _) -> run(_, _, _) -> erlang:nif_error(undef). +-spec inspect(MP,Item) -> {namelist, [ binary() ]} when + MP :: mp(), + Item :: namelist. + +inspect(_,_) -> + erlang:nif_error(undef). + + %%% End of BIFs -spec split(Subject, RE) -> SplitList when @@ -102,7 +111,7 @@ split(Subject,RE) -> Subject :: iodata() | unicode:charlist(), RE :: mp() | iodata() | unicode:charlist(), Options :: [ Option ], - Option :: anchored | notbol | noteol | notempty + Option :: anchored | notbol | noteol | notempty | notempty_atstart | {offset, non_neg_integer()} | {newline, nl_spec()} | bsr_anycrlf | bsr_unicode | {return, ReturnType} | {parts, NumParts} | group | trim | CompileOpt, @@ -295,7 +304,7 @@ replace(Subject,RE,Replacement) -> RE :: mp() | iodata() | unicode:charlist(), Replacement :: iodata() | unicode:charlist(), Options :: [Option], - Option :: anchored | global | notbol | noteol | notempty + Option :: anchored | global | notbol | noteol | notempty | notempty_atstart | {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf | bsr_unicode | {return, ReturnType} | CompileOpt, ReturnType :: iodata | list | binary, @@ -509,7 +518,9 @@ check_for_crlf(_,L) -> % SelectReturn = false | all | stirpfirst | none % ConvertReturn = index | list | binary % {capture, all} -> all (untouchded) -% {capture, first} -> kept in argumentt list and Select all +% {capture, all_names} -> if names are present: treated as a name {capture, [...]} +% else: same as {capture, []} +% {capture, first} -> kept in argument list and Select all % {capture, all_but_first} -> removed from argument list and selects stripfirst % {capture, none} -> removed from argument list and selects none % {capture, []} -> removed from argument list and selects none @@ -518,23 +529,30 @@ check_for_crlf(_,L) -> % Call as process_parameters([],0,false,index,NeedClean) -process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_) -> +process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_,_) -> {[], InitialOffset, SelectReturn, ConvertReturn}; -process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC) -> - process_parameters(T,N,Select0,Return0,CC); -process_parameters([global | T],Init0,Select0,Return0,CC) -> - process_parameters(T,Init0,Select0,Return0,CC); -process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC) -> - process_parameters([{capture,Values}|T],Init0,Select0,Type,CC); -process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) -> +process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC,RE) -> + process_parameters(T,N,Select0,Return0,CC,RE); +process_parameters([global | T],Init0,Select0,Return0,CC,RE) -> + process_parameters(T,Init0,Select0,Return0,CC,RE); +process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC,RE) -> + process_parameters([{capture,Values}|T],Init0,Select0,Type,CC,RE); +process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC,RE) -> % First process the rest to see if capture was already present {NewTail, Init1, Select1, Return1} = - process_parameters(T,Init0,Select0,Return0,CC), + process_parameters(T,Init0,Select0,Return0,CC,RE), case Select1 of false -> case Values of all -> {[{capture,all} | NewTail], Init1, all, Return0}; + all_names -> + case re:inspect(RE,namelist) of + {namelist, []} -> + {[{capture,first} | NewTail], Init1, none, Return0}; + {namelist, List} -> + {[{capture,[0|List]} | NewTail], Init1, stripfirst, Return0} + end; first -> {[{capture,first} | NewTail], Init1, all, Return0}; all_but_first -> @@ -553,20 +571,20 @@ process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) -> % Found overriding further down list, ignore this one {NewTail, Init1, Select1, Return1} end; -process_parameters([H|T],Init0,Select0,Return0,true) -> +process_parameters([H|T],Init0,Select0,Return0,true,RE) -> case copt(H) of true -> - process_parameters(T,Init0,Select0,Return0,true); + process_parameters(T,Init0,Select0,Return0,true,RE); false -> {NewT,Init,Select,Return} = - process_parameters(T,Init0,Select0,Return0,true), + process_parameters(T,Init0,Select0,Return0,true,RE), {[H|NewT],Init,Select,Return} end; -process_parameters([H|T],Init0,Select0,Return0,false) -> +process_parameters([H|T],Init0,Select0,Return0,false,RE) -> {NewT,Init,Select,Return} = - process_parameters(T,Init0,Select0,Return0,false), + process_parameters(T,Init0,Select0,Return0,false,RE), {[H|NewT],Init,Select,Return}; -process_parameters(_,_,_,_,_) -> +process_parameters(_,_,_,_,_,_) -> throw(badlist). postprocess({match,[]},_,_,_,_) -> @@ -723,7 +741,7 @@ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) -> {StrippedOptions, InitialOffset, SelectReturn, ConvertReturn} = case (catch - process_parameters(Options0, 0, false, index, NeedClean)) of + process_parameters(Options0, 0, false, index, NeedClean,RE)) of badlist -> erlang:error(badarg,[Subject,RE,Options0]); CorrectReturn -> @@ -747,7 +765,7 @@ loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) -> loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options); false -> {match,M} = - case re:run(Subject,RE,[{offset,X},notempty, + case re:run(Subject,RE,[{offset,X},notempty_atstart, anchored]++Options) of nomatch -> {match,[]}; @@ -803,6 +821,12 @@ forward2(Chal,A,N,true,CRLF) -> copt(caseless) -> true; +copt(no_start_optimize) -> + true; +copt(never_utf) -> + true; +copt(ucp) -> + true; copt(dollar_endonly) -> true; copt(dotall) -> @@ -833,6 +857,8 @@ copt(_) -> runopt(notempty) -> true; +runopt(notempty_atstart) -> + true; runopt(notbol) -> true; runopt(noteol) -> diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index d86e5f5b91..129f2b3e4c 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -25,7 +25,8 @@ split_autogen/1,split_options/1,split_specials/1, error_handling/1,pcre_cve_2008_2371/1, pcre_compile_workspace_overflow/1,re_infinite_loop/1, - re_backwards_accented/1]). + re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1, + opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1]). -include_lib("test_server/include/test_server.hrl"). -include_lib("kernel/include/file.hrl"). @@ -37,7 +38,9 @@ all() -> replace_autogen, global_capture, replace_input_types, replace_return, split_autogen, split_options, split_specials, error_handling, pcre_cve_2008_2371, - pcre_compile_workspace_overflow, re_infinite_loop, re_backwards_accented]. + pcre_compile_workspace_overflow, re_infinite_loop, + re_backwards_accented, opt_dupnames, opt_all_names, + inspect, opt_no_start_optimize,opt_never_utf,opt_ucp]. groups() -> []. @@ -620,3 +623,155 @@ re_backwards_accented(Config) when is_list(Config) -> [unicode,{capture,none}]), ?t:timetrap_cancel(Dog), ok. +opt_dupnames(doc) -> + "Check correct handling of dupnames option to re"; +opt_dupnames(Config) when is_list(Config) -> + Days = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"], + _ = [ begin + Short = lists:sublist(Day,3), + {match,[Short]} = + re:run(Day, + "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|" + "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, ['DN'], list}]) + end || Day <- Days ], + _ = [ begin + Short = list_to_binary(lists:sublist(Day,3)), + {match,[Short]} = + re:run(Day, + "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|" + "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, ['DN'], binary}]) + end || Day <- Days ], + _ = [ begin + {match,[{0,3}]} = + re:run(Day, + "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|" + "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, ['DN'], index}]) + end || Day <- Days ], + {match,[{0,1},{1,3},{7,1}]} = re:run("SMondayX","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" + "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]), + {match,[{-1,0},{0,3},{-1,0}]} = re:run("Wednesday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" + "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]), + nomatch = re:run("Wednsday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" + "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]), + ok. + +opt_all_names(doc) -> + "Test capturing of all_names"; +opt_all_names(Config) when is_list(Config) -> + Days = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"], + {match,[{1,3},{0,1},{7,1}]} = re:run("SMondayX","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" + "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, all_names,index}]), + {match,[{0,3},{-1,0},{-1,0}]} = re:run("Wednesday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" + "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, all_names,index}]), + + _ = [ begin + {match,[{0,3}]} = + re:run(Day, + "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|" + "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|" + "(?<DN>Sat)(?:urday)?", + [dupnames, {capture, all_names, index}]) + end || Day <- Days ], + _ = [ begin + match = + re:run(Day, + "(Mon|Fri|Sun)(?:day)?|(Tue)(?:sday)?|" + "(Wed)(?:nesday)?|(Thu)(?:rsday)?|" + "(Sat)(?:urday)?", + [dupnames, {capture, all_names, index}]) + end || Day <- Days ], + {match,[{0,1},{-1,0},{-1,0}]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]), + {match,[{-1,0},{0,1},{-1,0}]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]), + {match,[{-1,0},{-1,0},{0,1}]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]), + {match,[<<"A">>,<<>>,<<>>]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]), + {match,[<<>>,<<"B">>,<<>>]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]), + {match,[<<>>,<<>>,<<"C">>]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]), + {match,["A",[],[]]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]), + {match,[[],"B",[]]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]), + {match,[[],[],"C"]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]), + {match,[{-1,0},{-1,0},{0,1}]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]), + {match,[{-1,0},{0,1},{-1,0}]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]), + {match,[{0,1},{-1,0},{-1,0}]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]), + {match,[<<>>,<<>>,<<"A">>]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]), + {match,[<<>>,<<"B">>,<<>>]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]), + {match,[<<"C">>,<<>>,<<>>]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]), + {match,[[],[],"A"]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]), + {match,[[],"B",[]]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]), + {match,["C",[],[]]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]), + {match,[[<<>>,<<>>,<<"C">>], + [<<>>,<<>>,<<"C">>], + [<<>>,<<>>,<<"C">>]]} = re:run("CCC","(?<A>A)|(?<B>B)|(?<C>C)", + [global,{capture, all_names, binary}]), + {match,[[<<"C">>,<<>>], + [<<>>,<<"B">>], + [<<"C">>,<<>>]]} = re:run("CBC","(?<A>A)|(?<B>B)|(?<A>C)", + [global,dupnames,{capture, all_names, binary}]), + {match,[[]]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]), + {match,["D"]} = re:run("ABCDE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]), + {match,["F"]} = re:run("ABCFE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]), + {match,["F",[]]} = re:run("ABCFE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A','B'],list}]), + {match,[[],"E"]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A','B'],list}]), + {match,[[],"E"]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,all_names,list}]), + {match,[{-1,0},{3,1}]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,all_names,index}]), + ok. + +inspect(doc) -> + "Test the minimal inspect function"; +inspect(Config) when is_list(Config)-> + {ok,MP} = re:compile("(?<A>A)|(?<B>B)|(?<C>C)."), + {namelist,[<<"A">>,<<"B">>,<<"C">>]} = re:inspect(MP,namelist), + {ok,MPD} = re:compile("(?<A>A)|(?<B>B)|(?<A>C).",[dupnames]), + {namelist,[<<"A">>,<<"B">>]} = re:inspect(MPD,namelist), + {ok,MPN} = re:compile("(A)|(B)|(C)."), + {namelist,[]} = re:inspect(MPN,namelist), + {'EXIT',{badarg,_}} = (catch re:inspect(MPD,namelistk)), + {'EXIT',{badarg,_}} = (catch re:inspect({re_pattern,3,0,0,<<"kalle">>},namelist)), + ok. + +opt_no_start_optimize(doc) -> + "Test that the no_start_optimize compilation flag works"; +opt_no_start_optimize(Config) when is_list(Config) -> + {match, [{3,3}]} = re:run("DEFABC","(*COMMIT)ABC",[]), % Start optimization makes this result wrong! + nomatch = re:run("DEFABC","(*COMMIT)ABC",[no_start_optimize]), % This is the correct result... + ok. + +opt_never_utf(doc) -> + "Check that the never_utf option works"; +opt_never_utf(Config) when is_list(Config) -> + {match,[{0,3}]} = re:run("ABC","ABC",[never_utf]), + {match,[{0,3}]} = re:run("ABC","(*UTF)ABC",[]), + {ok,_} = re:compile("(*UTF)ABC"), + {ok,_} = re:compile("(*UTF)ABC",[unicode]), + {ok,_} = re:compile("(*UTF8)ABC"), + {'EXIT',{badarg,_}} = (catch re:run("ABC","ABC",[unicode,never_utf])), + {'EXIT',{badarg,_}} = (catch re:run("ABC","(*UTF)ABC",[never_utf])), + {'EXIT',{badarg,_}} = (catch re:run("ABC","(*UTF8)ABC",[never_utf])), + {error,_} = (catch re:compile("ABC",[unicode,never_utf])), + {error,_} = (catch re:compile("(*UTF)ABC",[never_utf])), + {error,_} = (catch re:compile("(*UTF8)ABC",[never_utf])), + ok. +opt_ucp(doc) -> + "Check that the ucp option is passed to PCRE"; +opt_ucp(Config) when is_list(Config) -> + {match,[{0,1}]} = re:run([$a],"\\w",[unicode]), + {match,[{0,2}]} = re:run([229],"\\w",[unicode]), % Latin1 works without UCP, as we have a default + % Latin1 table + nomatch = re:run([1024],"\\w",[unicode]), % Latin1 word characters only, 1024 is not latin1 + {match,[{0,2}]} = re:run([1024],"\\w",[unicode,ucp]), % Any Unicode word character works with 'ucp' + ok. |