diff options
author | Erlang/OTP <[email protected]> | 2010-01-27 12:33:35 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2010-01-27 15:12:34 +0100 |
commit | 8beea46d65375d09ba9af07676894dc720eda6df (patch) | |
tree | f3586eaba444e699894c92fec75206a243cc53cc | |
parent | 50cc08bfabb0510ebf42170650217dfeec229ce7 (diff) | |
parent | 6b83b643a20bf502eca696f63445049e0838731a (diff) | |
download | otp-8beea46d65375d09ba9af07676894dc720eda6df.tar.gz otp-8beea46d65375d09ba9af07676894dc720eda6df.tar.bz2 otp-8beea46d65375d09ba9af07676894dc720eda6df.zip |
Merge branch 'rb/stdlib_re_unicode_fixes' into ccase/r13b04_dev
* rb/stdlib_re_unicode_fixes:
Fix lost unicode option in re:compile()
Refactor out repeated block in re module
Fix re:replace/4 to handle unicode charlist Replacement argument
Fix re:replace/4 to handle unicode charlist RE argument
Fix re:replace/4 to handle binary unicode output when nothing replaced
OTP-8394 A number of bugs concerning re and unicode are corrected:
- re:compile no longer loses unicode option, which also fixes bug
in re:split.
- re:replace now handles unicode charlist replacement argument
- re:replace now handles unicode RE charlist argument correctly
- re:replace now handles binary unicode output correctly when
nothing is replaced.
Most code, testcases and error isolation done by Rory Byrne.
-rw-r--r-- | erts/emulator/beam/erl_bif_re.c | 2 | ||||
-rw-r--r-- | lib/stdlib/src/re.erl | 71 | ||||
-rw-r--r-- | lib/stdlib/test/re_SUITE.erl | 30 | ||||
-rw-r--r-- | lib/stdlib/test/re_SUITE_data/mod_testoutput8 | 877 | ||||
-rw-r--r-- | lib/stdlib/test/run_pcre_tests.erl | 50 |
5 files changed, 957 insertions, 73 deletions
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c index 6efc19597b..c027cd5984 100644 --- a/erts/emulator/beam/erl_bif_re.c +++ b/erts/emulator/beam/erl_bif_re.c @@ -884,7 +884,7 @@ re_run_3(BIF_ALIST_3) int capture_count; if (pflags & PARSE_FLAG_UNICODE && - (!is_binary(BIF_ARG_1) || + (!is_binary(BIF_ARG_2) || !is_binary(BIF_ARG_1) || (is_list_cap && !(pflags & PARSE_FLAG_GLOBAL)))) { BIF_TRAP3(urun_trap_exportp, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); } diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 5417ac02e5..296a6b3d23 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -1,19 +1,19 @@ %% %% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. -%% +%% +%% Copyright Ericsson AB 2008-2010. All Rights Reserved. +%% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. -%% +%% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. -%% +%% %% %CopyrightEnd% %% -module(re). @@ -32,18 +32,7 @@ split(Subject,RE,Options) -> try {NewOpt,Convert,Unicode,Limit,Strip,Group} = process_split_params(Options,iodata,false,-1,false,false), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, + FlatSubject = to_binary(Subject, Unicode), case compile_split(RE,NewOpt) of {error,_Err} -> throw(badre); @@ -217,19 +206,9 @@ replace(Subject,RE,Replacement,Options) -> try {NewOpt,Convert,Unicode} = process_repl_params(Options,iodata,false), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, - case do_replace(FlatSubject,Subject,RE,Replacement,NewOpt) of + FlatSubject = to_binary(Subject, Unicode), + FlatReplacement = to_binary(Replacement, Unicode), + case do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt) of {error,_Err} -> throw(badre); IoList -> @@ -237,7 +216,12 @@ replace(Subject,RE,Replacement,Options) -> iodata -> IoList; binary -> - iolist_to_binary(IoList); + case Unicode of + false -> + iolist_to_binary(IoList); + true -> + unicode:characters_to_binary(IoList,unicode) + end; list -> case Unicode of false -> @@ -324,8 +308,7 @@ process_split_params([H|T],C,U,L,S,G) -> {[H|NT],NC,NU,NL,NS,NG}. apply_mlist(Subject,Replacement,Mlist) -> - do_mlist(Subject,Subject,0,precomp_repl(iolist_to_binary(Replacement)), - Mlist). + do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist). precomp_repl(<<>>) -> @@ -545,7 +528,7 @@ process_uparams([],Type) -> ucompile(RE,Options) -> try - re:compile(unicode:characters_to_binary(RE,unicode)) + re:compile(unicode:characters_to_binary(RE,unicode),Options) catch error:AnyError -> {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = @@ -618,18 +601,7 @@ grun(Subject,RE,{Options,NeedClean,OrigRE}) -> grun2(Subject,RE,{Options,NeedClean}) -> Unicode = check_for_unicode(RE,Options), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, + FlatSubject = to_binary(Subject, Unicode), do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}). do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) -> @@ -749,3 +721,10 @@ runopt(global) -> true; runopt(_) -> false. + +to_binary(Bin, _IsUnicode) when is_binary(Bin) -> + Bin; +to_binary(Data, true) -> + unicode:characters_to_binary(Data,unicode); +to_binary(Data, false) -> + iolist_to_binary(Data). diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index 98eb66d1fb..fa50ba3b7a 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -1,29 +1,29 @@ %% %% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. -%% +%% +%% Copyright Ericsson AB 2008-2010. All Rights Reserved. +%% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. -%% +%% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. -%% +%% %% %CopyrightEnd% %% -module(re_SUITE). --export([all/1, pcre/1,compile_options/1,run_options/1,combined_options/1,replace_autogen/1,global_capture/1,replace_return/1,split_autogen/1,split_options/1,split_specials/1,error_handling/1]). +-export([all/1, pcre/1,compile_options/1,run_options/1,combined_options/1,replace_autogen/1,global_capture/1,replace_input_types/1,replace_return/1,split_autogen/1,split_options/1,split_specials/1,error_handling/1]). -include("test_server.hrl"). -include_lib("kernel/include/file.hrl"). -all(suite) -> [pcre,compile_options,run_options,combined_options,replace_autogen,global_capture,replace_return,split_autogen,split_options,split_specials,error_handling]. +all(suite) -> [pcre,compile_options,run_options,combined_options,replace_autogen,global_capture,replace_input_types,replace_return,split_autogen,split_options,split_specials,error_handling]. pcre(doc) -> ["Run all applicable tests from the PCRE testsuites."]; @@ -268,7 +268,17 @@ global_capture(Config) when is_list(Config) -> ?line {match,[[{3,5},{5,3}],[{11,4},{12,3}]]} = re:run("ABC�bcdABCabcdA",".(?<FOO>bcd)",[global,{capture,all,index},unicode]), ?t:timetrap_cancel(Dog), ok. - + +replace_input_types(doc) -> + ["Tests replace with different input types"]; +replace_input_types(Config) when is_list(Config) -> + Dog = ?t:timetrap(?t:minutes(3)), + ?line <<"abcd">> = re:replace("abcd","Z","X",[{return,binary},unicode]), + ?line <<"abcd">> = re:replace("abcd","\x{400}","X",[{return,binary},unicode]), + ?line <<"a",208,128,"cd">> = re:replace(<<"abcd">>,"b","\x{400}",[{return,binary},unicode]), + ?t:timetrap_cancel(Dog), + ok. + replace_return(doc) -> ["Tests return options of replace together with global searching"]; replace_return(Config) when is_list(Config) -> @@ -289,6 +299,10 @@ replace_return(Config) when is_list(Config) -> ?line <<"iXk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\9X",[{return,binary}]), ?line <<"jXk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\10X",[{return,binary}]), ?line <<"Xk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\11X",[{return,binary}]), + ?line "a\x{400}bcX" = re:replace("a\x{400}bcd","d","X",[global,{return,list},unicode]), + ?line <<"a",208,128,"bcX">> = re:replace("a\x{400}bcd","d","X",[global,{return,binary},unicode]), + ?line "a\x{400}bcd" = re:replace("a\x{400}bcd","Z","X",[global,{return,list},unicode]), + ?line <<"a",208,128,"bcd">> = re:replace("a\x{400}bcd","Z","X",[global,{return,binary},unicode]), ?t:timetrap_cancel(Dog), ok. diff --git a/lib/stdlib/test/re_SUITE_data/mod_testoutput8 b/lib/stdlib/test/re_SUITE_data/mod_testoutput8 new file mode 100644 index 0000000000..b7e7b02d6c --- /dev/null +++ b/lib/stdlib/test/re_SUITE_data/mod_testoutput8 @@ -0,0 +1,877 @@ +/-- Do not use the \x{} construct except with patterns that have the --/ +/-- /8 option set, because PCRE doesn't recognize them as UTF-8 unless --/ +No match +/-- that option is set. However, the latest Perls recognize them always. --/ +No match + +\x{100}ab/8 + \x{100}ab + 0: \x{100}ab + +/a\x{100}*b/8 + ab + 0: ab + a\x{100}b + 0: a\x{100}b + a\x{100}\x{100}b + 0: a\x{100}\x{100}b + +/a\x{100}+b/8 + a\x{100}b + 0: a\x{100}b + a\x{100}\x{100}b + 0: a\x{100}\x{100}b + *** Failers +No match + ab +No match + +/\bX/8 + Xoanon + 0: X + +Xoanon + 0: X + \x{300}Xoanon + 0: X + *** Failers +No match + YXoanon +No match + +/\BX/8 + YXoanon + 0: X + *** Failers +No match + Xoanon +No match + +Xoanon +No match + \x{300}Xoanon +No match + +/X\b/8 + X+oanon + 0: X + ZX\x{300}oanon + 0: X + FAX + 0: X + *** Failers +No match + Xoanon +No match + +/X\B/8 + Xoanon + 0: X + *** Failers +No match + X+oanon +No match + ZX\x{300}oanon +No match + FAX +No match + +/[^a]/8 + abcd + 0: b + a\x{100} + 0: \x{100} + +/^[abc\x{123}\x{400}-\x{402}]{2,3}\d/8 + ab99 + 0: ab9 + \x{123}\x{123}45 + 0: \x{123}\x{123}4 + \x{400}\x{401}\x{402}6 + 0: \x{400}\x{401}\x{402}6 + *** Failers +No match + d99 +No match + \x{123}\x{122}4 +No match + \x{400}\x{403}6 +No match + \x{400}\x{401}\x{402}\x{402}6 +No match + +/abc/8 + �] +Error -10 + � +Error -10 + ��� +Error -10 + ���\? +No match + +/a.b/8 + acb + 0: acb + a\x7fb + 0: a\x{7f}b + a\x{100}b + 0: a\x{100}b + *** Failers +No match + a\nb +No match + +/^[a\x{c0}]/8 + *** Failers +No match + \x{100} +No match + +/(?<=aXb)cd/8 + aXbcd + 0: cd + +/(?<=a\x{100}b)cd/8 + a\x{100}bcd + 0: cd + +/(?<=a\x{100000}b)cd/8 + a\x{100000}bcd + 0: cd + +/(?:\x{100}){3}b/8 + \x{100}\x{100}\x{100}b + 0: \x{100}\x{100}\x{100}b + *** Failers +No match + \x{100}\x{100}b +No match + +/\x{ab}/8 + \x{ab} + 0: \x{ab} + \xc2\xab + 0: \x{ab} + *** Failers +No match + \x00{ab} +No match + +/^[^a]{2}/8 + \x{100}bc + 0: \x{100}b + +/^[^a]{2,}/8 + \x{100}bcAa + 0: \x{100}bcA + +/^[^a]{2,}?/8 + \x{100}bca + 0: \x{100}b + +/^[^a]{2}/8i + \x{100}bc + 0: \x{100}b + +/^[^a]{2,}/8i + \x{100}bcAa + 0: \x{100}bc + +/^[^a]{2,}?/8iU + \x{100}bca + 0: \x{100}bc + +/\x{100}{0,0}/8 + abcd + 0: + +/\x{100}?/8 + abcd + 0: + \x{100}\x{100} + 0: \x{100} + +/\x{100}{0,3}/8 + \x{100}\x{100} + 0: \x{100}\x{100} + \x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100} + +/\x{100}*/8 + abce + 0: + \x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100} + +/\x{100}{1,1}/8 + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100} + +/\x{100}{1,3}/8 + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100} + +/\x{100}+/8 + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100} + +/\x{100}{3}/8 + abcd\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100} + +/\x{100}{3,5}/8 + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100}\x{100}\x{100} + +/\x{100}{3,}/8 + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + +/(?<=a\x{100}{2}b)X/8 + Xyyya\x{100}\x{100}bXzzz + 0: X + +/\D*/8 + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/\D*/8 + \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + +/\D/8 + 1X2 + 0: X + 1\x{100}2 + 0: \x{100} + +/>\S/8 + > >X Y + 0: >X + > >\x{100} Y + 0: >\x{100} + +/\d/8 + \x{100}3 + 0: 3 + +/\s/8 + \x{100} X + 0: + +/\D+/8 + 12abcd34 + 0: abcd + *** Failers + 0: *** Failers + 1234 +No match + +/\d+/8 + 12abcd34 + 0: 12 + *** Failers +No match + +/\d{2,3}/8 + 12abcd34 + 0: 12 + 1234abcd + 0: 123 + *** Failers +No match + 1.4 +No match + +/\S+/8 + 12abcd34 + 0: 12abcd34 + *** Failers + 0: *** + \ \ +No match + +/>\s+</8 + 12> <34 + 0: > < + *** Failers +No match + +/>\s{2,3}</8 + ab> <cd + 0: > < + ab> <ce + 0: > < + *** Failers +No match + ab> <cd +No match + +/>\s{2,3}?</8 + ab> <cd + 0: > < + ab> <ce + 0: > < + *** Failers +No match + ab> <cd +No match + +/\w+/8 + 12 34 + 0: 12 + *** Failers + 0: Failers + +++=*! +No match + +/\w{2,3}/8 + ab cd + 0: ab + abcd ce + 0: abc + *** Failers + 0: Fai + a.b.c +No match + +/\W+/8 + 12====34 + 0: ==== + *** Failers + 0: *** + abcd +No match + +/\W{2,3}/8 + ab====cd + 0: === + ab==cd + 0: == + *** Failers + 0: *** + a.b.c +No match + +/\W{2,3}?/8U + ab====cd + 0: === + ab==cd + 0: == + *** Failers + 0: *** + a.b.c +No match + +/[\x{100}]/8 + \x{100} + 0: \x{100} + Z\x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + *** Failers +No match + +/[Z\x{100}]/8 + Z\x{100} + 0: Z + \x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + *** Failers +No match + +/[\x{100}\x{200}]/8 + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + *** Failers +No match + +/[\x{100}-\x{200}]/8 + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + ab\x{111}cd + 0: \x{111} + *** Failers +No match + +/[z-\x{200}]/8 + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + ab\x{111}cd + 0: \x{111} + abzcd + 0: z + ab|cd + 0: | + *** Failers +No match + +/[Q\x{100}\x{200}]/8 + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + Q? + 0: Q + *** Failers +No match + +/[Q\x{100}-\x{200}]/8 + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + ab\x{111}cd + 0: \x{111} + Q? + 0: Q + *** Failers +No match + +/[Qz-\x{200}]/8 + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + ab\x{111}cd + 0: \x{111} + abzcd + 0: z + ab|cd + 0: | + Q? + 0: Q + *** Failers +No match + +/[\x{100}\x{200}]{1,3}/8 + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + ab\x{200}\x{100}\x{200}\x{100}cd + 0: \x{200}\x{100}\x{200} + *** Failers +No match + +/[\x{100}\x{200}]{1,3}?/8U + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + ab\x{200}\x{100}\x{200}\x{100}cd + 0: \x{200}\x{100}\x{200} + *** Failers +No match + +/[Q\x{100}\x{200}]{1,3}/8 + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + ab\x{200}\x{100}\x{200}\x{100}cd + 0: \x{200}\x{100}\x{200} + *** Failers +No match + +/[Q\x{100}\x{200}]{1,3}?/8U + ab\x{100}cd + 0: \x{100} + ab\x{200}cd + 0: \x{200} + ab\x{200}\x{100}\x{200}\x{100}cd + 0: \x{200}\x{100}\x{200} + *** Failers +No match + +/(?<=[\x{100}\x{200}])X/8 + abc\x{200}X + 0: X + abc\x{100}X + 0: X + *** Failers +No match + X +No match + +/(?<=[Q\x{100}\x{200}])X/8 + abc\x{200}X + 0: X + abc\x{100}X + 0: X + abQX + 0: X + *** Failers +No match + X +No match + +/(?<=[\x{100}\x{200}]{3})X/8 + abc\x{100}\x{200}\x{100}X + 0: X + *** Failers +No match + abc\x{200}X +No match + X +No match + +/[^\x{100}\x{200}]X/8 + AX + 0: AX + \x{150}X + 0: \x{150}X + \x{500}X + 0: \x{500}X + *** Failers +No match + \x{100}X +No match + \x{200}X +No match + +/[^Q\x{100}\x{200}]X/8 + AX + 0: AX + \x{150}X + 0: \x{150}X + \x{500}X + 0: \x{500}X + *** Failers +No match + \x{100}X +No match + \x{200}X +No match + QX +No match + +/[^\x{100}-\x{200}]X/8 + AX + 0: AX + \x{500}X + 0: \x{500}X + *** Failers +No match + \x{100}X +No match + \x{150}X +No match + \x{200}X +No match + +/[z-\x{100}]/8i + z + 0: z + Z + 0: Z + \x{100} + 0: \x{100} + *** Failers +No match + \x{102} +No match + y +No match + +/[\xFF]/ + >\xff< + 0: \xff + +/[\xff]/8 + >\x{ff}< + 0: \x{ff} + +/[^\xFF]/ + XYZ + 0: X + +/[^\xff]/8 + XYZ + 0: X + \x{123} + 0: \x{123} + +/^[ac]*b/8 + xb +No match + +/^[ac\x{100}]*b/8 + xb +No match + +/^[^x]*b/8i + xb +No match + +/^[^x]*b/8 + xb +No match + +/^\d*b/8 + xb +No match + +/^\x{85}$/8i + \x{85} + 0: \x{85} + +/^abc./mgx8<any> + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + 0: abc8 + 0: abc9 + +/abc.$/mgx8<any> + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + 0: abc8 + 0: abc9 + +/^a\Rb/8<bsr_unicode> + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0cb + 0: a\x{0c}b + a\x{85}b + 0: a\x{85}b + a\x{2028}b + 0: a\x{2028}b + a\x{2029}b + 0: a\x{2029}b + ** Failers +No match + a\n\rb +No match + +/^a\R*b/8<bsr_unicode> + ab + 0: ab + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0c\x{2028}\x{2029}b + 0: a\x{0c}\x{2028}\x{2029}b + a\x{85}b + 0: a\x{85}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}\x0cb + 0: a\x{0a}\x{0d}\x{85}\x{0c}b + +/^a\R+b/8<bsr_unicode> + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0c\x{2028}\x{2029}b + 0: a\x{0c}\x{2028}\x{2029}b + a\x{85}b + 0: a\x{85}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}\x0cb + 0: a\x{0a}\x{0d}\x{85}\x{0c}b + ** Failers +No match + ab +No match + +/^a\R{1,3}b/8<bsr_unicode> + a\nb + 0: a\x{0a}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}b + 0: a\x{0a}\x{0d}\x{85}b + a\r\n\r\nb + 0: a\x{0d}\x{0a}\x{0d}\x{0a}b + a\r\n\r\n\r\nb + 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b + a\n\r\n\rb + 0: a\x{0a}\x{0d}\x{0a}\x{0d}b + a\n\n\r\nb + 0: a\x{0a}\x{0a}\x{0d}\x{0a}b + ** Failers +No match + a\n\n\n\rb +No match + a\r +No match + +/\h+\V?\v{3,4}/8 + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d} + +/\V?\v{3,4}/8 + \x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + 0: X\x{0a}\x{0b}\x{0c}\x{0d} + +/\h+\V?\v{3,4}/8 + >\x09\x20\x{a0}X\x0a\x0a\x0a< + 0: \x{09} \x{a0}X\x{0a}\x{0a}\x{0a} + +/\V?\v{3,4}/8 + >\x09\x20\x{a0}X\x0a\x0a\x0a< + 0: X\x{0a}\x{0a}\x{0a} + +/\H\h\V\v/8 + X X\x0a + 0: X X\x{0a} + X\x09X\x0b + 0: X\x{09}X\x{0b} + ** Failers +No match + \x{a0} X\x0a +No match + +/\H*\h+\V?\v{3,4}/8 + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d} + \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a + 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d} + \x09\x20\x{a0}\x0a\x0b\x0c + 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c} + ** Failers +No match + \x09\x20\x{a0}\x0a\x0b +No match + +/\H\h\V\v/8 + \x{3001}\x{3000}\x{2030}\x{2028} + 0: \x{3001}\x{3000}\x{2030}\x{2028} + X\x{180e}X\x{85} + 0: X\x{180e}X\x{85} + ** Failers +No match + \x{2009} X\x0a +No match + +/\H*\h+\V?\v{3,4}/8 + \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a + 0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d} + \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a + 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028} + \x09\x20\x{202f}\x0a\x0b\x0c + 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c} + ** Failers +No match + \x09\x{200a}\x{a0}\x{2028}\x0b +No match + +/a\Rb/I8<bsr_anycrlf> +Capturing subpattern count = 0 +Options: bsr_anycrlf utf8 +First char = 'a' +Need char = 'b' + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b + ** Failers +No match + a\x{85}b +No match + a\x0bb +No match + +/a\Rb/I8<bsr_unicode> +Capturing subpattern count = 0 +Options: bsr_unicode utf8 +First char = 'a' +Need char = 'b' + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x{85}b + 0: a\x{85}b + a\x0bb + 0: a\x{0b}b + ** Failers +No match + a\x{85}b\<bsr_anycrlf> +No match + a\x0bb\<bsr_anycrlf> +No match + +/a\R?b/I8<bsr_anycrlf> +Capturing subpattern count = 0 +Options: bsr_anycrlf utf8 +First char = 'a' +Need char = 'b' + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b + ** Failers +No match + a\x{85}b +No match + a\x0bb +No match + +/a\R?b/I8<bsr_unicode> +Capturing subpattern count = 0 +Options: bsr_unicode utf8 +First char = 'a' +Need char = 'b' + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x{85}b + 0: a\x{85}b + a\x0bb + 0: a\x{0b}b + ** Failers +No match + a\x{85}b\<bsr_anycrlf> +No match + a\x0bb\<bsr_anycrlf> +No match + +/ End of testinput 8 / diff --git a/lib/stdlib/test/run_pcre_tests.erl b/lib/stdlib/test/run_pcre_tests.erl index 0ef3986918..8c6424e708 100644 --- a/lib/stdlib/test/run_pcre_tests.erl +++ b/lib/stdlib/test/run_pcre_tests.erl @@ -1,19 +1,19 @@ %% %% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. -%% +%% +%% Copyright Ericsson AB 2008-2010. All Rights Reserved. +%% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. -%% +%% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. -%% +%% %% %CopyrightEnd% %% -module(run_pcre_tests). @@ -25,7 +25,7 @@ test(RootDir) -> erts_debug:set_internal_state(available_internal_state,true), io:format("oldlimit: ~p~n",[ erts_debug:set_internal_state(re_loop_limit,10)]), Testfiles0 = ["testoutput1", "testoutput2", "testoutput3", "testoutput4", - "testoutput5", "testoutput6", "testoutput10"], + "testoutput5", "testoutput6","mod_testoutput8","testoutput10"], Testfiles = [ filename:join([RootDir,FN]) || FN <- Testfiles0 ], Res = [ begin io:format("~s~n",[X]), t(X) end || X <- Testfiles ], io:format("limit was: ~p~n",[ erts_debug:set_internal_state(re_loop_limit,default)]), @@ -42,12 +42,14 @@ t(OneFile,Num) -> put(error_limit,Num), put(skipped,0), Res = - [test(Structured,true,index), - test(Structured,false,index), - test(Structured,true,binary), - test(Structured,false,binary), - test(Structured,true,list), - test(Structured,false,list)], + [test(Structured,true,index,false), + test(Structured,false,index,false), + test(Structured,true,index,true), + test(Structured,false,index,true), + test(Structured,true,binary,false), + test(Structured,false,binary,false), + test(Structured,true,list,false), + test(Structured,false,list,false)], {lists:sum(Res),length(Structured)*6,get(skipped)}. @@ -63,11 +65,21 @@ pick_exec_options([Opt|T]) -> pick_exec_options([]) -> {[],[]}. -test([],_,_) -> +test([],_,_,_) -> 0; -test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) -> +test([{RE0,Line,Options0,Tests}|T],PreCompile,XMode,REAsList) -> %io:format("."), %case RE of <<>> -> io:format("Empty re:~w~n",[Line]); _ -> ok end, + Unicode = lists:member(unicode,Options0), + RE = case REAsList of + true -> + if + Unicode -> unicode:characters_to_list(RE0); + true -> binary_to_list(RE0) + end; + false -> + RE0 + end, {Options,ExecOptions} = pick_exec_options(Options0), {Cres, Xopt} = case PreCompile of true -> @@ -80,7 +92,7 @@ test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) -> %erlang:display({testrun,RE,P,Tests,ExecOptions,Xopt,XMode}), case (catch testrun(RE,P,Tests,ExecOptions,Xopt,XMode)) of N when is_integer(N) -> - N + test(T,PreCompile,XMode); + N + test(T,PreCompile,XMode,REAsList); limit -> io:format("Error limit reached.~n"), 1; @@ -91,12 +103,12 @@ test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) -> _ -> put(skipped,1) end, - test(T,PreCompile,XMode) + test(T,PreCompile,XMode,REAsList) end; {error,Err} -> io:format("Compile error(~w): ~w~n",[Line,Err]), case get(error_limit) of - infinite -> 1 + test(T,PreCompile,XMode); + infinite -> 1 + test(T,PreCompile,XMode,REAsList); X -> case X-1 of Y when Y =< 0 -> @@ -104,7 +116,7 @@ test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) -> 1; Y -> put(error_limit,Y), - 1 + test(T,PreCompile,XMode) + 1 + test(T,PreCompile,XMode,REAsList) end end end. @@ -549,6 +561,8 @@ tr_option($N) -> [no_auto_capture]; tr_option($8) -> [unicode]; +tr_option($U) -> + [ungreedy]; tr_option($g) -> [{exec_option,g}]; tr_option(_) -> |