aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorErlang/OTP <otp@erlang.org>2010-01-27 12:33:35 +0000
committerErlang/OTP <otp@erlang.org>2010-01-27 15:12:34 +0100
commit8beea46d65375d09ba9af07676894dc720eda6df (patch)
treef3586eaba444e699894c92fec75206a243cc53cc /lib
parent50cc08bfabb0510ebf42170650217dfeec229ce7 (diff)
parent6b83b643a20bf502eca696f63445049e0838731a (diff)
downloadotp-8beea46d65375d09ba9af07676894dc720eda6df.tar.gz
otp-8beea46d65375d09ba9af07676894dc720eda6df.tar.bz2
otp-8beea46d65375d09ba9af07676894dc720eda6df.zip
Merge branch 'rb/stdlib_re_unicode_fixes' into ccase/r13b04_dev
* rb/stdlib_re_unicode_fixes: Fix lost unicode option in re:compile() Refactor out repeated block in re module Fix re:replace/4 to handle unicode charlist Replacement argument Fix re:replace/4 to handle unicode charlist RE argument Fix re:replace/4 to handle binary unicode output when nothing replaced OTP-8394 A number of bugs concerning re and unicode are corrected: - re:compile no longer loses unicode option, which also fixes bug in re:split. - re:replace now handles unicode charlist replacement argument - re:replace now handles unicode RE charlist argument correctly - re:replace now handles binary unicode output correctly when nothing is replaced. Most code, testcases and error isolation done by Rory Byrne.
Diffstat (limited to 'lib')
-rw-r--r--lib/stdlib/src/re.erl71
-rw-r--r--lib/stdlib/test/re_SUITE.erl30
-rw-r--r--lib/stdlib/test/re_SUITE_data/mod_testoutput8877
-rw-r--r--lib/stdlib/test/run_pcre_tests.erl50
4 files changed, 956 insertions, 72 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 5417ac02e5..296a6b3d23 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -1,19 +1,19 @@
%%
%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
-%%
+%%
+%% Copyright Ericsson AB 2008-2010. All Rights Reserved.
+%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
-%%
+%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
-%%
+%%
%% %CopyrightEnd%
%%
-module(re).
@@ -32,18 +32,7 @@ split(Subject,RE,Options) ->
try
{NewOpt,Convert,Unicode,Limit,Strip,Group} =
process_split_params(Options,iodata,false,-1,false,false),
- FlatSubject =
- case is_binary(Subject) of
- true ->
- Subject;
- false ->
- case Unicode of
- true ->
- unicode:characters_to_binary(Subject,unicode);
- false ->
- iolist_to_binary(Subject)
- end
- end,
+ FlatSubject = to_binary(Subject, Unicode),
case compile_split(RE,NewOpt) of
{error,_Err} ->
throw(badre);
@@ -217,19 +206,9 @@ replace(Subject,RE,Replacement,Options) ->
try
{NewOpt,Convert,Unicode} =
process_repl_params(Options,iodata,false),
- FlatSubject =
- case is_binary(Subject) of
- true ->
- Subject;
- false ->
- case Unicode of
- true ->
- unicode:characters_to_binary(Subject,unicode);
- false ->
- iolist_to_binary(Subject)
- end
- end,
- case do_replace(FlatSubject,Subject,RE,Replacement,NewOpt) of
+ FlatSubject = to_binary(Subject, Unicode),
+ FlatReplacement = to_binary(Replacement, Unicode),
+ case do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt) of
{error,_Err} ->
throw(badre);
IoList ->
@@ -237,7 +216,12 @@ replace(Subject,RE,Replacement,Options) ->
iodata ->
IoList;
binary ->
- iolist_to_binary(IoList);
+ case Unicode of
+ false ->
+ iolist_to_binary(IoList);
+ true ->
+ unicode:characters_to_binary(IoList,unicode)
+ end;
list ->
case Unicode of
false ->
@@ -324,8 +308,7 @@ process_split_params([H|T],C,U,L,S,G) ->
{[H|NT],NC,NU,NL,NS,NG}.
apply_mlist(Subject,Replacement,Mlist) ->
- do_mlist(Subject,Subject,0,precomp_repl(iolist_to_binary(Replacement)),
- Mlist).
+ do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist).
precomp_repl(<<>>) ->
@@ -545,7 +528,7 @@ process_uparams([],Type) ->
ucompile(RE,Options) ->
try
- re:compile(unicode:characters_to_binary(RE,unicode))
+ re:compile(unicode:characters_to_binary(RE,unicode),Options)
catch
error:AnyError ->
{'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} =
@@ -618,18 +601,7 @@ grun(Subject,RE,{Options,NeedClean,OrigRE}) ->
grun2(Subject,RE,{Options,NeedClean}) ->
Unicode = check_for_unicode(RE,Options),
- FlatSubject =
- case is_binary(Subject) of
- true ->
- Subject;
- false ->
- case Unicode of
- true ->
- unicode:characters_to_binary(Subject,unicode);
- false ->
- iolist_to_binary(Subject)
- end
- end,
+ FlatSubject = to_binary(Subject, Unicode),
do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}).
do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) ->
@@ -749,3 +721,10 @@ runopt(global) ->
true;
runopt(_) ->
false.
+
+to_binary(Bin, _IsUnicode) when is_binary(Bin) ->
+ Bin;
+to_binary(Data, true) ->
+ unicode:characters_to_binary(Data,unicode);
+to_binary(Data, false) ->
+ iolist_to_binary(Data).
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index 98eb66d1fb..fa50ba3b7a 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -1,29 +1,29 @@
%%
%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
-%%
+%%
+%% Copyright Ericsson AB 2008-2010. All Rights Reserved.
+%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
-%%
+%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
-%%
+%%
%% %CopyrightEnd%
%%
-module(re_SUITE).
--export([all/1, pcre/1,compile_options/1,run_options/1,combined_options/1,replace_autogen/1,global_capture/1,replace_return/1,split_autogen/1,split_options/1,split_specials/1,error_handling/1]).
+-export([all/1, pcre/1,compile_options/1,run_options/1,combined_options/1,replace_autogen/1,global_capture/1,replace_input_types/1,replace_return/1,split_autogen/1,split_options/1,split_specials/1,error_handling/1]).
-include("test_server.hrl").
-include_lib("kernel/include/file.hrl").
-all(suite) -> [pcre,compile_options,run_options,combined_options,replace_autogen,global_capture,replace_return,split_autogen,split_options,split_specials,error_handling].
+all(suite) -> [pcre,compile_options,run_options,combined_options,replace_autogen,global_capture,replace_input_types,replace_return,split_autogen,split_options,split_specials,error_handling].
pcre(doc) ->
["Run all applicable tests from the PCRE testsuites."];
@@ -268,7 +268,17 @@ global_capture(Config) when is_list(Config) ->
?line {match,[[{3,5},{5,3}],[{11,4},{12,3}]]} = re:run("ABC�bcdABCabcdA",".(?<FOO>bcd)",[global,{capture,all,index},unicode]),
?t:timetrap_cancel(Dog),
ok.
-
+
+replace_input_types(doc) ->
+ ["Tests replace with different input types"];
+replace_input_types(Config) when is_list(Config) ->
+ Dog = ?t:timetrap(?t:minutes(3)),
+ ?line <<"abcd">> = re:replace("abcd","Z","X",[{return,binary},unicode]),
+ ?line <<"abcd">> = re:replace("abcd","\x{400}","X",[{return,binary},unicode]),
+ ?line <<"a",208,128,"cd">> = re:replace(<<"abcd">>,"b","\x{400}",[{return,binary},unicode]),
+ ?t:timetrap_cancel(Dog),
+ ok.
+
replace_return(doc) ->
["Tests return options of replace together with global searching"];
replace_return(Config) when is_list(Config) ->
@@ -289,6 +299,10 @@ replace_return(Config) when is_list(Config) ->
?line <<"iXk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\9X",[{return,binary}]),
?line <<"jXk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\10X",[{return,binary}]),
?line <<"Xk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\11X",[{return,binary}]),
+ ?line "a\x{400}bcX" = re:replace("a\x{400}bcd","d","X",[global,{return,list},unicode]),
+ ?line <<"a",208,128,"bcX">> = re:replace("a\x{400}bcd","d","X",[global,{return,binary},unicode]),
+ ?line "a\x{400}bcd" = re:replace("a\x{400}bcd","Z","X",[global,{return,list},unicode]),
+ ?line <<"a",208,128,"bcd">> = re:replace("a\x{400}bcd","Z","X",[global,{return,binary},unicode]),
?t:timetrap_cancel(Dog),
ok.
diff --git a/lib/stdlib/test/re_SUITE_data/mod_testoutput8 b/lib/stdlib/test/re_SUITE_data/mod_testoutput8
new file mode 100644
index 0000000000..b7e7b02d6c
--- /dev/null
+++ b/lib/stdlib/test/re_SUITE_data/mod_testoutput8
@@ -0,0 +1,877 @@
+/-- Do not use the \x{} construct except with patterns that have the --/
+/-- /8 option set, because PCRE doesn't recognize them as UTF-8 unless --/
+No match
+/-- that option is set. However, the latest Perls recognize them always. --/
+No match
+
+\x{100}ab/8
+ \x{100}ab
+ 0: \x{100}ab
+
+/a\x{100}*b/8
+ ab
+ 0: ab
+ a\x{100}b
+ 0: a\x{100}b
+ a\x{100}\x{100}b
+ 0: a\x{100}\x{100}b
+
+/a\x{100}+b/8
+ a\x{100}b
+ 0: a\x{100}b
+ a\x{100}\x{100}b
+ 0: a\x{100}\x{100}b
+ *** Failers
+No match
+ ab
+No match
+
+/\bX/8
+ Xoanon
+ 0: X
+ +Xoanon
+ 0: X
+ \x{300}Xoanon
+ 0: X
+ *** Failers
+No match
+ YXoanon
+No match
+
+/\BX/8
+ YXoanon
+ 0: X
+ *** Failers
+No match
+ Xoanon
+No match
+ +Xoanon
+No match
+ \x{300}Xoanon
+No match
+
+/X\b/8
+ X+oanon
+ 0: X
+ ZX\x{300}oanon
+ 0: X
+ FAX
+ 0: X
+ *** Failers
+No match
+ Xoanon
+No match
+
+/X\B/8
+ Xoanon
+ 0: X
+ *** Failers
+No match
+ X+oanon
+No match
+ ZX\x{300}oanon
+No match
+ FAX
+No match
+
+/[^a]/8
+ abcd
+ 0: b
+ a\x{100}
+ 0: \x{100}
+
+/^[abc\x{123}\x{400}-\x{402}]{2,3}\d/8
+ ab99
+ 0: ab9
+ \x{123}\x{123}45
+ 0: \x{123}\x{123}4
+ \x{400}\x{401}\x{402}6
+ 0: \x{400}\x{401}\x{402}6
+ *** Failers
+No match
+ d99
+No match
+ \x{123}\x{122}4
+No match
+ \x{400}\x{403}6
+No match
+ \x{400}\x{401}\x{402}\x{402}6
+No match
+
+/abc/8
+ �]
+Error -10
+ �
+Error -10
+ ���
+Error -10
+ ���\?
+No match
+
+/a.b/8
+ acb
+ 0: acb
+ a\x7fb
+ 0: a\x{7f}b
+ a\x{100}b
+ 0: a\x{100}b
+ *** Failers
+No match
+ a\nb
+No match
+
+/^[a\x{c0}]/8
+ *** Failers
+No match
+ \x{100}
+No match
+
+/(?<=aXb)cd/8
+ aXbcd
+ 0: cd
+
+/(?<=a\x{100}b)cd/8
+ a\x{100}bcd
+ 0: cd
+
+/(?<=a\x{100000}b)cd/8
+ a\x{100000}bcd
+ 0: cd
+
+/(?:\x{100}){3}b/8
+ \x{100}\x{100}\x{100}b
+ 0: \x{100}\x{100}\x{100}b
+ *** Failers
+No match
+ \x{100}\x{100}b
+No match
+
+/\x{ab}/8
+ \x{ab}
+ 0: \x{ab}
+ \xc2\xab
+ 0: \x{ab}
+ *** Failers
+No match
+ \x00{ab}
+No match
+
+/^[^a]{2}/8
+ \x{100}bc
+ 0: \x{100}b
+
+/^[^a]{2,}/8
+ \x{100}bcAa
+ 0: \x{100}bcA
+
+/^[^a]{2,}?/8
+ \x{100}bca
+ 0: \x{100}b
+
+/^[^a]{2}/8i
+ \x{100}bc
+ 0: \x{100}b
+
+/^[^a]{2,}/8i
+ \x{100}bcAa
+ 0: \x{100}bc
+
+/^[^a]{2,}?/8iU
+ \x{100}bca
+ 0: \x{100}bc
+
+/\x{100}{0,0}/8
+ abcd
+ 0:
+
+/\x{100}?/8
+ abcd
+ 0:
+ \x{100}\x{100}
+ 0: \x{100}
+
+/\x{100}{0,3}/8
+ \x{100}\x{100}
+ 0: \x{100}\x{100}
+ \x{100}\x{100}\x{100}\x{100}
+ 0: \x{100}\x{100}\x{100}
+
+/\x{100}*/8
+ abce
+ 0:
+ \x{100}\x{100}\x{100}\x{100}
+ 0: \x{100}\x{100}\x{100}\x{100}
+
+/\x{100}{1,1}/8
+ abcd\x{100}\x{100}\x{100}\x{100}
+ 0: \x{100}
+
+/\x{100}{1,3}/8
+ abcd\x{100}\x{100}\x{100}\x{100}
+ 0: \x{100}\x{100}\x{100}
+
+/\x{100}+/8
+ abcd\x{100}\x{100}\x{100}\x{100}
+ 0: \x{100}\x{100}\x{100}\x{100}
+
+/\x{100}{3}/8
+ abcd\x{100}\x{100}\x{100}XX
+ 0: \x{100}\x{100}\x{100}
+
+/\x{100}{3,5}/8
+ abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX
+ 0: \x{100}\x{100}\x{100}\x{100}\x{100}
+
+/\x{100}{3,}/8
+ abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX
+ 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}
+
+/(?<=a\x{100}{2}b)X/8
+ Xyyya\x{100}\x{100}bXzzz
+ 0: X
+
+/\D*/8
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+
+/\D*/8
+ \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}
+ 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}
+
+/\D/8
+ 1X2
+ 0: X
+ 1\x{100}2
+ 0: \x{100}
+
+/>\S/8
+ > >X Y
+ 0: >X
+ > >\x{100} Y
+ 0: >\x{100}
+
+/\d/8
+ \x{100}3
+ 0: 3
+
+/\s/8
+ \x{100} X
+ 0:
+
+/\D+/8
+ 12abcd34
+ 0: abcd
+ *** Failers
+ 0: *** Failers
+ 1234
+No match
+
+/\d+/8
+ 12abcd34
+ 0: 12
+ *** Failers
+No match
+
+/\d{2,3}/8
+ 12abcd34
+ 0: 12
+ 1234abcd
+ 0: 123
+ *** Failers
+No match
+ 1.4
+No match
+
+/\S+/8
+ 12abcd34
+ 0: 12abcd34
+ *** Failers
+ 0: ***
+ \ \
+No match
+
+/>\s+</8
+ 12> <34
+ 0: > <
+ *** Failers
+No match
+
+/>\s{2,3}</8
+ ab> <cd
+ 0: > <
+ ab> <ce
+ 0: > <
+ *** Failers
+No match
+ ab> <cd
+No match
+
+/>\s{2,3}?</8
+ ab> <cd
+ 0: > <
+ ab> <ce
+ 0: > <
+ *** Failers
+No match
+ ab> <cd
+No match
+
+/\w+/8
+ 12 34
+ 0: 12
+ *** Failers
+ 0: Failers
+ +++=*!
+No match
+
+/\w{2,3}/8
+ ab cd
+ 0: ab
+ abcd ce
+ 0: abc
+ *** Failers
+ 0: Fai
+ a.b.c
+No match
+
+/\W+/8
+ 12====34
+ 0: ====
+ *** Failers
+ 0: ***
+ abcd
+No match
+
+/\W{2,3}/8
+ ab====cd
+ 0: ===
+ ab==cd
+ 0: ==
+ *** Failers
+ 0: ***
+ a.b.c
+No match
+
+/\W{2,3}?/8U
+ ab====cd
+ 0: ===
+ ab==cd
+ 0: ==
+ *** Failers
+ 0: ***
+ a.b.c
+No match
+
+/[\x{100}]/8
+ \x{100}
+ 0: \x{100}
+ Z\x{100}
+ 0: \x{100}
+ \x{100}Z
+ 0: \x{100}
+ *** Failers
+No match
+
+/[Z\x{100}]/8
+ Z\x{100}
+ 0: Z
+ \x{100}
+ 0: \x{100}
+ \x{100}Z
+ 0: \x{100}
+ *** Failers
+No match
+
+/[\x{100}\x{200}]/8
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ *** Failers
+No match
+
+/[\x{100}-\x{200}]/8
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ ab\x{111}cd
+ 0: \x{111}
+ *** Failers
+No match
+
+/[z-\x{200}]/8
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ ab\x{111}cd
+ 0: \x{111}
+ abzcd
+ 0: z
+ ab|cd
+ 0: |
+ *** Failers
+No match
+
+/[Q\x{100}\x{200}]/8
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ Q?
+ 0: Q
+ *** Failers
+No match
+
+/[Q\x{100}-\x{200}]/8
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ ab\x{111}cd
+ 0: \x{111}
+ Q?
+ 0: Q
+ *** Failers
+No match
+
+/[Qz-\x{200}]/8
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ ab\x{111}cd
+ 0: \x{111}
+ abzcd
+ 0: z
+ ab|cd
+ 0: |
+ Q?
+ 0: Q
+ *** Failers
+No match
+
+/[\x{100}\x{200}]{1,3}/8
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ ab\x{200}\x{100}\x{200}\x{100}cd
+ 0: \x{200}\x{100}\x{200}
+ *** Failers
+No match
+
+/[\x{100}\x{200}]{1,3}?/8U
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ ab\x{200}\x{100}\x{200}\x{100}cd
+ 0: \x{200}\x{100}\x{200}
+ *** Failers
+No match
+
+/[Q\x{100}\x{200}]{1,3}/8
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ ab\x{200}\x{100}\x{200}\x{100}cd
+ 0: \x{200}\x{100}\x{200}
+ *** Failers
+No match
+
+/[Q\x{100}\x{200}]{1,3}?/8U
+ ab\x{100}cd
+ 0: \x{100}
+ ab\x{200}cd
+ 0: \x{200}
+ ab\x{200}\x{100}\x{200}\x{100}cd
+ 0: \x{200}\x{100}\x{200}
+ *** Failers
+No match
+
+/(?<=[\x{100}\x{200}])X/8
+ abc\x{200}X
+ 0: X
+ abc\x{100}X
+ 0: X
+ *** Failers
+No match
+ X
+No match
+
+/(?<=[Q\x{100}\x{200}])X/8
+ abc\x{200}X
+ 0: X
+ abc\x{100}X
+ 0: X
+ abQX
+ 0: X
+ *** Failers
+No match
+ X
+No match
+
+/(?<=[\x{100}\x{200}]{3})X/8
+ abc\x{100}\x{200}\x{100}X
+ 0: X
+ *** Failers
+No match
+ abc\x{200}X
+No match
+ X
+No match
+
+/[^\x{100}\x{200}]X/8
+ AX
+ 0: AX
+ \x{150}X
+ 0: \x{150}X
+ \x{500}X
+ 0: \x{500}X
+ *** Failers
+No match
+ \x{100}X
+No match
+ \x{200}X
+No match
+
+/[^Q\x{100}\x{200}]X/8
+ AX
+ 0: AX
+ \x{150}X
+ 0: \x{150}X
+ \x{500}X
+ 0: \x{500}X
+ *** Failers
+No match
+ \x{100}X
+No match
+ \x{200}X
+No match
+ QX
+No match
+
+/[^\x{100}-\x{200}]X/8
+ AX
+ 0: AX
+ \x{500}X
+ 0: \x{500}X
+ *** Failers
+No match
+ \x{100}X
+No match
+ \x{150}X
+No match
+ \x{200}X
+No match
+
+/[z-\x{100}]/8i
+ z
+ 0: z
+ Z
+ 0: Z
+ \x{100}
+ 0: \x{100}
+ *** Failers
+No match
+ \x{102}
+No match
+ y
+No match
+
+/[\xFF]/
+ >\xff<
+ 0: \xff
+
+/[\xff]/8
+ >\x{ff}<
+ 0: \x{ff}
+
+/[^\xFF]/
+ XYZ
+ 0: X
+
+/[^\xff]/8
+ XYZ
+ 0: X
+ \x{123}
+ 0: \x{123}
+
+/^[ac]*b/8
+ xb
+No match
+
+/^[ac\x{100}]*b/8
+ xb
+No match
+
+/^[^x]*b/8i
+ xb
+No match
+
+/^[^x]*b/8
+ xb
+No match
+
+/^\d*b/8
+ xb
+No match
+
+/^\x{85}$/8i
+ \x{85}
+ 0: \x{85}
+
+/^abc./mgx8<any>
+ abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+ 0: abc1
+ 0: abc2
+ 0: abc3
+ 0: abc4
+ 0: abc5
+ 0: abc6
+ 0: abc7
+ 0: abc8
+ 0: abc9
+
+/abc.$/mgx8<any>
+ abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
+ 0: abc1
+ 0: abc2
+ 0: abc3
+ 0: abc4
+ 0: abc5
+ 0: abc6
+ 0: abc7
+ 0: abc8
+ 0: abc9
+
+/^a\Rb/8<bsr_unicode>
+ a\nb
+ 0: a\x{0a}b
+ a\rb
+ 0: a\x{0d}b
+ a\r\nb
+ 0: a\x{0d}\x{0a}b
+ a\x0bb
+ 0: a\x{0b}b
+ a\x0cb
+ 0: a\x{0c}b
+ a\x{85}b
+ 0: a\x{85}b
+ a\x{2028}b
+ 0: a\x{2028}b
+ a\x{2029}b
+ 0: a\x{2029}b
+ ** Failers
+No match
+ a\n\rb
+No match
+
+/^a\R*b/8<bsr_unicode>
+ ab
+ 0: ab
+ a\nb
+ 0: a\x{0a}b
+ a\rb
+ 0: a\x{0d}b
+ a\r\nb
+ 0: a\x{0d}\x{0a}b
+ a\x0bb
+ 0: a\x{0b}b
+ a\x0c\x{2028}\x{2029}b
+ 0: a\x{0c}\x{2028}\x{2029}b
+ a\x{85}b
+ 0: a\x{85}b
+ a\n\rb
+ 0: a\x{0a}\x{0d}b
+ a\n\r\x{85}\x0cb
+ 0: a\x{0a}\x{0d}\x{85}\x{0c}b
+
+/^a\R+b/8<bsr_unicode>
+ a\nb
+ 0: a\x{0a}b
+ a\rb
+ 0: a\x{0d}b
+ a\r\nb
+ 0: a\x{0d}\x{0a}b
+ a\x0bb
+ 0: a\x{0b}b
+ a\x0c\x{2028}\x{2029}b
+ 0: a\x{0c}\x{2028}\x{2029}b
+ a\x{85}b
+ 0: a\x{85}b
+ a\n\rb
+ 0: a\x{0a}\x{0d}b
+ a\n\r\x{85}\x0cb
+ 0: a\x{0a}\x{0d}\x{85}\x{0c}b
+ ** Failers
+No match
+ ab
+No match
+
+/^a\R{1,3}b/8<bsr_unicode>
+ a\nb
+ 0: a\x{0a}b
+ a\n\rb
+ 0: a\x{0a}\x{0d}b
+ a\n\r\x{85}b
+ 0: a\x{0a}\x{0d}\x{85}b
+ a\r\n\r\nb
+ 0: a\x{0d}\x{0a}\x{0d}\x{0a}b
+ a\r\n\r\n\r\nb
+ 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b
+ a\n\r\n\rb
+ 0: a\x{0a}\x{0d}\x{0a}\x{0d}b
+ a\n\n\r\nb
+ 0: a\x{0a}\x{0a}\x{0d}\x{0a}b
+ ** Failers
+No match
+ a\n\n\n\rb
+No match
+ a\r
+No match
+
+/\h+\V?\v{3,4}/8
+ \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
+ 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d}
+
+/\V?\v{3,4}/8
+ \x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
+ 0: X\x{0a}\x{0b}\x{0c}\x{0d}
+
+/\h+\V?\v{3,4}/8
+ >\x09\x20\x{a0}X\x0a\x0a\x0a<
+ 0: \x{09} \x{a0}X\x{0a}\x{0a}\x{0a}
+
+/\V?\v{3,4}/8
+ >\x09\x20\x{a0}X\x0a\x0a\x0a<
+ 0: X\x{0a}\x{0a}\x{0a}
+
+/\H\h\V\v/8
+ X X\x0a
+ 0: X X\x{0a}
+ X\x09X\x0b
+ 0: X\x{09}X\x{0b}
+ ** Failers
+No match
+ \x{a0} X\x0a
+No match
+
+/\H*\h+\V?\v{3,4}/8
+ \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
+ 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d}
+ \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
+ 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d}
+ \x09\x20\x{a0}\x0a\x0b\x0c
+ 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}
+ ** Failers
+No match
+ \x09\x20\x{a0}\x0a\x0b
+No match
+
+/\H\h\V\v/8
+ \x{3001}\x{3000}\x{2030}\x{2028}
+ 0: \x{3001}\x{3000}\x{2030}\x{2028}
+ X\x{180e}X\x{85}
+ 0: X\x{180e}X\x{85}
+ ** Failers
+No match
+ \x{2009} X\x0a
+No match
+
+/\H*\h+\V?\v{3,4}/8
+ \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
+ 0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d}
+ \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
+ 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028}
+ \x09\x20\x{202f}\x0a\x0b\x0c
+ 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c}
+ ** Failers
+No match
+ \x09\x{200a}\x{a0}\x{2028}\x0b
+No match
+
+/a\Rb/I8<bsr_anycrlf>
+Capturing subpattern count = 0
+Options: bsr_anycrlf utf8
+First char = 'a'
+Need char = 'b'
+ a\rb
+ 0: a\x{0d}b
+ a\nb
+ 0: a\x{0a}b
+ a\r\nb
+ 0: a\x{0d}\x{0a}b
+ ** Failers
+No match
+ a\x{85}b
+No match
+ a\x0bb
+No match
+
+/a\Rb/I8<bsr_unicode>
+Capturing subpattern count = 0
+Options: bsr_unicode utf8
+First char = 'a'
+Need char = 'b'
+ a\rb
+ 0: a\x{0d}b
+ a\nb
+ 0: a\x{0a}b
+ a\r\nb
+ 0: a\x{0d}\x{0a}b
+ a\x{85}b
+ 0: a\x{85}b
+ a\x0bb
+ 0: a\x{0b}b
+ ** Failers
+No match
+ a\x{85}b\<bsr_anycrlf>
+No match
+ a\x0bb\<bsr_anycrlf>
+No match
+
+/a\R?b/I8<bsr_anycrlf>
+Capturing subpattern count = 0
+Options: bsr_anycrlf utf8
+First char = 'a'
+Need char = 'b'
+ a\rb
+ 0: a\x{0d}b
+ a\nb
+ 0: a\x{0a}b
+ a\r\nb
+ 0: a\x{0d}\x{0a}b
+ ** Failers
+No match
+ a\x{85}b
+No match
+ a\x0bb
+No match
+
+/a\R?b/I8<bsr_unicode>
+Capturing subpattern count = 0
+Options: bsr_unicode utf8
+First char = 'a'
+Need char = 'b'
+ a\rb
+ 0: a\x{0d}b
+ a\nb
+ 0: a\x{0a}b
+ a\r\nb
+ 0: a\x{0d}\x{0a}b
+ a\x{85}b
+ 0: a\x{85}b
+ a\x0bb
+ 0: a\x{0b}b
+ ** Failers
+No match
+ a\x{85}b\<bsr_anycrlf>
+No match
+ a\x0bb\<bsr_anycrlf>
+No match
+
+/ End of testinput 8 /
diff --git a/lib/stdlib/test/run_pcre_tests.erl b/lib/stdlib/test/run_pcre_tests.erl
index 0ef3986918..8c6424e708 100644
--- a/lib/stdlib/test/run_pcre_tests.erl
+++ b/lib/stdlib/test/run_pcre_tests.erl
@@ -1,19 +1,19 @@
%%
%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
-%%
+%%
+%% Copyright Ericsson AB 2008-2010. All Rights Reserved.
+%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
-%%
+%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
-%%
+%%
%% %CopyrightEnd%
%%
-module(run_pcre_tests).
@@ -25,7 +25,7 @@ test(RootDir) ->
erts_debug:set_internal_state(available_internal_state,true),
io:format("oldlimit: ~p~n",[ erts_debug:set_internal_state(re_loop_limit,10)]),
Testfiles0 = ["testoutput1", "testoutput2", "testoutput3", "testoutput4",
- "testoutput5", "testoutput6", "testoutput10"],
+ "testoutput5", "testoutput6","mod_testoutput8","testoutput10"],
Testfiles = [ filename:join([RootDir,FN]) || FN <- Testfiles0 ],
Res = [ begin io:format("~s~n",[X]), t(X) end || X <- Testfiles ],
io:format("limit was: ~p~n",[ erts_debug:set_internal_state(re_loop_limit,default)]),
@@ -42,12 +42,14 @@ t(OneFile,Num) ->
put(error_limit,Num),
put(skipped,0),
Res =
- [test(Structured,true,index),
- test(Structured,false,index),
- test(Structured,true,binary),
- test(Structured,false,binary),
- test(Structured,true,list),
- test(Structured,false,list)],
+ [test(Structured,true,index,false),
+ test(Structured,false,index,false),
+ test(Structured,true,index,true),
+ test(Structured,false,index,true),
+ test(Structured,true,binary,false),
+ test(Structured,false,binary,false),
+ test(Structured,true,list,false),
+ test(Structured,false,list,false)],
{lists:sum(Res),length(Structured)*6,get(skipped)}.
@@ -63,11 +65,21 @@ pick_exec_options([Opt|T]) ->
pick_exec_options([]) ->
{[],[]}.
-test([],_,_) ->
+test([],_,_,_) ->
0;
-test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) ->
+test([{RE0,Line,Options0,Tests}|T],PreCompile,XMode,REAsList) ->
%io:format("."),
%case RE of <<>> -> io:format("Empty re:~w~n",[Line]); _ -> ok end,
+ Unicode = lists:member(unicode,Options0),
+ RE = case REAsList of
+ true ->
+ if
+ Unicode -> unicode:characters_to_list(RE0);
+ true -> binary_to_list(RE0)
+ end;
+ false ->
+ RE0
+ end,
{Options,ExecOptions} = pick_exec_options(Options0),
{Cres, Xopt} = case PreCompile of
true ->
@@ -80,7 +92,7 @@ test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) ->
%erlang:display({testrun,RE,P,Tests,ExecOptions,Xopt,XMode}),
case (catch testrun(RE,P,Tests,ExecOptions,Xopt,XMode)) of
N when is_integer(N) ->
- N + test(T,PreCompile,XMode);
+ N + test(T,PreCompile,XMode,REAsList);
limit ->
io:format("Error limit reached.~n"),
1;
@@ -91,12 +103,12 @@ test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) ->
_ ->
put(skipped,1)
end,
- test(T,PreCompile,XMode)
+ test(T,PreCompile,XMode,REAsList)
end;
{error,Err} ->
io:format("Compile error(~w): ~w~n",[Line,Err]),
case get(error_limit) of
- infinite -> 1 + test(T,PreCompile,XMode);
+ infinite -> 1 + test(T,PreCompile,XMode,REAsList);
X ->
case X-1 of
Y when Y =< 0 ->
@@ -104,7 +116,7 @@ test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) ->
1;
Y ->
put(error_limit,Y),
- 1 + test(T,PreCompile,XMode)
+ 1 + test(T,PreCompile,XMode,REAsList)
end
end
end.
@@ -549,6 +561,8 @@ tr_option($N) ->
[no_auto_capture];
tr_option($8) ->
[unicode];
+tr_option($U) ->
+ [ungreedy];
tr_option($g) ->
[{exec_option,g}];
tr_option(_) ->