aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2015-09-10 13:11:37 +0200
committerBjörn Gustavsson <[email protected]>2015-09-10 13:11:37 +0200
commitc3e4e985d4f3c99d93f7d7f830e65e54d89fc880 (patch)
tree44854c5b5506f25d288f4818063a75a7e283a25e /lib
parente62a1735cb593fe1dac8b93b7573a0f64c6cca74 (diff)
parentb3c7bf872f7fbe304cdcb449a2d501933805aa48 (diff)
downloadotp-c3e4e985d4f3c99d93f7d7f830e65e54d89fc880.tar.gz
otp-c3e4e985d4f3c99d93f7d7f830e65e54d89fc880.tar.bz2
otp-c3e4e985d4f3c99d93f7d7f830e65e54d89fc880.zip
Merge branch 'howleysv/stdlib/compiled-unicode/OTP-12977' into maint
* howleysv/stdlib/compiled-unicode/OTP-12977: stdlib: Fix bug with unicode detection in re
Diffstat (limited to 'lib')
-rw-r--r--lib/stdlib/src/re.erl101
-rw-r--r--lib/stdlib/test/re_SUITE.erl129
2 files changed, 114 insertions, 116 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index e5d9fc51d2..80bfe38970 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -132,8 +132,9 @@ split(Subject,RE) ->
split(Subject,RE,Options) ->
try
- {NewOpt,Convert,Unicode,Limit,Strip,Group} =
- process_split_params(Options,iodata,false,-1,false,false),
+ {NewOpt,Convert,Limit,Strip,Group} =
+ process_split_params(Options,iodata,-1,false,false),
+ Unicode = check_for_unicode(RE, Options),
FlatSubject = to_binary(Subject, Unicode),
case compile_split(RE,NewOpt) of
{error,_Err} ->
@@ -324,8 +325,8 @@ replace(Subject,RE,Replacement) ->
replace(Subject,RE,Replacement,Options) ->
try
- {NewOpt,Convert,Unicode} =
- process_repl_params(Options,iodata,false),
+ {NewOpt,Convert} = process_repl_params(Options,iodata),
+ Unicode = check_for_unicode(RE, Options),
FlatSubject = to_binary(Subject, Unicode),
FlatReplacement = to_binary(Replacement, Unicode),
IoList = do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt),
@@ -367,65 +368,59 @@ do_replace(FlatSubject,Subject,RE,Replacement,Options) ->
apply_mlist(FlatSubject,Replacement,[Slist])
end.
-process_repl_params([],Convert,Unicode) ->
- {[],Convert,Unicode};
-process_repl_params([unicode|T],C,_U) ->
- {NT,NC,NU} = process_repl_params(T,C,true),
- {[unicode|NT],NC,NU};
-process_repl_params([report_errors|_],_,_) ->
+process_repl_params([],Convert) ->
+ {[],Convert};
+process_repl_params([report_errors|_],_) ->
throw(badopt);
-process_repl_params([{capture,_,_}|_],_,_) ->
+process_repl_params([{capture,_,_}|_],_) ->
throw(badopt);
-process_repl_params([{capture,_}|_],_,_) ->
+process_repl_params([{capture,_}|_],_) ->
throw(badopt);
-process_repl_params([{return,iodata}|T],_C,U) ->
- process_repl_params(T,iodata,U);
-process_repl_params([{return,list}|T],_C,U) ->
- process_repl_params(T,list,U);
-process_repl_params([{return,binary}|T],_C,U) ->
- process_repl_params(T,binary,U);
-process_repl_params([{return,_}|_],_,_) ->
+process_repl_params([{return,iodata}|T],_C) ->
+ process_repl_params(T,iodata);
+process_repl_params([{return,list}|T],_C) ->
+ process_repl_params(T,list);
+process_repl_params([{return,binary}|T],_C) ->
+ process_repl_params(T,binary);
+process_repl_params([{return,_}|_],_) ->
throw(badopt);
-process_repl_params([H|T],C,U) ->
- {NT,NC,NU} = process_repl_params(T,C,U),
- {[H|NT],NC,NU}.
-
-process_split_params([],Convert,Unicode,Limit,Strip,Group) ->
- {[],Convert,Unicode,Limit,Strip,Group};
-process_split_params([unicode|T],C,_U,L,S,G) ->
- {NT,NC,NU,NL,NS,NG} = process_split_params(T,C,true,L,S,G),
- {[unicode|NT],NC,NU,NL,NS,NG};
-process_split_params([trim|T],C,U,_L,_S,G) ->
- process_split_params(T,C,U,-1,true,G);
-process_split_params([{parts,0}|T],C,U,_L,_S,G) ->
- process_split_params(T,C,U,-1,true,G);
-process_split_params([{parts,N}|T],C,U,_L,_S,G) when is_integer(N), N >= 1 ->
- process_split_params(T,C,U,N-1,false,G);
-process_split_params([{parts,infinity}|T],C,U,_L,_S,G) ->
- process_split_params(T,C,U,-1,false,G);
-process_split_params([{parts,_}|_],_,_,_,_,_) ->
+process_repl_params([H|T],C) ->
+ {NT,NC} = process_repl_params(T,C),
+ {[H|NT],NC}.
+
+process_split_params([],Convert,Limit,Strip,Group) ->
+ {[],Convert,Limit,Strip,Group};
+process_split_params([trim|T],C,_L,_S,G) ->
+ process_split_params(T,C,-1,true,G);
+process_split_params([{parts,0}|T],C,_L,_S,G) ->
+ process_split_params(T,C,-1,true,G);
+process_split_params([{parts,N}|T],C,_L,_S,G) when is_integer(N), N >= 1 ->
+ process_split_params(T,C,N-1,false,G);
+process_split_params([{parts,infinity}|T],C,_L,_S,G) ->
+ process_split_params(T,C,-1,false,G);
+process_split_params([{parts,_}|_],_,_,_,_) ->
throw(badopt);
-process_split_params([group|T],C,U,L,S,_G) ->
- process_split_params(T,C,U,L,S,true);
-process_split_params([global|_],_,_,_,_,_) ->
+process_split_params([group|T],C,L,S,_G) ->
+ process_split_params(T,C,L,S,true);
+process_split_params([global|_],_,_,_,_) ->
throw(badopt);
-process_split_params([report_errors|_],_,_,_,_,_) ->
+process_split_params([report_errors|_],_,_,_,_) ->
throw(badopt);
-process_split_params([{capture,_,_}|_],_,_,_,_,_) ->
+process_split_params([{capture,_,_}|_],_,_,_,_) ->
throw(badopt);
-process_split_params([{capture,_}|_],_,_,_,_,_) ->
+process_split_params([{capture,_}|_],_,_,_,_) ->
throw(badopt);
-process_split_params([{return,iodata}|T],_C,U,L,S,G) ->
- process_split_params(T,iodata,U,L,S,G);
-process_split_params([{return,list}|T],_C,U,L,S,G) ->
- process_split_params(T,list,U,L,S,G);
-process_split_params([{return,binary}|T],_C,U,L,S,G) ->
- process_split_params(T,binary,U,L,S,G);
-process_split_params([{return,_}|_],_,_,_,_,_) ->
+process_split_params([{return,iodata}|T],_C,L,S,G) ->
+ process_split_params(T,iodata,L,S,G);
+process_split_params([{return,list}|T],_C,L,S,G) ->
+ process_split_params(T,list,L,S,G);
+process_split_params([{return,binary}|T],_C,L,S,G) ->
+ process_split_params(T,binary,L,S,G);
+process_split_params([{return,_}|_],_,_,_,_) ->
throw(badopt);
-process_split_params([H|T],C,U,L,S,G) ->
- {NT,NC,NU,NL,NS,NG} = process_split_params(T,C,U,L,S,G),
- {[H|NT],NC,NU,NL,NS,NG}.
+process_split_params([H|T],C,L,S,G) ->
+ {NT,NC,NL,NS,NG} = process_split_params(T,C,L,S,G),
+ {[H|NT],NC,NL,NS,NG}.
apply_mlist(Subject,Replacement,Mlist) ->
do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist).
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index b8c20d9745..d78d6153da 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -28,7 +28,7 @@
pcre_compile_workspace_overflow/1,re_infinite_loop/1,
re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1,
opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1,
- match_limit/1,sub_binaries/1]).
+ match_limit/1,sub_binaries/1,copt/1]).
-include_lib("test_server/include/test_server.hrl").
-include_lib("kernel/include/file.hrl").
@@ -319,32 +319,26 @@ replace_return(doc) ->
["Tests return options of replace together with global searching"];
replace_return(Config) when is_list(Config) ->
Dog = ?t:timetrap(?t:minutes(3)),
- ?line {'EXIT',{badarg,_}} = (catch re:replace("na","(a","")),
- ?line <<"nasse">> = re:replace(<<"nisse">>,"i","a",[{return,binary}]),
- ?line <<"ABCÅXABCXA">> = re:replace("ABC\305abcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary}]),
-
- ?line [<<"ABCÅ">>,
- <<"X">>,
- <<"ABC">>,
- <<"X">> |
- <<"A">> ] =
- re:replace("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,iodata}]),
- ?line "ABCÅXABCXA" = re:replace("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,list},unicode]),
- ?line <<65,66,67,195,133,88,65,66,67,88,65>> = re:replace("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary},unicode]),
- ?line <<65,66,67,195,133,88,65,66,67,97,98,99,100,65>> = re:replace("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[{return,binary},unicode]),
- ?line <<"iXk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\9X",[{return,binary}]),
- ?line <<"jXk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\10X",[{return,binary}]),
- ?line <<"Xk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\11X",[{return,binary}]),
- ?line <<"9X1">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g9X",[{return,binary}]),
- ?line <<"0X1">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g10X",[{return,binary}]),
- ?line <<"X1">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g11X",[{return,binary}]),
- ?line <<"971">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{9}7",[{return,binary}]),
- ?line <<"071">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{10}7",[{return,binary}]),
- ?line <<"71">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{11}7",[{return,binary}]),
- ?line "a\x{400}bcX" = re:replace("a\x{400}bcd","d","X",[global,{return,list},unicode]),
- ?line <<"a",208,128,"bcX">> = re:replace("a\x{400}bcd","d","X",[global,{return,binary},unicode]),
- ?line "a\x{400}bcd" = re:replace("a\x{400}bcd","Z","X",[global,{return,list},unicode]),
- ?line <<"a",208,128,"bcd">> = re:replace("a\x{400}bcd","Z","X",[global,{return,binary},unicode]),
+ {'EXIT',{badarg,_}} = (catch re:replace("na","(a","")),
+ ok = replacetest(<<"nisse">>,"i","a",[{return,binary}],<<"nasse">>),
+ ok = replacetest("ABC\305abcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary}],<<"ABCÅXABCXA">>),
+ ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,iodata}],[<<"ABCÅ">>,<<"X">>,<<"ABC">>,<<"X">>|<<"A">>]),
+ ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,list},unicode],"ABCÅXABCXA"),
+ ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary},unicode],<<65,66,67,195,133,88,65,66,67,88,65>>),
+ ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[{return,binary},unicode],<<65,66,67,195,133,88,65,66,67,97,98,99,100,65>>),
+ ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\9X",[{return,binary}],<<"iXk">>),
+ ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\10X",[{return,binary}],<<"jXk">>),
+ ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\11X",[{return,binary}],<<"Xk">>),
+ ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g9X",[{return,binary}],<<"9X1">>),
+ ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g10X",[{return,binary}],<<"0X1">>),
+ ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g11X",[{return,binary}],<<"X1">>),
+ ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{9}7",[{return,binary}],<<"971">>),
+ ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{10}7",[{return,binary}],<<"071">>),
+ ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{11}7",[{return,binary}],<<"71">>),
+ ok = replacetest("a\x{400}bcd","d","X",[global,{return,list},unicode],"a\x{400}bcX"),
+ ok = replacetest("a\x{400}bcd","d","X",[global,{return,binary},unicode],<<"a",208,128,"bcX">>),
+ ok = replacetest("a\x{400}bcd","Z","X",[global,{return,list},unicode],"a\x{400}bcd"),
+ ok = replacetest("a\x{400}bcd","Z","X",[global,{return,binary},unicode],<<"a",208,128,"bcd">>),
?t:timetrap_cancel(Dog),
ok.
@@ -389,6 +383,35 @@ crtest(Subject,RE,Options,true,Result) ->
error
end.
+replacetest(Subject,RE,Replacement,Options,Result) ->
+ Result = re:replace(Subject,RE,Replacement,Options),
+ {CompileOptions,ReplaceOptions} = lists:partition(fun copt/1, Options),
+ {ok,MP} = re:compile(RE,CompileOptions),
+ Result = re:replace(Subject,MP,Replacement,ReplaceOptions),
+ ok.
+
+splittest(Subject,RE,Options,Result) ->
+ Result = re:split(Subject,RE,Options),
+ {CompileOptions,SplitOptions} = lists:partition(fun copt/1, Options),
+ {ok,MP} = re:compile(RE,CompileOptions),
+ Result = re:split(Subject,MP,SplitOptions),
+ ok.
+
+copt(caseless) -> true;
+copt(no_start_optimize) -> true;
+copt(never_utf) -> true;
+copt(ucp) -> true;
+copt(dollar_endonly) -> true;
+copt(dotall) -> true;
+copt(extended) -> true;
+copt(firstline) -> true;
+copt(multiline) -> true;
+copt(no_auto_capture) -> true;
+copt(dupnames) -> true;
+copt(ungreedy) -> true;
+copt(unicode) -> true;
+copt(_) -> false.
+
split_autogen(doc) ->
["Test split with autogenerated erlang module"];
split_autogen(Config) when is_list(Config) ->
@@ -401,43 +424,23 @@ split_options(doc) ->
["Test special options to split."];
split_options(Config) when is_list(Config) ->
Dog = ?t:timetrap(?t:minutes(1)),
- ?line [[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]] = re:split("a b c ","( )",[group,trim]),
- ?line [[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]] = re:split("a b c ","( )",[group,{parts,0}]),
- ?line [[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]] =
- re:split("a b c ","( )",[{parts,infinity},group]),
- ?line [[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]] =
- re:split("a b c ","( )",[group]),
- ?line [[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],
- [<<"c">>,<<" ">>],[<<"d">>,<<" ">>]] =
- re:split(" a b c d ","( +)",[group,trim]),
- ?line [[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],
- [<<"c">>,<<" ">>],[<<"d">>,<<" ">>]] =
- re:split(" a b c d ","( +)",[{parts,0},group]),
- ?line [[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],
- [<<"c">>,<<" ">>],[<<"d">>,<<" ">>],[<<>>]] =
- re:split(" a b c d ","( +)",[{parts,infinity},group]),
- ?line [[<<"a">>,<<" ">>],[<<"b c d">>]] =
- re:split("a b c d","( +)",[{parts,2},group]),
- ?line [[[967]," "],["b c d"]] =
- re:split([967]++" b c d","( +)",
- [{parts,2},group,{return,list},unicode]),
- ?line [[<<207,135>>,<<" ">>],[<<"b c d">>]] =
- re:split([967]++" b c d","( +)",
- [{parts,2},group,{return,binary},unicode]),
- ?line {'EXIT',{badarg,_}} =
- (catch re:split([967]++" b c d","( +)",
- [{parts,2},group,{return,binary}])),
- ?line {'EXIT',{badarg,_}} =
- (catch re:split("a b c d","( +)",[{parts,-2}])),
- ?line {'EXIT',{badarg,_}} =
- (catch re:split("a b c d","( +)",[{parts,banan}])),
- ?line {'EXIT',{badarg,_}} =
- (catch re:split("a b c d","( +)",[{capture,all}])),
- ?line {'EXIT',{badarg,_}} =
- (catch re:split("a b c d","( +)",[{capture,[],binary}])),
+ ok = splittest("a b c ","( )",[group,trim],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]]),
+ ok = splittest("a b c ","( )",[group,{parts,0}],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]]),
+ ok = splittest("a b c ","( )",[{parts,infinity},group],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]]),
+ ok = splittest("a b c ","( )",[group],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]]),
+ ok = splittest(" a b c d ","( +)",[group,trim],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>]]),
+ ok = splittest(" a b c d ","( +)",[{parts,0},group],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>]]),
+ ok = splittest(" a b c d ","( +)",[{parts,infinity},group],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>],[<<>>]]),
+ ok = splittest("a b c d","( +)",[{parts,2},group],[[<<"a">>,<<" ">>],[<<"b c d">>]]),
+ ok = splittest([967]++" b c d","( +)",[{parts,2},group,{return,list},unicode],[[[967]," "],["b c d"]]),
+ ok = splittest([967]++" b c d","( +)",[{parts,2},group,{return,binary},unicode],[[<<207,135>>,<<" ">>],[<<"b c d">>]]),
+ {'EXIT',{badarg,_}} = (catch re:split([967]++" b c d","( +)",[{parts,2},group,{return,binary}])),
+ {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{parts,-2}])),
+ {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{parts,banan}])),
+ {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{capture,all}])),
+ {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{capture,[],binary}])),
% Parts 0 is equal to no parts specification (implicit strip)
- ?line ["a"," ","b"," ","c"," ","d"] =
- re:split("a b c d","( *)",[{parts,0},{return,list}]),
+ ok = splittest("a b c d","( *)",[{parts,0},{return,list}],["a"," ","b"," ","c"," ","d"]),
?t:timetrap_cancel(Dog),
ok.