diff options
author | Björn Gustavsson <[email protected]> | 2015-09-10 13:11:37 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2015-09-10 13:11:37 +0200 |
commit | c3e4e985d4f3c99d93f7d7f830e65e54d89fc880 (patch) | |
tree | 44854c5b5506f25d288f4818063a75a7e283a25e /lib/stdlib | |
parent | e62a1735cb593fe1dac8b93b7573a0f64c6cca74 (diff) | |
parent | b3c7bf872f7fbe304cdcb449a2d501933805aa48 (diff) | |
download | otp-c3e4e985d4f3c99d93f7d7f830e65e54d89fc880.tar.gz otp-c3e4e985d4f3c99d93f7d7f830e65e54d89fc880.tar.bz2 otp-c3e4e985d4f3c99d93f7d7f830e65e54d89fc880.zip |
Merge branch 'howleysv/stdlib/compiled-unicode/OTP-12977' into maint
* howleysv/stdlib/compiled-unicode/OTP-12977:
stdlib: Fix bug with unicode detection in re
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/src/re.erl | 101 | ||||
-rw-r--r-- | lib/stdlib/test/re_SUITE.erl | 129 |
2 files changed, 114 insertions, 116 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index e5d9fc51d2..80bfe38970 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -132,8 +132,9 @@ split(Subject,RE) -> split(Subject,RE,Options) -> try - {NewOpt,Convert,Unicode,Limit,Strip,Group} = - process_split_params(Options,iodata,false,-1,false,false), + {NewOpt,Convert,Limit,Strip,Group} = + process_split_params(Options,iodata,-1,false,false), + Unicode = check_for_unicode(RE, Options), FlatSubject = to_binary(Subject, Unicode), case compile_split(RE,NewOpt) of {error,_Err} -> @@ -324,8 +325,8 @@ replace(Subject,RE,Replacement) -> replace(Subject,RE,Replacement,Options) -> try - {NewOpt,Convert,Unicode} = - process_repl_params(Options,iodata,false), + {NewOpt,Convert} = process_repl_params(Options,iodata), + Unicode = check_for_unicode(RE, Options), FlatSubject = to_binary(Subject, Unicode), FlatReplacement = to_binary(Replacement, Unicode), IoList = do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt), @@ -367,65 +368,59 @@ do_replace(FlatSubject,Subject,RE,Replacement,Options) -> apply_mlist(FlatSubject,Replacement,[Slist]) end. -process_repl_params([],Convert,Unicode) -> - {[],Convert,Unicode}; -process_repl_params([unicode|T],C,_U) -> - {NT,NC,NU} = process_repl_params(T,C,true), - {[unicode|NT],NC,NU}; -process_repl_params([report_errors|_],_,_) -> +process_repl_params([],Convert) -> + {[],Convert}; +process_repl_params([report_errors|_],_) -> throw(badopt); -process_repl_params([{capture,_,_}|_],_,_) -> +process_repl_params([{capture,_,_}|_],_) -> throw(badopt); -process_repl_params([{capture,_}|_],_,_) -> +process_repl_params([{capture,_}|_],_) -> throw(badopt); -process_repl_params([{return,iodata}|T],_C,U) -> - process_repl_params(T,iodata,U); -process_repl_params([{return,list}|T],_C,U) -> - process_repl_params(T,list,U); -process_repl_params([{return,binary}|T],_C,U) -> - process_repl_params(T,binary,U); -process_repl_params([{return,_}|_],_,_) -> +process_repl_params([{return,iodata}|T],_C) -> + process_repl_params(T,iodata); +process_repl_params([{return,list}|T],_C) -> + process_repl_params(T,list); +process_repl_params([{return,binary}|T],_C) -> + process_repl_params(T,binary); +process_repl_params([{return,_}|_],_) -> throw(badopt); -process_repl_params([H|T],C,U) -> - {NT,NC,NU} = process_repl_params(T,C,U), - {[H|NT],NC,NU}. - -process_split_params([],Convert,Unicode,Limit,Strip,Group) -> - {[],Convert,Unicode,Limit,Strip,Group}; -process_split_params([unicode|T],C,_U,L,S,G) -> - {NT,NC,NU,NL,NS,NG} = process_split_params(T,C,true,L,S,G), - {[unicode|NT],NC,NU,NL,NS,NG}; -process_split_params([trim|T],C,U,_L,_S,G) -> - process_split_params(T,C,U,-1,true,G); -process_split_params([{parts,0}|T],C,U,_L,_S,G) -> - process_split_params(T,C,U,-1,true,G); -process_split_params([{parts,N}|T],C,U,_L,_S,G) when is_integer(N), N >= 1 -> - process_split_params(T,C,U,N-1,false,G); -process_split_params([{parts,infinity}|T],C,U,_L,_S,G) -> - process_split_params(T,C,U,-1,false,G); -process_split_params([{parts,_}|_],_,_,_,_,_) -> +process_repl_params([H|T],C) -> + {NT,NC} = process_repl_params(T,C), + {[H|NT],NC}. + +process_split_params([],Convert,Limit,Strip,Group) -> + {[],Convert,Limit,Strip,Group}; +process_split_params([trim|T],C,_L,_S,G) -> + process_split_params(T,C,-1,true,G); +process_split_params([{parts,0}|T],C,_L,_S,G) -> + process_split_params(T,C,-1,true,G); +process_split_params([{parts,N}|T],C,_L,_S,G) when is_integer(N), N >= 1 -> + process_split_params(T,C,N-1,false,G); +process_split_params([{parts,infinity}|T],C,_L,_S,G) -> + process_split_params(T,C,-1,false,G); +process_split_params([{parts,_}|_],_,_,_,_) -> throw(badopt); -process_split_params([group|T],C,U,L,S,_G) -> - process_split_params(T,C,U,L,S,true); -process_split_params([global|_],_,_,_,_,_) -> +process_split_params([group|T],C,L,S,_G) -> + process_split_params(T,C,L,S,true); +process_split_params([global|_],_,_,_,_) -> throw(badopt); -process_split_params([report_errors|_],_,_,_,_,_) -> +process_split_params([report_errors|_],_,_,_,_) -> throw(badopt); -process_split_params([{capture,_,_}|_],_,_,_,_,_) -> +process_split_params([{capture,_,_}|_],_,_,_,_) -> throw(badopt); -process_split_params([{capture,_}|_],_,_,_,_,_) -> +process_split_params([{capture,_}|_],_,_,_,_) -> throw(badopt); -process_split_params([{return,iodata}|T],_C,U,L,S,G) -> - process_split_params(T,iodata,U,L,S,G); -process_split_params([{return,list}|T],_C,U,L,S,G) -> - process_split_params(T,list,U,L,S,G); -process_split_params([{return,binary}|T],_C,U,L,S,G) -> - process_split_params(T,binary,U,L,S,G); -process_split_params([{return,_}|_],_,_,_,_,_) -> +process_split_params([{return,iodata}|T],_C,L,S,G) -> + process_split_params(T,iodata,L,S,G); +process_split_params([{return,list}|T],_C,L,S,G) -> + process_split_params(T,list,L,S,G); +process_split_params([{return,binary}|T],_C,L,S,G) -> + process_split_params(T,binary,L,S,G); +process_split_params([{return,_}|_],_,_,_,_) -> throw(badopt); -process_split_params([H|T],C,U,L,S,G) -> - {NT,NC,NU,NL,NS,NG} = process_split_params(T,C,U,L,S,G), - {[H|NT],NC,NU,NL,NS,NG}. +process_split_params([H|T],C,L,S,G) -> + {NT,NC,NL,NS,NG} = process_split_params(T,C,L,S,G), + {[H|NT],NC,NL,NS,NG}. apply_mlist(Subject,Replacement,Mlist) -> do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist). diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index b8c20d9745..d78d6153da 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -28,7 +28,7 @@ pcre_compile_workspace_overflow/1,re_infinite_loop/1, re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1, opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1, - match_limit/1,sub_binaries/1]). + match_limit/1,sub_binaries/1,copt/1]). -include_lib("test_server/include/test_server.hrl"). -include_lib("kernel/include/file.hrl"). @@ -319,32 +319,26 @@ replace_return(doc) -> ["Tests return options of replace together with global searching"]; replace_return(Config) when is_list(Config) -> Dog = ?t:timetrap(?t:minutes(3)), - ?line {'EXIT',{badarg,_}} = (catch re:replace("na","(a","")), - ?line <<"nasse">> = re:replace(<<"nisse">>,"i","a",[{return,binary}]), - ?line <<"ABCÅXABCXA">> = re:replace("ABC\305abcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary}]), - - ?line [<<"ABCÅ">>, - <<"X">>, - <<"ABC">>, - <<"X">> | - <<"A">> ] = - re:replace("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,iodata}]), - ?line "ABCÅXABCXA" = re:replace("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,list},unicode]), - ?line <<65,66,67,195,133,88,65,66,67,88,65>> = re:replace("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary},unicode]), - ?line <<65,66,67,195,133,88,65,66,67,97,98,99,100,65>> = re:replace("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[{return,binary},unicode]), - ?line <<"iXk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\9X",[{return,binary}]), - ?line <<"jXk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\10X",[{return,binary}]), - ?line <<"Xk">> = re:replace("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\11X",[{return,binary}]), - ?line <<"9X1">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g9X",[{return,binary}]), - ?line <<"0X1">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g10X",[{return,binary}]), - ?line <<"X1">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g11X",[{return,binary}]), - ?line <<"971">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{9}7",[{return,binary}]), - ?line <<"071">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{10}7",[{return,binary}]), - ?line <<"71">> = re:replace("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{11}7",[{return,binary}]), - ?line "a\x{400}bcX" = re:replace("a\x{400}bcd","d","X",[global,{return,list},unicode]), - ?line <<"a",208,128,"bcX">> = re:replace("a\x{400}bcd","d","X",[global,{return,binary},unicode]), - ?line "a\x{400}bcd" = re:replace("a\x{400}bcd","Z","X",[global,{return,list},unicode]), - ?line <<"a",208,128,"bcd">> = re:replace("a\x{400}bcd","Z","X",[global,{return,binary},unicode]), + {'EXIT',{badarg,_}} = (catch re:replace("na","(a","")), + ok = replacetest(<<"nisse">>,"i","a",[{return,binary}],<<"nasse">>), + ok = replacetest("ABC\305abcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary}],<<"ABCÅXABCXA">>), + ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,iodata}],[<<"ABCÅ">>,<<"X">>,<<"ABC">>,<<"X">>|<<"A">>]), + ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,list},unicode],"ABCÅXABCXA"), + ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary},unicode],<<65,66,67,195,133,88,65,66,67,88,65>>), + ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[{return,binary},unicode],<<65,66,67,195,133,88,65,66,67,97,98,99,100,65>>), + ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\9X",[{return,binary}],<<"iXk">>), + ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\10X",[{return,binary}],<<"jXk">>), + ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\11X",[{return,binary}],<<"Xk">>), + ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g9X",[{return,binary}],<<"9X1">>), + ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g10X",[{return,binary}],<<"0X1">>), + ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g11X",[{return,binary}],<<"X1">>), + ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{9}7",[{return,binary}],<<"971">>), + ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{10}7",[{return,binary}],<<"071">>), + ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{11}7",[{return,binary}],<<"71">>), + ok = replacetest("a\x{400}bcd","d","X",[global,{return,list},unicode],"a\x{400}bcX"), + ok = replacetest("a\x{400}bcd","d","X",[global,{return,binary},unicode],<<"a",208,128,"bcX">>), + ok = replacetest("a\x{400}bcd","Z","X",[global,{return,list},unicode],"a\x{400}bcd"), + ok = replacetest("a\x{400}bcd","Z","X",[global,{return,binary},unicode],<<"a",208,128,"bcd">>), ?t:timetrap_cancel(Dog), ok. @@ -389,6 +383,35 @@ crtest(Subject,RE,Options,true,Result) -> error end. +replacetest(Subject,RE,Replacement,Options,Result) -> + Result = re:replace(Subject,RE,Replacement,Options), + {CompileOptions,ReplaceOptions} = lists:partition(fun copt/1, Options), + {ok,MP} = re:compile(RE,CompileOptions), + Result = re:replace(Subject,MP,Replacement,ReplaceOptions), + ok. + +splittest(Subject,RE,Options,Result) -> + Result = re:split(Subject,RE,Options), + {CompileOptions,SplitOptions} = lists:partition(fun copt/1, Options), + {ok,MP} = re:compile(RE,CompileOptions), + Result = re:split(Subject,MP,SplitOptions), + ok. + +copt(caseless) -> true; +copt(no_start_optimize) -> true; +copt(never_utf) -> true; +copt(ucp) -> true; +copt(dollar_endonly) -> true; +copt(dotall) -> true; +copt(extended) -> true; +copt(firstline) -> true; +copt(multiline) -> true; +copt(no_auto_capture) -> true; +copt(dupnames) -> true; +copt(ungreedy) -> true; +copt(unicode) -> true; +copt(_) -> false. + split_autogen(doc) -> ["Test split with autogenerated erlang module"]; split_autogen(Config) when is_list(Config) -> @@ -401,43 +424,23 @@ split_options(doc) -> ["Test special options to split."]; split_options(Config) when is_list(Config) -> Dog = ?t:timetrap(?t:minutes(1)), - ?line [[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]] = re:split("a b c ","( )",[group,trim]), - ?line [[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]] = re:split("a b c ","( )",[group,{parts,0}]), - ?line [[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]] = - re:split("a b c ","( )",[{parts,infinity},group]), - ?line [[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]] = - re:split("a b c ","( )",[group]), - ?line [[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>], - [<<"c">>,<<" ">>],[<<"d">>,<<" ">>]] = - re:split(" a b c d ","( +)",[group,trim]), - ?line [[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>], - [<<"c">>,<<" ">>],[<<"d">>,<<" ">>]] = - re:split(" a b c d ","( +)",[{parts,0},group]), - ?line [[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>], - [<<"c">>,<<" ">>],[<<"d">>,<<" ">>],[<<>>]] = - re:split(" a b c d ","( +)",[{parts,infinity},group]), - ?line [[<<"a">>,<<" ">>],[<<"b c d">>]] = - re:split("a b c d","( +)",[{parts,2},group]), - ?line [[[967]," "],["b c d"]] = - re:split([967]++" b c d","( +)", - [{parts,2},group,{return,list},unicode]), - ?line [[<<207,135>>,<<" ">>],[<<"b c d">>]] = - re:split([967]++" b c d","( +)", - [{parts,2},group,{return,binary},unicode]), - ?line {'EXIT',{badarg,_}} = - (catch re:split([967]++" b c d","( +)", - [{parts,2},group,{return,binary}])), - ?line {'EXIT',{badarg,_}} = - (catch re:split("a b c d","( +)",[{parts,-2}])), - ?line {'EXIT',{badarg,_}} = - (catch re:split("a b c d","( +)",[{parts,banan}])), - ?line {'EXIT',{badarg,_}} = - (catch re:split("a b c d","( +)",[{capture,all}])), - ?line {'EXIT',{badarg,_}} = - (catch re:split("a b c d","( +)",[{capture,[],binary}])), + ok = splittest("a b c ","( )",[group,trim],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]]), + ok = splittest("a b c ","( )",[group,{parts,0}],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]]), + ok = splittest("a b c ","( )",[{parts,infinity},group],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]]), + ok = splittest("a b c ","( )",[group],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]]), + ok = splittest(" a b c d ","( +)",[group,trim],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>]]), + ok = splittest(" a b c d ","( +)",[{parts,0},group],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>]]), + ok = splittest(" a b c d ","( +)",[{parts,infinity},group],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>],[<<>>]]), + ok = splittest("a b c d","( +)",[{parts,2},group],[[<<"a">>,<<" ">>],[<<"b c d">>]]), + ok = splittest([967]++" b c d","( +)",[{parts,2},group,{return,list},unicode],[[[967]," "],["b c d"]]), + ok = splittest([967]++" b c d","( +)",[{parts,2},group,{return,binary},unicode],[[<<207,135>>,<<" ">>],[<<"b c d">>]]), + {'EXIT',{badarg,_}} = (catch re:split([967]++" b c d","( +)",[{parts,2},group,{return,binary}])), + {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{parts,-2}])), + {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{parts,banan}])), + {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{capture,all}])), + {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{capture,[],binary}])), % Parts 0 is equal to no parts specification (implicit strip) - ?line ["a"," ","b"," ","c"," ","d"] = - re:split("a b c d","( *)",[{parts,0},{return,list}]), + ok = splittest("a b c d","( *)",[{parts,0},{return,list}],["a"," ","b"," ","c"," ","d"]), ?t:timetrap_cancel(Dog), ok. |