From b3c7bf872f7fbe304cdcb449a2d501933805aa48 Mon Sep 17 00:00:00 2001 From: Shane Howley Date: Wed, 15 Jul 2015 18:46:21 +0100 Subject: stdlib: Fix bug with unicode detection in re Fix bug with unrecognised 'unicode' option in re:split/2,3 & re:replace/3,4 when using pre-compiled regex. --- lib/stdlib/src/re.erl | 101 ++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 53 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index e5d9fc51d2..80bfe38970 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -132,8 +132,9 @@ split(Subject,RE) -> split(Subject,RE,Options) -> try - {NewOpt,Convert,Unicode,Limit,Strip,Group} = - process_split_params(Options,iodata,false,-1,false,false), + {NewOpt,Convert,Limit,Strip,Group} = + process_split_params(Options,iodata,-1,false,false), + Unicode = check_for_unicode(RE, Options), FlatSubject = to_binary(Subject, Unicode), case compile_split(RE,NewOpt) of {error,_Err} -> @@ -324,8 +325,8 @@ replace(Subject,RE,Replacement) -> replace(Subject,RE,Replacement,Options) -> try - {NewOpt,Convert,Unicode} = - process_repl_params(Options,iodata,false), + {NewOpt,Convert} = process_repl_params(Options,iodata), + Unicode = check_for_unicode(RE, Options), FlatSubject = to_binary(Subject, Unicode), FlatReplacement = to_binary(Replacement, Unicode), IoList = do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt), @@ -367,65 +368,59 @@ do_replace(FlatSubject,Subject,RE,Replacement,Options) -> apply_mlist(FlatSubject,Replacement,[Slist]) end. -process_repl_params([],Convert,Unicode) -> - {[],Convert,Unicode}; -process_repl_params([unicode|T],C,_U) -> - {NT,NC,NU} = process_repl_params(T,C,true), - {[unicode|NT],NC,NU}; -process_repl_params([report_errors|_],_,_) -> +process_repl_params([],Convert) -> + {[],Convert}; +process_repl_params([report_errors|_],_) -> throw(badopt); -process_repl_params([{capture,_,_}|_],_,_) -> +process_repl_params([{capture,_,_}|_],_) -> throw(badopt); -process_repl_params([{capture,_}|_],_,_) -> +process_repl_params([{capture,_}|_],_) -> throw(badopt); -process_repl_params([{return,iodata}|T],_C,U) -> - process_repl_params(T,iodata,U); -process_repl_params([{return,list}|T],_C,U) -> - process_repl_params(T,list,U); -process_repl_params([{return,binary}|T],_C,U) -> - process_repl_params(T,binary,U); -process_repl_params([{return,_}|_],_,_) -> +process_repl_params([{return,iodata}|T],_C) -> + process_repl_params(T,iodata); +process_repl_params([{return,list}|T],_C) -> + process_repl_params(T,list); +process_repl_params([{return,binary}|T],_C) -> + process_repl_params(T,binary); +process_repl_params([{return,_}|_],_) -> throw(badopt); -process_repl_params([H|T],C,U) -> - {NT,NC,NU} = process_repl_params(T,C,U), - {[H|NT],NC,NU}. - -process_split_params([],Convert,Unicode,Limit,Strip,Group) -> - {[],Convert,Unicode,Limit,Strip,Group}; -process_split_params([unicode|T],C,_U,L,S,G) -> - {NT,NC,NU,NL,NS,NG} = process_split_params(T,C,true,L,S,G), - {[unicode|NT],NC,NU,NL,NS,NG}; -process_split_params([trim|T],C,U,_L,_S,G) -> - process_split_params(T,C,U,-1,true,G); -process_split_params([{parts,0}|T],C,U,_L,_S,G) -> - process_split_params(T,C,U,-1,true,G); -process_split_params([{parts,N}|T],C,U,_L,_S,G) when is_integer(N), N >= 1 -> - process_split_params(T,C,U,N-1,false,G); -process_split_params([{parts,infinity}|T],C,U,_L,_S,G) -> - process_split_params(T,C,U,-1,false,G); -process_split_params([{parts,_}|_],_,_,_,_,_) -> +process_repl_params([H|T],C) -> + {NT,NC} = process_repl_params(T,C), + {[H|NT],NC}. + +process_split_params([],Convert,Limit,Strip,Group) -> + {[],Convert,Limit,Strip,Group}; +process_split_params([trim|T],C,_L,_S,G) -> + process_split_params(T,C,-1,true,G); +process_split_params([{parts,0}|T],C,_L,_S,G) -> + process_split_params(T,C,-1,true,G); +process_split_params([{parts,N}|T],C,_L,_S,G) when is_integer(N), N >= 1 -> + process_split_params(T,C,N-1,false,G); +process_split_params([{parts,infinity}|T],C,_L,_S,G) -> + process_split_params(T,C,-1,false,G); +process_split_params([{parts,_}|_],_,_,_,_) -> throw(badopt); -process_split_params([group|T],C,U,L,S,_G) -> - process_split_params(T,C,U,L,S,true); -process_split_params([global|_],_,_,_,_,_) -> +process_split_params([group|T],C,L,S,_G) -> + process_split_params(T,C,L,S,true); +process_split_params([global|_],_,_,_,_) -> throw(badopt); -process_split_params([report_errors|_],_,_,_,_,_) -> +process_split_params([report_errors|_],_,_,_,_) -> throw(badopt); -process_split_params([{capture,_,_}|_],_,_,_,_,_) -> +process_split_params([{capture,_,_}|_],_,_,_,_) -> throw(badopt); -process_split_params([{capture,_}|_],_,_,_,_,_) -> +process_split_params([{capture,_}|_],_,_,_,_) -> throw(badopt); -process_split_params([{return,iodata}|T],_C,U,L,S,G) -> - process_split_params(T,iodata,U,L,S,G); -process_split_params([{return,list}|T],_C,U,L,S,G) -> - process_split_params(T,list,U,L,S,G); -process_split_params([{return,binary}|T],_C,U,L,S,G) -> - process_split_params(T,binary,U,L,S,G); -process_split_params([{return,_}|_],_,_,_,_,_) -> +process_split_params([{return,iodata}|T],_C,L,S,G) -> + process_split_params(T,iodata,L,S,G); +process_split_params([{return,list}|T],_C,L,S,G) -> + process_split_params(T,list,L,S,G); +process_split_params([{return,binary}|T],_C,L,S,G) -> + process_split_params(T,binary,L,S,G); +process_split_params([{return,_}|_],_,_,_,_) -> throw(badopt); -process_split_params([H|T],C,U,L,S,G) -> - {NT,NC,NU,NL,NS,NG} = process_split_params(T,C,U,L,S,G), - {[H|NT],NC,NU,NL,NS,NG}. +process_split_params([H|T],C,L,S,G) -> + {NT,NC,NL,NS,NG} = process_split_params(T,C,L,S,G), + {[H|NT],NC,NL,NS,NG}. apply_mlist(Subject,Replacement,Mlist) -> do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist). -- cgit v1.2.3