From 98cb178fe80be7ee560c16e02dc31bf3df7700c8 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 16 Jan 2010 11:18:39 +0000 Subject: Fix re:replace/4 to handle binary unicode output when nothing replaced A bug with re:replace/4 causes an exception when: (a) it's given a unicode charlist as input; (b) it's set to {return,binary}; and (c) it finds nothing to replace. The problem is: when re:replace/4 does not find anything to replace in its Subject input, it calls iolist_to_binary on this data. This fails if the original input is a charlist with non-ascii codepoints. --- lib/stdlib/src/re.erl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/stdlib/src/re.erl') diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 5417ac02e5..f934fdcba1 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -237,7 +237,12 @@ replace(Subject,RE,Replacement,Options) -> iodata -> IoList; binary -> - iolist_to_binary(IoList); + case Unicode of + false -> + iolist_to_binary(IoList); + true -> + unicode:characters_to_binary(IoList,unicode) + end; list -> case Unicode of false -> -- cgit v1.2.3 From cf7e585bb45970fe0b5a8a6aa6653cd50583d052 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 16 Jan 2010 14:47:43 +0000 Subject: Fix re:replace/4 to handle unicode charlist Replacement argument A bug in re:replace/4 causes a badarg exception to be thrown when the Replacement argument is a charlist containing non-ascii codepoints. The problem is that the code incorrectly assumes that the Replacement text is iodata() and calls iolist_to_binary/1 on it. This patch fixes it to obey the 'unicode' option and handle charlist() Replacement arguments correctly. --- lib/stdlib/src/re.erl | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'lib/stdlib/src/re.erl') diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index f934fdcba1..889d273f6f 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -229,7 +229,19 @@ replace(Subject,RE,Replacement,Options) -> iolist_to_binary(Subject) end end, - case do_replace(FlatSubject,Subject,RE,Replacement,NewOpt) of + FlatReplacement = + case is_binary(Replacement) of + true -> + Replacement; + false -> + case Unicode of + true -> + unicode:characters_to_binary(Replacement,unicode); + false -> + iolist_to_binary(Replacement) + end + end, + case do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt) of {error,_Err} -> throw(badre); IoList -> @@ -329,8 +341,7 @@ process_split_params([H|T],C,U,L,S,G) -> {[H|NT],NC,NU,NL,NS,NG}. apply_mlist(Subject,Replacement,Mlist) -> - do_mlist(Subject,Subject,0,precomp_repl(iolist_to_binary(Replacement)), - Mlist). + do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist). precomp_repl(<<>>) -> -- cgit v1.2.3 From ff65b85c9c81b9497a5e3342b9e995937cc286ee Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 16 Jan 2010 15:42:50 +0000 Subject: Refactor out repeated block in re module --- lib/stdlib/src/re.erl | 59 ++++++++++----------------------------------------- 1 file changed, 11 insertions(+), 48 deletions(-) (limited to 'lib/stdlib/src/re.erl') diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 889d273f6f..86da9a26f4 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -32,18 +32,7 @@ split(Subject,RE,Options) -> try {NewOpt,Convert,Unicode,Limit,Strip,Group} = process_split_params(Options,iodata,false,-1,false,false), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, + FlatSubject = to_binary(Subject, Unicode), case compile_split(RE,NewOpt) of {error,_Err} -> throw(badre); @@ -217,30 +206,8 @@ replace(Subject,RE,Replacement,Options) -> try {NewOpt,Convert,Unicode} = process_repl_params(Options,iodata,false), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, - FlatReplacement = - case is_binary(Replacement) of - true -> - Replacement; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Replacement,unicode); - false -> - iolist_to_binary(Replacement) - end - end, + FlatSubject = to_binary(Subject, Unicode), + FlatReplacement = to_binary(Replacement, Unicode), case do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt) of {error,_Err} -> throw(badre); @@ -634,18 +601,7 @@ grun(Subject,RE,{Options,NeedClean,OrigRE}) -> grun2(Subject,RE,{Options,NeedClean}) -> Unicode = check_for_unicode(RE,Options), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, + FlatSubject = to_binary(Subject, Unicode), do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}). do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) -> @@ -765,3 +721,10 @@ runopt(global) -> true; runopt(_) -> false. + +to_binary(Bin, _IsUnicode) when is_binary(Bin) -> + Bin; +to_binary(Data, true) -> + unicode:characters_to_binary(Data,unicode); +to_binary(Data, false) -> + iolist_to_binary(Data). -- cgit v1.2.3 From 6b83b643a20bf502eca696f63445049e0838731a Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 25 Jan 2010 17:06:08 +0100 Subject: Fix lost unicode option in re:compile() Noticed-by: Rory Byrne --- lib/stdlib/src/re.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/stdlib/src/re.erl') diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 86da9a26f4..724c768de9 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -528,7 +528,7 @@ process_uparams([],Type) -> ucompile(RE,Options) -> try - re:compile(unicode:characters_to_binary(RE,unicode)) + re:compile(unicode:characters_to_binary(RE,unicode),Options) catch error:AnyError -> {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = -- cgit v1.2.3