From cf7e585bb45970fe0b5a8a6aa6653cd50583d052 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 16 Jan 2010 14:47:43 +0000 Subject: Fix re:replace/4 to handle unicode charlist Replacement argument A bug in re:replace/4 causes a badarg exception to be thrown when the Replacement argument is a charlist containing non-ascii codepoints. The problem is that the code incorrectly assumes that the Replacement text is iodata() and calls iolist_to_binary/1 on it. This patch fixes it to obey the 'unicode' option and handle charlist() Replacement arguments correctly. --- lib/stdlib/src/re.erl | 17 ++++++++++++++--- lib/stdlib/test/re_SUITE.erl | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index f934fdcba1..889d273f6f 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -229,7 +229,19 @@ replace(Subject,RE,Replacement,Options) -> iolist_to_binary(Subject) end end, - case do_replace(FlatSubject,Subject,RE,Replacement,NewOpt) of + FlatReplacement = + case is_binary(Replacement) of + true -> + Replacement; + false -> + case Unicode of + true -> + unicode:characters_to_binary(Replacement,unicode); + false -> + iolist_to_binary(Replacement) + end + end, + case do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt) of {error,_Err} -> throw(badre); IoList -> @@ -329,8 +341,7 @@ process_split_params([H|T],C,U,L,S,G) -> {[H|NT],NC,NU,NL,NS,NG}. apply_mlist(Subject,Replacement,Mlist) -> - do_mlist(Subject,Subject,0,precomp_repl(iolist_to_binary(Replacement)), - Mlist). + do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist). precomp_repl(<<>>) -> diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index f8667bfcee..e0e0670676 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -275,6 +275,7 @@ replace_input_types(Config) when is_list(Config) -> Dog = ?t:timetrap(?t:minutes(3)), ?line <<"abcd">> = re:replace("abcd","Z","X",[{return,binary},unicode]), ?line <<"abcd">> = re:replace("abcd","\x{400}","X",[{return,binary},unicode]), + ?line <<"a",208,128,"cd">> = re:replace(<<"abcd">>,"b","\x{400}",[{return,binary},unicode]), ?t:timetrap_cancel(Dog), ok. -- cgit v1.2.3