diff options
author | Erlang/OTP <[email protected]> | 2010-01-27 12:33:35 +0000 |
---|---|---|
committer | Erlang/OTP <[email protected]> | 2010-01-27 15:12:34 +0100 |
commit | 8beea46d65375d09ba9af07676894dc720eda6df (patch) | |
tree | f3586eaba444e699894c92fec75206a243cc53cc /lib/stdlib/src | |
parent | 50cc08bfabb0510ebf42170650217dfeec229ce7 (diff) | |
parent | 6b83b643a20bf502eca696f63445049e0838731a (diff) | |
download | otp-8beea46d65375d09ba9af07676894dc720eda6df.tar.gz otp-8beea46d65375d09ba9af07676894dc720eda6df.tar.bz2 otp-8beea46d65375d09ba9af07676894dc720eda6df.zip |
Merge branch 'rb/stdlib_re_unicode_fixes' into ccase/r13b04_dev
* rb/stdlib_re_unicode_fixes:
Fix lost unicode option in re:compile()
Refactor out repeated block in re module
Fix re:replace/4 to handle unicode charlist Replacement argument
Fix re:replace/4 to handle unicode charlist RE argument
Fix re:replace/4 to handle binary unicode output when nothing replaced
OTP-8394 A number of bugs concerning re and unicode are corrected:
- re:compile no longer loses unicode option, which also fixes bug
in re:split.
- re:replace now handles unicode charlist replacement argument
- re:replace now handles unicode RE charlist argument correctly
- re:replace now handles binary unicode output correctly when
nothing is replaced.
Most code, testcases and error isolation done by Rory Byrne.
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r-- | lib/stdlib/src/re.erl | 71 |
1 files changed, 25 insertions, 46 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index 5417ac02e5..296a6b3d23 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -1,19 +1,19 @@ %% %% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. -%% +%% +%% Copyright Ericsson AB 2008-2010. All Rights Reserved. +%% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. -%% +%% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. -%% +%% %% %CopyrightEnd% %% -module(re). @@ -32,18 +32,7 @@ split(Subject,RE,Options) -> try {NewOpt,Convert,Unicode,Limit,Strip,Group} = process_split_params(Options,iodata,false,-1,false,false), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, + FlatSubject = to_binary(Subject, Unicode), case compile_split(RE,NewOpt) of {error,_Err} -> throw(badre); @@ -217,19 +206,9 @@ replace(Subject,RE,Replacement,Options) -> try {NewOpt,Convert,Unicode} = process_repl_params(Options,iodata,false), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, - case do_replace(FlatSubject,Subject,RE,Replacement,NewOpt) of + FlatSubject = to_binary(Subject, Unicode), + FlatReplacement = to_binary(Replacement, Unicode), + case do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt) of {error,_Err} -> throw(badre); IoList -> @@ -237,7 +216,12 @@ replace(Subject,RE,Replacement,Options) -> iodata -> IoList; binary -> - iolist_to_binary(IoList); + case Unicode of + false -> + iolist_to_binary(IoList); + true -> + unicode:characters_to_binary(IoList,unicode) + end; list -> case Unicode of false -> @@ -324,8 +308,7 @@ process_split_params([H|T],C,U,L,S,G) -> {[H|NT],NC,NU,NL,NS,NG}. apply_mlist(Subject,Replacement,Mlist) -> - do_mlist(Subject,Subject,0,precomp_repl(iolist_to_binary(Replacement)), - Mlist). + do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist). precomp_repl(<<>>) -> @@ -545,7 +528,7 @@ process_uparams([],Type) -> ucompile(RE,Options) -> try - re:compile(unicode:characters_to_binary(RE,unicode)) + re:compile(unicode:characters_to_binary(RE,unicode),Options) catch error:AnyError -> {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = @@ -618,18 +601,7 @@ grun(Subject,RE,{Options,NeedClean,OrigRE}) -> grun2(Subject,RE,{Options,NeedClean}) -> Unicode = check_for_unicode(RE,Options), - FlatSubject = - case is_binary(Subject) of - true -> - Subject; - false -> - case Unicode of - true -> - unicode:characters_to_binary(Subject,unicode); - false -> - iolist_to_binary(Subject) - end - end, + FlatSubject = to_binary(Subject, Unicode), do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}). do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) -> @@ -749,3 +721,10 @@ runopt(global) -> true; runopt(_) -> false. + +to_binary(Bin, _IsUnicode) when is_binary(Bin) -> + Bin; +to_binary(Data, true) -> + unicode:characters_to_binary(Data,unicode); +to_binary(Data, false) -> + iolist_to_binary(Data). |