aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2010-01-27 12:33:35 +0000
committerErlang/OTP <[email protected]>2010-01-27 15:12:34 +0100
commit8beea46d65375d09ba9af07676894dc720eda6df (patch)
treef3586eaba444e699894c92fec75206a243cc53cc /lib/stdlib/src
parent50cc08bfabb0510ebf42170650217dfeec229ce7 (diff)
parent6b83b643a20bf502eca696f63445049e0838731a (diff)
downloadotp-8beea46d65375d09ba9af07676894dc720eda6df.tar.gz
otp-8beea46d65375d09ba9af07676894dc720eda6df.tar.bz2
otp-8beea46d65375d09ba9af07676894dc720eda6df.zip
Merge branch 'rb/stdlib_re_unicode_fixes' into ccase/r13b04_dev
* rb/stdlib_re_unicode_fixes: Fix lost unicode option in re:compile() Refactor out repeated block in re module Fix re:replace/4 to handle unicode charlist Replacement argument Fix re:replace/4 to handle unicode charlist RE argument Fix re:replace/4 to handle binary unicode output when nothing replaced OTP-8394 A number of bugs concerning re and unicode are corrected: - re:compile no longer loses unicode option, which also fixes bug in re:split. - re:replace now handles unicode charlist replacement argument - re:replace now handles unicode RE charlist argument correctly - re:replace now handles binary unicode output correctly when nothing is replaced. Most code, testcases and error isolation done by Rory Byrne.
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/re.erl71
1 files changed, 25 insertions, 46 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index 5417ac02e5..296a6b3d23 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -1,19 +1,19 @@
%%
%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
-%%
+%%
+%% Copyright Ericsson AB 2008-2010. All Rights Reserved.
+%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
-%%
+%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
-%%
+%%
%% %CopyrightEnd%
%%
-module(re).
@@ -32,18 +32,7 @@ split(Subject,RE,Options) ->
try
{NewOpt,Convert,Unicode,Limit,Strip,Group} =
process_split_params(Options,iodata,false,-1,false,false),
- FlatSubject =
- case is_binary(Subject) of
- true ->
- Subject;
- false ->
- case Unicode of
- true ->
- unicode:characters_to_binary(Subject,unicode);
- false ->
- iolist_to_binary(Subject)
- end
- end,
+ FlatSubject = to_binary(Subject, Unicode),
case compile_split(RE,NewOpt) of
{error,_Err} ->
throw(badre);
@@ -217,19 +206,9 @@ replace(Subject,RE,Replacement,Options) ->
try
{NewOpt,Convert,Unicode} =
process_repl_params(Options,iodata,false),
- FlatSubject =
- case is_binary(Subject) of
- true ->
- Subject;
- false ->
- case Unicode of
- true ->
- unicode:characters_to_binary(Subject,unicode);
- false ->
- iolist_to_binary(Subject)
- end
- end,
- case do_replace(FlatSubject,Subject,RE,Replacement,NewOpt) of
+ FlatSubject = to_binary(Subject, Unicode),
+ FlatReplacement = to_binary(Replacement, Unicode),
+ case do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt) of
{error,_Err} ->
throw(badre);
IoList ->
@@ -237,7 +216,12 @@ replace(Subject,RE,Replacement,Options) ->
iodata ->
IoList;
binary ->
- iolist_to_binary(IoList);
+ case Unicode of
+ false ->
+ iolist_to_binary(IoList);
+ true ->
+ unicode:characters_to_binary(IoList,unicode)
+ end;
list ->
case Unicode of
false ->
@@ -324,8 +308,7 @@ process_split_params([H|T],C,U,L,S,G) ->
{[H|NT],NC,NU,NL,NS,NG}.
apply_mlist(Subject,Replacement,Mlist) ->
- do_mlist(Subject,Subject,0,precomp_repl(iolist_to_binary(Replacement)),
- Mlist).
+ do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist).
precomp_repl(<<>>) ->
@@ -545,7 +528,7 @@ process_uparams([],Type) ->
ucompile(RE,Options) ->
try
- re:compile(unicode:characters_to_binary(RE,unicode))
+ re:compile(unicode:characters_to_binary(RE,unicode),Options)
catch
error:AnyError ->
{'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} =
@@ -618,18 +601,7 @@ grun(Subject,RE,{Options,NeedClean,OrigRE}) ->
grun2(Subject,RE,{Options,NeedClean}) ->
Unicode = check_for_unicode(RE,Options),
- FlatSubject =
- case is_binary(Subject) of
- true ->
- Subject;
- false ->
- case Unicode of
- true ->
- unicode:characters_to_binary(Subject,unicode);
- false ->
- iolist_to_binary(Subject)
- end
- end,
+ FlatSubject = to_binary(Subject, Unicode),
do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}).
do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) ->
@@ -749,3 +721,10 @@ runopt(global) ->
true;
runopt(_) ->
false.
+
+to_binary(Bin, _IsUnicode) when is_binary(Bin) ->
+ Bin;
+to_binary(Data, true) ->
+ unicode:characters_to_binary(Data,unicode);
+to_binary(Data, false) ->
+ iolist_to_binary(Data).