aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--erts/emulator/beam/erl_bif_re.c30
-rw-r--r--lib/stdlib/src/re.erl64
2 files changed, 64 insertions, 30 deletions
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 4289c79ba2..12fc834685 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -379,6 +379,8 @@ build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, con
Eterm ret;
size_t pattern_size;
int capture_count;
+ int use_crlf;
+ unsigned long options;
if (!result) {
/* Return {error_tag, {Code, String, Offset}} */
int elen = sys_strlen(errstr);
@@ -393,14 +395,20 @@ build_compile_result(Process *p, Eterm error_tag, pcre *result, int errcode, con
} else {
erts_pcre_fullinfo(result, NULL, PCRE_INFO_SIZE, &pattern_size);
erts_pcre_fullinfo(result, NULL, PCRE_INFO_CAPTURECOUNT, &capture_count);
+ erts_pcre_fullinfo(result, NULL, PCRE_INFO_OPTIONS, &options);
+ options &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF | PCRE_NEWLINE_CRLF |
+ PCRE_NEWLINE_ANY | PCRE_NEWLINE_ANYCRLF;
+ use_crlf = (options == PCRE_NEWLINE_ANY ||
+ options == PCRE_NEWLINE_CRLF ||
+ options == PCRE_NEWLINE_ANYCRLF);
/* XXX: Optimize - keep in offheap binary to allow this to
be kept across traps w/o need of copying */
ret = new_binary(p, (byte *) result, pattern_size);
erts_pcre_free(result);
- hp = HAlloc(p, (with_ok) ? (3+5) : 5);
- ret = TUPLE4(hp,am_re_pattern, make_small(capture_count), make_small(unicode),ret);
+ hp = HAlloc(p, (with_ok) ? (3+6) : 6);
+ ret = TUPLE5(hp,am_re_pattern, make_small(capture_count), make_small(unicode),make_small(use_crlf),ret);
if (with_ok) {
- hp += 5;
+ hp += 6;
ret = TUPLE2(hp,am_ok,ret);
}
}
@@ -875,7 +883,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
is_list_cap = ((pflags & PARSE_FLAG_CAPTURE_OPT) &&
(capture[CAPSPEC_TYPE] == am_list));
- if (is_not_tuple(arg2) || (arityval(*tuple_val(arg2)) != 4)) {
+ if (is_not_tuple(arg2) || (arityval(*tuple_val(arg2)) != 5)) {
if (is_binary(arg2) || is_list(arg2) || is_nil(arg2)) {
/* Compile from textual RE */
ErlDrvSizeT slen;
@@ -947,7 +955,8 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
tp = tuple_val(arg2);
if (tp[1] != am_re_pattern || is_not_small(tp[2]) ||
- is_not_small(tp[3]) || is_not_binary(tp[4])) {
+ is_not_small(tp[3]) || is_not_small(tp[4]) ||
+ is_not_binary(tp[5])) {
BIF_ERROR(p,BADARG);
}
@@ -967,9 +976,9 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
}
ovsize = 3*(unsigned_val(tp[2])+1);
- code_size = binary_size(tp[4]);
+ code_size = binary_size(tp[5]);
if ((code_tmp = (const pcre *)
- erts_get_aligned_binary_bytes(tp[4], &temp_alloc)) == NULL) {
+ erts_get_aligned_binary_bytes(tp[5], &temp_alloc)) == NULL) {
erts_free_aligned_binary_bytes(temp_alloc);
BIF_ERROR(p, BADARG);
}
@@ -1055,9 +1064,10 @@ handle_iolist:
#ifdef DEBUG
loop_count = 0xFFFFFFFF;
#endif
-
- rc = erts_pcre_exec(restart.code, &(restart.extra), restart.subject, slength, startoffset,
- options, restart.ovector, ovsize);
+
+ rc = erts_pcre_exec(restart.code, &(restart.extra), restart.subject,
+ slength, startoffset,
+ options, restart.ovector, ovsize);
ASSERT(loop_count != 0xFFFFFFFF);
BUMP_REDS(p, loop_count / LOOP_FACTOR);
if (rc == PCRE_ERROR_LOOP_LIMIT) {
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl
index c5109ec455..d8d529e6a4 100644
--- a/lib/stdlib/src/re.erl
+++ b/lib/stdlib/src/re.erl
@@ -19,8 +19,8 @@
-module(re).
-export([grun/3,urun/3,ucompile/2,replace/3,replace/4,split/2,split/3]).
-%-opaque mp() :: {re_pattern, _, _, _}.
--type mp() :: {re_pattern, _, _, _}.
+%-opaque mp() :: {re_pattern, _, _, _, _}.
+-type mp() :: {re_pattern, _, _, _, _}.
-type nl_spec() :: cr | crlf | lf | anycrlf | any.
@@ -266,7 +266,7 @@ extend_subpatterns([],N) ->
extend_subpatterns([H|T],N) ->
[H | extend_subpatterns(T,N-1)].
-compile_split({re_pattern,N,_,_} = Comp, Options) ->
+compile_split({re_pattern,N,_,_,_} = Comp, Options) ->
{Comp,N,Options};
compile_split(Pat,Options0) when not is_tuple(Pat) ->
Options = lists:filter(fun(O) ->
@@ -275,7 +275,7 @@ compile_split(Pat,Options0) when not is_tuple(Pat) ->
case re:compile(Pat,Options) of
{error,Err} ->
{error,Err};
- {ok, {re_pattern,N,_,_} = Comp} ->
+ {ok, {re_pattern,N,_,_,_} = Comp} ->
NewOpt = lists:filter(fun(OO) -> (not copt(OO)) end, Options0),
{Comp,N,NewOpt}
end;
@@ -487,12 +487,24 @@ do_replace(Subject,Repl,SubExprs0) ->
end || Part <- Repl ].
-check_for_unicode({re_pattern,_,1,_},_) ->
+check_for_unicode({re_pattern,_,1,_,_},_) ->
true;
-check_for_unicode({re_pattern,_,0,_},_) ->
+check_for_unicode({re_pattern,_,0,_,_},_) ->
false;
check_for_unicode(_,L) ->
lists:member(unicode,L).
+
+check_for_crlf({re_pattern,_,_,1,_},_) ->
+ true;
+check_for_crlf({re_pattern,_,_,0,_},_) ->
+ false;
+check_for_crlf(_,L) ->
+ case lists:keysearch(newline,1,L) of
+ {value,{newline,any}} -> true;
+ {value,{newline,crlf}} -> true;
+ {value,{newline,anycrlf}} -> true;
+ _ -> false
+ end.
% SelectReturn = false | all | stirpfirst | none
% ConvertReturn = index | list | binary
@@ -662,7 +674,7 @@ urun2(Subject0,RE0,Options0) ->
RE = case RE0 of
BinRE when is_binary(BinRE) ->
BinRE;
- {re_pattern,_,_,_} = ReCompiled ->
+ {re_pattern,_,_,_,_} = ReCompiled ->
ReCompiled;
ListRE ->
unicode:characters_to_binary(ListRE,unicode)
@@ -703,10 +715,11 @@ grun(Subject,RE,{Options,NeedClean,OrigRE}) ->
grun2(Subject,RE,{Options,NeedClean}) ->
Unicode = check_for_unicode(RE,Options),
+ CRLF = check_for_crlf(RE,Options),
FlatSubject = to_binary(Subject, Unicode),
- do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}).
+ do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options,NeedClean}).
-do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) ->
+do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
{StrippedOptions, InitialOffset,
SelectReturn, ConvertReturn} =
case (catch
@@ -718,12 +731,12 @@ do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) ->
end,
postprocess(loopexec(FlatSubject,RE,InitialOffset,
byte_size(FlatSubject),
- Unicode,StrippedOptions),
+ Unicode,CRLF,StrippedOptions),
SelectReturn,ConvertReturn,FlatSubject,Unicode).
-loopexec(_,_,X,Y,_,_) when X > Y ->
+loopexec(_,_,X,Y,_,_,_) when X > Y ->
{match,[]};
-loopexec(Subject,RE,X,Y,Unicode,Options) ->
+loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->
case re:run(Subject,RE,[{offset,X}]++Options) of
nomatch ->
{match,[]};
@@ -731,7 +744,7 @@ loopexec(Subject,RE,X,Y,Unicode,Options) ->
{match,Rest} =
case B>0 of
true ->
- loopexec(Subject,RE,A+B,Y,Unicode,Options);
+ loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options);
false ->
{match,M} =
case re:run(Subject,RE,[{offset,X},notempty,
@@ -745,10 +758,10 @@ loopexec(Subject,RE,X,Y,Unicode,Options) ->
[{_,NStep}|_] when NStep > 0 ->
A+NStep;
_ ->
- forward(Subject,A,1,Unicode)
+ forward(Subject,A,1,Unicode,CRLF)
end,
{match,MM} = loopexec(Subject,RE,NewA,Y,
- Unicode,Options),
+ Unicode,CRLF,Options),
case M of
[] ->
{match,MM};
@@ -759,11 +772,22 @@ loopexec(Subject,RE,X,Y,Unicode,Options) ->
{match,[[{A,B}|More] | Rest]}
end.
-forward(_Chal,A,0,_) ->
+forward(_Chal,A,0,_,_) ->
A;
-forward(_Chal,A,N,false) ->
- A+N;
-forward(Chal,A,N,true) ->
+forward(Chal,A,N,U,true) ->
+ <<_:A/binary,Tl/binary>> = Chal,
+ case Tl of
+ <<$\r,$\n,_/binary>> ->
+ forward(Chal,A+2,N-1,U,true);
+ _ ->
+ forward2(Chal,A,N,U,true)
+ end;
+forward(Chal,A,N,U,false) ->
+ forward2(Chal,A,N,U,false).
+
+forward2(Chal,A,N,false,CRLF) ->
+ forward(Chal,A+1,N-1,false,CRLF);
+forward2(Chal,A,N,true,CRLF) ->
<<_:A/binary,Tl/binary>> = Chal,
Forw = case Tl of
<<1:1,1:1,0:1,_:5,_/binary>> ->
@@ -775,7 +799,7 @@ forward(Chal,A,N,true) ->
_ ->
1
end,
- forward(Chal,A+Forw,N-1,true).
+ forward(Chal,A+Forw,N-1,true,CRLF).
copt(caseless) ->
true;