diff options
Diffstat (limited to 'lib/stdlib/src/re.erl')
| -rw-r--r-- | lib/stdlib/src/re.erl | 177 | 
1 files changed, 126 insertions, 51 deletions
diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index c5109ec455..7f3cd8f592 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -1,7 +1,7 @@  %%  %% %CopyrightBegin%  %% -%% Copyright Ericsson AB 2008-2012. All Rights Reserved. +%% Copyright Ericsson AB 2008-2014. All Rights Reserved.  %%  %% The contents of this file are subject to the Erlang Public License,  %% Version 1.1, (the "License"); you may not use this file except in @@ -19,20 +19,20 @@  -module(re).  -export([grun/3,urun/3,ucompile/2,replace/3,replace/4,split/2,split/3]). -%-opaque mp() :: {re_pattern, _, _, _}. --type mp() :: {re_pattern, _, _, _}. +-type mp() :: {re_pattern, _, _, _, _}.  -type nl_spec() :: cr | crlf | lf | anycrlf | any.  -type compile_option() :: unicode | anchored | caseless | dollar_endonly                          | dotall | extended | firstline | multiline                          | no_auto_capture | dupnames | ungreedy -                        | {newline, nl_spec()}| bsr_anycrlf -                        | bsr_unicode. +                        | {newline, nl_spec()} +                        | bsr_anycrlf | bsr_unicode +                        | no_start_optimize | ucp | never_utf.  %%% BIFs --export([compile/1, compile/2, run/2, run/3]). +-export([compile/1, compile/2, run/2, run/3, inspect/2]).  -spec compile(Regexp) -> {ok, MP} | {error, ErrSpec} when        Regexp :: iodata(), @@ -63,17 +63,21 @@ run(_, _) ->  -spec run(Subject, RE, Options) -> {match, Captured} |                                     match | -                                   nomatch when +                                   nomatch | +				   {error, ErrType} when        Subject :: iodata() | unicode:charlist(),        RE :: mp() | iodata() | unicode:charlist(),        Options :: [Option], -      Option :: anchored | global | notbol | noteol | notempty +      Option :: anchored | global | notbol | noteol | notempty  +	      | notempty_atstart | report_errors                | {offset, non_neg_integer()} | +		{match_limit, non_neg_integer()} | +		{match_limit_recursion, non_neg_integer()} |                  {newline, NLSpec :: nl_spec()} |                  bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |                  {capture, ValueSpec, Type} | CompileOpt,        Type :: index | list | binary, -      ValueSpec :: all | all_but_first | first | none | ValueList, +      ValueSpec :: all | all_but_first | all_names | first | none | ValueList,        ValueList :: [ValueID],        ValueID :: integer() | string() | atom(),        CompileOpt :: compile_option(), @@ -83,11 +87,21 @@ run(_, _) ->                     | binary(),        ListConversionData :: string()                            | {error, string(), binary()} -                          | {incomplete, string(), binary()}. +                          | {incomplete, string(), binary()}, +      ErrType :: match_limit | match_limit_recursion | {compile,  CompileErr},  +      CompileErr :: {ErrString :: string(), Position :: non_neg_integer()}.  run(_, _, _) ->      erlang:nif_error(undef). +-spec inspect(MP,Item) -> {namelist, [ binary() ]} when +      MP :: mp(), +      Item :: namelist. + +inspect(_,_) -> +    erlang:nif_error(undef). +     +  %%% End of BIFs  -spec split(Subject, RE) -> SplitList when @@ -102,8 +116,10 @@ split(Subject,RE) ->        Subject :: iodata() | unicode:charlist(),        RE :: mp() | iodata() | unicode:charlist(),        Options :: [ Option ], -      Option :: anchored | notbol | noteol | notempty +      Option :: anchored | notbol | noteol | notempty | notempty_atstart                | {offset, non_neg_integer()} | {newline, nl_spec()} +              | {match_limit, non_neg_integer()}  +              | {match_limit_recursion, non_neg_integer()}                | bsr_anycrlf | bsr_unicode | {return, ReturnType}                | {parts, NumParts} | group | trim | CompileOpt,        NumParts :: non_neg_integer() | infinity, @@ -266,7 +282,7 @@ extend_subpatterns([],N) ->  extend_subpatterns([H|T],N) ->      [H | extend_subpatterns(T,N-1)]. -compile_split({re_pattern,N,_,_} = Comp, Options) -> +compile_split({re_pattern,N,_,_,_} = Comp, Options) ->      {Comp,N,Options};  compile_split(Pat,Options0) when not is_tuple(Pat) ->      Options = lists:filter(fun(O) -> @@ -275,7 +291,7 @@ compile_split(Pat,Options0) when not is_tuple(Pat) ->      case re:compile(Pat,Options) of  	{error,Err} ->  	    {error,Err}; -	{ok, {re_pattern,N,_,_} = Comp} -> +	{ok, {re_pattern,N,_,_,_} = Comp} ->  	    NewOpt = lists:filter(fun(OO) -> (not copt(OO)) end, Options0),  	    {Comp,N,NewOpt}      end; @@ -295,8 +311,11 @@ replace(Subject,RE,Replacement) ->        RE :: mp() | iodata() | unicode:charlist(),        Replacement :: iodata() | unicode:charlist(),        Options :: [Option], -      Option :: anchored | global | notbol | noteol | notempty +      Option :: anchored | global | notbol | noteol | notempty  +	      | notempty_atstart                | {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf +              | {match_limit, non_neg_integer()}  +              | {match_limit_recursion, non_neg_integer()}                | bsr_unicode | {return, ReturnType} | CompileOpt,        ReturnType :: iodata | list | binary,        CompileOpt :: compile_option(), @@ -352,6 +371,8 @@ process_repl_params([],Convert,Unicode) ->  process_repl_params([unicode|T],C,_U) ->      {NT,NC,NU} = process_repl_params(T,C,true),       {[unicode|NT],NC,NU}; +process_repl_params([report_errors|_],_,_) -> +    throw(badopt);  process_repl_params([{capture,_,_}|_],_,_) ->      throw(badopt);  process_repl_params([{capture,_}|_],_,_) -> @@ -387,6 +408,8 @@ process_split_params([group|T],C,U,L,S,_G) ->      process_split_params(T,C,U,L,S,true);   process_split_params([global|_],_,_,_,_,_) ->      throw(badopt); +process_split_params([report_errors|_],_,_,_,_,_) -> +    throw(badopt);  process_split_params([{capture,_,_}|_],_,_,_,_,_) ->      throw(badopt);  process_split_params([{capture,_}|_],_,_,_,_,_) -> @@ -487,17 +510,31 @@ do_replace(Subject,Repl,SubExprs0) ->        end || Part <- Repl ]. -check_for_unicode({re_pattern,_,1,_},_) -> +check_for_unicode({re_pattern,_,1,_,_},_) ->      true; -check_for_unicode({re_pattern,_,0,_},_) -> +check_for_unicode({re_pattern,_,0,_,_},_) ->      false;  check_for_unicode(_,L) ->      lists:member(unicode,L). + +check_for_crlf({re_pattern,_,_,1,_},_) -> +    true; +check_for_crlf({re_pattern,_,_,0,_},_) -> +    false; +check_for_crlf(_,L) -> +    case lists:keysearch(newline,1,L) of +	{value,{newline,any}} -> true; +	{value,{newline,crlf}} -> true; +	{value,{newline,anycrlf}} -> true; +	_ -> false +    end.  % SelectReturn = false | all | stirpfirst | none   % ConvertReturn = index | list | binary  % {capture, all} -> all (untouchded) -% {capture, first} -> kept in argumentt list and Select all +% {capture, all_names} -> if names are present: treated as a name {capture, [...]}  +%                                      else:    same as {capture, []} +% {capture, first} -> kept in argument list and Select all  % {capture, all_but_first} -> removed from argument list and selects stripfirst  % {capture, none} ->  removed from argument list and selects none  % {capture, []} -> removed from argument list and selects none @@ -506,23 +543,30 @@ check_for_unicode(_,L) ->  % Call as process_parameters([],0,false,index,NeedClean) -process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_) -> +process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_,_) ->      {[], InitialOffset, SelectReturn, ConvertReturn}; -process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC) -> -    process_parameters(T,N,Select0,Return0,CC); -process_parameters([global | T],Init0,Select0,Return0,CC) -> -    process_parameters(T,Init0,Select0,Return0,CC); -process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC) -> -    process_parameters([{capture,Values}|T],Init0,Select0,Type,CC); -process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) -> +process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC,RE) -> +    process_parameters(T,N,Select0,Return0,CC,RE); +process_parameters([global | T],Init0,Select0,Return0,CC,RE) -> +    process_parameters(T,Init0,Select0,Return0,CC,RE); +process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC,RE) -> +    process_parameters([{capture,Values}|T],Init0,Select0,Type,CC,RE); +process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC,RE) ->      % First process the rest to see if capture was already present      {NewTail, Init1, Select1, Return1} =  -	process_parameters(T,Init0,Select0,Return0,CC), +	process_parameters(T,Init0,Select0,Return0,CC,RE),      case Select1 of  	false ->  	    case Values of  		all ->  		    {[{capture,all} | NewTail], Init1, all, Return0};  +		all_names -> +		    case re:inspect(RE,namelist) of +			{namelist, []} -> +			    {[{capture,first} | NewTail], Init1, none, Return0}; +			{namelist, List} -> +			    {[{capture,[0|List]} | NewTail], Init1, stripfirst, Return0} +		    end;   		first ->  		    {[{capture,first} | NewTail], Init1, all, Return0};  		all_but_first -> @@ -541,20 +585,20 @@ process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC) ->  	    % Found overriding further down list, ignore this one  	    {NewTail, Init1, Select1, Return1}      end; -process_parameters([H|T],Init0,Select0,Return0,true) -> +process_parameters([H|T],Init0,Select0,Return0,true,RE) ->      case copt(H) of  	true -> -	    process_parameters(T,Init0,Select0,Return0,true); +	    process_parameters(T,Init0,Select0,Return0,true,RE);  	false ->  	    {NewT,Init,Select,Return} = -		process_parameters(T,Init0,Select0,Return0,true),	 +		process_parameters(T,Init0,Select0,Return0,true,RE),	  	    {[H|NewT],Init,Select,Return}      end; -process_parameters([H|T],Init0,Select0,Return0,false) -> +process_parameters([H|T],Init0,Select0,Return0,false,RE) ->      {NewT,Init,Select,Return} = -		process_parameters(T,Init0,Select0,Return0,false), +		process_parameters(T,Init0,Select0,Return0,false,RE),      {[H|NewT],Init,Select,Return}; -process_parameters(_,_,_,_,_) -> +process_parameters(_,_,_,_,_,_) ->      throw(badlist).  postprocess({match,[]},_,_,_,_) -> @@ -662,7 +706,7 @@ urun2(Subject0,RE0,Options0) ->      RE = case RE0 of  	     BinRE when is_binary(BinRE) ->  		 BinRE; -	     {re_pattern,_,_,_} = ReCompiled -> +	     {re_pattern,_,_,_,_} = ReCompiled ->  		 ReCompiled;  	     ListRE ->  		 unicode:characters_to_binary(ListRE,unicode) @@ -703,38 +747,46 @@ grun(Subject,RE,{Options,NeedClean,OrigRE}) ->  grun2(Subject,RE,{Options,NeedClean}) ->      Unicode = check_for_unicode(RE,Options), +    CRLF = check_for_crlf(RE,Options),      FlatSubject = to_binary(Subject, Unicode), -    do_grun(FlatSubject,Subject,Unicode,RE,{Options,NeedClean}). +    do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options,NeedClean}). -do_grun(FlatSubject,Subject,Unicode,RE,{Options0,NeedClean}) -> +do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->      {StrippedOptions, InitialOffset,       SelectReturn, ConvertReturn} =   	case (catch  -		  process_parameters(Options0, 0, false, index, NeedClean)) of +		  process_parameters(Options0, 0, false, index, NeedClean,RE)) of  	    badlist ->  		erlang:error(badarg,[Subject,RE,Options0]);  	    CorrectReturn ->  		CorrectReturn  	end, -    postprocess(loopexec(FlatSubject,RE,InitialOffset, -			 byte_size(FlatSubject), -			 Unicode,StrippedOptions), -		SelectReturn,ConvertReturn,FlatSubject,Unicode). +    try +	postprocess(loopexec(FlatSubject,RE,InitialOffset, +			     byte_size(FlatSubject), +			     Unicode,CRLF,StrippedOptions), +		    SelectReturn,ConvertReturn,FlatSubject,Unicode) +    catch +	throw:ErrTuple -> +	    ErrTuple +    end. -loopexec(_,_,X,Y,_,_) when X > Y -> +loopexec(_,_,X,Y,_,_,_) when X > Y ->      {match,[]}; -loopexec(Subject,RE,X,Y,Unicode,Options) -> +loopexec(Subject,RE,X,Y,Unicode,CRLF,Options) ->      case re:run(Subject,RE,[{offset,X}]++Options) of +	{error, Err} -> +	    throw({error,Err});  	nomatch ->  	    {match,[]};  	{match,[{A,B}|More]} ->  	    {match,Rest} =   		case B>0 of  		    true -> -			loopexec(Subject,RE,A+B,Y,Unicode,Options); +			loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options);  		    false ->  			{match,M} =  -			    case re:run(Subject,RE,[{offset,X},notempty, +			    case re:run(Subject,RE,[{offset,X},notempty_atstart,  						anchored]++Options) of  				nomatch ->  				    {match,[]}; @@ -745,10 +797,10 @@ loopexec(Subject,RE,X,Y,Unicode,Options) ->  				   [{_,NStep}|_] when NStep > 0 ->  				       A+NStep;  				   _ -> -				       forward(Subject,A,1,Unicode) +				       forward(Subject,A,1,Unicode,CRLF)  			       end,  			{match,MM} = loopexec(Subject,RE,NewA,Y, -					      Unicode,Options), +					      Unicode,CRLF,Options),  			case M of   			    [] ->  				{match,MM}; @@ -759,11 +811,22 @@ loopexec(Subject,RE,X,Y,Unicode,Options) ->  	    {match,[[{A,B}|More] | Rest]}      end. -forward(_Chal,A,0,_) -> +forward(_Chal,A,0,_,_) ->      A; -forward(_Chal,A,N,false) -> -    A+N; -forward(Chal,A,N,true) -> +forward(Chal,A,N,U,true) -> +    <<_:A/binary,Tl/binary>> = Chal, +    case Tl of +	<<$\r,$\n,_/binary>> -> +	    forward(Chal,A+2,N-1,U,true); +	_ ->  +	    forward2(Chal,A,N,U,true) +    end; +forward(Chal,A,N,U,false) -> +    forward2(Chal,A,N,U,false). + +forward2(Chal,A,N,false,CRLF) -> +    forward(Chal,A+1,N-1,false,CRLF); +forward2(Chal,A,N,true,CRLF) ->      <<_:A/binary,Tl/binary>> = Chal,      Forw = case Tl of  	       <<1:1,1:1,0:1,_:5,_/binary>>  -> @@ -775,10 +838,16 @@ forward(Chal,A,N,true) ->  	       _ ->  		   1  	   end, -    forward(Chal,A+Forw,N-1,true). +    forward(Chal,A+Forw,N-1,true,CRLF).  copt(caseless) ->      true; +copt(no_start_optimize) -> +    true; +copt(never_utf) -> +    true; +copt(ucp) -> +    true;  copt(dollar_endonly) ->      true;  copt(dotall) -> @@ -809,6 +878,8 @@ copt(_) ->  runopt(notempty) ->      true; +runopt(notempty_atstart) -> +    true;  runopt(notbol) ->      true;  runopt(noteol) -> @@ -821,6 +892,10 @@ runopt({capture,_}) ->      true;  runopt(global) ->      true; +runopt({match_limit,_}) -> +    true; +runopt({match_limit_recursion,_}) -> +    true;  runopt(_) ->      false.  | 
