%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, software %% distributed under the License is distributed on an "AS IS" BASIS, %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. %% %% %CopyrightEnd% %% -module(run_pcre_tests). -export([test/1,gen_split_test/1,gen_repl_test/1]). test(RootDir) -> put(verbose,false), erts_debug:set_internal_state(available_internal_state,true), io:format("oldlimit: ~p~n",[ erts_debug:set_internal_state(re_loop_limit,10)]), Testfiles0 = ["testoutput1", "testoutput2", "testoutput3", "testoutput4", "testoutput5", "testoutput6","mod_testoutput8","testoutput10"], Testfiles = [ filename:join([RootDir,FN]) || FN <- Testfiles0 ], Res = [ begin io:format("~s~n",[X]), t(X) end || X <- Testfiles ], io:format("limit was: ~p~n",[ erts_debug:set_internal_state(re_loop_limit,default)]), Res2 = Res ++ [ begin io:format("~s~n",[X]), t(X) end || X <- Testfiles ], erts_debug:set_internal_state(available_internal_state,false), put(verbose,true), Res2. t(OneFile) -> t(OneFile,infinite). t(OneFile,Num) -> {ok,Bin} = file:read_file(OneFile), Lines = splitfile(0,Bin,1), Structured = stru(Lines), put(error_limit,Num), put(skipped,0), Res = [test(Structured,true,index,false), test(Structured,false,index,false), test(Structured,true,index,true), test(Structured,false,index,true), test(Structured,true,binary,false), test(Structured,false,binary,false), test(Structured,true,list,false), test(Structured,false,list,false)], {lists:sum(Res),length(Structured)*6,get(skipped)}. pick_exec_options([{exec_option,Opt}|T]) -> {O,E} = pick_exec_options(T), {O,[Opt|E]}; pick_exec_options([unicode|T]) -> {O,E} = pick_exec_options(T), {[unicode|O],[unicode|E]}; pick_exec_options([Opt|T]) -> {O,E} = pick_exec_options(T), {[Opt|O],E}; pick_exec_options([]) -> {[],[]}. test([],_,_,_) -> 0; test([{RE0,Line,Options0,Tests}|T],PreCompile,XMode,REAsList) -> Unicode = lists:member(unicode,Options0), RE = case REAsList of true -> if Unicode -> unicode:characters_to_list(RE0); true -> binary_to_list(RE0) end; false -> RE0 end, {Options,ExecOptions} = pick_exec_options(Options0), {Cres, Xopt} = case PreCompile of true -> {re:compile(RE,Options),[]}; _ -> {{ok,RE},Options} end, case Cres of {ok,P} -> case (catch testrun(RE,P,Tests,ExecOptions,Xopt,XMode)) of N when is_integer(N) -> N + test(T,PreCompile,XMode,REAsList); limit -> io:format("Error limit reached.~n"), 1; skip -> case get(skipped) of N when is_integer(N) -> put(skipped,N+1); _ -> put(skipped,1) end, test(T,PreCompile,XMode,REAsList) end; {error,Err} -> io:format("Compile error(~w): ~w~n",[Line,Err]), case get(error_limit) of infinite -> 1 + test(T,PreCompile,XMode,REAsList); X -> case X-1 of Y when Y =< 0 -> io:format("Error limit reached.~n"), 1; Y -> put(error_limit,Y), 1 + test(T,PreCompile,XMode,REAsList) end end end. contains_eightbit(<<>>) -> false; contains_eightbit(<>) when X >= 128 -> true; contains_eightbit(<<_,R/binary>>) -> contains_eightbit(R). clean_duplicates([],_) -> []; clean_duplicates([{X,Y}|T],L) -> case lists:keymember(X,1,L) of true -> clean_duplicates(T,L); false -> [{X,Y}|clean_duplicates(T,L)] end; clean_duplicates([bsr_anycrlf|T],L) -> case (lists:member(bsr_anycrlf,L) orelse lists:member(bsr_unicode,L)) of true -> clean_duplicates(T,L); false -> [bsr_anycrlf|clean_duplicates(T,L)] end; clean_duplicates([bsr_unicode|T],L) -> case (lists:member(bsr_anycrlf,L) orelse lists:member(bsr_unicode,L)) of true -> clean_duplicates(T,L); false -> [bsr_unicode|clean_duplicates(T,L)] end; clean_duplicates([X|T],L) -> case lists:member(X,L) of true -> clean_duplicates(T,L); false -> [X|clean_duplicates(T,L)] end. press([]) -> []; press([H|T]) -> H++press(T). testrun(_,_,[],_,_,_) -> 0; testrun(RE,P,[{Chal,Line,ExecOpt,Responses}|T],EO,Xopt0,XMode) -> Xopt = clean_duplicates(Xopt0,ExecOpt), case lists:keymember(newline,1,Xopt) of true -> info("skipping inconsistent newlines " "when compiling and running in one go (~p)~n", [Line]), throw(skip); false -> ok end, Res = case lists:member(g,EO) of true -> case XMode of binary -> case re:run(Chal,P,ExecOpt++Xopt++ [global,{capture,all,binary}]) of nomatch -> nomatch; {match, Reslist} -> {match,press([bfix(R)|| R <- Reslist])} end; list -> case re:run(Chal,P,ExecOpt++Xopt++ [global,{capture,all,list}]) of nomatch -> nomatch; {match, Reslist} -> UFix = lists:member(unicode,EO), {match,press([bfix([if UFix =:= true -> list_to_utf8(L); true -> list_to_binary(L) end || L <- R]) || R <- Reslist])} end; index -> case re:run(Chal,P,ExecOpt++Xopt++[global]) of nomatch -> nomatch; {match, Reslist} -> {match,press([fixup(Chal,R,0) || R <- Reslist])} end end; false -> case EO -- [accept_nonascii] of EO -> case contains_eightbit(Chal) of true -> info("skipping 8bit without LANG (~p)~n", [Line]), throw(skip); false -> ok end, case XMode of binary -> case re:run(Chal,P,ExecOpt++Xopt++ [{capture,all,binary}]) of nomatch -> nomatch; {match, Reslist} -> {match,bfix(Reslist)} end; list -> case re:run(Chal,P,ExecOpt++Xopt++ [{capture,all,list}]) of nomatch -> nomatch; {match, Reslist} -> UFix = lists:member(unicode,EO), {match,bfix([if UFix =:= true -> list_to_utf8(L); true -> list_to_binary(L) end || L <- Reslist])} end; index -> case re:run(Chal,P,ExecOpt++Xopt) of nomatch -> nomatch; {match, Reslist} -> {match,fixup(Chal,Reslist,0)} end end; _LesserOpt -> case XMode of binary -> case re:run(Chal,P,ExecOpt++Xopt++ [{capture,all,binary}]) of nomatch -> nomatch; {match, Reslist} -> {match,bfix(Reslist)} end; list -> case re:run(Chal,P,ExecOpt++Xopt++ [{capture,all,list}]) of nomatch -> nomatch; {match, Reslist} -> UFix = lists:member(unicode,EO), {match,bfix([if UFix =:= true -> list_to_utf8(L); true -> list_to_binary(L) end || L <- Reslist])} end; index -> case re:run(Chal,P,ExecOpt++Xopt) of nomatch -> nomatch; {match, Reslist} -> {match,fixup(Chal,Reslist,0)} end end end end, case compare_sloppy(Res,Responses) of true -> testrun(RE,P,T,EO,Xopt0,XMode); false -> io:format("FAIL(~w): re = ~p, ~nmatched against = ~p(~w), ~nwith options = ~p. ~nexpected = ~p, ~ngot = ~p~n", [Line,RE,Chal,binary_to_list(Chal),{ExecOpt,EO,Xopt},Responses,Res]), case get(error_limit) of infinite -> ok; X -> case X-1 of Y when Y =< 0 -> throw(limit); Y -> put(error_limit,Y) end end, 1 end. compare_sloppy({A,L1},{A,L2}) -> compare_sloppy(L1,L2); compare_sloppy(A,A) -> true; compare_sloppy([{X,Y}|T1],[{X,Y}|T2]) -> compare_sloppy(T1,T2); compare_sloppy([{X,[Y,_]}|T1],[{X,Y}|T2]) -> compare_sloppy(T1,T2); compare_sloppy([{X,[_,Y]}|T1],[{X,Y}|T2]) -> compare_sloppy(T1,T2); compare_sloppy(_,_) -> false. bfix(RL) -> bfix(RL,0). bfix([],_) -> []; bfix([<<>>|T],N) -> [{N,[<<>>,<<"">>]}|bfix(T,N+1)]; % indeterminable bfix([H|T],N) -> [{N,H}|bfix(T,N+1)]. fixup(List,Any,Any2) when is_list(List)-> fixup(unicode:characters_to_binary(List,unicode,unicode),Any,Any2); fixup(_,[],_) -> []; fixup(Bin,[{-1,0}|T],N) -> [{N,<<"">>}|fixup(Bin,T,N+1)]; fixup(Bin,[{X,Y}|T],N) -> <<_:X/binary,Res:Y/binary,_/binary>> = Bin, [{N,Res}|fixup(Bin,T,N+1)]. splitfile(N,Bin,_Line) when N >= size(Bin) -> []; splitfile(N,Bin,Line) -> {Res,NewN} = pickline(N,N,Bin), case emptyline(Res) of true -> [{Line,<<>>}|splitfile(NewN,Bin,Line+1)]; false -> [{Line,Res}|splitfile(NewN,Bin,Line+1)] end. emptyline(<<>>) -> true; emptyline(<<$ ,R/binary>>) -> emptyline(R); emptyline(_) -> false. pickline(Start,Stop,Bin) when Stop >= size(Bin) -> Len = Stop - Start - 1, <<_:Start/binary,Res:Len/binary,_/binary>> = Bin, {Res,Stop}; pickline(Start,Stop,Bin) -> <<_:Stop/binary,Ch,_/binary>> = Bin, case Ch of $\n -> Len = Stop - Start, <<_:Start/binary,Res:Len/binary,_/binary>> = Bin, {Res,Stop+1}; _ -> pickline(Start,Stop+1,Bin) end. skip_until_empty([]) -> []; skip_until_empty([{_,<<>>}|T]) -> T; skip_until_empty([{_,_}|T]) -> skip_until_empty(T). skip_debug([{_,<<$-,_/binary>>}|Con]) -> Con; skip_debug([_|T]) -> skip_debug(T); skip_debug([]) -> []. skip_extra_info([{_,<<$ ,$ ,$ ,_/binary>>}=H|Con]) -> [H|Con]; skip_extra_info([{_,<<>>}|Con]) -> Con; skip_extra_info([_|T]) -> skip_extra_info(T); skip_extra_info([]) -> []. stru([]) -> []; stru([{_,<<>>}|T]) -> stru(T); stru([{_Line,<<"< forbid ", _Rest/binary>>}|T0]) -> %% We do not handle lockout of modifiers from the tests... stru(T0); stru([{Line,<>}|T0]) -> {T,Re} = find_rest_re(Ch,[{Line,Re0}|T0]), {NewRe,<< Ch, Options/binary >>} = end_of_re(Ch,Re), case interpret_options_x(backstrip(frontstrip(Options)),NewRe) of {Olist,<<>>} -> U = lists:member(unicode,Olist), case T of [{_,<<$-,_/binary>>}|Con] -> %%Debug output, we skip those TmpT = skip_debug(Con), {NewT,Matches} = stru2(TmpT,U), [{NewRe,Line,Olist,Matches}|stru(NewT)]; [{_,<<$C,$a,$p,$t,$u,$r,$i,$n,$g,_/binary>>}|_] -> NewT0 = skip_extra_info(T), {NewT,Matches} = stru2(NewT0,U), [{NewRe,Line,Olist,Matches}|stru(NewT)]; [{_,<>}|_] when Bla =/= $ -> NewT = skip_until_empty(T), stru(NewT); _ -> {NewT,Matches} = stru2(T,U), Matches1 = case U of true -> Matches ++ [ {unicode:characters_to_list(E1,unicode),E2,E3,E4} || {E1,E2,E3,E4} <- Matches]; false -> Matches end, [{NewRe,Line,Olist,Matches1}|stru(NewT)] end; {_,Rest} -> NewT = skip_until_empty(T), info("Skipping options ~s for now (~w)~n",[binary_to_list(Rest),Line]), case NewT of [{Li,_}|_] -> info("Skip to line ~p~n",[Li]); _ -> ok end, stru(NewT) end. contains_lang_sens(<<>>) -> false; contains_lang_sens(<<$\\,$W,_/binary>>) -> true; contains_lang_sens(<<$\\,$w,_/binary>>) -> true; contains_lang_sens(<<$\\,$b,_/binary>>) -> true; contains_lang_sens(<<_,R/binary>>) -> contains_lang_sens(R). interpret_options_x(Options,RE) -> {O,R} = interpret_options(Options), case (contains_lang_sens(RE) or lists:member(caseless,O)) of false -> {[{exec_option,accept_nonascii}|O],R}; true -> case lists:member(unicode,O) of true -> {[{exec_option,accept_nonascii}|O],R}; false -> {O,R} end end. tr_option($i) -> [caseless]; tr_option($I) -> []; tr_option($B) -> []; tr_option($Z) -> []; tr_option($x) -> [extended]; tr_option($s) -> [dotall]; tr_option($m) -> [multiline]; tr_option($J) -> [dupnames]; tr_option($N) -> [no_auto_capture]; tr_option($8) -> [unicode]; tr_option($U) -> [ungreedy]; tr_option($g) -> [{exec_option,g}]; tr_option(_) -> false. interpret_options(<<$<,Rest0/binary>>) -> {Option,Rest} = pinch_cr(Rest0), case Option of {not_supported,{newline,_Offender}} -> {[],<<$<,Rest0/binary>>}; _ -> {Olist,NRest} = interpret_options(Rest), {[Option | Olist], NRest} end; interpret_options(<<$L,$f,$r,$_,$F,$R,Rest/binary>>) -> info("Accepting (and ignoring) french locale~n",[]), {Olist,NRest} = interpret_options(Rest), {[{exec_option, accept_nonascii}|Olist],NRest}; interpret_options(<>) -> {Olist,NRest} = interpret_options(Rest), case tr_option(Ch) of false -> {Olist,<>}; Option -> {Option ++ Olist, NRest} end; interpret_options(<<>>) -> {[],<<>>}. find_unsupported([{not_supported,X}|T]) -> [X | find_unsupported(T)]; find_unsupported([_|T]) -> find_unsupported(T); find_unsupported([]) -> []. backslash_end(<<>>) -> false; backslash_end(<<$\\>>) -> true; backslash_end(<<_>>) -> false; backslash_end(<<_,R/binary>>) -> backslash_end(R). stru2([{Line,<<$ ,Rest/binary>>} | T],U) -> %% A challenge case (catch responses(T,U)) of {NewT,Rlist} -> {NewNewT,StrList} = stru2(NewT,U), %% Hack... FS = case backstrip(frontstrip(Rest)) of <<"\\">> -> %% Single backslash is to be considered %% an empty line in challenge <<>>; OFS -> case backslash_end(OFS) of true -> <>; _ -> OFS end end, {ExecOpts,NFS} = escape(FS,U), case find_unsupported(ExecOpts) of [] -> {NewNewT,[{NFS,Line,ExecOpts, case Rlist of nomatch -> nomatch; RR -> {match,RR} end} | StrList]}; UList -> info("WARNING(~w): the exec-option(s) ~p are unsupported, skipping challenge.~n",[Line,UList]), {NewNewT,StrList} end; fail -> NewT = skip_until_empty(T), {NewT,[]} end; stru2(X,_) -> {X,[]}. responses([{_Line,<< X:2/binary,$:,$ ,Resp/binary>>}|T],U) -> {NT,R2} = responses(T,U), NX=list_to_integer(binary_to_list(frontstrip(X))), {NT,[{NX,escape2(Resp,U)} | R2]}; responses([{_Line,<< X:3/binary,$:,$ ,Resp/binary>>}|T],U) -> {NT,R2} = responses(T,U), NX=list_to_integer(binary_to_list(frontstrip(X))), {NT,[{NX,escape2(Resp,U)} | R2]}; responses([{_Line,<<$N,$o,$ ,$m,$a,$t,$c,$h,_/binary>>}|T],_) -> {T,nomatch}; responses([{Line,<<$ ,No,Ch,_/binary>>}|T],U) when No >= $0, No =< $9, Ch >= $A, Ch =< $Z -> info("Skipping strange debug response at line ~p~n",[Line]), responses(T,U); responses([{Line,<<$ ,$ ,Ch,_/binary>>}|T],U) when Ch =:= $G; Ch =:= $C -> info("Skipping stranger debug response at line ~p~n",[Line]), responses(T,U); responses([{Line,<>=X}|_],_) when C =/= $ -> info("Offending response line(~w)! ~p~n",[Line,X]), throw(fail); responses(X,_) -> {X,[]}. end_of_re(_,<<>>) -> {<<>>,<<>>}; end_of_re(C,<> = R) -> {<<>>,R}; end_of_re(C,<<$\\,C,Rest/binary>>) -> {Sub,Rest2} = end_of_re(C,Rest), {<>,Rest2}; end_of_re(C,<>) -> {Sub,Rest2} = end_of_re(C,Rest), {<>,Rest2}. frontstrip(<<>>) -> <<>>; frontstrip(<< $ ,Rest/binary>>) -> frontstrip(Rest); frontstrip(Bin) -> Bin. backstrip(<<>>) -> <<>>; backstrip(<<$ >>) -> <<>>; backstrip(<>) -> case backstrip(Rest) of Rest -> <>; Other -> NRest = backstrip(Other), <> end. find_rest_re(_,[]) -> {<<>>,<<>>}; find_rest_re(Ch,[{_,H}|T]) -> case end_of_re(Ch,H) of {_,<<>>} -> {NewT,Rest} = find_rest_re(Ch,T), {NewT,<>}; {_,_} -> {T,H} end. eopt($A) -> [anchored]; eopt($B) -> [notbol]; eopt(X) -> [{not_supported,X}]. pinch_cr(<<$c,$r,$>,Rest/binary>>) -> {{newline,cr},Rest}; pinch_cr(<<$l,$f,$>,Rest/binary>>) -> {{newline,lf},Rest}; pinch_cr(<<$c,$r,$l,$f,$>,Rest/binary>>) -> {{newline,crlf},Rest}; pinch_cr(<<$C,$R,$>,Rest/binary>>) -> {{newline,cr},Rest}; pinch_cr(<<$L,$F,$>,Rest/binary>>) -> {{newline,lf},Rest}; pinch_cr(<<$C,$R,$L,$F,$>,Rest/binary>>) -> {{newline,crlf},Rest}; pinch_cr(<<$a,$n,$y,$c,$r,$l,$f,$>,Rest/binary>>) -> {{newline,anycrlf},Rest}; pinch_cr(<<$b,$s,$r,$_,$a,$n,$y,$c,$r,$l,$f,$>,Rest/binary>>) -> {bsr_anycrlf,Rest}; pinch_cr(<<$b,$s,$r,$_,$u,$n,$i,$c,$o,$d,$e,$>,Rest/binary>>) -> {bsr_unicode,Rest}; pinch_cr(<<$a,$n,$y,$>,Rest/binary>>) -> {{newline,any},Rest}; pinch_cr(<<$A,$N,$Y,$>,Rest/binary>>) -> {{newline,any},Rest}; pinch_cr(Other) -> case splitby($>,Other,<<>>) of {Unk,Rest} -> {{not_supported,{newline,Unk}},Rest}; no -> {{not_supported,$<},Other} end. splitby(_,<<>>,_) -> no; splitby(Ch,<>,Acc) -> {Acc,Rest}; splitby(Ch,<>,Acc) -> splitby(Ch,Rest,<>). pick_number(N,<>) when Ch >= $0, Ch =< $9 -> pick_number(N*10+(Ch - $0),Rest); pick_number(N,Rest) -> {N,Rest}. pick_offset(Rest) -> {Int,NRest} = pick_number(0,Rest), {{offset,Int},NRest}. escape(<<>>,_) -> {[],<<>>}; escape(<<$\\, Ch, Rest/binary>>,U) when Ch >= $A, Ch =< $Z; Ch =:= $? -> %%Options in the string... NewOpts = eopt(Ch), {MoreOpts,Tail} = escape(Rest,U), {NewOpts ++ MoreOpts,Tail}; escape(<<$\\, $>, Rest/binary>>,U) -> %%Offset Options in the string... {NewOpt,NewRest} = pick_offset(Rest), {MoreOpts,Tail} = escape(NewRest,U), {[NewOpt|MoreOpts],Tail}; escape(<<$\\, $<, Rest/binary>>,U) -> %%CR Options in the string... {NewOpt,NewRest} = pinch_cr(Rest), {MoreOpts,Tail} = escape(NewRest,U), {[NewOpt|MoreOpts],Tail}; escape(<<$\\, Ch, Rest/binary>>,U) -> {C,NR} = case single_esc(Ch) of no -> case multi_esc(<>,U) of {CharBin,NewRest} -> {CharBin,NewRest}; no -> {<<$\\>>,<>} end; CCC -> {<>,Rest} end, {MoreOpts,Tail} = escape(NR,U), {MoreOpts,<>}; escape(<>,U) -> {X,RR} = escape(<>,U), {X,<>}; escape(Any,_) -> {[],Any}. escape2(<<>>,_) -> <<>>; escape2(<<$\\, Ch, Rest/binary>>,U) -> {C,NR} = case multi_esc(<>,U) of {CharBin,NewRest} -> {CharBin,NewRest}; no -> {<<$\\>>,<>} end, Tail = escape2(NR,U), <>; escape2(<>,U) -> RR = escape2(<>,U), <>; escape2(Any,_) -> Any. trx(N) when ((N >= $0) and (N =< $9)) -> N - $0; trx($A) -> 10; trx($B) -> 11; trx($C) -> 12; trx($D) -> 13; trx($E) -> 14; trx($F) -> 15; trx($a) -> 10; trx($b) -> 11; trx($c) -> 12; trx($d) -> 13; trx($e) -> 14; trx($f) -> 15. int_to_utf8(I) when I =< 16#7F -> <>; int_to_utf8(I) when I =< 16#7FF -> B2 = I band 16#3f, B1 = (I bsr 6) band 16#1f, <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; int_to_utf8(I) when I =< 16#FFFF -> B3 = I band 16#3f, B2 = (I bsr 6) band 16#3f, B1 = (I bsr 12) band 16#f, <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; int_to_utf8(I) when I =< 16#10FFFF -> B4 = I band 16#3f, B3 = (I bsr 6) band 16#3f, B2 = (I bsr 12) band 16#3f, B1 = (I bsr 18) band 16#7, <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>; int_to_utf8(_) -> exit(unsupported_utf8). list_to_utf8(L) when is_list(L); is_binary(L) -> iolist_to_binary([int_to_utf8(I) || I <- L]); list_to_utf8({Tag,_,_}) when Tag =:= incomplete ; Tag =:= error -> throw(skip). multi_esc(<>,_) when M >= $0, M =< $7, N >= $0, N =< $7, O >= $0, O =< $7 -> Cha = ((M - $0) bsl 6) bor ((N - $0) bsl 3) bor (O - $0), {<>,Rest}; multi_esc(<>,_) when N >= $0, N =< $7, O >= $0, O =< $7 -> Cha = ((N - $0) bsl 3) bor (O - $0), {<>,Rest}; multi_esc(<>,_) when O >= $0, O =< $7 -> Cha = (O - $0), {<>,Rest}; multi_esc(<<$x,${,N,O,$},Rest/binary>>,Unicode) when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or ((N >= $a) and (N =< $f))) and (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or ((O >= $a) and (O =< $f)))) -> Cha = (trx(N) bsl 4) bor trx(O), case Unicode of false -> {<>,Rest}; _ -> {int_to_utf8(Cha),Rest} end; multi_esc(<<$x,${,N,O,P,$},Rest/binary>>,_) when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or ((N >= $a) and (N =< $f))) and (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or ((O >= $a) and (O =< $f)))and (((P >= $0) and (P =< $9)) or ((P >= $A) and (P =< $F)) or ((P >= $a) and (P =< $f)))) -> Cha = (trx(N) bsl 8) bor (trx(O) bsl 4) bor trx(P), {int_to_utf8(Cha),Rest}; multi_esc(<<$x,${,N,O,P,Q,$},Rest/binary>>,_) when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or ((N >= $a) and (N =< $f))) and (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or ((O >= $a) and (O =< $f))) and (((P >= $0) and (P =< $9)) or ((P >= $A) and (P =< $F)) or ((P >= $a) and (P =< $f))) and (((Q >= $0) and (Q =< $9)) or ((Q >= $A) and (Q =< $F)) or ((Q >= $a) and (Q =< $f)))) -> Cha = (trx(N) bsl 12) bor (trx(O) bsl 8) bor (trx(P) bsl 4) bor trx(Q), {int_to_utf8(Cha),Rest}; multi_esc(<<$x,${,N,O,P,Q,R,$},Rest/binary>>,_) when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or ((N >= $a) and (N =< $f))) and (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or ((O >= $a) and (O =< $f))) and (((P >= $0) and (P =< $9)) or ((P >= $A) and (P =< $F)) or ((P >= $a) and (P =< $f))) and (((Q >= $0) and (Q =< $9)) or ((Q >= $A) and (Q =< $F)) or ((Q >= $a) and (Q =< $f))) and (((R >= $0) and (R =< $9)) or ((R >= $A) and (R =< $F)) or ((R >= $a) and (R =< $f)))) -> Cha = (trx(N) bsl 16) bor (trx(O) bsl 12) bor (trx(P) bsl 8) bor (trx(Q) bsl 4) bor trx(R), {int_to_utf8(Cha),Rest}; multi_esc(<<$x,${,N,O,P,Q,R,S,$},Rest/binary>>,_) when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or ((N >= $a) and (N =< $f))) and (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or ((O >= $a) and (O =< $f))) and (((P >= $0) and (P =< $9)) or ((P >= $A) and (P =< $F)) or ((P >= $a) and (P =< $f))) and (((Q >= $0) and (Q =< $9)) or ((Q >= $A) and (Q =< $F)) or ((Q >= $a) and (Q =< $f))) and (((R >= $0) and (R =< $9)) or ((R >= $A) and (R =< $F)) or ((R >= $a) and (R =< $f))) and (((S >= $0) and (S =< $9)) or ((S >= $A) and (S =< $F)) or ((S >= $a) and (S =< $f)))) -> Cha = (trx(N) bsl 20) bor (trx(O) bsl 16) bor (trx(P) bsl 12) bor (trx(Q) bsl 8) bor (trx(R) bsl 4) bor trx(S), {int_to_utf8(Cha),Rest}; multi_esc(<<$x,N,O,Rest/binary>>,_) when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or ((N >= $a) and (N =< $f))) and (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or ((O >= $a) and (O =< $f)))) -> Cha = (trx(N) bsl 4) bor trx(O), {<>,Rest}; multi_esc(<<$x,N,Rest/binary>>,_) when (((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or ((N >= $a) and (N =< $f))) -> Cha = trx(N), {<>,Rest}; multi_esc(_,_) -> no. single_esc($") -> $"; single_esc($ ) -> $ ; single_esc($') -> $'; single_esc($@) -> $@; single_esc($t) -> $\t; single_esc($n) -> $\n; single_esc($r) -> $\r; single_esc($f) -> $\f; single_esc($e) -> $\e; single_esc($b) -> $\b; single_esc($$) -> $$; single_esc($\\) -> $\\; single_esc($a) -> 7; %%single_esc(Ch) when Ch >= $A, Ch =< $Z -> % eh? %% Ch; single_esc(_) -> no. info(Str,Lst) -> case get(verbose) of true -> io:format(Str,Lst); _ -> ok end. %% Generate split tests from indatafile, %% you will need perl on the machine gen_split_test(OneFile) -> {ok,Bin} = file:read_file(OneFile), Lines = splitfile(0,Bin,1), Structured = stru(Lines), PerlShellScript = OneFile++"_split_test_gen.sh", FunList = dumpsplit(Structured,PerlShellScript), ErlModule = "re_"++filename:basename(OneFile)++"_split_test", ErlFileName = ErlModule++".erl", {ok,F}= file:open(ErlFileName,[write]), io:format(F,"-module(~s).~n",[ErlModule]), io:format(F,"-export([run/0]).~n",[]), io:format(F,"-compile(no_native).~n",[]), io:format(F,"%% This file is generated by running ~w:gen_split_test(~p)~n", [?MODULE,OneFile]), io:format(F,"join([]) -> [];~n",[]), io:format(F,"join([A]) -> [A];~n",[]), io:format(F,"join([H|T]) -> [H,<<\":\">>|join(T)].~n",[]), io:format(F,"run() ->~n",[]), [ io:format(F," ~s(),~n",[FunName]) || FunName <- FunList ], file:close(F), os:cmd("sh "++ PerlShellScript++" 2>/dev/null >> "++ErlFileName), io:format("~s~n",[os:cmd("wc -l "++ErlFileName)]), ok. dumpsplit(S,Fname) -> {ok,F}= file:open(Fname,[write]), Res = dodumpsplit(F,S,0,[],0), file:close(F), Res. dodumpsplit(F,[],_,Acc,_) -> io:format(F,"echo \" ok.\"~n",[]), lists:reverse(Acc); dodumpsplit(F,L,0,Acc,FunNum) -> NewFun = "run"++integer_to_list(FunNum), io:format(F,"echo \" ok.\"~n",[]), io:format(F,"echo \"~s() ->\"~n",[NewFun]), dodumpsplit(F,L,20,[NewFun|Acc],FunNum+1); dodumpsplit(F,[H|T],N,Acc,FunNum) -> dumponesplit(F,H), dodumpsplit(F,T,N-1,Acc,FunNum). dumponesplit(F,{RE,Line,O,TS}) -> [begin {NO,_} = pick_exec_options(O++Op), SSS = opt_to_string(NO), LLL = unicode:characters_to_list(RE), case (catch iolist_to_binary(LLL)) of X when is_binary(X) -> io:format(F,"perl -e '$x = join(\":\",split(/~s/~s,\"~s\")); " "$x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; " "print \" <<\\\"$x\\\">> = " "iolist_to_binary(join(re:split(\\\"~s\\\"," "\\\"~s\\\",~p))),\\n\";'~n", [zsafe(safe(RE)), SSS, ysafe(safe(Str)), dsafe(safe(Str)), dsafe2(safe(RE)), NO++[trim]]), io:format(F,"perl -e '$x = join(\":\",split(/~s/~s,\"~s\",2)); " "$x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; " "print \" <<\\\"$x\\\">> = " "iolist_to_binary(join(re:split(\\\"~s\\\"," "\\\"~s\\\",~p))),\\n\";'~n", [zsafe(safe(RE)), SSS, ysafe(safe(Str)), dsafe(safe(Str)), dsafe2(safe(RE)), NO++[{parts,2}]]), io:format(F,"perl -e '$x = join(\":\",split(/~s/~s,\"~s\",-1)); " "$x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; " "print \" <<\\\"$x\\\">> = " "iolist_to_binary(join(re:split(\\\"~s\\\"," "\\\"~s\\\",~p))),\\n\";'~n", [zsafe(safe(RE)), SSS, ysafe(safe(Str)), dsafe(safe(Str)), dsafe2(safe(RE)), NO]); _ -> io:format("Found fishy character at line ~w~n",[Line]) end end || {Str,_,Op,_} <- TS]. %% Generate replacement tests from indatafile, %% you will need perl on the machine gen_repl_test(OneFile) -> rand:seed(exsplus, {1219,687731,62804}), {ok,Bin} = file:read_file(OneFile), Lines = splitfile(0,Bin,1), Structured = stru(Lines), PerlShellScript = OneFile++"_replacement_test_gen.sh", FunList = dump(Structured,PerlShellScript), ErlModule = "re_"++filename:basename(OneFile)++"_replacement_test", ErlFileName = ErlModule++".erl", {ok,F}= file:open(ErlFileName,[write]), io:format(F,"-module(~s).~n",[ErlModule]), io:format(F,"-export([run/0]).~n",[]), io:format(F,"-compile(no_native).~n",[]), io:format(F,"%% This file is generated by running ~w:gen_repl_test(~p)~n", [?MODULE,OneFile]), io:format(F,"run() ->~n",[]), [ io:format(F," ~s(),~n",[FunName]) || FunName <- FunList ], file:close(F), os:cmd("sh "++ PerlShellScript++" 2>/dev/null >> "++ErlFileName), io:format("~s~n",[os:cmd("wc -l "++ErlFileName)]), ok. dump(S,Fname) -> {ok,F}= file:open(Fname,[write]), Res = dodump(F,S,0,[],0), file:close(F), Res. dodump(F,[],_,Acc,_) -> io:format(F,"echo \" ok.\"~n",[]), lists:reverse(Acc); dodump(F,L,0,Acc,FunNum) -> NewFun = "run"++integer_to_list(FunNum), io:format(F,"echo \" ok.\"~n",[]), io:format(F,"echo \"~s() ->\"~n",[NewFun]), dodump(F,L,20,[NewFun|Acc],FunNum+1); dodump(F,[H|T],N,Acc,FunNum) -> dumpone(F,H), dodump(F,T,N-1,Acc,FunNum). dumpone(F,{RE,Line,O,TS}) -> [begin {NO,_} = pick_exec_options(O++Op), SSS = opt_to_string(NO), RS = ranstring(), LLL = unicode:characters_to_list(RE), case (catch iolist_to_binary(LLL)) of X when is_binary(X) -> io:format(F,"perl -e '$x = \"~s\"; $x =~~ s/~s/~s/~s; $x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; print \" <<\\\"$x\\\">> = iolist_to_binary(re:replace(\\\"~s\\\",\\\"~s\\\",\\\"~s\\\",~p)), \\n\";'~n",[ysafe(safe(Str)),zsafe(safe(RE)),perlify(binary_to_list(RS)),SSS,dsafe(safe(Str)),dsafe(safe(RE)),xsafe(RS),NO]), io:format(F,"perl -e '$x = \"~s\"; $x =~~ s/~s/~s/g~s; $x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; print \" <<\\\"$x\\\">> = iolist_to_binary(re:replace(\\\"~s\\\",\\\"~s\\\",\\\"~s\\\",~p)), \\n\";'~n",[ysafe(safe(Str)),zsafe(safe(RE)),perlify(binary_to_list(RS)),SSS,dsafe(safe(Str)),dsafe(safe(RE)),xsafe(RS),NO++[global]]); _ -> io:format("Found fishy character at line ~w~n",[Line]) end end || {Str,_,Op,_} <- TS]. dsafe2([]) -> []; dsafe2([$\',$\",$\',$\",$\'|T]) -> [$\',$\",$\',$\",$\' |dsafe2(T)]; dsafe2([$\"|T]) -> [$\\,$\\,$\\,$\" |dsafe2(T)]; dsafe2([$\\, $G|T]) -> [$\\,$\\,$\\,$\\,$A |dsafe2(T)]; dsafe2([$\\|T]) -> [$\\,$\\,$\\,$\\ |dsafe2(T)]; dsafe2([$$|T]) -> [$\\,$$|dsafe2(T)]; dsafe2([H|T]) -> [H|dsafe2(T)]. dsafe([]) -> []; dsafe([$\',$\",$\',$\",$\'|T]) -> [$\',$\",$\',$\",$\' |dsafe(T)]; dsafe([$\"|T]) -> [$\\,$\\,$\\,$\" |dsafe(T)]; dsafe([$\\|T]) -> [$\\,$\\,$\\,$\\ |dsafe(T)]; dsafe([$$|T]) -> [$\\,$$|dsafe(T)]; dsafe([H|T]) -> [H|dsafe(T)]. xsafe(<<>>) -> []; xsafe(<<$\\,R/binary>>) -> [$\\,$\\,$\\,$\\ | xsafe(R)]; xsafe(<>) -> [X|xsafe(R)]. zsafe([]) -> []; zsafe([$$, $b|T]) -> [$\\,$$, $b | zsafe(T)]; zsafe([X|R]) -> [X|zsafe(R)]. ysafe([]) -> []; ysafe([$\',$\",$\',$\",$\'|T]) -> [$\',$\",$\',$\",$\' |ysafe(T)]; ysafe([$\"|T]) -> [$\\,$\" |ysafe(T)]; ysafe([$\\|T]) -> [$\\,$\\ |ysafe(T)]; ysafe([$$|T]) -> [$\\,$$|ysafe(T)]; ysafe([H|T]) -> [H|ysafe(T)]. safe(<<>>) -> []; safe(<<$\n>>) -> %chomp []; safe(<<$\',R/binary>>) -> [$\',$\",$\',$\",$\' | safe(R)]; safe(<>) -> [X|safe(R)]. perlify([$\\,N|Rest]) when N >= $0, N =< $9 -> [$$,N|perlify(Rest)]; perlify([$& | Rest]) -> [$$,$& | perlify(Rest)]; perlify([H|T]) -> [H|perlify(T)]; perlify([]) -> []. opt_to_string([]) -> []; opt_to_string([A|T]) -> case btr(A) of false -> opt_to_string(T); Ch -> [Ch | opt_to_string(T)] end. btr(caseless) -> $i; btr(extended) -> $x; btr(dotall) -> $s; btr(multiline) -> $m; btr(dupnames) -> $J; btr(no_auto_capture) -> $N; btr(unicode) -> $8; btr(_) -> false. ranchar() -> case rand:uniform(10) of 9 -> $&; 10 -> <<"\\1">>; N when N < 5 -> rand:uniform($Z-$A)+$A-1; M when M < 9 -> rand:uniform($z-$a)+$a-1 end. ranstring() -> iolist_to_binary([ranchar() || _ <- lists:duplicate(rand:uniform(20),0) ]).