diff options
Diffstat (limited to 'lib/stdlib/test/run_pcre_tests.erl')
-rw-r--r-- | lib/stdlib/test/run_pcre_tests.erl | 1201 |
1 files changed, 1201 insertions, 0 deletions
diff --git a/lib/stdlib/test/run_pcre_tests.erl b/lib/stdlib/test/run_pcre_tests.erl new file mode 100644 index 0000000000..0ef3986918 --- /dev/null +++ b/lib/stdlib/test/run_pcre_tests.erl @@ -0,0 +1,1201 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(run_pcre_tests). + +-compile(export_all). + +test(RootDir) -> + put(verbose,false), + erts_debug:set_internal_state(available_internal_state,true), + io:format("oldlimit: ~p~n",[ erts_debug:set_internal_state(re_loop_limit,10)]), + Testfiles0 = ["testoutput1", "testoutput2", "testoutput3", "testoutput4", + "testoutput5", "testoutput6", "testoutput10"], + Testfiles = [ filename:join([RootDir,FN]) || FN <- Testfiles0 ], + Res = [ begin io:format("~s~n",[X]), t(X) end || X <- Testfiles ], + io:format("limit was: ~p~n",[ erts_debug:set_internal_state(re_loop_limit,default)]), + Res2 = Res ++ [ begin io:format("~s~n",[X]), t(X) end || X <- Testfiles ], + erts_debug:set_internal_state(available_internal_state,false), + put(verbose,true), + Res2. +t(OneFile) -> + t(OneFile,infinite). +t(OneFile,Num) -> + {ok,Bin} = file:read_file(OneFile), + Lines = splitfile(0,Bin,1), + Structured = stru(Lines), + put(error_limit,Num), + put(skipped,0), + Res = + [test(Structured,true,index), + test(Structured,false,index), + test(Structured,true,binary), + test(Structured,false,binary), + test(Structured,true,list), + test(Structured,false,list)], + {lists:sum(Res),length(Structured)*6,get(skipped)}. + + +pick_exec_options([{exec_option,Opt}|T]) -> + {O,E} = pick_exec_options(T), + {O,[Opt|E]}; +pick_exec_options([unicode|T]) -> + {O,E} = pick_exec_options(T), + {[unicode|O],[unicode|E]}; +pick_exec_options([Opt|T]) -> + {O,E} = pick_exec_options(T), + {[Opt|O],E}; +pick_exec_options([]) -> + {[],[]}. + +test([],_,_) -> + 0; +test([{RE,Line,Options0,Tests}|T],PreCompile,XMode) -> + %io:format("."), + %case RE of <<>> -> io:format("Empty re:~w~n",[Line]); _ -> ok end, + {Options,ExecOptions} = pick_exec_options(Options0), + {Cres, Xopt} = case PreCompile of + true -> + {re:compile(RE,Options),[]}; + _ -> + {{ok,RE},Options} + end, + case Cres of + {ok,P} -> + %erlang:display({testrun,RE,P,Tests,ExecOptions,Xopt,XMode}), + case (catch testrun(RE,P,Tests,ExecOptions,Xopt,XMode)) of + N when is_integer(N) -> + N + test(T,PreCompile,XMode); + limit -> + io:format("Error limit reached.~n"), + 1; + skip -> + case get(skipped) of + N when is_integer(N) -> + put(skipped,N+1); + _ -> + put(skipped,1) + end, + test(T,PreCompile,XMode) + end; + {error,Err} -> + io:format("Compile error(~w): ~w~n",[Line,Err]), + case get(error_limit) of + infinite -> 1 + test(T,PreCompile,XMode); + X -> + case X-1 of + Y when Y =< 0 -> + io:format("Error limit reached.~n"), + 1; + Y -> + put(error_limit,Y), + 1 + test(T,PreCompile,XMode) + end + end + end. + +loopexec(_,_,X,Y,_,_) when X > Y -> + {match,[]}; +loopexec(P,Chal,X,Y,Unicode,Xopt) -> + %io:format("~p~n",[X]), + case re:run(Chal,P,[{offset,X}]++Xopt) of + nomatch -> + %io:format(" re:exec(~p,~p,[{offset,~p}]) -> ~p~n", + % [P,Chal,X,no]), + {match,[]}; + %loopexec(P,Chal,X+1,Y); + {match,[{A,B}|More]} -> + %io:format(" re:exec(~p,~p,[{offset,~p}]) -> ~p~n", + % [P,Chal,X,{match,[{A,B}|More]}]), + {match,Rest} = + case B>0 of + true -> + loopexec(P,Chal,A+B,Y,Unicode,Xopt); + false -> + {match,M} = case re:run(Chal,P,[{offset,X},notempty,anchored]++Xopt) of + nomatch -> + {match,[]}; + {match,Other} -> + {match,fixup(Chal,Other,0)} + end, + NewA = forward(Chal,A,1,Unicode), + {match,MM} = loopexec(P,Chal,NewA,Y,Unicode,Xopt), + {match,M ++ MM} + end, + {match,fixup(Chal,[{A,B}|More],0)++Rest} + end. + +forward(_Chal,A,0,_) -> + A; +forward(_Chal,A,N,false) -> + A+N; +forward(Chal,A,N,true) -> + <<_:A/binary,Tl/binary>> = Chal, + Forw = case Tl of + <<1:1,1:1,0:1,_:5,_/binary>> -> + 2; + <<1:1,1:1,1:1,0:1,_:4,_/binary>> -> + 3; + <<1:1,1:1,1:1,1:1,0:1,_:3,_/binary>> -> + 4; + _ -> + 1 + end, + %io:format("Forward ~p~n",[Forw]), + forward(Chal,A+Forw,N-1,true). + +contains_eightbit(<<>>) -> + false; +contains_eightbit(<<X:8,_/binary>>) when X >= 128 -> + true; +contains_eightbit(<<_,R/binary>>) -> + contains_eightbit(R). + +clean_duplicates([],_) -> + []; +clean_duplicates([{X,Y}|T],L) -> + case lists:keymember(X,1,L) of + true -> + clean_duplicates(T,L); + false -> + [{X,Y}|clean_duplicates(T,L)] + end; +clean_duplicates([bsr_anycrlf|T],L) -> + case (lists:member(bsr_anycrlf,L) orelse lists:member(bsr_unicode,L)) of + true -> + clean_duplicates(T,L); + false -> + [bsr_anycrlf|clean_duplicates(T,L)] + end; +clean_duplicates([bsr_unicode|T],L) -> + case (lists:member(bsr_anycrlf,L) orelse lists:member(bsr_unicode,L)) of + true -> + clean_duplicates(T,L); + false -> + [bsr_unicode|clean_duplicates(T,L)] + end; +clean_duplicates([X|T],L) -> + case lists:member(X,L) of + true -> + clean_duplicates(T,L); + false -> + [X|clean_duplicates(T,L)] + end. + + +global_fixup(_,nomatch) -> + nomatch; +global_fixup(P,{match,M}) -> + {match,lists:flatten(global_fixup2(P,M))}. + +global_fixup2(_,[]) -> + []; +global_fixup2(P,[H|T]) -> + [gfixup_one(P,0,H)|global_fixup2(P,T)]. + +gfixup_one(_,_,[]) -> + []; +gfixup_one(P,I,[{Start,Len}|T]) -> + <<_:Start/binary,R:Len/binary,_/binary>> = P, + [{I,R}|gfixup_one(P,I+1,T)]. + + +press([]) -> + []; +press([H|T]) -> + H++press(T). + +testrun(_,_,[],_,_,_) -> + 0; +testrun(RE,P,[{Chal,Line,ExecOpt,Responses}|T],EO,Xopt0,XMode) -> + Xopt = clean_duplicates(Xopt0,ExecOpt), + + case lists:keymember(newline,1,Xopt) of + true -> + info("skipping inconsistent newlines " + "when compiling and running in one go (~p)~n", + [Line]), + throw(skip); + false -> + ok + end, + + Res = + case lists:member(g,EO) of + true -> + case XMode of + binary -> + case re:run(Chal,P,ExecOpt++Xopt++ + [global,{capture,all,binary}]) of + nomatch -> + nomatch; + {match, Reslist} -> + {match,press([bfix(R)|| R <- Reslist])} + end; + list -> + case re:run(Chal,P,ExecOpt++Xopt++ + [global,{capture,all,list}]) of + nomatch -> + nomatch; + {match, Reslist} -> + UFix = lists:member(unicode,EO), + {match,press([bfix([if UFix =:= true -> list_to_utf8(L); true -> list_to_binary(L) end || L <- R]) || R <- Reslist])} + end; + index -> + case re:run(Chal,P,ExecOpt++Xopt++[global]) of + nomatch -> + nomatch; + {match, Reslist} -> + {match,press([fixup(Chal,R,0) || R <- Reslist])} + end + end; + false -> + case EO -- [accept_nonascii] of + EO -> + case contains_eightbit(Chal) of + true -> + info("skipping 8bit without LANG (~p)~n", + [Line]), + throw(skip); + false -> + ok + end, + + case XMode of + binary -> + case re:run(Chal,P,ExecOpt++Xopt++ + [{capture,all,binary}]) of + nomatch -> + nomatch; + {match, Reslist} -> + {match,bfix(Reslist)} + end; + list -> + case re:run(Chal,P,ExecOpt++Xopt++ + [{capture,all,list}]) of + nomatch -> + nomatch; + {match, Reslist} -> + UFix = lists:member(unicode,EO), + {match,bfix([if + UFix =:= true -> list_to_utf8(L); + true -> list_to_binary(L) + end || L <- Reslist])} + end; + index -> + case re:run(Chal,P,ExecOpt++Xopt) of + nomatch -> + nomatch; + {match, Reslist} -> + {match,fixup(Chal,Reslist,0)} + end + end; + _LesserOpt -> + case XMode of + binary -> + case re:run(Chal,P,ExecOpt++Xopt++ + [{capture,all,binary}]) of + nomatch -> + nomatch; + {match, Reslist} -> + {match,bfix(Reslist)} + end; + list -> + case re:run(Chal,P,ExecOpt++Xopt++ + [{capture,all,list}]) of + nomatch -> + nomatch; + {match, Reslist} -> + %io:format("re:run(~w,~w,~w) -> ~w~n",[Chal,P,ExecOpt++Xopt++ + % [{capture,all,list}],Reslist]), + UFix = lists:member(unicode,EO), + {match,bfix([if + UFix =:= true -> list_to_utf8(L); + true -> list_to_binary(L) + end || L <- Reslist])} + end; + index -> + case re:run(Chal,P,ExecOpt++Xopt) of + nomatch -> + nomatch; + {match, Reslist} -> + {match,fixup(Chal,Reslist,0)} + end + end + end + end, + case compare_sloppy(Res,Responses) of + true -> + testrun(RE,P,T,EO,Xopt0,XMode); + false -> + io:format("FAIL(~w): re = ~p, ~nmatched against = ~p(~w), ~nwith options = ~p. ~nexpected = ~p, ~ngot = ~p~n", + [Line,RE,Chal,binary_to_list(Chal),{ExecOpt,EO,Xopt},Responses,Res]), + case get(error_limit) of + infinite -> ok; + X -> + case X-1 of + Y when Y =< 0 -> + throw(limit); + Y -> + put(error_limit,Y) + end + end, + 1 + end. +compare_sloppy({A,L1},{A,L2}) -> + compare_sloppy(L1,L2); +compare_sloppy(A,A) -> + true; +compare_sloppy([{X,Y}|T1],[{X,Y}|T2]) -> + compare_sloppy(T1,T2); +compare_sloppy([{X,[Y,_]}|T1],[{X,Y}|T2]) -> + compare_sloppy(T1,T2); +compare_sloppy([{X,[_,Y]}|T1],[{X,Y}|T2]) -> + compare_sloppy(T1,T2); +compare_sloppy(_,_) -> + false. + +bfix(RL) -> + bfix(RL,0). +bfix([],_) -> + []; +bfix([<<>>|T],N) -> + [{N,[<<>>,<<"<unset>">>]}|bfix(T,N+1)]; % indeterminable +bfix([H|T],N) -> + [{N,H}|bfix(T,N+1)]. + +fixup(List,Any,Any2) when is_list(List)-> + fixup(unicode:characters_to_binary(List,unicode,unicode),Any,Any2); +fixup(_,[],_) -> + []; +fixup(Bin,[{-1,0}|T],N) -> + [{N,<<"<unset>">>}|fixup(Bin,T,N+1)]; +fixup(Bin,[{X,Y}|T],N) -> + <<_:X/binary,Res:Y/binary,_/binary>> = Bin, + [{N,Res}|fixup(Bin,T,N+1)]. + +splitfile(N,Bin,_Line) when N >= size(Bin) -> + []; +splitfile(N,Bin,Line) -> + {Res,NewN} = pickline(N,N,Bin), + case emptyline(Res) of + true -> + [{Line,<<>>}|splitfile(NewN,Bin,Line+1)]; + false -> + [{Line,Res}|splitfile(NewN,Bin,Line+1)] + end. + +emptyline(<<>>) -> + true; +emptyline(<<$ ,R/binary>>) -> + emptyline(R); +emptyline(_) -> + false. +pickline(Start,Stop,Bin) when Stop >= size(Bin) -> + Len = Stop - Start - 1, + <<_:Start/binary,Res:Len/binary,_/binary>> = Bin, + {Res,Stop}; + +pickline(Start,Stop,Bin) -> + %erlang:display({Start,Stop,size(Bin)}), + <<_:Stop/binary,Ch,_/binary>> = Bin, + case Ch of + $\n -> + Len = Stop - Start, + <<_:Start/binary,Res:Len/binary,_/binary>> = Bin, + {Res,Stop+1}; + _ -> + pickline(Start,Stop+1,Bin) + end. + +skip_until_empty([]) -> + []; +skip_until_empty([{_,<<>>}|T]) -> + T; +skip_until_empty([{_,_}|T]) -> + skip_until_empty(T). + +skip_debug([{_,<<$-,_/binary>>}|Con]) -> + Con; +skip_debug([_|T]) -> + skip_debug(T); +skip_debug([]) -> + []. + +skip_extra_info([{_,<<$ ,$ ,$ ,_/binary>>}=H|Con]) -> + [H|Con]; +skip_extra_info([{_,<<>>}|Con]) -> + Con; +skip_extra_info([_|T]) -> + skip_extra_info(T); +skip_extra_info([]) -> + []. + +stru([]) -> + []; +stru([{_,<<>>}|T]) -> + stru(T); +stru([{Line,<<Ch,Re0/binary>>}|T0]) -> + {T,Re} = find_rest_re(Ch,[{Line,Re0}|T0]), + %io:format("DBG: ~p~n",[Re]), + {NewRe,<< Ch, Options/binary >>} = end_of_re(Ch,Re), + case interpret_options_x(backstrip(frontstrip(Options)),NewRe) of + {Olist,<<>>} -> + case T of + [{_,<<$-,_/binary>>}|Con] -> + %Debug output, we skip those + %io:format("Skipping debug (~w)~n",[Line]), + TmpT = skip_debug(Con), + {NewT,Matches} = stru2(TmpT), + [{NewRe,Line,Olist,Matches}|stru(NewT)]; + [{_,<<$C,$a,$p,$t,$u,$r,$i,$n,$g,_/binary>>}|_] -> + NewT0 = skip_extra_info(T), + {NewT,Matches} = stru2(NewT0), + [{NewRe,Line,Olist,Matches}|stru(NewT)]; + [{_,<<Bla,_/binary>>}|_] when Bla =/= $ -> + %io:format("Skipping blabla (~w)~n",[Line]), + NewT = skip_until_empty(T), + stru(NewT); + _ -> + {NewT,Matches} = stru2(T), + %erlang:display({NewRe,Line,Olist,Matches}), + Matches1 = case lists:member(unicode,Olist) of + true -> + Matches ++ + [ {unicode:characters_to_list(E1,unicode),E2,E3,E4} || + {E1,E2,E3,E4} <- Matches]; + false -> + Matches + end, + %erlang:display({NewRe,Line,Olist,Matches1}), + [{NewRe,Line,Olist,Matches1}|stru(NewT)] + end; + {_,Rest} -> +%% case T of +%% [{_,<<Bla,_/binary>>}|_] when Bla =/= $ -> +%% io:format("Skipping blabla (~w)~n",[Line]); +%% _ -> +%% ok +%% end, + NewT = skip_until_empty(T), + %{NewT,_Matches} = stru2(T), + info("Skipping options ~s for now (~w)~n",[binary_to_list(Rest),Line]), + case NewT of + [{Li,_}|_] -> + info("Skip to line ~p~n",[Li]); + _ -> + ok + end, + stru(NewT) + end. + +contains_lang_sens(<<>>) -> + false; +contains_lang_sens(<<$\\,$W,_/binary>>) -> + true; +contains_lang_sens(<<$\\,$w,_/binary>>) -> + true; +contains_lang_sens(<<$\\,$b,_/binary>>) -> + true; +contains_lang_sens(<<_,R/binary>>) -> + contains_lang_sens(R). + + +interpret_options_x(Options,RE) -> + {O,R} = interpret_options(Options), + case (contains_lang_sens(RE) or lists:member(caseless,O)) of + false -> + {[{exec_option,accept_nonascii}|O],R}; + true -> + case lists:member(unicode,O) of + true -> + {[{exec_option,accept_nonascii}|O],R}; + false -> + {O,R} + end + end. +tr_option($i) -> + [caseless]; +tr_option($I) -> + []; +tr_option($B) -> + []; +tr_option($Z) -> + []; +tr_option($x) -> + [extended]; +tr_option($s) -> + [dotall]; +tr_option($m) -> + [multiline]; +tr_option($J) -> + [dupnames]; +tr_option($N) -> + [no_auto_capture]; +tr_option($8) -> + [unicode]; +tr_option($g) -> + [{exec_option,g}]; +tr_option(_) -> + false. + +interpret_options(<<$<,Rest0/binary>>) -> + {Option,Rest} = pinch_cr(Rest0), + {Olist,NRest} = interpret_options(Rest), + {[Option | Olist], NRest}; +interpret_options(<<$L,$f,$r,$_,$F,$R,Rest/binary>>) -> + info("Accepting (and ignoring) french locale~n",[]), + {Olist,NRest} = interpret_options(Rest), + {[{exec_option, accept_nonascii}|Olist],NRest}; +interpret_options(<<Ch,Rest/binary>>) -> + {Olist,NRest} = interpret_options(Rest), + case tr_option(Ch) of + false -> + {Olist,<<Ch,NRest/binary>>}; + Option -> + {Option ++ Olist, NRest} + end; +interpret_options(<<>>) -> + {[],<<>>}. + +find_unsupported([{not_supported,X}|T]) -> + [X | find_unsupported(T)]; +find_unsupported([_|T]) -> + find_unsupported(T); +find_unsupported([]) -> + []. + +backslash_end(<<>>) -> + false; +backslash_end(<<$\\>>) -> + true; +backslash_end(<<_>>) -> + false; +backslash_end(<<_,R/binary>>) -> + backslash_end(R). + +%stru2([<<$ ,$ ,$ ,$ , $*,$*,$*,$ ,_/binary>> | T]) -> +% stru2(T); +stru2([{Line,<<$ ,Rest/binary>>} | T]) -> + % A challenge + case (catch responses(T)) of + {NewT,Rlist} -> + {NewNewT,StrList} = stru2(NewT), + %% Hack... + FS = case backstrip(frontstrip(Rest)) of + <<"\\">> -> + %% Single backslash is to be considered + %% an empty line in challenge + <<>>; + OFS -> + case backslash_end(OFS) of + true -> + <<OFS/binary,$ >>; + _ -> + OFS + end + end, + {ExecOpts,NFS} = escape(FS), + case find_unsupported(ExecOpts) of + [] -> + {NewNewT,[{NFS,Line,ExecOpts, + case + Rlist of nomatch -> nomatch; + RR -> {match,RR} + end} | StrList]}; + UList -> + info("WARNING(~w): the exec-option(s) ~p are unsupported, skipping challenge.~n",[Line,UList]), + {NewNewT,StrList} + end; + fail -> + NewT = skip_until_empty(T), + {NewT,[]} + end; + +stru2(X) -> + {X,[]}. +%responses([<< $ ,$ ,$ ,$ ,$*,$*,$*,$ ,_/binary>>|T]) -> +% responses(T); +responses([{_Line,<< X:2/binary,$:,$ ,Resp/binary>>}|T]) -> + {NT,R2} = responses(T), + NX=list_to_integer(binary_to_list(frontstrip(X))), + {NT,[{NX,escape2(Resp)} | R2]}; +responses([{_Line,<< X:3/binary,$:,$ ,Resp/binary>>}|T]) -> + {NT,R2} = responses(T), + NX=list_to_integer(binary_to_list(frontstrip(X))), + {NT,[{NX,escape2(Resp)} | R2]}; +responses([{_Line,<<$N,$o,$ ,$m,$a,$t,$c,$h,_/binary>>}|T]) -> + {T,nomatch}; +responses([{Line,<<$ ,No,Ch,_/binary>>}|T]) when No >= $0, No =< $9, Ch >= $A, Ch =< $Z -> + info("Skipping strange debug response at line ~p~n",[Line]), + responses(T); +responses([{Line,<<$ ,$ ,Ch,_/binary>>}|T]) when Ch =:= $G; Ch =:= $C -> + info("Skipping stranger debug response at line ~p~n",[Line]), + responses(T); +responses([{Line,<<C,_/binary>>=X}|_]) when C =/= $ -> + info("Offending response line(~w)! ~p~n",[Line,X]), + throw(fail); +responses(X) -> + {X,[]}. + + +end_of_re(_,<<>>) -> + {<<>>,<<>>}; +end_of_re(C,<<C,_Rest/binary>> = R) -> + {<<>>,R}; +end_of_re(C,<<$\\,C,Rest/binary>>) -> + {Sub,Rest2} = end_of_re(C,Rest), + {<<C,Sub/binary>>,Rest2}; +end_of_re(C,<<Ch,Rest/binary>>) -> + {Sub,Rest2} = end_of_re(C,Rest), + {<<Ch,Sub/binary>>,Rest2}. + +frontstrip(<<>>) -> + <<>>; +frontstrip(<< $ ,Rest/binary>>) -> + frontstrip(Rest); +frontstrip(Bin) -> + Bin. + +backstrip(<<>>) -> + <<>>; +backstrip(<<$ >>) -> + <<>>; +backstrip(<<X,Rest/binary>>) -> + case backstrip(Rest) of + Rest -> + <<X,Rest/binary>>; + Other -> + NRest = backstrip(Other), + <<X,NRest/binary>> + end. + +find_rest_re(_,[]) -> + {<<>>,<<>>}; +find_rest_re(Ch,[{_,H}|T]) -> + case end_of_re(Ch,H) of + {_,<<>>} -> + {NewT,Rest} = find_rest_re(Ch,T), + {NewT,<<H/binary,$\n,Rest/binary>>}; + {_,_} -> + {T,H} + end. + +eopt($A) -> + [anchored]; +eopt($B) -> + [notbol]; +eopt(X) -> + [{not_supported,X}]. + +pinch_cr(<<$c,$r,$>,Rest/binary>>) -> + {{newline,cr},Rest}; +pinch_cr(<<$l,$f,$>,Rest/binary>>) -> + {{newline,lf},Rest}; +pinch_cr(<<$c,$r,$l,$f,$>,Rest/binary>>) -> + {{newline,crlf},Rest}; +pinch_cr(<<$C,$R,$>,Rest/binary>>) -> + {{newline,cr},Rest}; +pinch_cr(<<$L,$F,$>,Rest/binary>>) -> + {{newline,lf},Rest}; +pinch_cr(<<$C,$R,$L,$F,$>,Rest/binary>>) -> + {{newline,crlf},Rest}; +pinch_cr(<<$a,$n,$y,$c,$r,$l,$f,$>,Rest/binary>>) -> + {{newline,anycrlf},Rest}; +pinch_cr(<<$b,$s,$r,$_,$a,$n,$y,$c,$r,$l,$f,$>,Rest/binary>>) -> + {bsr_anycrlf,Rest}; +pinch_cr(<<$b,$s,$r,$_,$u,$n,$i,$c,$o,$d,$e,$>,Rest/binary>>) -> + {bsr_unicode,Rest}; +pinch_cr(<<$a,$n,$y,$>,Rest/binary>>) -> + {{newline,any},Rest}; +pinch_cr(<<$A,$N,$Y,$>,Rest/binary>>) -> + {{newline,any},Rest}; +pinch_cr(Other) -> + case splitby($>,Other,<<>>) of + {Unk,Rest} -> + {{not_supported,{newline,Unk}},Rest}; + no -> + {{not_supported,$<},Other} + end. + +splitby(_,<<>>,_) -> + no; +splitby(Ch,<<Ch,Rest/binary>>,Acc) -> + {Acc,Rest}; +splitby(Ch,<<OCh,Rest/binary>>,Acc) -> + splitby(Ch,Rest,<<Acc/binary,OCh>>). + + +escape(<<>>) -> + {[],<<>>}; +escape(<<$\\, Ch, Rest/binary>>) when Ch >= $A, Ch =< $Z; Ch =:= $? -> + %Options in the string... + NewOpts = eopt(Ch), + {MoreOpts,Tail} = escape(Rest), + {NewOpts ++ MoreOpts,Tail}; +escape(<<$\\, $<, Rest/binary>>) -> + %CR Options in the string... + {NewOpt,NewRest} = pinch_cr(Rest), + {MoreOpts,Tail} = escape(NewRest), + {[NewOpt|MoreOpts],Tail}; +escape(<<$\\, Ch, Rest/binary>>) -> + {C,NR} = case single_esc(Ch) of + no -> + case multi_esc(<<Ch,Rest/binary>>) of + {CharBin,NewRest} -> + {CharBin,NewRest}; + no -> + {<<$\\>>,<<Ch,Rest/binary>>} + end; + CCC -> + %erlang:display({escape,CCC}), + {<<CCC>>,Rest} + end, + {MoreOpts,Tail} = escape(NR), + {MoreOpts,<<C/binary,Tail/binary>>}; +%escape(<<$\\,Rest/binary>>) -> +% escape(<<Rest/binary>>); +escape(<<Ch,Rest/binary>>) -> + {X,RR} = escape(<<Rest/binary>>), + {X,<<Ch,RR/binary>>}; +escape(Any) -> + {[],Any}. +escape2(<<>>) -> + <<>>; +escape2(<<$\\, Ch, Rest/binary>>) -> + {C,NR} = case multi_esc(<<Ch,Rest/binary>>) of + {CharBin,NewRest} -> + {CharBin,NewRest}; + no -> + {<<$\\>>,<<Ch,Rest/binary>>} + end, + Tail = escape2(NR), + <<C/binary,Tail/binary>>; +escape2(<<Ch,Rest/binary>>) -> + RR = escape2(<<Rest/binary>>), + <<Ch,RR/binary>>; +escape2(Any) -> + Any. + + +trx(N) when ((N >= $0) and (N =< $9)) -> + N - $0; +trx($A) -> 10; +trx($B) -> 11; +trx($C) -> 12; +trx($D) -> 13; +trx($E) -> 14; +trx($F) -> 15; +trx($a) -> 10; +trx($b) -> 11; +trx($c) -> 12; +trx($d) -> 13; +trx($e) -> 14; +trx($f) -> 15. + + +int_to_utf8(I) when I =< 16#7F -> + <<I>>; +int_to_utf8(I) when I =< 16#7FF -> + B2 = I band 16#3f, + B1 = (I bsr 6) band 16#1f, + <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; +int_to_utf8(I) when I =< 16#FFFF -> + B3 = I band 16#3f, + B2 = (I bsr 6) band 16#3f, + B1 = (I bsr 12) band 16#f, + <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; +int_to_utf8(I) when I =< 16#10FFFF -> + B4 = I band 16#3f, + B3 = (I bsr 6) band 16#3f, + B2 = (I bsr 12) band 16#3f, + B1 = (I bsr 18) band 16#7, + <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>; +int_to_utf8(_) -> + exit(unsupported_utf8). + +list_to_utf8(L) when is_list(L); is_binary(L) -> + iolist_to_binary([int_to_utf8(I) || I <- L]); +list_to_utf8({Tag,_,_}) when Tag =:= incomplete ; Tag =:= error -> + throw(skip). + +multi_esc(<<M,N,O,Rest/binary>>) + when M >= $0, M =< $7, N >= $0, N =< $7, O >= $0, O =< $7 -> + Cha = ((M - $0) bsl 6) bor ((N - $0) bsl 3) bor (O - $0), + {<<Cha>>,Rest}; +multi_esc(<<N,O,Rest/binary>>) + when N >= $0, N =< $7, O >= $0, O =< $7 -> + Cha = ((N - $0) bsl 3) bor (O - $0), + {<<Cha>>,Rest}; +multi_esc(<<O,Rest/binary>>) + when O >= $0, O =< $7 -> + Cha = (O - $0), + {<<Cha>>,Rest}; + + +multi_esc(<<$x,${,N,O,$},Rest/binary>>) + when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or + ((N >= $a) and (N =< $f))) and + (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or + ((O >= $a) and (O =< $f)))) -> + Cha = (trx(N) bsl 4) bor trx(O), + {int_to_utf8(Cha),Rest}; +multi_esc(<<$x,${,N,O,P,$},Rest/binary>>) + when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or + ((N >= $a) and (N =< $f))) and + (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or + ((O >= $a) and (O =< $f)))and + (((P >= $0) and (P =< $9)) or ((P >= $A) and (P =< $F)) or + ((P >= $a) and (P =< $f)))) -> + Cha = (trx(N) bsl 8) bor (trx(O) bsl 4) bor trx(P), + {int_to_utf8(Cha),Rest}; +multi_esc(<<$x,${,N,O,P,Q,$},Rest/binary>>) + when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or + ((N >= $a) and (N =< $f))) and + (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or + ((O >= $a) and (O =< $f))) and + (((P >= $0) and (P =< $9)) or ((P >= $A) and (P =< $F)) or + ((P >= $a) and (P =< $f))) and + (((Q >= $0) and (Q =< $9)) or ((Q >= $A) and (Q =< $F)) or + ((Q >= $a) and (Q =< $f)))) -> + Cha = (trx(N) bsl 12) bor (trx(O) bsl 8) bor (trx(P) bsl 4) bor trx(Q), + {int_to_utf8(Cha),Rest}; +multi_esc(<<$x,${,N,O,P,Q,R,$},Rest/binary>>) + when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or + ((N >= $a) and (N =< $f))) and + (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or + ((O >= $a) and (O =< $f))) and + (((P >= $0) and (P =< $9)) or ((P >= $A) and (P =< $F)) or + ((P >= $a) and (P =< $f))) and + (((Q >= $0) and (Q =< $9)) or ((Q >= $A) and (Q =< $F)) or + ((Q >= $a) and (Q =< $f))) and + (((R >= $0) and (R =< $9)) or ((R >= $A) and (R =< $F)) or + ((R >= $a) and (R =< $f)))) -> + Cha = (trx(N) bsl 16) bor (trx(O) bsl 12) bor (trx(P) bsl 8) bor (trx(Q) bsl 4) bor trx(R), + {int_to_utf8(Cha),Rest}; +multi_esc(<<$x,${,N,O,P,Q,R,S,$},Rest/binary>>) + when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or + ((N >= $a) and (N =< $f))) and + (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or + ((O >= $a) and (O =< $f))) and + (((P >= $0) and (P =< $9)) or ((P >= $A) and (P =< $F)) or + ((P >= $a) and (P =< $f))) and + (((Q >= $0) and (Q =< $9)) or ((Q >= $A) and (Q =< $F)) or + ((Q >= $a) and (Q =< $f))) and + (((R >= $0) and (R =< $9)) or ((R >= $A) and (R =< $F)) or + ((R >= $a) and (R =< $f))) and + (((S >= $0) and (S =< $9)) or ((S >= $A) and (S =< $F)) or + ((S >= $a) and (S =< $f)))) -> + Cha = (trx(N) bsl 20) bor (trx(O) bsl 16) bor (trx(P) bsl 12) bor (trx(Q) bsl 8) bor (trx(R) bsl 4) bor trx(S), + {int_to_utf8(Cha),Rest}; +multi_esc(<<$x,N,O,Rest/binary>>) + when ((((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or + ((N >= $a) and (N =< $f))) and + (((O >= $0) and (O =< $9)) or ((O >= $A) and (O =< $F)) or + ((O >= $a) and (O =< $f)))) -> + Cha = (trx(N) bsl 4) bor trx(O), + {<<Cha>>,Rest}; +multi_esc(<<$x,N,Rest/binary>>) + when (((N >= $0) and (N =< $9)) or ((N >= $A) and (N =< $F)) or + ((N >= $a) and (N =< $f))) -> + Cha = trx(N), + {<<Cha>>,Rest}; +multi_esc(_) -> + no. + +single_esc($") -> + $"; +single_esc($ ) -> + $ ; +single_esc($') -> + $'; +single_esc($@) -> + $@; +single_esc($t) -> + $\t; +single_esc($n) -> + $\n; +single_esc($r) -> + $\r; +single_esc($f) -> + $\f; +single_esc($e) -> + $\e; +single_esc($b) -> + $\b; +single_esc($$) -> + $$; +single_esc($\\) -> + $\\; +single_esc($a) -> + 7; +%single_esc(Ch) when Ch >= $A, Ch =< $Z -> % eh? +% Ch; + +single_esc(_) -> + no. + +info(Str,Lst) -> + case get(verbose) of + true -> + io:format(Str,Lst); + _ -> + ok + end. + + +%% Generate split tests from indatafile, +%% you will need perl on the machine +gen_split_test(OneFile) -> + {ok,Bin} = file:read_file(OneFile), + Lines = splitfile(0,Bin,1), + Structured = stru(Lines), + PerlShellScript = OneFile++"_split_test_gen.sh", + dumpsplit(Structured,PerlShellScript), + PerlShellScript, + ErlModule = "re_"++filename:basename(OneFile)++"_split_test", + ErlFileName = ErlModule++".erl", + {ok,F}= file:open(ErlFileName,[write]), + io:format(F,"-module(~s).~n",[ErlModule]), + io:format(F,"-compile(export_all).~n",[]), + io:format(F,"-include(\"test_server.hrl\").~n",[]), + %io:format(F,"-define(line,erlang:display(?LINE),).~n",[]), + io:format(F,"%% This file is generated by running ~w:gen_split_test(~p)~n", + [?MODULE,OneFile]), + io:format(F,"join([]) -> [];~n",[]), + io:format(F,"join([A]) -> [A];~n",[]), + io:format(F,"join([H|T]) -> [H,<<\":\">>|join(T)].~n",[]), + io:format(F,"run() ->~n",[]), + file:close(F), + os:cmd("sh "++ PerlShellScript++" 2>/dev/null >> "++ErlFileName), + {ok,F2}= file:open(ErlFileName,[append]), + io:format(F2,"ok.~n",[]), + file:close(F2), + io:format("~s~n",[os:cmd("wc -l "++ErlFileName)]), + ok. + +dumpsplit(S,Fname) -> + {ok,F}= file:open(Fname,[write]), + dodumpsplit(F,S), + file:close(F). + +dodumpsplit(_,[]) -> + ok; +dodumpsplit(F,[H|T]) -> + dumponesplit(F,H), + dodumpsplit(F,T). + +dumponesplit(F,{RE,_,O,TS}) -> + [begin + {NO,_} = pick_exec_options(O++Op), + SSS = opt_to_string(NO), + io:format(F,"perl -e '$x = join(\":\",split(/~s/~s,\"~s\")); " + "$x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; " + "print \"?line <<\\\"$x\\\">> = " + "iolist_to_binary(join(re:split(\\\"~s\\\"," + "\\\"~s\\\",~p))), \\n\";'~n", + [zsafe(safe(RE)), + SSS, + ysafe(safe(Str)), + dsafe(safe(Str)), + dsafe2(safe(RE)), + NO++[trim]]), + io:format(F,"perl -e '$x = join(\":\",split(/~s/~s,\"~s\",2)); " + "$x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; " + "print \"?line <<\\\"$x\\\">> = " + "iolist_to_binary(join(re:split(\\\"~s\\\"," + "\\\"~s\\\",~p))), \\n\";'~n", + [zsafe(safe(RE)), + SSS, + ysafe(safe(Str)), + dsafe(safe(Str)), + dsafe2(safe(RE)), + NO++[{parts,2}]]), + io:format(F,"perl -e '$x = join(\":\",split(/~s/~s,\"~s\",-1)); " + "$x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; " + "print \"?line <<\\\"$x\\\">> = " + "iolist_to_binary(join(re:split(\\\"~s\\\"," + "\\\"~s\\\",~p))), \\n\";'~n", + [zsafe(safe(RE)), + SSS, + ysafe(safe(Str)), + dsafe(safe(Str)), + dsafe2(safe(RE)), + NO]) + end || + {Str,_,Op,_} <- TS]. + +%% Generate replacement tests from indatafile, +%% you will need perl on the machine +gen_repl_test(OneFile) -> + random:seed(1219,687731,62804), + {ok,Bin} = file:read_file(OneFile), + Lines = splitfile(0,Bin,1), + Structured = stru(Lines), + PerlShellScript = OneFile++"_replacement_test_gen.sh", + dump(Structured,PerlShellScript), + ErlModule = "re_"++filename:basename(OneFile)++"_replacement_test", + ErlFileName = ErlModule++".erl", + {ok,F}= file:open(ErlFileName,[write]), + io:format(F,"-module(~s).~n",[ErlModule]), + io:format(F,"-compile(export_all).~n",[]), + io:format(F,"-include(\"test_server.hrl\").~n",[]), + io:format(F,"%% This file is generated by running ~w:gen_repl_test(~p)~n", + [?MODULE,OneFile]), + io:format(F,"run() ->~n",[]), + file:close(F), + os:cmd("sh "++ PerlShellScript++" 2>/dev/null >> "++ErlFileName), + {ok,F2}= file:open(ErlFileName,[append]), + io:format(F2,"ok.~n",[]), + file:close(F2), + io:format("~s~n",[os:cmd("wc -l "++ErlFileName)]), + ok. +dump(S,Fname) -> + {ok,F}= file:open(Fname,[write]), + dodump(F,S), + file:close(F). + +dodump(_,[]) -> + ok; +dodump(F,[H|T]) -> + dumpone(F,H), + dodump(F,T). + +dumpone(F,{RE,_,O,TS}) -> + [begin + {NO,_} = pick_exec_options(O++Op), + SSS = opt_to_string(NO), + RS = ranstring(), + io:format(F,"perl -e '$x = \"~s\"; $x =~~ s/~s/~s/~s; $x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; print \"?line <<\\\"$x\\\">> = iolist_to_binary(re:replace(\\\"~s\\\",\\\"~s\\\",\\\"~s\\\",~p)), \\n\";'~n",[ysafe(safe(Str)),zsafe(safe(RE)),perlify(binary_to_list(RS)),SSS,dsafe(safe(Str)),dsafe(safe(RE)),xsafe(RS),NO]), + io:format(F,"perl -e '$x = \"~s\"; $x =~~ s/~s/~s/g~s; $x =~~ s/\\\\/\\\\\\\\/g; $x =~~ s/\\\"/\\\\\"/g; print \"?line <<\\\"$x\\\">> = iolist_to_binary(re:replace(\\\"~s\\\",\\\"~s\\\",\\\"~s\\\",~p)), \\n\";'~n",[ysafe(safe(Str)),zsafe(safe(RE)),perlify(binary_to_list(RS)),SSS,dsafe(safe(Str)),dsafe(safe(RE)),xsafe(RS),NO++[global]]) + end || + {Str,_,Op,_} <- TS]. + +dsafe2([]) -> + []; +dsafe2([$\',$\",$\',$\",$\'|T]) -> + [$\',$\",$\',$\",$\' |dsafe2(T)]; +dsafe2([$\"|T]) -> + [$\\,$\\,$\\,$\" |dsafe2(T)]; +dsafe2([$\\, $G|T]) -> + [$\\,$\\,$\\,$\\,$A |dsafe2(T)]; +dsafe2([$\\|T]) -> + [$\\,$\\,$\\,$\\ |dsafe2(T)]; +dsafe2([$$|T]) -> + [$\\,$$|dsafe2(T)]; +dsafe2([H|T]) -> + [H|dsafe2(T)]. + +dsafe([]) -> + []; +dsafe([$\',$\",$\',$\",$\'|T]) -> + [$\',$\",$\',$\",$\' |dsafe(T)]; +dsafe([$\"|T]) -> + [$\\,$\\,$\\,$\" |dsafe(T)]; +dsafe([$\\|T]) -> + [$\\,$\\,$\\,$\\ |dsafe(T)]; +dsafe([$$|T]) -> + [$\\,$$|dsafe(T)]; +dsafe([H|T]) -> + [H|dsafe(T)]. + +xsafe(<<>>) -> + []; +xsafe(<<$\\,R/binary>>) -> + [$\\,$\\,$\\,$\\ | xsafe(R)]; +xsafe(<<X,R/binary>>) -> + [X|xsafe(R)]. + +zsafe([]) -> + []; +zsafe([$$, $b|T]) -> + [$\\,$$, $b | zsafe(T)]; +zsafe([X|R]) -> + [X|zsafe(R)]. + +ysafe([]) -> + []; +ysafe([$\',$\",$\',$\",$\'|T]) -> + [$\',$\",$\',$\",$\' |ysafe(T)]; +ysafe([$\"|T]) -> + [$\\,$\" |ysafe(T)]; +ysafe([$\\|T]) -> + [$\\,$\\ |ysafe(T)]; +ysafe([$$|T]) -> + [$\\,$$|ysafe(T)]; +ysafe([H|T]) -> + [H|ysafe(T)]. + +safe(<<>>) -> + []; +safe(<<$\n>>) -> %chomp + []; +safe(<<$\',R/binary>>) -> + [$\',$\",$\',$\",$\' | safe(R)]; +safe(<<X,R/binary>>) -> + [X|safe(R)]. +perlify([$\\,N|Rest]) when N >= $0, N =< $9 -> + [$$,N|perlify(Rest)]; +perlify([$& | Rest]) -> + [$$,$& | perlify(Rest)]; +perlify([H|T]) -> + [H|perlify(T)]; +perlify([]) -> + []. + +opt_to_string([]) -> + []; +opt_to_string([A|T]) -> + case btr(A) of + false -> + opt_to_string(T); + Ch -> + [Ch | opt_to_string(T)] + end. + +btr(caseless) -> + $i; +btr(extended) -> + $x; +btr(dotall) -> + $s; +btr(multiline) -> + $m; +btr(dupnames) -> + $J; +btr(no_auto_capture) -> + $N; +btr(unicode) -> + $8; +btr(_) -> + false. + + +ranchar() -> + case random:uniform(10) of + 9 -> $&; + 10 -> <<"\\1">>; + N when N < 5 -> + random:uniform($Z-$A)+$A-1; + M when M < 9 -> + random:uniform($z-$a)+$a-1 + end. + +ranstring() -> + iolist_to_binary([ranchar() || _ <- lists:duplicate(random:uniform(20),0) ]). + |