From 943d65dac5aed83b2c0d6335bc792002bef374a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 6 Feb 2013 17:01:15 +0100 Subject: Strengthen test suite --- lib/stdlib/test/filelib_SUITE.erl | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl index 27078f0914..2ef2b2bcaa 100644 --- a/lib/stdlib/test/filelib_SUITE.erl +++ b/lib/stdlib/test/filelib_SUITE.erl @@ -65,8 +65,18 @@ wildcard_one(Config) when is_list(Config) -> ?line {ok,OldCwd} = file:get_cwd(), ?line Dir = filename:join(?config(priv_dir, Config), "wildcard_one"), ?line ok = file:make_dir(Dir), + do_wildcard_1(Dir, + fun(Wc) -> + filelib:wildcard(Wc, Dir, erl_prim_loader) + end), ?line file:set_cwd(Dir), - ?line do_wildcard_1(Dir, fun(Wc) -> filelib:wildcard(Wc) end), + do_wildcard_1(Dir, + fun(Wc) -> + L = filelib:wildcard(Wc), + L = filelib:wildcard(Wc, erl_prim_loader), + L = filelib:wildcard(Wc, "."), + L = filelib:wildcard(Wc, Dir) + end), ?line file:set_cwd(OldCwd), ?line ok = file:del_dir(Dir), ok. @@ -130,6 +140,9 @@ do_wildcard_2(Dir, Wcf) -> ?line ["abc","abcdef"] = Wcf("a*{def,}"), ?line ["abc","abcdef"] = Wcf("a*{,def}"), + %% Constant wildcard. + ["abcdef"] = Wcf("abcdef"), + %% Negative tests. ?line [] = Wcf("b*"), ?line [] = Wcf("bufflig"), @@ -157,6 +170,8 @@ do_wildcard_4(Dir, Wcf) -> All = ["a-","aA","aB","aC","a[","a]"], ?line Files = mkfiles(lists:reverse(All), Dir), ?line All = Wcf("a[][A-C-]"), + ["a-"] = Wcf("a[-]"), + ["a["] = Wcf("a["), ?line del(Files), do_wildcard_5(Dir, Wcf). @@ -173,6 +188,7 @@ do_wildcard_5(Dir, Wcf) -> ?line ["blurf/nisse"] = Wcf("*/nisse"), ?line [] = Wcf("mountain/*"), ?line [] = Wcf("xa/gurka"), + ["blurf/nisse"] = Wcf("blurf/nisse"), %% Cleanup ?line del(Files), @@ -233,7 +249,24 @@ do_wildcard_8(Dir, Wcf) -> del(Files), foreach(fun(D) -> ok = file:del_dir(filename:join(Dir, D)) - end, Dirs2 ++ Dirs1 ++ Dirs0). + end, Dirs2 ++ Dirs1 ++ Dirs0), + do_wildcard_9(Dir, Wcf). + +do_wildcard_9(Dir, Wcf) -> + Dirs0 = ["lib","lib/app","lib/app/ebin"], + Dirs = [filename:join(Dir, D) || D <- Dirs0], + [ok = file:make_dir(D) || D <- Dirs], + Files0 = [filename:join("lib/app/ebin", F++".bar") || + F <- ["abc","foo","foobar"]], + Files = [filename:join(Dir, F) || F <- Files0], + [ok = file:write_file(F, <<"some content\n">>) || F <- Files], + Files0 = Wcf("lib/app/ebin/*.bar"), + + %% Cleanup. + del(Files), + [ok = file:del_dir(D) || D <- lists:reverse(Dirs)], + ok. + fold_files(Config) when is_list(Config) -> ?line Dir = filename:join(?config(priv_dir, Config), "fold_files"), -- cgit v1.2.3 From 03b7c52a75c4af8d33b6b2a13bf7697709d3f6a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 12 Feb 2013 12:27:39 +0100 Subject: Don't handle binaries from file:list_dir/1 In R16B, file:list_dir/1 will never return binaries. --- lib/stdlib/src/filelib.erl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index 318f3b87b8..ecc48e129d 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -315,13 +315,6 @@ do_wildcard_3(Base, [Pattern|Rest], Result, Mod) -> do_wildcard_3(Base, [], Result, _Mod) -> [Base|Result]. -wildcard_4(Pattern, [File|Rest], Base, Result) when is_binary(File) -> - case wildcard_5(Pattern, binary_to_list(File)) of - true -> - wildcard_4(Pattern, Rest, Base, [join(Base, File)|Result]); - false -> - wildcard_4(Pattern, Rest, Base, Result) - end; wildcard_4(Pattern, [File|Rest], Base, Result) -> case wildcard_5(Pattern, File) of true -> -- cgit v1.2.3 From e96b2bc546b13a30707479838f1a5891147a5603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 13 Feb 2013 11:54:31 +0100 Subject: Don't allow binaries as the Cwd argument for filelib:wildcard() filelib:wildcard() will no longer support "raw filenames", so it makes no sense for it to accept a binary Cwd argument. --- lib/stdlib/src/filelib.erl | 4 ++-- lib/stdlib/test/filelib_SUITE.erl | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index ecc48e129d..853cf6a6fa 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -53,14 +53,14 @@ wildcard(Pattern) when is_list(Pattern) -> -spec wildcard(Wildcard, Cwd) -> [file:filename()] when Wildcard :: filename() | dirname(), Cwd :: dirname(). -wildcard(Pattern, Cwd) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> +wildcard(Pattern, Cwd) when is_list(Pattern), is_list(Cwd) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, file)); wildcard(Pattern, Mod) when is_list(Pattern), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Mod)). -spec wildcard(file:name(), file:name(), atom()) -> [file:filename()]. wildcard(Pattern, Cwd, Mod) - when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)), is_atom(Mod) -> + when is_list(Pattern), is_list(Cwd), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, Mod)). -spec is_dir(Name) -> boolean() when diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl index 2ef2b2bcaa..4a67d68428 100644 --- a/lib/stdlib/test/filelib_SUITE.erl +++ b/lib/stdlib/test/filelib_SUITE.erl @@ -83,11 +83,8 @@ wildcard_one(Config) when is_list(Config) -> wildcard_two(Config) when is_list(Config) -> ?line Dir = filename:join(?config(priv_dir, Config), "wildcard_two"), - ?line DirB = unicode:characters_to_binary(Dir, file:native_name_encoding()), ?line ok = file:make_dir(Dir), ?line do_wildcard_1(Dir, fun(Wc) -> io:format("~p~n",[{Wc,Dir, X = filelib:wildcard(Wc, Dir)}]),X end), - ?line do_wildcard_1(Dir, fun(Wc) -> io:format("~p~n",[{Wc,DirB, X = filelib:wildcard(Wc, DirB)}]), - [unicode:characters_to_list(Y,file:native_name_encoding()) || Y <- X] end), ?line do_wildcard_1(Dir, fun(Wc) -> filelib:wildcard(Wc, Dir++"/") end), case os:type() of {win32,_} -> -- cgit v1.2.3 From f8b9d04789cb307c519f7c99493ebc9118a2b8c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 12 Feb 2013 14:56:05 +0100 Subject: Don't redefine error/1 It is confusing. Rename our own exception-generating function to badpattern/1. --- lib/stdlib/src/filelib.erl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index 853cf6a6fa..fec9d0ac23 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -19,9 +19,6 @@ -module(filelib). %% File utilities. - -%% Avoid warning for local function error/1 clashing with autoimported BIF. --compile({no_auto_import,[error/1]}). -export([wildcard/1, wildcard/2, is_dir/1, is_file/1, is_regular/1, compile_wildcard/1]). -export([fold_files/5, last_modified/1, file_size/1, ensure_dir/1]). @@ -37,7 +34,7 @@ catch error:{badpattern,_}=UnUsUalVaRiAbLeNaMe -> %% Get the stack backtrace correct. - erlang:error(UnUsUalVaRiAbLeNaMe) + error(UnUsUalVaRiAbLeNaMe) end). -type filename() :: file:name(). @@ -430,7 +427,7 @@ compile_part_to_sep(Part) -> compile_part(Part, true, []). compile_part([], true, _) -> - error(missing_delimiter); + badpattern(missing_delimiter); compile_part([$,|Rest], true, Result) -> {ok, $,, lists:reverse(Result), Rest}; compile_part([$}|Rest], true, Result) -> @@ -502,8 +499,8 @@ compile_alt(Pattern, Result) -> error end. -error(Reason) -> - erlang:error({badpattern,Reason}). +badpattern(Reason) -> + error({badpattern,Reason}). eval_read_file_info(File, file) -> file:read_file_info(File); -- cgit v1.2.3 From 3c40105f4666e1c38752a5bbf1c282c221dda967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 12 Feb 2013 09:35:49 +0100 Subject: Rethink the filelib:wildcard() functions Commit 70b5e24c9498225fadc08d19503269c8aad851bf broke filelib:wildcard(Pattern, "."). Over the years, the logic for filelib:wilcard() has become a mess of special cases. Probably because of premature optimization, filelib:wildcard(Pattern) and filelib:wildcard(Pattern, Cwd) are handled differently. They can be consolidated if we use a "." as the default Cwd argument. We can also simplify things by compiling the Cwd argument into the wildcard. We did not that in the initial implementation because it used to be possible to pre-compile a wildcard and pass it to filelib:wildcard/{1,2}. Since that is no longer possible, there is no point in keeping the compiled wildcard "portable" (not dependent on the Cwd argument). --- lib/kernel/test/file_name_SUITE.erl | 11 ---- lib/stdlib/src/filelib.erl | 124 ++++++++++++++++-------------------- 2 files changed, 55 insertions(+), 80 deletions(-) diff --git a/lib/kernel/test/file_name_SUITE.erl b/lib/kernel/test/file_name_SUITE.erl index 40bde8a736..e8529db1dc 100644 --- a/lib/kernel/test/file_name_SUITE.erl +++ b/lib/kernel/test/file_name_SUITE.erl @@ -575,17 +575,6 @@ check_very_icky(Mod) -> end, ?line {NumOK,NumNOK} = filelib:fold_files(".",".*",true,fun(_F,{N,M}) when is_list(_F) -> io:format("~ts~n",[_F]),{N+1,M}; (_F,{N,M}) -> io:format("~p~n",[_F]),{N,M+1} end,{0,0}), ?line ok = filelib:fold_files(".",[1076,1089,1072,124,46,42],true,fun(_F,_) -> ok end,false), - ?line SF3 = unicode:characters_to_binary("åäösubfil3", - file:native_name_encoding()), - ?line SF2 = case treat_icky(<<"åäösubfil2">>) of - LF2 when is_list(LF2) -> - unicode:characters_to_binary(LF2, - file:native_name_encoding()); - BF2 -> - BF2 - end, - ?line Sorted = lists:sort([SF3,SF2]), - ?line Sorted = lists:sort(filelib:wildcard("*",<<"åäösubdir2">>)), ok catch throw:need_unicode_mode -> diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index fec9d0ac23..b1a5b7a8a9 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -19,13 +19,14 @@ -module(filelib). %% File utilities. --export([wildcard/1, wildcard/2, is_dir/1, is_file/1, is_regular/1, - compile_wildcard/1]). +-export([wildcard/1, wildcard/2, is_dir/1, is_file/1, is_regular/1]). -export([fold_files/5, last_modified/1, file_size/1, ensure_dir/1]). - -export([wildcard/3, is_dir/2, is_file/2, is_regular/2]). -export([fold_files/6, last_modified/2, file_size/2]). +%% For debugging/testing. +-export([compile_wildcard/1]). + -include_lib("kernel/include/file.hrl"). -define(HANDLE_ERROR(Expr), @@ -45,7 +46,7 @@ -spec wildcard(Wildcard) -> [file:filename()] when Wildcard :: filename() | dirname(). wildcard(Pattern) when is_list(Pattern) -> - ?HANDLE_ERROR(do_wildcard(Pattern, file)). + ?HANDLE_ERROR(do_wildcard(Pattern, ".", file)). -spec wildcard(Wildcard, Cwd) -> [file:filename()] when Wildcard :: filename() | dirname(), @@ -53,7 +54,7 @@ wildcard(Pattern) when is_list(Pattern) -> wildcard(Pattern, Cwd) when is_list(Pattern), is_list(Cwd) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, file)); wildcard(Pattern, Mod) when is_list(Pattern), is_atom(Mod) -> - ?HANDLE_ERROR(do_wildcard(Pattern, Mod)). + ?HANDLE_ERROR(do_wildcard(Pattern, ".", Mod)). -spec wildcard(file:name(), file:name(), atom()) -> [file:filename()]. wildcard(Pattern, Cwd, Mod) @@ -121,47 +122,6 @@ file_size(File, Mod) when is_atom(Mod) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -do_wildcard(Pattern, Mod) when is_list(Pattern) -> - do_wildcard_comp(do_compile_wildcard(Pattern), Mod). - -do_wildcard_comp({compiled_wildcard,{exists,File}}, Mod) -> - case eval_read_file_info(File, Mod) of - {ok,_} -> [File]; - _ -> [] - end; -do_wildcard_comp({compiled_wildcard,[cwd,Base|Rest]}, Mod) -> - do_wildcard_1([Base], Rest, Mod); -do_wildcard_comp({compiled_wildcard,[Base|Rest]}, Mod) -> - do_wildcard_1([Base], Rest, Mod). - -do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> - do_wildcard_comp(do_compile_wildcard(Pattern), Cwd, Mod). - -do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> - case eval_read_file_info(filename:absname(File, Cwd), Mod) of - {ok,_} -> [File]; - _ -> [] - end; -do_wildcard_comp({compiled_wildcard,[cwd|Rest0]}, Cwd0, Mod) -> - case Rest0 of - [current|Rest] -> ok; - Rest -> ok - end, - {Cwd,PrefixLen} = case filename:join([Cwd0]) of - Bin when is_binary(Bin) -> {Bin,byte_size(Bin)+1}; - Other -> {Other,length(Other)+1} - end, %Slash away redundant slashes. - [ - if - is_binary(N) -> - <<_:PrefixLen/binary,Res/binary>> = N, - Res; - true -> - lists:nthtail(PrefixLen, N) - end || N <- do_wildcard_1([Cwd], Rest, Mod)]; -do_wildcard_comp({compiled_wildcard,[Base|Rest]}, _Cwd, Mod) -> - do_wildcard_1([Base], Rest, Mod). - do_is_dir(Dir, Mod) -> case eval_read_file_info(Dir, Mod) of {ok, #file_info{type=directory}} -> @@ -290,8 +250,23 @@ ensure_dir(F) -> %%% Pattern matching using a compiled wildcard. %%% -do_wildcard_1(Files, Pattern, Mod) -> - do_wildcard_2(Files, Pattern, [], Mod). +do_wildcard(Pattern, Cwd, Mod) -> + {Compiled,PrefixLen} = compile_wildcard(Pattern, Cwd), + Files = do_wildcard_1(Compiled, Mod), + if + PrefixLen =:= 0 -> + Files; + true -> + [lists:nthtail(PrefixLen, File) || File <- Files] + end. + +do_wildcard_1({exists,File}, Mod) -> + case eval_read_file_info(File, Mod) of + {ok,_} -> [File]; + _ -> [] + end; +do_wildcard_1([Base|Rest], Mod) -> + do_wildcard_2([Base], Rest, [], Mod). do_wildcard_2([File|Rest], Pattern, Result, Mod) -> do_wildcard_2(Rest, Pattern, do_wildcard_3(File, Pattern, Result, Mod), Mod); @@ -299,7 +274,7 @@ do_wildcard_2([], _, Result, _Mod) -> Result. do_wildcard_3(Base, [[double_star]|Rest], Result, Mod) -> - lists:sort(do_double_star(current, [Base], Rest, Result, Mod, true)); + lists:sort(do_double_star(".", [Base], Rest, Result, Mod, true)); do_wildcard_3(Base, [Pattern|Rest], Result, Mod) -> case do_list_dir(Base, Mod) of {ok, Files0} -> @@ -315,7 +290,7 @@ do_wildcard_3(Base, [], Result, _Mod) -> wildcard_4(Pattern, [File|Rest], Base, Result) -> case wildcard_5(Pattern, File) of true -> - wildcard_4(Pattern, Rest, Base, [join(Base, File)|Result]); + wildcard_4(Pattern, Rest, Base, [filename:join(Base, File)|Result]); false -> wildcard_4(Pattern, Rest, Base, Result) end; @@ -349,7 +324,10 @@ wildcard_5([_|_], []) -> false. do_double_star(Base, [H|T], Rest, Result, Mod, Root) -> - Full = join(Base, H), + Full = case Root of + false -> filename:join(Base, H); + true -> H + end, Result1 = case do_list_dir(Full, Mod) of {ok, Files} -> do_double_star(Full, Files, Rest, Result, Mod, false); @@ -379,46 +357,54 @@ do_alt([Alt|Rest], File) -> do_alt([], _File) -> false. -do_list_dir(current, Mod) -> eval_list_dir(".", Mod); do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod). -join(current, File) -> File; -join(Base, File) -> filename:join(Base, File). - %%% Compiling a wildcard. -compile_wildcard(Pattern) -> - ?HANDLE_ERROR(do_compile_wildcard(Pattern)). - -do_compile_wildcard(Pattern) -> - {compiled_wildcard,compile_wildcard_1(Pattern)}. +%% Only for debugging. +compile_wildcard(Pattern) when is_list(Pattern) -> + {compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}. -compile_wildcard_1(Pattern) -> +compile_wildcard(Pattern, Cwd0) -> [Root|Rest] = filename:split(Pattern), case filename:pathtype(Root) of relative -> - case compile_wildcard_2([Root|Rest], current) of - {exists,_}=Wc -> Wc; - [_|_]=Wc -> [cwd|Wc] - end; + Cwd = filename:join([Cwd0]), + compile_wildcard_2([Root|Rest], {cwd,Cwd}); _ -> - compile_wildcard_2(Rest, [Root]) + compile_wildcard_2(Rest, {root,0,Root}) end. compile_wildcard_2([Part|Rest], Root) -> case compile_part(Part) of Part -> - compile_wildcard_2(Rest, join(Root, Part)); + compile_wildcard_2(Rest, compile_join(Root, Part)); Pattern -> compile_wildcard_3(Rest, [Pattern,Root]) end; -compile_wildcard_2([], Root) -> {exists,Root}. +compile_wildcard_2([], {root,PrefixLen,Root}) -> + {{exists,Root},PrefixLen}. compile_wildcard_3([Part|Rest], Result) -> compile_wildcard_3(Rest, [compile_part(Part)|Result]); compile_wildcard_3([], Result) -> - lists:reverse(Result). + case lists:reverse(Result) of + [{root,PrefixLen,Root}|Compiled] -> + {[Root|Compiled],PrefixLen}; + [{cwd,Root}|Compiled] -> + {[Root|Compiled],length(filename:join(Root, "x"))-1} + end. + +compile_join({cwd,"."}, File) -> + {root,0,File}; +compile_join({cwd,Cwd}, File0) -> + File = filename:join([File0]), + Root = filename:join(Cwd, File), + PrefixLen = length(Root) - length(File), + {root,PrefixLen,Root}; +compile_join({root,PrefixLen,Root}, File) -> + {root,PrefixLen,filename:join(Root, File)}. compile_part(Part) -> compile_part(Part, false, []). -- cgit v1.2.3 From c84311ffe08d12c96d38b6464b2f0ba8cd567e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 12 Feb 2013 10:54:03 +0100 Subject: Replace filename:join/2 with '++' Under controlled circumstances (and we are in control of the circumstances), filename:join/2 can be replaced with '++'. '++' is faster because it is implmented in C, does not need to look at the list elements it copies, and does not need to copy its right operand. --- lib/stdlib/src/filelib.erl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index b1a5b7a8a9..8a32220a7b 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -275,9 +275,10 @@ do_wildcard_2([], _, Result, _Mod) -> do_wildcard_3(Base, [[double_star]|Rest], Result, Mod) -> lists:sort(do_double_star(".", [Base], Rest, Result, Mod, true)); -do_wildcard_3(Base, [Pattern|Rest], Result, Mod) -> - case do_list_dir(Base, Mod) of +do_wildcard_3(Base0, [Pattern|Rest], Result, Mod) -> + case do_list_dir(Base0, Mod) of {ok, Files0} -> + Base = prepare_base(Base0), Files = lists:sort(Files0), Matches = wildcard_4(Pattern, Files, Base, []), do_wildcard_2(Matches, Rest, Result, Mod); @@ -290,7 +291,7 @@ do_wildcard_3(Base, [], Result, _Mod) -> wildcard_4(Pattern, [File|Rest], Base, Result) -> case wildcard_5(Pattern, File) of true -> - wildcard_4(Pattern, Rest, Base, [filename:join(Base, File)|Result]); + wildcard_4(Pattern, Rest, Base, [Base++File|Result]); false -> wildcard_4(Pattern, Rest, Base, Result) end; @@ -323,6 +324,11 @@ wildcard_5([], [_|_]) -> wildcard_5([_|_], []) -> false. +prepare_base(Base0) -> + Base1 = filename:join(Base0, "x"), + "x"++Base2 = lists:reverse(Base1), + lists:reverse(Base2). + do_double_star(Base, [H|T], Rest, Result, Mod, Root) -> Full = case Root of false -> filename:join(Base, H); -- cgit v1.2.3 From 3fafadfc76d061d5d2f30afde3e647f8b8282e37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 12 Feb 2013 11:13:04 +0100 Subject: Only sort the result list once, just before returning There does not seem to be any advantage of sorting the result of file:list_dir/1 directly. Disadvantages are that we'll need to be careful to keep the result sorted, and that we could waste time sorting filenames that the pattern matching will discard anyway. --- lib/stdlib/src/filelib.erl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index 8a32220a7b..c936d08db4 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -252,13 +252,14 @@ ensure_dir(F) -> do_wildcard(Pattern, Cwd, Mod) -> {Compiled,PrefixLen} = compile_wildcard(Pattern, Cwd), - Files = do_wildcard_1(Compiled, Mod), - if - PrefixLen =:= 0 -> - Files; - true -> - [lists:nthtail(PrefixLen, File) || File <- Files] - end. + Files0 = do_wildcard_1(Compiled, Mod), + Files = if + PrefixLen =:= 0 -> + Files0; + true -> + [lists:nthtail(PrefixLen, File) || File <- Files0] + end, + lists:sort(Files). do_wildcard_1({exists,File}, Mod) -> case eval_read_file_info(File, Mod) of @@ -274,12 +275,11 @@ do_wildcard_2([], _, Result, _Mod) -> Result. do_wildcard_3(Base, [[double_star]|Rest], Result, Mod) -> - lists:sort(do_double_star(".", [Base], Rest, Result, Mod, true)); + do_double_star(".", [Base], Rest, Result, Mod, true); do_wildcard_3(Base0, [Pattern|Rest], Result, Mod) -> case do_list_dir(Base0, Mod) of - {ok, Files0} -> + {ok, Files} -> Base = prepare_base(Base0), - Files = lists:sort(Files0), Matches = wildcard_4(Pattern, Files, Base, []), do_wildcard_2(Matches, Rest, Result, Mod); _ -> -- cgit v1.2.3 From e306b333a7a01b145c328521111c8e4cc388be19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 12 Feb 2013 12:25:11 +0100 Subject: Clean up and simplify the inner matching loop Use list comprehensions and 'orelse' to make the code more concise. Rename wildcard_5/2 to match_part/2 to make it clearer what it does. If the pattern always matches, don't test it in every iteration of the loop. --- lib/stdlib/src/filelib.erl | 61 +++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index c936d08db4..de49e5df6b 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -280,7 +280,7 @@ do_wildcard_3(Base0, [Pattern|Rest], Result, Mod) -> case do_list_dir(Base0, Mod) of {ok, Files} -> Base = prepare_base(Base0), - Matches = wildcard_4(Pattern, Files, Base, []), + Matches = do_wildcard_4(Pattern, Base, Files), do_wildcard_2(Matches, Rest, Result, Mod); _ -> Result @@ -288,42 +288,43 @@ do_wildcard_3(Base0, [Pattern|Rest], Result, Mod) -> do_wildcard_3(Base, [], Result, _Mod) -> [Base|Result]. -wildcard_4(Pattern, [File|Rest], Base, Result) -> - case wildcard_5(Pattern, File) of - true -> - wildcard_4(Pattern, Rest, Base, [Base++File|Result]); +do_wildcard_4(Pattern, Base, Files) -> + case will_always_match(Pattern) of false -> - wildcard_4(Pattern, Rest, Base, Result) - end; -wildcard_4(_Patt, [], _Base, Result) -> - Result. + [Base++F || F <- Files, match_part(Pattern, F)]; + true -> + [Base++F || F <- Files] + end. -wildcard_5([question|Rest1], [_|Rest2]) -> - wildcard_5(Rest1, Rest2); -wildcard_5([accept], _) -> +match_part([question|Rest1], [_|Rest2]) -> + match_part(Rest1, Rest2); +match_part([accept], _) -> true; -wildcard_5([double_star], _) -> +match_part([double_star], _) -> true; -wildcard_5([star|Rest], File) -> +match_part([star|Rest], File) -> do_star(Rest, File); -wildcard_5([{one_of, Ordset}|Rest], [C|File]) -> +match_part([{one_of, Ordset}|Rest], [C|File]) -> case ordsets:is_element(C, Ordset) of - true -> wildcard_5(Rest, File); + true -> match_part(Rest, File); false -> false end; -wildcard_5([{alt, Alts}], File) -> +match_part([{alt, Alts}], File) -> do_alt(Alts, File); -wildcard_5([C|Rest1], [C|Rest2]) when is_integer(C) -> - wildcard_5(Rest1, Rest2); -wildcard_5([X|_], [Y|_]) when is_integer(X), is_integer(Y) -> +match_part([C|Rest1], [C|Rest2]) when is_integer(C) -> + match_part(Rest1, Rest2); +match_part([X|_], [Y|_]) when is_integer(X), is_integer(Y) -> false; -wildcard_5([], []) -> +match_part([], []) -> true; -wildcard_5([], [_|_]) -> +match_part([], [_|_]) -> false; -wildcard_5([_|_], []) -> +match_part([_|_], []) -> false. +will_always_match([accept]) -> true; +will_always_match(_) -> false. + prepare_base(Base0) -> Base1 = filename:join(Base0, "x"), "x"++Base2 = lists:reverse(Base1), @@ -347,19 +348,13 @@ do_double_star(Base, [H|T], Rest, Result, Mod, Root) -> do_double_star(_Base, [], _Rest, Result, _Mod, _Root) -> Result. -do_star(Pattern, [X|Rest]) -> - case wildcard_5(Pattern, [X|Rest]) of - true -> true; - false -> do_star(Pattern, Rest) - end; +do_star(Pattern, [_|Rest]=File) -> + match_part(Pattern, File) orelse do_star(Pattern, Rest); do_star(Pattern, []) -> - wildcard_5(Pattern, []). + match_part(Pattern, []). do_alt([Alt|Rest], File) -> - case wildcard_5(Alt, File) of - true -> true; - false -> do_alt(Rest, File) - end; + match_part(Alt, File) orelse do_alt(Rest, File); do_alt([], _File) -> false. -- cgit v1.2.3 From 7d22cccf18dd44507ee072315036d5023d09d47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 12 Feb 2013 14:10:22 +0100 Subject: Optimize character sets using gb_sets --- lib/stdlib/src/filelib.erl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index de49e5df6b..93e700284c 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -305,10 +305,7 @@ match_part([double_star], _) -> match_part([star|Rest], File) -> do_star(Rest, File); match_part([{one_of, Ordset}|Rest], [C|File]) -> - case ordsets:is_element(C, Ordset) of - true -> match_part(Rest, File); - false -> false - end; + gb_sets:is_element(C, Ordset) andalso match_part(Rest, File); match_part([{alt, Alts}], File) -> do_alt(Alts, File); match_part([C|Rest1], [C|Rest2]) when is_integer(C) -> @@ -460,7 +457,7 @@ compile_charset(List, Ordset) -> compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper -> compile_charset1(Rest, compile_range(Lower, Upper, Ordset)); compile_charset1([$]|Rest], Ordset) -> - {ok, {one_of, Ordset}, Rest}; + {ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest}; compile_charset1([X|Rest], Ordset) -> compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([], _Ordset) -> -- cgit v1.2.3 From 5abcc77292e59a951e5a53c1641081f973d25c90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 13 Feb 2013 07:39:06 +0100 Subject: Remove special case handling "-" at the beginning of a charset We don't need this special case, since "-" is only special between two other characters. --- lib/stdlib/src/filelib.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index 93e700284c..7df72a93e5 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -447,8 +447,6 @@ compile_part([], _Upto, Result) -> compile_charset([$]|Rest], Ordset) -> compile_charset1(Rest, ordsets:add_element($], Ordset)); -compile_charset([$-|Rest], Ordset) -> - compile_charset1(Rest, ordsets:add_element($-, Ordset)); compile_charset([], _Ordset) -> error; compile_charset(List, Ordset) -> -- cgit v1.2.3