diff options
author | Björn Gustavsson <[email protected]> | 2017-10-05 10:40:52 +0200 |
---|---|---|
committer | GitHub <[email protected]> | 2017-10-05 10:40:52 +0200 |
commit | f7476ef04d6cad95a54aff3aae734c25a283e3fd (patch) | |
tree | feb60a7333102e60fce579a3a50685272a0825a9 | |
parent | b6c52ec1a344d7ee42b10feddb10c1d03b3a7c27 (diff) | |
parent | 36dc96339e2b2b692e6dfe6de43db3a2348732bd (diff) | |
download | otp-f7476ef04d6cad95a54aff3aae734c25a283e3fd.tar.gz otp-f7476ef04d6cad95a54aff3aae734c25a283e3fd.tar.bz2 otp-f7476ef04d6cad95a54aff3aae734c25a283e3fd.zip |
Merge pull request #1567 from bjorng/bjorn/stdlib/wildcard-escaping/ERL-451/OTP-14577
Implement escaping of special characters in wildcards
-rw-r--r-- | lib/stdlib/doc/src/filelib.xml | 5 | ||||
-rw-r--r-- | lib/stdlib/src/filelib.erl | 42 | ||||
-rw-r--r-- | lib/stdlib/test/filelib_SUITE.erl | 33 |
3 files changed, 76 insertions, 4 deletions
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml index 57c4348745..1b69e84d31 100644 --- a/lib/stdlib/doc/src/filelib.xml +++ b/lib/stdlib/doc/src/filelib.xml @@ -217,6 +217,11 @@ <p>Other characters represent themselves. Only filenames that have exactly the same character in the same position match. Matching is case-sensitive, for example, "a" does not match "A".</p> + <p>Directory separators must always be written as <c>/</c>, even on + Windows.</p> + <p>A character preceded by <c>\</c> loses its special meaning. Note + that <c>\</c> must be written as <c>\\</c> in a string literal. + For example, "\\?*" will match any filename starting with <c>?</c>.</p> <p>Notice that multiple "*" characters are allowed (as in Unix wildcards, but opposed to Windows/DOS wildcards).</p> <p><em>Examples:</em></p> diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index d7c313f214..0f90b3fc33 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod). %%% Compiling a wildcard. + +%% Define characters used for escaping a \. +-define(ESCAPE_PREFIX, $@). +-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]). +-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]). + %% Only for debugging. compile_wildcard(Pattern) when is_list(Pattern) -> {compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}. -compile_wildcard(Pattern, Cwd0) -> +compile_wildcard(Pattern0, Cwd0) -> + Pattern = convert_escapes(Pattern0), [Root|Rest] = filename:split(Pattern), case filename:pathtype(Root) of relative -> @@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) -> compile_join({root,PrefixLen,Root}, File) -> {root,PrefixLen,filename:join(Root, File)}. -compile_part(Part) -> +compile_part(Part0) -> + Part = wrap_escapes(Part0), compile_part(Part, false, []). compile_part_to_sep(Part) -> @@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) -> error -> compile_part(Rest, Upto, [${|Result]) end; +compile_part([{escaped,X}|Rest], Upto, Result) -> + compile_part(Rest, Upto, [X|Result]); compile_part([X|Rest], Upto, Result) -> compile_part(Rest, Upto, [X|Result]); compile_part([], _Upto, Result) -> @@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper -> compile_charset1(Rest, compile_range(Lower, Upper, Ordset)); compile_charset1([$]|Rest], Ordset) -> {ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest}; +compile_charset1([{escaped,X}|Rest], Ordset) -> + compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([X|Rest], Ordset) -> compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([], _Ordset) -> @@ -486,6 +498,32 @@ compile_alt(Pattern, Result) -> error end. +%% Convert backslashes to an illegal Unicode character to +%% protect in from filename:split/1. + +convert_escapes([?ESCAPE_PREFIX|T]) -> + ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T); +convert_escapes([$\\|T]) -> + ?ESCAPE_CHARACTER ++ convert_escapes(T); +convert_escapes([H|T]) -> + [H|convert_escapes(T)]; +convert_escapes([]) -> + []. + +%% Wrap each escape in a tuple to remove the special meaning for +%% the character that follows. + +wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) -> + [?ESCAPE_PREFIX|wrap_escapes(T)]; +wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) -> + [{escaped,C}|wrap_escapes(T)]; +wrap_escapes(?ESCAPE_CHARACTER) -> + []; +wrap_escapes([H|T]) -> + [H|wrap_escapes(T)]; +wrap_escapes([]) -> + []. + badpattern(Reason) -> error({badpattern,Reason}). diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl index c94821bc75..1236fe45f4 100644 --- a/lib/stdlib/test/filelib_SUITE.erl +++ b/lib/stdlib/test/filelib_SUITE.erl @@ -120,7 +120,7 @@ wcc(Wc, Error) -> do_wildcard_1(Dir, Wcf0) -> do_wildcard_2(Dir, Wcf0), Wcf = fun(Wc0) -> - Wc = filename:join(Dir, Wc0), + Wc = Dir ++ "/" ++ Wc0, L = Wcf0(Wc), [subtract_dir(N, Dir) || N <- L] end, @@ -268,8 +268,37 @@ do_wildcard_9(Dir, Wcf) -> %% Cleanup. del(Files), [ok = file:del_dir(D) || D <- lists:reverse(Dirs)], - ok. + do_wildcard_10(Dir, Wcf). + +%% ERL-451/OTP-14577: Escape characters using \\. +do_wildcard_10(Dir, Wcf) -> + All0 = ["{abc}","abc","def","---","z--","@a,b","@c"], + All = case os:type() of + {unix,_} -> + %% '?' is allowed in file names on Unix, but + %% not on Windows. + ["?q"|All0]; + _ -> + All0 + end, + Files = mkfiles(lists:reverse(All), Dir), + + ["{abc}"] = Wcf("\\{a*"), + ["{abc}"] = Wcf("\\{abc}"), + ["abc","def","z--"] = Wcf("[a-z]*"), + ["---","abc","z--"] = Wcf("[a\\-z]*"), + ["@a,b","@c"] = Wcf("@{a\\,b,c}"), + ["@c"] = Wcf("@{a,b,c}"), + + case os:type() of + {unix,_} -> + ["?q"] = Wcf("\\?q"); + _ -> + [] = Wcf("\\?q") + end, + del(Files), + ok. fold_files(Config) when is_list(Config) -> Dir = filename:join(proplists:get_value(priv_dir, Config), "fold_files"), |