diff options
author | Björn Gustavsson <[email protected]> | 2017-09-14 12:52:02 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2017-10-04 11:37:43 +0200 |
commit | 36dc96339e2b2b692e6dfe6de43db3a2348732bd (patch) | |
tree | ef62a03b52cc627de29fb5ad7250984c0a8a41f9 /lib | |
parent | 49d0857634fc1a990ab97a144787288570fa2507 (diff) | |
download | otp-36dc96339e2b2b692e6dfe6de43db3a2348732bd.tar.gz otp-36dc96339e2b2b692e6dfe6de43db3a2348732bd.tar.bz2 otp-36dc96339e2b2b692e6dfe6de43db3a2348732bd.zip |
Implement escaping of special characters in wildcards
Allow characters with special meaning to be escaped using
\ (which must be writen as \\ in a string). That allows
matching of filenames containing characters that are special
in wildcards.
This is an incompatible change, but note that the use of backslashes
in wildcards would already work differently on Windows and Unix. Take
for example this call:
filelib:wildcard("a\\b")
On Windows, filelib:wildcard/1 would look for a directory named
"a", and a file or directory named "b" inside it.
On Unix, filelib:wildcard/1 would look for a file named "a\\b".
With this commit applied, filelib:wildcard/1 will look for
a file named "ab" on both Windows and Unix.
https://bugs.erlang.org/browse/ERL-451
Diffstat (limited to 'lib')
-rw-r--r-- | lib/stdlib/doc/src/filelib.xml | 5 | ||||
-rw-r--r-- | lib/stdlib/src/filelib.erl | 42 | ||||
-rw-r--r-- | lib/stdlib/test/filelib_SUITE.erl | 33 |
3 files changed, 76 insertions, 4 deletions
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml index 57c4348745..1b69e84d31 100644 --- a/lib/stdlib/doc/src/filelib.xml +++ b/lib/stdlib/doc/src/filelib.xml @@ -217,6 +217,11 @@ <p>Other characters represent themselves. Only filenames that have exactly the same character in the same position match. Matching is case-sensitive, for example, "a" does not match "A".</p> + <p>Directory separators must always be written as <c>/</c>, even on + Windows.</p> + <p>A character preceded by <c>\</c> loses its special meaning. Note + that <c>\</c> must be written as <c>\\</c> in a string literal. + For example, "\\?*" will match any filename starting with <c>?</c>.</p> <p>Notice that multiple "*" characters are allowed (as in Unix wildcards, but opposed to Windows/DOS wildcards).</p> <p><em>Examples:</em></p> diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index d7c313f214..0f90b3fc33 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod). %%% Compiling a wildcard. + +%% Define characters used for escaping a \. +-define(ESCAPE_PREFIX, $@). +-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]). +-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]). + %% Only for debugging. compile_wildcard(Pattern) when is_list(Pattern) -> {compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}. -compile_wildcard(Pattern, Cwd0) -> +compile_wildcard(Pattern0, Cwd0) -> + Pattern = convert_escapes(Pattern0), [Root|Rest] = filename:split(Pattern), case filename:pathtype(Root) of relative -> @@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) -> compile_join({root,PrefixLen,Root}, File) -> {root,PrefixLen,filename:join(Root, File)}. -compile_part(Part) -> +compile_part(Part0) -> + Part = wrap_escapes(Part0), compile_part(Part, false, []). compile_part_to_sep(Part) -> @@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) -> error -> compile_part(Rest, Upto, [${|Result]) end; +compile_part([{escaped,X}|Rest], Upto, Result) -> + compile_part(Rest, Upto, [X|Result]); compile_part([X|Rest], Upto, Result) -> compile_part(Rest, Upto, [X|Result]); compile_part([], _Upto, Result) -> @@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper -> compile_charset1(Rest, compile_range(Lower, Upper, Ordset)); compile_charset1([$]|Rest], Ordset) -> {ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest}; +compile_charset1([{escaped,X}|Rest], Ordset) -> + compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([X|Rest], Ordset) -> compile_charset1(Rest, ordsets:add_element(X, Ordset)); compile_charset1([], _Ordset) -> @@ -486,6 +498,32 @@ compile_alt(Pattern, Result) -> error end. +%% Convert backslashes to an illegal Unicode character to +%% protect in from filename:split/1. + +convert_escapes([?ESCAPE_PREFIX|T]) -> + ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T); +convert_escapes([$\\|T]) -> + ?ESCAPE_CHARACTER ++ convert_escapes(T); +convert_escapes([H|T]) -> + [H|convert_escapes(T)]; +convert_escapes([]) -> + []. + +%% Wrap each escape in a tuple to remove the special meaning for +%% the character that follows. + +wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) -> + [?ESCAPE_PREFIX|wrap_escapes(T)]; +wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) -> + [{escaped,C}|wrap_escapes(T)]; +wrap_escapes(?ESCAPE_CHARACTER) -> + []; +wrap_escapes([H|T]) -> + [H|wrap_escapes(T)]; +wrap_escapes([]) -> + []. + badpattern(Reason) -> error({badpattern,Reason}). diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl index c94821bc75..1236fe45f4 100644 --- a/lib/stdlib/test/filelib_SUITE.erl +++ b/lib/stdlib/test/filelib_SUITE.erl @@ -120,7 +120,7 @@ wcc(Wc, Error) -> do_wildcard_1(Dir, Wcf0) -> do_wildcard_2(Dir, Wcf0), Wcf = fun(Wc0) -> - Wc = filename:join(Dir, Wc0), + Wc = Dir ++ "/" ++ Wc0, L = Wcf0(Wc), [subtract_dir(N, Dir) || N <- L] end, @@ -268,8 +268,37 @@ do_wildcard_9(Dir, Wcf) -> %% Cleanup. del(Files), [ok = file:del_dir(D) || D <- lists:reverse(Dirs)], - ok. + do_wildcard_10(Dir, Wcf). + +%% ERL-451/OTP-14577: Escape characters using \\. +do_wildcard_10(Dir, Wcf) -> + All0 = ["{abc}","abc","def","---","z--","@a,b","@c"], + All = case os:type() of + {unix,_} -> + %% '?' is allowed in file names on Unix, but + %% not on Windows. + ["?q"|All0]; + _ -> + All0 + end, + Files = mkfiles(lists:reverse(All), Dir), + + ["{abc}"] = Wcf("\\{a*"), + ["{abc}"] = Wcf("\\{abc}"), + ["abc","def","z--"] = Wcf("[a-z]*"), + ["---","abc","z--"] = Wcf("[a\\-z]*"), + ["@a,b","@c"] = Wcf("@{a\\,b,c}"), + ["@c"] = Wcf("@{a,b,c}"), + + case os:type() of + {unix,_} -> + ["?q"] = Wcf("\\?q"); + _ -> + [] = Wcf("\\?q") + end, + del(Files), + ok. fold_files(Config) when is_list(Config) -> Dir = filename:join(proplists:get_value(priv_dir, Config), "fold_files"), |