aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2017-09-14 12:52:02 +0200
committerBjörn Gustavsson <[email protected]>2017-10-04 11:37:43 +0200
commit36dc96339e2b2b692e6dfe6de43db3a2348732bd (patch)
treeef62a03b52cc627de29fb5ad7250984c0a8a41f9
parent49d0857634fc1a990ab97a144787288570fa2507 (diff)
downloadotp-36dc96339e2b2b692e6dfe6de43db3a2348732bd.tar.gz
otp-36dc96339e2b2b692e6dfe6de43db3a2348732bd.tar.bz2
otp-36dc96339e2b2b692e6dfe6de43db3a2348732bd.zip
Implement escaping of special characters in wildcards
Allow characters with special meaning to be escaped using \ (which must be writen as \\ in a string). That allows matching of filenames containing characters that are special in wildcards. This is an incompatible change, but note that the use of backslashes in wildcards would already work differently on Windows and Unix. Take for example this call: filelib:wildcard("a\\b") On Windows, filelib:wildcard/1 would look for a directory named "a", and a file or directory named "b" inside it. On Unix, filelib:wildcard/1 would look for a file named "a\\b". With this commit applied, filelib:wildcard/1 will look for a file named "ab" on both Windows and Unix. https://bugs.erlang.org/browse/ERL-451
-rw-r--r--lib/stdlib/doc/src/filelib.xml5
-rw-r--r--lib/stdlib/src/filelib.erl42
-rw-r--r--lib/stdlib/test/filelib_SUITE.erl33
3 files changed, 76 insertions, 4 deletions
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml
index 57c4348745..1b69e84d31 100644
--- a/lib/stdlib/doc/src/filelib.xml
+++ b/lib/stdlib/doc/src/filelib.xml
@@ -217,6 +217,11 @@
<p>Other characters represent themselves. Only filenames that
have exactly the same character in the same position match.
Matching is case-sensitive, for example, "a" does not match "A".</p>
+ <p>Directory separators must always be written as <c>/</c>, even on
+ Windows.</p>
+ <p>A character preceded by <c>\</c> loses its special meaning. Note
+ that <c>\</c> must be written as <c>\\</c> in a string literal.
+ For example, "\\?*" will match any filename starting with <c>?</c>.</p>
<p>Notice that multiple "*" characters are allowed
(as in Unix wildcards, but opposed to Windows/DOS wildcards).</p>
<p><em>Examples:</em></p>
diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl
index d7c313f214..0f90b3fc33 100644
--- a/lib/stdlib/src/filelib.erl
+++ b/lib/stdlib/src/filelib.erl
@@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod).
%%% Compiling a wildcard.
+
+%% Define characters used for escaping a \.
+-define(ESCAPE_PREFIX, $@).
+-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]).
+-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]).
+
%% Only for debugging.
compile_wildcard(Pattern) when is_list(Pattern) ->
{compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}.
-compile_wildcard(Pattern, Cwd0) ->
+compile_wildcard(Pattern0, Cwd0) ->
+ Pattern = convert_escapes(Pattern0),
[Root|Rest] = filename:split(Pattern),
case filename:pathtype(Root) of
relative ->
@@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) ->
compile_join({root,PrefixLen,Root}, File) ->
{root,PrefixLen,filename:join(Root, File)}.
-compile_part(Part) ->
+compile_part(Part0) ->
+ Part = wrap_escapes(Part0),
compile_part(Part, false, []).
compile_part_to_sep(Part) ->
@@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) ->
error ->
compile_part(Rest, Upto, [${|Result])
end;
+compile_part([{escaped,X}|Rest], Upto, Result) ->
+ compile_part(Rest, Upto, [X|Result]);
compile_part([X|Rest], Upto, Result) ->
compile_part(Rest, Upto, [X|Result]);
compile_part([], _Upto, Result) ->
@@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper ->
compile_charset1(Rest, compile_range(Lower, Upper, Ordset));
compile_charset1([$]|Rest], Ordset) ->
{ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest};
+compile_charset1([{escaped,X}|Rest], Ordset) ->
+ compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([X|Rest], Ordset) ->
compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([], _Ordset) ->
@@ -486,6 +498,32 @@ compile_alt(Pattern, Result) ->
error
end.
+%% Convert backslashes to an illegal Unicode character to
+%% protect in from filename:split/1.
+
+convert_escapes([?ESCAPE_PREFIX|T]) ->
+ ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T);
+convert_escapes([$\\|T]) ->
+ ?ESCAPE_CHARACTER ++ convert_escapes(T);
+convert_escapes([H|T]) ->
+ [H|convert_escapes(T)];
+convert_escapes([]) ->
+ [].
+
+%% Wrap each escape in a tuple to remove the special meaning for
+%% the character that follows.
+
+wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) ->
+ [?ESCAPE_PREFIX|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) ->
+ [{escaped,C}|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER) ->
+ [];
+wrap_escapes([H|T]) ->
+ [H|wrap_escapes(T)];
+wrap_escapes([]) ->
+ [].
+
badpattern(Reason) ->
error({badpattern,Reason}).
diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl
index c94821bc75..1236fe45f4 100644
--- a/lib/stdlib/test/filelib_SUITE.erl
+++ b/lib/stdlib/test/filelib_SUITE.erl
@@ -120,7 +120,7 @@ wcc(Wc, Error) ->
do_wildcard_1(Dir, Wcf0) ->
do_wildcard_2(Dir, Wcf0),
Wcf = fun(Wc0) ->
- Wc = filename:join(Dir, Wc0),
+ Wc = Dir ++ "/" ++ Wc0,
L = Wcf0(Wc),
[subtract_dir(N, Dir) || N <- L]
end,
@@ -268,8 +268,37 @@ do_wildcard_9(Dir, Wcf) ->
%% Cleanup.
del(Files),
[ok = file:del_dir(D) || D <- lists:reverse(Dirs)],
- ok.
+ do_wildcard_10(Dir, Wcf).
+
+%% ERL-451/OTP-14577: Escape characters using \\.
+do_wildcard_10(Dir, Wcf) ->
+ All0 = ["{abc}","abc","def","---","z--","@a,b","@c"],
+ All = case os:type() of
+ {unix,_} ->
+ %% '?' is allowed in file names on Unix, but
+ %% not on Windows.
+ ["?q"|All0];
+ _ ->
+ All0
+ end,
+ Files = mkfiles(lists:reverse(All), Dir),
+
+ ["{abc}"] = Wcf("\\{a*"),
+ ["{abc}"] = Wcf("\\{abc}"),
+ ["abc","def","z--"] = Wcf("[a-z]*"),
+ ["---","abc","z--"] = Wcf("[a\\-z]*"),
+ ["@a,b","@c"] = Wcf("@{a\\,b,c}"),
+ ["@c"] = Wcf("@{a,b,c}"),
+
+ case os:type() of
+ {unix,_} ->
+ ["?q"] = Wcf("\\?q");
+ _ ->
+ [] = Wcf("\\?q")
+ end,
+ del(Files),
+ ok.
fold_files(Config) when is_list(Config) ->
Dir = filename:join(proplists:get_value(priv_dir, Config), "fold_files"),