aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2017-10-05 10:40:52 +0200
committerGitHub <[email protected]>2017-10-05 10:40:52 +0200
commitf7476ef04d6cad95a54aff3aae734c25a283e3fd (patch)
treefeb60a7333102e60fce579a3a50685272a0825a9
parentb6c52ec1a344d7ee42b10feddb10c1d03b3a7c27 (diff)
parent36dc96339e2b2b692e6dfe6de43db3a2348732bd (diff)
downloadotp-f7476ef04d6cad95a54aff3aae734c25a283e3fd.tar.gz
otp-f7476ef04d6cad95a54aff3aae734c25a283e3fd.tar.bz2
otp-f7476ef04d6cad95a54aff3aae734c25a283e3fd.zip
Merge pull request #1567 from bjorng/bjorn/stdlib/wildcard-escaping/ERL-451/OTP-14577
Implement escaping of special characters in wildcards
-rw-r--r--lib/stdlib/doc/src/filelib.xml5
-rw-r--r--lib/stdlib/src/filelib.erl42
-rw-r--r--lib/stdlib/test/filelib_SUITE.erl33
3 files changed, 76 insertions, 4 deletions
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml
index 57c4348745..1b69e84d31 100644
--- a/lib/stdlib/doc/src/filelib.xml
+++ b/lib/stdlib/doc/src/filelib.xml
@@ -217,6 +217,11 @@
<p>Other characters represent themselves. Only filenames that
have exactly the same character in the same position match.
Matching is case-sensitive, for example, "a" does not match "A".</p>
+ <p>Directory separators must always be written as <c>/</c>, even on
+ Windows.</p>
+ <p>A character preceded by <c>\</c> loses its special meaning. Note
+ that <c>\</c> must be written as <c>\\</c> in a string literal.
+ For example, "\\?*" will match any filename starting with <c>?</c>.</p>
<p>Notice that multiple "*" characters are allowed
(as in Unix wildcards, but opposed to Windows/DOS wildcards).</p>
<p><em>Examples:</em></p>
diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl
index d7c313f214..0f90b3fc33 100644
--- a/lib/stdlib/src/filelib.erl
+++ b/lib/stdlib/src/filelib.erl
@@ -365,11 +365,18 @@ do_list_dir(Dir, Mod) -> eval_list_dir(Dir, Mod).
%%% Compiling a wildcard.
+
+%% Define characters used for escaping a \.
+-define(ESCAPE_PREFIX, $@).
+-define(ESCAPE_CHARACTER, [?ESCAPE_PREFIX,$e]).
+-define(ESCAPED_ESCAPE_PREFIX, [?ESCAPE_PREFIX,?ESCAPE_PREFIX]).
+
%% Only for debugging.
compile_wildcard(Pattern) when is_list(Pattern) ->
{compiled_wildcard,?HANDLE_ERROR(compile_wildcard(Pattern, "."))}.
-compile_wildcard(Pattern, Cwd0) ->
+compile_wildcard(Pattern0, Cwd0) ->
+ Pattern = convert_escapes(Pattern0),
[Root|Rest] = filename:split(Pattern),
case filename:pathtype(Root) of
relative ->
@@ -409,7 +416,8 @@ compile_join({cwd,Cwd}, File0) ->
compile_join({root,PrefixLen,Root}, File) ->
{root,PrefixLen,filename:join(Root, File)}.
-compile_part(Part) ->
+compile_part(Part0) ->
+ Part = wrap_escapes(Part0),
compile_part(Part, false, []).
compile_part_to_sep(Part) ->
@@ -445,6 +453,8 @@ compile_part([${|Rest], Upto, Result) ->
error ->
compile_part(Rest, Upto, [${|Result])
end;
+compile_part([{escaped,X}|Rest], Upto, Result) ->
+ compile_part(Rest, Upto, [X|Result]);
compile_part([X|Rest], Upto, Result) ->
compile_part(Rest, Upto, [X|Result]);
compile_part([], _Upto, Result) ->
@@ -461,6 +471,8 @@ compile_charset1([Lower, $-, Upper|Rest], Ordset) when Lower =< Upper ->
compile_charset1(Rest, compile_range(Lower, Upper, Ordset));
compile_charset1([$]|Rest], Ordset) ->
{ok, {one_of, gb_sets:from_ordset(Ordset)}, Rest};
+compile_charset1([{escaped,X}|Rest], Ordset) ->
+ compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([X|Rest], Ordset) ->
compile_charset1(Rest, ordsets:add_element(X, Ordset));
compile_charset1([], _Ordset) ->
@@ -486,6 +498,32 @@ compile_alt(Pattern, Result) ->
error
end.
+%% Convert backslashes to an illegal Unicode character to
+%% protect in from filename:split/1.
+
+convert_escapes([?ESCAPE_PREFIX|T]) ->
+ ?ESCAPED_ESCAPE_PREFIX ++ convert_escapes(T);
+convert_escapes([$\\|T]) ->
+ ?ESCAPE_CHARACTER ++ convert_escapes(T);
+convert_escapes([H|T]) ->
+ [H|convert_escapes(T)];
+convert_escapes([]) ->
+ [].
+
+%% Wrap each escape in a tuple to remove the special meaning for
+%% the character that follows.
+
+wrap_escapes(?ESCAPED_ESCAPE_PREFIX ++ T) ->
+ [?ESCAPE_PREFIX|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER ++ [C|T]) ->
+ [{escaped,C}|wrap_escapes(T)];
+wrap_escapes(?ESCAPE_CHARACTER) ->
+ [];
+wrap_escapes([H|T]) ->
+ [H|wrap_escapes(T)];
+wrap_escapes([]) ->
+ [].
+
badpattern(Reason) ->
error({badpattern,Reason}).
diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl
index c94821bc75..1236fe45f4 100644
--- a/lib/stdlib/test/filelib_SUITE.erl
+++ b/lib/stdlib/test/filelib_SUITE.erl
@@ -120,7 +120,7 @@ wcc(Wc, Error) ->
do_wildcard_1(Dir, Wcf0) ->
do_wildcard_2(Dir, Wcf0),
Wcf = fun(Wc0) ->
- Wc = filename:join(Dir, Wc0),
+ Wc = Dir ++ "/" ++ Wc0,
L = Wcf0(Wc),
[subtract_dir(N, Dir) || N <- L]
end,
@@ -268,8 +268,37 @@ do_wildcard_9(Dir, Wcf) ->
%% Cleanup.
del(Files),
[ok = file:del_dir(D) || D <- lists:reverse(Dirs)],
- ok.
+ do_wildcard_10(Dir, Wcf).
+
+%% ERL-451/OTP-14577: Escape characters using \\.
+do_wildcard_10(Dir, Wcf) ->
+ All0 = ["{abc}","abc","def","---","z--","@a,b","@c"],
+ All = case os:type() of
+ {unix,_} ->
+ %% '?' is allowed in file names on Unix, but
+ %% not on Windows.
+ ["?q"|All0];
+ _ ->
+ All0
+ end,
+ Files = mkfiles(lists:reverse(All), Dir),
+
+ ["{abc}"] = Wcf("\\{a*"),
+ ["{abc}"] = Wcf("\\{abc}"),
+ ["abc","def","z--"] = Wcf("[a-z]*"),
+ ["---","abc","z--"] = Wcf("[a\\-z]*"),
+ ["@a,b","@c"] = Wcf("@{a\\,b,c}"),
+ ["@c"] = Wcf("@{a,b,c}"),
+
+ case os:type() of
+ {unix,_} ->
+ ["?q"] = Wcf("\\?q");
+ _ ->
+ [] = Wcf("\\?q")
+ end,
+ del(Files),
+ ok.
fold_files(Config) when is_list(Config) ->
Dir = filename:join(proplists:get_value(priv_dir, Config), "fold_files"),