diff options
author | Patrik Nyblom <[email protected]> | 2010-12-03 15:38:14 +0100 |
---|---|---|
committer | Patrik Nyblom <[email protected]> | 2010-12-03 15:38:14 +0100 |
commit | 73b53fe24ebc595a81c4ee023bcfef5a71e59db4 (patch) | |
tree | ed75b935318dbef275d62ab141a5f59b5b36bce0 /lib/stdlib/src/filelib.erl | |
parent | 52b0fbbc4daa4e22fcad6fe39dd8f8cec8bb8d1b (diff) | |
parent | 159d79b6657e5bb4f3accb6a0d74b74f06da4ba2 (diff) | |
download | otp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.tar.gz otp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.tar.bz2 otp-73b53fe24ebc595a81c4ee023bcfef5a71e59db4.zip |
Merge branch 'pan/unicode-filenames/OTP-8887' into dev
* pan/unicode-filenames/OTP-8887: (27 commits)
Test and correct filelib and filename
Add documentation to erlang.xml and slight correction to unicode_usage.xml
Add section about Unicode file names to stdlib users guide
Correct bug in file_name_SUITE making it fail on Unix instead of Windows7
Add documentation about raw filenames and Unicode file name translation mode
Make filelib not crash on re codepoints beyond 255 in re when filename is raw
Mend on_load_embedded testcase which did not handle windows links
Correct testcase regarding windows versions supporting soft links.
Teach filelib to use re in unicode mode when filenames are not raw
Treat soft links on Windows correctly in file_name_SUITE
Adapt new soft and hard link routines on Windos to Unicode
Corrected testcases broken by unicode filenames
Update preloaded prim_file
Teach prim_file not to accept atoms and not to throw exceptions
Adapt inet_drv to Visual Studio 2008
Teach spawn_executable about Unicode
Convert filenames read on MacOSX to canonical form
Teach file to accept codepoints beyond 255.
Add testcases
Correct shell utilities to handle unicode and possibly binaries
...
Diffstat (limited to 'lib/stdlib/src/filelib.erl')
-rw-r--r-- | lib/stdlib/src/filelib.erl | 60 |
1 files changed, 42 insertions, 18 deletions
diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index d5ddf9ed7e..c845b61204 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -47,14 +47,14 @@ wildcard(Pattern) when is_list(Pattern) -> ?HANDLE_ERROR(do_wildcard(Pattern, file)). -spec wildcard(file:name(), file:name() | atom()) -> [file:filename()]. -wildcard(Pattern, Cwd) when is_list(Pattern), is_list(Cwd) -> +wildcard(Pattern, Cwd) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, file)); wildcard(Pattern, Mod) when is_list(Pattern), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Mod)). -spec wildcard(file:name(), file:name(), atom()) -> [file:filename()]. wildcard(Pattern, Cwd, Mod) - when is_list(Pattern), is_list(Cwd), is_atom(Mod) -> + when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, Mod)). -spec is_dir(file:name()) -> boolean(). @@ -118,7 +118,7 @@ do_wildcard_comp({compiled_wildcard,{exists,File}}, Mod) -> do_wildcard_comp({compiled_wildcard,[Base|Rest]}, Mod) -> do_wildcard_1([Base], Rest, Mod). -do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), is_list(Cwd) -> +do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> do_wildcard_comp(do_compile_wildcard(Pattern), Cwd, Mod). do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> @@ -127,9 +127,18 @@ do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> _ -> [] end; do_wildcard_comp({compiled_wildcard,[current|Rest]}, Cwd0, Mod) -> - Cwd = filename:join([Cwd0]), %Slash away redundant slashes. - PrefixLen = length(Cwd)+1, - [lists:nthtail(PrefixLen, N) || N <- do_wildcard_1([Cwd], Rest, Mod)]; + {Cwd,PrefixLen} = case filename:join([Cwd0]) of + Bin when is_binary(Bin) -> {Bin,byte_size(Bin)+1}; + Other -> {Other,length(Other)+1} + end, %Slash away redundant slashes. + [ + if + is_binary(N) -> + <<_:PrefixLen/binary,Res/binary>> = N, + Res; + true -> + lists:nthtail(PrefixLen, N) + end || N <- do_wildcard_1([Cwd], Rest, Mod)]; do_wildcard_comp({compiled_wildcard,[Base|Rest]}, _Cwd, Mod) -> do_wildcard_1([Base], Rest, Mod). @@ -166,36 +175,44 @@ do_is_regular(File, Mod) -> %% If <Recursive> is true all sub-directories to <Dir> are processed do_fold_files(Dir, RegExp, Recursive, Fun, Acc, Mod) -> - {ok, Re1} = re:compile(RegExp), - do_fold_files1(Dir, Re1, Recursive, Fun, Acc, Mod). + {ok, Re1} = re:compile(RegExp,[unicode]), + do_fold_files1(Dir, Re1, RegExp, Recursive, Fun, Acc, Mod). -do_fold_files1(Dir, RegExp, Recursive, Fun, Acc, Mod) -> +do_fold_files1(Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod) -> case eval_list_dir(Dir, Mod) of - {ok, Files} -> do_fold_files2(Files, Dir, RegExp, Recursive, Fun, Acc, Mod); + {ok, Files} -> do_fold_files2(Files, Dir, RegExp, OrigRE, + Recursive, Fun, Acc, Mod); {error, _} -> Acc end. -do_fold_files2([], _Dir, _RegExp, _Recursive, _Fun, Acc, _Mod) -> +%% OrigRE is not to be compiled as it's for non conforming filenames, +%% i.e. for filenames that does not comply to the current encoding, which should +%% be very rare. We use it only in those cases and do not want to precompile. +do_fold_files2([], _Dir, _RegExp, _OrigRE, _Recursive, _Fun, Acc, _Mod) -> Acc; -do_fold_files2([File|T], Dir, RegExp, Recursive, Fun, Acc0, Mod) -> +do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) -> FullName = filename:join(Dir, File), case do_is_regular(FullName, Mod) of true -> - case re:run(File, RegExp, [{capture,none}]) of + case (catch re:run(File, if is_binary(File) -> OrigRE; + true -> RegExp end, + [{capture,none}])) of match -> Acc = Fun(FullName, Acc0), - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc, Mod); + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod); + {'EXIT',_} -> + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod); nomatch -> - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end; false -> case Recursive andalso do_is_dir(FullName, Mod) of true -> - Acc1 = do_fold_files1(FullName, RegExp, Recursive, + Acc1 = do_fold_files1(FullName, RegExp, OrigRE, Recursive, Fun, Acc0, Mod), - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc1, Mod); + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc1, Mod); false -> - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end end. @@ -268,6 +285,13 @@ do_wildcard_3(Base, [Pattern|Rest], Result, Mod) -> do_wildcard_3(Base, [], Result, _Mod) -> [Base|Result]. +wildcard_4(Pattern, [File|Rest], Base, Result) when is_binary(File) -> + case wildcard_5(Pattern, binary_to_list(File)) of + true -> + wildcard_4(Pattern, Rest, Base, [join(Base, File)|Result]); + false -> + wildcard_4(Pattern, Rest, Base, Result) + end; wildcard_4(Pattern, [File|Rest], Base, Result) -> case wildcard_5(Pattern, File) of true -> |