From 4cf08709189ea8b7e2ae20f85c390abd04ae48ae Mon Sep 17 00:00:00 2001
From: Patrik Nyblom
Date: Wed, 13 Oct 2010 17:08:32 +0200
Subject: Teach filename to accept raw data and add filename enc option to emu
---
lib/stdlib/src/filename.erl | 335 +++++++++++++++++++++++++-------------------
1 file changed, 190 insertions(+), 145 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index 01c06e4596..40df54fe54 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -41,6 +41,9 @@
-include_lib("kernel/include/file.hrl").
+-define(IS_DRIVELETTER(Letter),(((Letter >= $A) andalso (Letter =< $Z)) orelse
+ ((Letter >= $a) andalso (Letter =< $z)))).
+
%% Converts a relative filename to an absolute filename
%% or the filename itself if it already is an absolute filename
%% Note that no attempt is made to create the most beatiful
@@ -57,12 +60,18 @@
%% (for Unix) : absname("/") -> "/"
%% (for WIN32): absname("/") -> "D:/"
--spec absname(file:name()) -> string().
+
+-spec absname(file:name()) -> file:filename().
absname(Name) ->
{ok, Cwd} = file:get_cwd(),
absname(Name, Cwd).
--spec absname(file:name(), string()) -> string().
+-spec absname(file:name(), file:filename()) -> file:filename().
+absname(Name, AbsBase) when is_binary(Name), is_list(AbsBase) ->
+ absname(Name,filename_string_to_binary(AbsBase));
+absname(Name, AbsBase) when is_list(Name), is_binary(AbsBase) ->
+ absname(filename_string_to_binary(Name),AbsBase);
+
absname(Name, AbsBase) ->
case pathtype(Name) of
relative ->
@@ -77,6 +86,20 @@ absname(Name, AbsBase) ->
%% Handles volumerelative names (on Windows only).
+absname_vr([<<"/">>|Rest1], [Volume|_], _AbsBase) ->
+ %% Absolute path on current drive.
+ join([Volume|Rest1]);
+absname_vr([<>|Rest1], [<>|_], AbsBase) ->
+ %% Relative to current directory on current drive.
+ absname(join(Rest1), AbsBase);
+absname_vr([<>|Name], _, _AbsBase) ->
+ %% Relative to current directory on another drive.
+ Dcwd =
+ case file:get_cwd([X, $:]) of
+ {ok, Dir} -> filename_string_to_binary(Dir);
+ {error, _} -> <>
+ end,
+ absname(join(Name), Dcwd);
absname_vr(["/"|Rest1], [Volume|_], _AbsBase) ->
%% Absolute path on current drive.
join([Volume|Rest1]);
@@ -92,41 +115,13 @@ absname_vr([[X, $:]|Name], _, _AbsBase) ->
end,
absname(join(Name), Dcwd).
-%% Joins a relative filename to an absolute base. For VxWorks the
-%% resulting name is fixed to minimize the length by collapsing
-%% ".." directories.
-%% For other systems this is just a join/2, but assumes that
+%% Joins a relative filename to an absolute base.
+%% This is just a join/2, but assumes that
%% AbsBase must be absolute and Name must be relative.
--spec absname_join(string(), file:name()) -> string().
+-spec absname_join(file:filename(), file:name()) -> file:filename().
absname_join(AbsBase, Name) ->
- case major_os_type() of
- vxworks ->
- absname_pretty(AbsBase, split(Name), lists:reverse(split(AbsBase)));
- _Else ->
- join(AbsBase, flatten(Name))
- end.
-
-%% Handles absolute filenames for VxWorks - these are 'pretty-printed',
-%% since a C function call chdir("/erlang/lib/../bin") really sets
-%% cwd to '/erlang/lib/../bin' which also works, but the long term
-%% effect is potentially not so good ...
-%%
-%% absname_pretty("../bin", "/erlang/lib") -> "/erlang/bin"
-%% absname_pretty("../../../..", "/erlang") -> "/erlang"
-
-absname_pretty(Abspath, Relpath, []) ->
- %% AbsBase _must_ begin with a vxworks device name
- {device, _Rest, Dev} = vxworks_first(Abspath),
- absname_pretty(Abspath, Relpath, [lists:reverse(Dev)]);
-absname_pretty(_Abspath, [], AbsBase) ->
- join(lists:reverse(AbsBase));
-absname_pretty(Abspath, [[$.]|Rest], AbsBase) ->
- absname_pretty(Abspath, Rest, AbsBase);
-absname_pretty(Abspath, [[$.,$.]|Rest], [_|AbsRest]) ->
- absname_pretty(Abspath, Rest, AbsRest);
-absname_pretty(Abspath, [First|Rest], AbsBase) ->
- absname_pretty(Abspath, Rest, [First|AbsBase]).
+ join(AbsBase, flatten(Name)).
%% Returns the part of the filename after the last directory separator,
%% or the filename itself if it has no separators.
@@ -136,12 +131,36 @@ absname_pretty(Abspath, [First|Rest], AbsBase) ->
%% basename("/usr/foo/") -> "foo" (trailing slashes ignored)
%% basename("/") -> []
--spec basename(file:name()) -> string().
+-spec basename(file:name()) -> file:filename().
+basename(Name) when is_binary(Name) ->
+ case os:type() of
+ {win32,_} ->
+ win_basenameb(Name);
+ _ ->
+ basenameb(Name,[<<"/">>])
+ end;
+
basename(Name0) ->
Name = flatten(Name0),
{DirSep2, DrvSep} = separators(),
basename1(skip_prefix(Name, DrvSep), [], DirSep2).
+win_basenameb(<>) when ?IS_DRIVELETTER(Letter) ->
+ basenameb(Rest,[<<"/">>,<<"\\">>]);
+win_basenameb(O) ->
+ basenameb(O,[<<"/">>,<<"\\">>]).
+basenameb(Bin,Sep) ->
+ Parts = [ X || X <- binary:split(Bin,Sep,[global]),
+ X =:= <<>> ],
+ if
+ Parts =:= [] ->
+ [];
+ true ->
+ lists:last(Parts)
+ end.
+
+
+
basename1([$/|[]], Tail, DirSep2) ->
basename1([], Tail, DirSep2);
basename1([$/|Rest], _Tail, DirSep2) ->
@@ -155,26 +174,11 @@ basename1([Char|Rest], Tail, DirSep2) when is_integer(Char) ->
basename1([], Tail, _DirSep2) ->
lists:reverse(Tail).
-skip_prefix(Name, false) -> % No prefix for unix, but for VxWorks.
- case major_os_type() of
- vxworks ->
- case vxworks_first(Name) of
- {device, Rest, _Device} ->
- Rest;
- {not_device, _Rest, _First} ->
- Name
- end;
- _Else ->
- Name
- end;
-skip_prefix(Name, DrvSep) ->
- skip_prefix1(Name, DrvSep).
-
-skip_prefix1([L, DrvSep|Name], DrvSep) when is_integer(L) ->
+skip_prefix(Name, false) ->
+ Name;
+skip_prefix([L, DrvSep|Name], DrvSep) when ?IS_DRIVELETTER(L) ->
Name;
-skip_prefix1([L], _) when is_integer(L) ->
- [L];
-skip_prefix1(Name, _) ->
+skip_prefix(Name, _) ->
Name.
%% Returns the last component of the filename, with the given
@@ -190,7 +194,27 @@ skip_prefix1(Name, _) ->
%% rootname(basename("xxx.jam")) -> "xxx"
%% rootname(basename("xxx.erl")) -> "xxx"
--spec basename(file:name(), file:name()) -> string().
+-spec basename(file:name(), file:name()) -> file:filename().
+basename(Name, Ext) when is_binary(Name), is_list(Ext) ->
+ basename(Name,filename_string_to_binary(Ext));
+basename(Name, Ext) when is_list(Name), is_binary(Ext) ->
+ basename(filename_string_to_binary(Name),Ext);
+basename(Name, Ext) when is_binary(Name), is_binary(Ext) ->
+ BName = basename(Name),
+ LN = byte_size(BName),
+ LE = byte_size(Ext),
+ case LN - LE of
+ Neg when Neg < 0 ->
+ BName;
+ Pos ->
+ case BName of
+ <> ->
+ Part;
+ Other ->
+ Other
+ end
+ end;
+
basename(Name0, Ext0) ->
Name = flatten(Name0),
Ext = flatten(Ext0),
@@ -216,21 +240,10 @@ basename([], _Ext, Tail, _DrvSep2) ->
%% Example: dirname("/usr/src/kalle.erl") -> "/usr/src",
%% dirname("kalle.erl") -> "."
--spec dirname(file:name()) -> string().
+-spec dirname(file:name()) -> file:filename().
dirname(Name0) ->
Name = flatten(Name0),
- case os:type() of
- vxworks ->
- {Devicep, Restname, FirstComp} = vxworks_first(Name),
- case Devicep of
- device ->
- dirname(Restname, FirstComp, [], separators());
- _ ->
- dirname(Name, [], [], separators())
- end;
- _ ->
- dirname(Name, [], [], separators())
- end.
+ dirname(Name, [], [], separators()).
dirname([[_|_]=List|Rest], Dir, File, Seps) ->
dirname(List++Rest, Dir, File, Seps);
@@ -268,7 +281,7 @@ dirname([], Dir, _, _) ->
%%
%% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src"
--spec extension(file:name()) -> string().
+-spec extension(file:name()) -> file:filename().
extension(Name0) ->
Name = flatten(Name0),
extension(Name, [], major_os_type()).
@@ -281,8 +294,6 @@ extension([$/|Rest], _Result, OsType) ->
extension(Rest, [], OsType);
extension([$\\|Rest], _Result, win32) ->
extension(Rest, [], win32);
-extension([$\\|Rest], _Result, vxworks) ->
- extension(Rest, [], vxworks);
extension([Char|Rest], Result, OsType) when is_integer(Char) ->
extension(Rest, [Char|Result], OsType);
extension([], Result, _OsType) ->
@@ -290,23 +301,36 @@ extension([], Result, _OsType) ->
%% Joins a list of filenames with directory separators.
--spec join([string()]) -> string().
+-spec join([file:filename()]) -> file:filename().
join([Name1, Name2|Rest]) ->
join([join(Name1, Name2)|Rest]);
join([Name]) when is_list(Name) ->
join1(Name, [], [], major_os_type());
+join([Name]) when is_binary(Name) ->
+ join1b(Name, <<>>, [], major_os_type());
join([Name]) when is_atom(Name) ->
join([atom_to_list(Name)]).
%% Joins two filenames with directory separators.
--spec join(string(), string()) -> string().
+-spec join(file:filename(), file:filename()) -> file:filename().
join(Name1, Name2) when is_list(Name1), is_list(Name2) ->
OsType = major_os_type(),
case pathtype(Name2) of
relative -> join1(Name1, Name2, [], OsType);
_Other -> join1(Name2, [], [], OsType)
end;
+join(Name1, Name2) when is_binary(Name1), is_list(Name2) ->
+ join(Name1,filename_string_to_binary(Name2));
+join(Name1, Name2) when is_list(Name1), is_binary(Name2) ->
+ join(filename_string_to_binary(Name1),Name2);
+join(Name1, Name2) when is_binary(Name1), is_binary(Name2) ->
+ OsType = major_os_type(),
+ case pathtype(Name2) of
+ relative -> join1b(Name1, Name2, [], OsType);
+ _Other -> join1b(Name2, <<>>, [], OsType)
+ end;
+
join(Name1, Name2) when is_atom(Name1) ->
join(atom_to_list(Name1), Name2);
join(Name1, Name2) when is_atom(Name2) ->
@@ -321,8 +345,6 @@ when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z ->
join1(Rest, RelativeName, [$:, UcLetter+$a-$A], win32);
join1([$\\|Rest], RelativeName, Result, win32) ->
join1([$/|Rest], RelativeName, Result, win32);
-join1([$\\|Rest], RelativeName, Result, vxworks) ->
- join1([$/|Rest], RelativeName, Result, vxworks);
join1([$/|Rest], RelativeName, [$., $/|Result], OsType) ->
join1(Rest, RelativeName, [$/|Result], OsType);
join1([$/|Rest], RelativeName, [$/|Result], OsType) ->
@@ -344,6 +366,26 @@ join1([Char|Rest], RelativeName, Result, OsType) when is_integer(Char) ->
join1([Atom|Rest], RelativeName, Result, OsType) when is_atom(Atom) ->
join1(atom_to_list(Atom)++Rest, RelativeName, Result, OsType).
+join1b(<>, RelativeName, [], win32)
+when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z ->
+ join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32);
+join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) ->
+ join1b(<<$/,Rest>>, RelativeName, Result, win32);
+join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) ->
+ join1b(Rest, RelativeName, [$/|Result], OsType);
+join1b(<<$/,Rest/binary>>, RelativeName, [$/|Result], OsType) ->
+ join1b(Rest, RelativeName, [$/|Result], OsType);
+join1b(<<>>, <<>>, Result, OsType) ->
+ list_to_binary(maybe_remove_dirsep(Result, OsType));
+join1b(<<>>, RelativeName, [$:|Rest], win32) ->
+ join1b(RelativeName, <<>>, [$:|Rest], win32);
+join1b(<<>>, RelativeName, [$/|Result], OsType) ->
+ join1b(RelativeName, <<>>, [$/|Result], OsType);
+join1b(<<>>, RelativeName, Result, OsType) ->
+ join1b(RelativeName, <<>>, [$/|Result], OsType);
+join1b(<>, RelativeName, Result, OsType) when is_integer(Char) ->
+ join1b(Rest, RelativeName, [Char|Result], OsType).
+
maybe_remove_dirsep([$/, $:, Letter], win32) ->
[Letter, $:, $/];
maybe_remove_dirsep([$/], _) ->
@@ -357,7 +399,7 @@ maybe_remove_dirsep(Name, _) ->
%% a given base directory, which is is assumed to be normalised
%% by a previous call to join/{1,2}.
--spec append(string(), file:name()) -> string().
+-spec append(file:filename(), file:name()) -> file:filename().
append(Dir, Name) ->
Dir ++ [$/|Name].
@@ -376,19 +418,14 @@ append(Dir, Name) ->
-spec pathtype(file:name()) -> 'absolute' | 'relative' | 'volumerelative'.
pathtype(Atom) when is_atom(Atom) ->
pathtype(atom_to_list(Atom));
-pathtype(Name) when is_list(Name) ->
+pathtype(Name) when is_list(Name) or is_binary(Name) ->
case os:type() of
{unix, _} -> unix_pathtype(Name);
- {win32, _} -> win32_pathtype(Name);
- vxworks -> case vxworks_first(Name) of
- {device, _Rest, _Dev} ->
- absolute;
- _ ->
- relative
- end;
- {ose,_} -> unix_pathtype(Name)
+ {win32, _} -> win32_pathtype(Name)
end.
+unix_pathtype(<<$/,_/binary>>) ->
+ absolute;
unix_pathtype([$/|_]) ->
absolute;
unix_pathtype([List|Rest]) when is_list(List) ->
@@ -404,6 +441,15 @@ win32_pathtype([Atom|Rest]) when is_atom(Atom) ->
win32_pathtype(atom_to_list(Atom)++Rest);
win32_pathtype([Char, List|Rest]) when is_list(List) ->
win32_pathtype([Char|List++Rest]);
+win32_pathtype(<<$/, $/, _/binary>>) -> absolute;
+win32_pathtype(<<$\\, $/, _/binary>>) -> absolute;
+win32_pathtype(<<$/, $\\, _/binary>>) -> absolute;
+win32_pathtype(<<$\\, $\\, _/binary>>) -> absolute;
+win32_pathtype(<<$/, _/binary>>) -> volumerelative;
+win32_pathtype(<<$\\, _/binary>>) -> volumerelative;
+win32_pathtype(<<_Letter, $:, $/, _/binary>>) -> absolute;
+win32_pathtype(<<_Letter, $:, $\\, _/binary>>) -> absolute;
+win32_pathtype(<<_Letter, $:, _/binary>>) -> volumerelative;
win32_pathtype([$/, $/|_]) -> absolute;
win32_pathtype([$\\, $/|_]) -> absolute;
win32_pathtype([$/, $\\|_]) -> absolute;
@@ -422,7 +468,7 @@ win32_pathtype(_) -> relative.
%% Examples: rootname("/jam.src/kalle") -> "/jam.src/kalle"
%% rootname("/jam.src/foo.erl") -> "/jam.src/foo"
--spec rootname(file:name()) -> string().
+-spec rootname(file:name()) -> file:filename().
rootname(Name0) ->
Name = flatten(Name0),
rootname(Name, [], [], major_os_type()).
@@ -431,8 +477,6 @@ rootname([$/|Rest], Root, Ext, OsType) ->
rootname(Rest, [$/]++Ext++Root, [], OsType);
rootname([$\\|Rest], Root, Ext, win32) ->
rootname(Rest, [$/]++Ext++Root, [], win32);
-rootname([$\\|Rest], Root, Ext, vxworks) ->
- rootname(Rest, [$/]++Ext++Root, [], vxworks);
rootname([$.|Rest], Root, [], OsType) ->
rootname(Rest, Root, ".", OsType);
rootname([$.|Rest], Root, Ext, OsType) ->
@@ -451,7 +495,7 @@ rootname([], Root, _Ext, _OsType) ->
%% Examples: rootname("/jam.src/kalle.jam", ".erl") -> "/jam.src/kalle.jam"
%% rootname("/jam.src/foo.erl", ".erl") -> "/jam.src/foo"
--spec rootname(file:name(), file:name()) -> string().
+-spec rootname(file:name(), file:name()) -> file:filename().
rootname(Name0, Ext0) ->
Name = flatten(Name0),
Ext = flatten(Ext0),
@@ -471,27 +515,55 @@ rootname2([Char|Rest], Ext, Result) when is_integer(Char) ->
%% split("foo/bar") -> ["foo", "bar"]
%% split("a:\\msdev\\include") -> ["a:/", "msdev", "include"]
--spec split(file:name()) -> [string()].
+-spec split(file:name()) -> [file:filename()].
+split(Name) when is_binary(Name) ->
+ case os:type() of
+ {win32, _} -> win32_splitb(Name);
+ _ -> unix_splitb(Name)
+ end;
+
split(Name0) ->
Name = flatten(Name0),
case os:type() of
- {unix, _} -> unix_split(Name);
{win32, _} -> win32_split(Name);
- vxworks -> vxworks_split(Name);
- {ose,_} -> unix_split(Name)
+ _ -> unix_split(Name)
end.
-%% If a VxWorks filename starts with '[/\].*[^/\]' '[/\].*:' or '.*:'
-%% that part of the filename is considered a device.
-%% The rest of the name is interpreted exactly as for win32.
-%% XXX - dirty solution to make filename:split([]) return the same thing on
-%% VxWorks as on unix and win32.
-vxworks_split([]) ->
- [];
-vxworks_split(L) ->
- {_Devicep, Rest, FirstComp} = vxworks_first(L),
- split(Rest, [], [lists:reverse(FirstComp)], win32).
+unix_splitb(Name) ->
+ L = binary:split(Name,[<<"/">>],[global]),
+ LL = case L of
+ [<<>>|Rest] ->
+ [<<"/">>|Rest];
+ _ ->
+ L
+ end,
+ [ X || X <- LL, X =/= <<>>].
+
+
+fix_driveletter(Letter0) ->
+ if
+ Letter0 >= $A, Letter0 =< $Z ->
+ Letter0+$a-$A;
+ true ->
+ Letter0
+ end.
+win32_splitb(<>) when (((Slash =:= $\\) orelse (Slash =:= $/)) andalso
+ ?IS_DRIVELETTER(Letter0)) ->
+ Letter = fix_driveletter(Letter0),
+ L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]),
+ [<> | [ X || X <- L, X =/= <<>> ]];
+win32_splitb(<>) when ?IS_DRIVELETTER(Letter0) ->
+ Letter = fix_driveletter(Letter0),
+ L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]),
+ [<> | [ X || X <- L, X =/= <<>> ]];
+win32_splitb(<>) when ((Slash =:= $\\) orelse (Slash =:= $/)) ->
+ L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]),
+ [<<$/>> | [ X || X <- L, X =/= <<>> ]];
+win32_splitb(Name) ->
+ L = binary:split(Name,[<<"/">>,<<"\\">>],[global]),
+ [<<$/>> | [ X || X <- L, X =/= <<>> ]].
+
unix_split(Name) ->
split(Name, [], unix).
@@ -502,8 +574,6 @@ win32_split([X, $\\|Rest]) when is_integer(X) ->
win32_split([X, $/|Rest]);
win32_split([X, Y, $\\|Rest]) when is_integer(X), is_integer(Y) ->
win32_split([X, Y, $/|Rest]);
-win32_split([$/, $/|Rest]) ->
- split(Rest, [], [[$/, $/]]);
win32_split([UcLetter, $:|Rest]) when UcLetter >= $A, UcLetter =< $Z ->
win32_split([UcLetter+$a-$A, $:|Rest]);
win32_split([Letter, $:, $/|Rest]) ->
@@ -540,7 +610,7 @@ split([], Comp, Components, OsType) ->
%% will be converted to backslashes. On all platforms, the
%% name will be normalized as done by join/1.
--spec nativename(string()) -> string().
+-spec nativename(file:filename()) -> file:filename().
nativename(Name0) ->
Name = join([Name0]), %Normalize.
case os:type() of
@@ -557,13 +627,12 @@ win32_nativename([]) ->
separators() ->
case os:type() of
- {unix, _} -> {false, false};
{win32, _} -> {$\\, $:};
- vxworks -> {$\\, false};
- {ose,_} -> {false, false}
+ _ -> {false, false}
end.
+
%% find_src(Module) --
%% find_src(Module, Rules) --
%%
@@ -733,45 +802,12 @@ major_os_type() ->
OsT -> OsT
end.
-%% Need to take care of the first pathname component separately
-%% due to VxWorks less than good device naming rules.
-%% (i.e. this is VxWorks specific ...)
-%% The following four all starts with device names
-%% elrond:/foo -> elrond:
-%% elrond:\\foo.bar -> elrond:
-%% /DISK1:foo -> /DISK1:
-%% /usr/include -> /usr
-%% This one doesn't:
-%% foo/bar
-
-vxworks_first([]) ->
- {not_device, [], []};
-vxworks_first([$/|T]) ->
- vxworks_first2(device, T, [$/]);
-vxworks_first([$\\|T]) ->
- vxworks_first2(device, T, [$/]);
-vxworks_first([H|T]) when is_list(H) ->
- vxworks_first(H++T);
-vxworks_first([H|T]) ->
- vxworks_first2(not_device, T, [H]).
-
-vxworks_first2(Devicep, [], FirstComp) ->
- {Devicep, [], FirstComp};
-vxworks_first2(Devicep, [$/|T], FirstComp) ->
- {Devicep, [$/|T], FirstComp};
-vxworks_first2(Devicep, [$\\|T], FirstComp) ->
- {Devicep, [$/|T], FirstComp};
-vxworks_first2(_Devicep, [$:|T], FirstComp)->
- {device, T, [$:|FirstComp]};
-vxworks_first2(Devicep, [H|T], FirstComp) when is_list(H) ->
- vxworks_first2(Devicep, H++T, FirstComp);
-vxworks_first2(Devicep, [H|T], FirstComp) ->
- vxworks_first2(Devicep, T, [H|FirstComp]).
-
%% flatten(List)
%% Flatten a list, also accepting atoms.
--spec flatten(file:name()) -> string().
+-spec flatten(file:name()) -> file:filename().
+flatten(Bin) when is_binary(Bin) ->
+ Bin;
flatten(List) ->
do_flatten(List, []).
@@ -785,3 +821,12 @@ do_flatten([], Tail) ->
Tail;
do_flatten(Atom, Tail) when is_atom(Atom) ->
atom_to_list(Atom) ++ flatten(Tail).
+
+filename_string_to_binary(List) ->
+ case unicode:characters_to_binary(List,unicode,file:native_name_encoding()) of
+ {error,_,_} ->
+ erlang:error(badarg);
+ Bin when is_binary(Bin) ->
+ Bin
+ end.
+
--
cgit v1.2.3
From 63eeba2f6829aac2644eaf212ebef9cdf4b59e8d Mon Sep 17 00:00:00 2001
From: Patrik Nyblom
Date: Thu, 14 Oct 2010 10:12:57 +0200
Subject: Handle binary file names and conversion of unicode strings
---
lib/stdlib/src/c.erl | 20 ++++++++++--
lib/stdlib/src/filename.erl | 79 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 96 insertions(+), 3 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl
index 6d50a575eb..399b91b92f 100644
--- a/lib/stdlib/src/c.erl
+++ b/lib/stdlib/src/c.erl
@@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) ->
pwd() ->
case file:get_cwd() of
{ok, Str} ->
- ok = io:format("~s\n", [Str]);
+ ok = io:format("~s\n", [fixup_one_bin(Str)]);
{error, _} ->
ok = io:format("Cannot determine current directory\n")
end.
@@ -684,11 +684,27 @@ ls() ->
ls(Dir) ->
case file:list_dir(Dir) of
{ok, Entries} ->
- ls_print(sort(Entries));
+ ls_print(sort(fixup_bin(Entries)));
{error,_E} ->
format("Invalid directory\n")
end.
+fixup_one_bin(X) when is_binary(X) ->
+ L = binary_to_list(X),
+ [ if
+ El > 127 ->
+ $?;
+ true ->
+ El
+ end || El <- L];
+fixup_one_bin(X) ->
+ X.
+fixup_bin([H|T]) ->
+ [fixup_one_bin(H) | fixup_bin(T)];
+fixup_bin([]) ->
+ [].
+
+
ls_print([]) -> ok;
ls_print(L) ->
Width = min([max(lengths(L, [])), 40]) + 5,
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl
index 40df54fe54..9ca4b808e1 100644
--- a/lib/stdlib/src/filename.erl
+++ b/lib/stdlib/src/filename.erl
@@ -228,7 +228,7 @@ basename([$/|[]], Ext, Tail, DrvSep2) ->
basename([], Ext, Tail, DrvSep2);
basename([$/|Rest], Ext, _Tail, DrvSep2) ->
basename(Rest, Ext, [], DrvSep2);
-basename([$\\|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) ->
+basename([DirSep2|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) ->
basename([$/|Rest], Ext, Tail, DirSep2);
basename([Char|Rest], Ext, Tail, DrvSep2) when is_integer(Char) ->
basename(Rest, Ext, [Char|Tail], DrvSep2);
@@ -241,6 +241,39 @@ basename([], _Ext, Tail, _DrvSep2) ->
%% dirname("kalle.erl") -> "."
-spec dirname(file:name()) -> file:filename().
+dirname(Name) when is_binary(Name) ->
+ {Dsep,Drivesep} = separators(),
+ SList = case Dsep of
+ Sep when is_integer(Sep) ->
+ [ <> ];
+ _ ->
+ []
+ end,
+ {XPart0,Dirs} = case Drivesep of
+ X when is_integer(X) ->
+ case Name of
+ <> when ?IS_DRIVELETTER(DL) ->
+ {<>,Rest};
+ _ ->
+ {<<>>,Name}
+ end;
+ _ ->
+ {<<>>,Name}
+ end,
+ Parts0 = binary:split(Dirs,[<<"/">>|SList],[global]),
+ %% Fairly short lists of parts, OK to reverse twice...
+ Parts = case Parts0 of
+ [] -> [];
+ _ -> lists:reverse(fstrip(tl(lists:reverse(Parts0))))
+ end,
+ XPart = case {Parts,XPart0} of
+ {[],<<>>} ->
+ <<".">>;
+ _ ->
+ XPart0
+ end,
+ dirjoin(Parts,XPart,<<"/">>);
+
dirname(Name0) ->
Name = flatten(Name0),
dirname(Name, [], [], separators()).
@@ -271,6 +304,26 @@ dirname([], [DrvSep,Dl], File, {_,DrvSep}) ->
end;
dirname([], Dir, _, _) ->
lists:reverse(Dir).
+
+%% Compatibility with lists variant, remove trailing slashes
+fstrip([<<>>,X|Y]) ->
+ fstrip([X|Y]);
+fstrip(A) ->
+ A.
+
+
+dirjoin([<<>>|T],Acc,Sep) ->
+ dirjoin1(T,<>,Sep);
+dirjoin(A,B,C) ->
+ dirjoin1(A,B,C).
+
+dirjoin1([],Acc,_) ->
+ Acc;
+dirjoin1([One],Acc,_) ->
+ <>;
+dirjoin1([H|T],Acc,Sep) ->
+ dirjoin(T,<>,Sep).
+
%% Given a filename string, returns the file extension,
%% including the period. Returns an empty list if there
@@ -282,6 +335,30 @@ dirname([], Dir, _, _) ->
%% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src"
-spec extension(file:name()) -> file:filename().
+extension(Name) when is_binary(Name) ->
+ {Dsep,_} = separators(),
+ SList = case Dsep of
+ Sep when is_integer(Sep) ->
+ [ <> ];
+ _ ->
+ []
+ end,
+ case binary:matches(Name,[<<".">>]) of
+ nomatch -> % Bug in binary workaround :(
+ <<>>;
+ [] ->
+ <<>>;
+ List ->
+ {Pos,_} = lists:last(List),
+ <<_:Pos/binary,Part/binary>> = Name,
+ case binary:match(Part,[<<"/">>|SList]) of
+ nomatch ->
+ Part;
+ _ ->
+ <<>>
+ end
+ end;
+
extension(Name0) ->
Name = flatten(Name0),
extension(Name, [], major_os_type()).
--
cgit v1.2.3
From 5da29a8c5fe9584e274d2fe9b95dead8334d419a Mon Sep 17 00:00:00 2001
From: Patrik Nyblom
Date: Mon, 8 Nov 2010 17:34:41 +0100
Subject: Correct shell utilities to handle unicode and possibly binaries
---
lib/stdlib/src/c.erl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl
index 399b91b92f..d04d8f191f 100644
--- a/lib/stdlib/src/c.erl
+++ b/lib/stdlib/src/c.erl
@@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) ->
pwd() ->
case file:get_cwd() of
{ok, Str} ->
- ok = io:format("~s\n", [fixup_one_bin(Str)]);
+ ok = io:format("~ts\n", [fixup_one_bin(Str)]);
{error, _} ->
ok = io:format("Cannot determine current directory\n")
end.
@@ -714,7 +714,7 @@ ls_print(X, Width, Len) when Width + Len >= 80 ->
io:nl(),
ls_print(X, Width, 0);
ls_print([H|T], Width, Len) ->
- io:format("~-*s",[Width,H]),
+ io:format("~-*ts",[Width,H]),
ls_print(T, Width, Len+Width);
ls_print([], _, _) ->
io:nl().
--
cgit v1.2.3
From 0c18c4ba3f561ae6d9ff943863bfc62e5e9099e1 Mon Sep 17 00:00:00 2001
From: Patrik Nyblom
Date: Fri, 26 Nov 2010 14:13:25 +0100
Subject: Corrected testcases broken by unicode filenames
Also corrected type-info for bifs
---
lib/stdlib/test/binary_module_SUITE.erl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl
index 16ed9a2c26..e4cdcf6125 100644
--- a/lib/stdlib/test/binary_module_SUITE.erl
+++ b/lib/stdlib/test/binary_module_SUITE.erl
@@ -186,7 +186,7 @@ badargs(Config) when is_list(Config) ->
binary:match(<<1,2,3>>,
{ac,ets:match_spec_compile([{'_',[],['$_']}])},
[{scope,{0,1}}])),
- ?line nomatch =
+ ?line [] =
?MASK_ERROR(binary:matches(<<1,2,3>>,<<1>>,[{scope,{0,0}}])),
?line badarg =
?MASK_ERROR(binary:matches(<<1,2,3>>,{bm,<<>>},[{scope,{0,1}}])),
--
cgit v1.2.3
From b21d33041ef30182cbb8e74c0023dc282d069a26 Mon Sep 17 00:00:00 2001
From: Patrik Nyblom
Date: Tue, 30 Nov 2010 12:31:33 +0100
Subject: Teach filelib to use re in unicode mode when filenames are not raw
---
lib/stdlib/src/filelib.erl | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl
index d5ddf9ed7e..eaaca750ab 100644
--- a/lib/stdlib/src/filelib.erl
+++ b/lib/stdlib/src/filelib.erl
@@ -166,36 +166,41 @@ do_is_regular(File, Mod) ->
%% If is true all sub-directories to are processed
do_fold_files(Dir, RegExp, Recursive, Fun, Acc, Mod) ->
- {ok, Re1} = re:compile(RegExp),
- do_fold_files1(Dir, Re1, Recursive, Fun, Acc, Mod).
+ {ok, Re1} = re:compile(RegExp,[unicode]),
+ do_fold_files1(Dir, Re1, RegExp, Recursive, Fun, Acc, Mod).
-do_fold_files1(Dir, RegExp, Recursive, Fun, Acc, Mod) ->
+do_fold_files1(Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod) ->
case eval_list_dir(Dir, Mod) of
- {ok, Files} -> do_fold_files2(Files, Dir, RegExp, Recursive, Fun, Acc, Mod);
+ {ok, Files} -> do_fold_files2(Files, Dir, RegExp, OrigRE,
+ Recursive, Fun, Acc, Mod);
{error, _} -> Acc
end.
-do_fold_files2([], _Dir, _RegExp, _Recursive, _Fun, Acc, _Mod) ->
+%% OrigRE is not to be compiled as it's for non conforming filenames,
+%% i.e. for filenames that does not comply to the current encoding, which should
+%% be very rare. We use it only in those cases and do not want to precompile.
+do_fold_files2([], _Dir, _RegExp, _OrigRE, _Recursive, _Fun, Acc, _Mod) ->
Acc;
-do_fold_files2([File|T], Dir, RegExp, Recursive, Fun, Acc0, Mod) ->
+do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) ->
FullName = filename:join(Dir, File),
case do_is_regular(FullName, Mod) of
true ->
- case re:run(File, RegExp, [{capture,none}]) of
+ case re:run(File, if is_binary(File) -> OrigRE; true -> RegExp end,
+ [{capture,none}]) of
match ->
Acc = Fun(FullName, Acc0),
- do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc, Mod);
+ do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod);
nomatch ->
- do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod)
+ do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod)
end;
false ->
case Recursive andalso do_is_dir(FullName, Mod) of
true ->
- Acc1 = do_fold_files1(FullName, RegExp, Recursive,
+ Acc1 = do_fold_files1(FullName, RegExp, OrigRE, Recursive,
Fun, Acc0, Mod),
- do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc1, Mod);
+ do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc1, Mod);
false ->
- do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod)
+ do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod)
end
end.
--
cgit v1.2.3
From 1ab2f46d91bce11bdc5b7ec65d3b3df46eadb105 Mon Sep 17 00:00:00 2001
From: Patrik Nyblom
Date: Wed, 1 Dec 2010 17:34:49 +0100
Subject: Make filelib not crash on re codepoints beyond 255 in re when
filename is raw
---
lib/stdlib/src/filelib.erl | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl
index eaaca750ab..04147d40d1 100644
--- a/lib/stdlib/src/filelib.erl
+++ b/lib/stdlib/src/filelib.erl
@@ -185,11 +185,14 @@ do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) ->
FullName = filename:join(Dir, File),
case do_is_regular(FullName, Mod) of
true ->
- case re:run(File, if is_binary(File) -> OrigRE; true -> RegExp end,
- [{capture,none}]) of
+ case (catch re:run(File, if is_binary(File) -> OrigRE;
+ true -> RegExp end,
+ [{capture,none}])) of
match ->
Acc = Fun(FullName, Acc0),
do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod);
+ {'EXIT',_} ->
+ do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod);
nomatch ->
do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod)
end;
--
cgit v1.2.3
From 3e6877b06ae395a9d4310ef664d0360867a47f62 Mon Sep 17 00:00:00 2001
From: Patrik Nyblom
Date: Wed, 1 Dec 2010 17:35:40 +0100
Subject: Add documentation about raw filenames and Unicode file name
translation mode
---
lib/stdlib/doc/src/filelib.xml | 27 ++++++++++++++++++++++++---
lib/stdlib/doc/src/filename.xml | 15 ++++++++++++---
2 files changed, 36 insertions(+), 6 deletions(-)
(limited to 'lib/stdlib')
diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml
index 4ff3b22f32..969aff4fcb 100644
--- a/lib/stdlib/doc/src/filelib.xml
+++ b/lib/stdlib/doc/src/filelib.xml
@@ -36,14 +36,23 @@
This module contains utilities on a higher level than the file
module.
+ The module supports Unicode file names, so that it will match against regular expressions given in Unicode and that it will find and process raw file names (i.e. files named in a way that does not confirm to the expected encoding).
+ If the VM operates in Unicode file naming mode on a machine with transparent file naming, the fun() provided to fold_files/5 needs to be prepared to handle binary file names.
+ For more information about raw file names, see the file module.
DATA TYPES
-filename() = string() | atom() | DeepList
-dirname() = filename()
-DeepList = [char() | atom() | DeepList]
+filename() = = string() | atom() | DeepList | RawFilename
+ DeepList = [char() | atom() | DeepList]
+ RawFilename = binary()
+ If VM is in unicode filename mode, string() and char() are allowed to be > 255.
+ RawFilename is a filename not subject to Unicode translation, meaning that it
+ can contain characters not conforming to the Unicode encoding expected from the
+ filesystem (i.e. non-UTF-8 characters although the VM is started in Unicode
+ filename mode).
+dirname() = filename()
@@ -90,6 +99,18 @@ DeepList = [char() | atom() | DeepList]
If Recursive is true all sub-directories to Dir
are processed. The regular expression matching is done on just
the filename without the directory part.
+
+ If Unicode file name translation is in effect and the file
+ system is completely transparent, file names that cannot be
+ interpreted as Unicode may be encountered, in which case the
+ fun() must be prepared to handle raw file names
+ (i.e. binaries). If the regular expression contains
+ codepoints beyond 255, it will not match file names that does
+ not conform to the expected character encoding (i.e. are not
+ encoded in valid UTF-8).
+
+ For more information about raw file names, see the
+ file module.
diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml
index fe6c6f898e..cdee6e4a81 100644
--- a/lib/stdlib/doc/src/filename.xml
+++ b/lib/stdlib/doc/src/filename.xml
@@ -4,7 +4,7 @@