From 4cf08709189ea8b7e2ae20f85c390abd04ae48ae Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 13 Oct 2010 17:08:32 +0200 Subject: Teach filename to accept raw data and add filename enc option to emu --- lib/stdlib/src/filename.erl | 335 +++++++++++++++++++++++++------------------- 1 file changed, 190 insertions(+), 145 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 01c06e4596..40df54fe54 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -41,6 +41,9 @@ -include_lib("kernel/include/file.hrl"). +-define(IS_DRIVELETTER(Letter),(((Letter >= $A) andalso (Letter =< $Z)) orelse + ((Letter >= $a) andalso (Letter =< $z)))). + %% Converts a relative filename to an absolute filename %% or the filename itself if it already is an absolute filename %% Note that no attempt is made to create the most beatiful @@ -57,12 +60,18 @@ %% (for Unix) : absname("/") -> "/" %% (for WIN32): absname("/") -> "D:/" --spec absname(file:name()) -> string(). + +-spec absname(file:name()) -> file:filename(). absname(Name) -> {ok, Cwd} = file:get_cwd(), absname(Name, Cwd). --spec absname(file:name(), string()) -> string(). +-spec absname(file:name(), file:filename()) -> file:filename(). +absname(Name, AbsBase) when is_binary(Name), is_list(AbsBase) -> + absname(Name,filename_string_to_binary(AbsBase)); +absname(Name, AbsBase) when is_list(Name), is_binary(AbsBase) -> + absname(filename_string_to_binary(Name),AbsBase); + absname(Name, AbsBase) -> case pathtype(Name) of relative -> @@ -77,6 +86,20 @@ absname(Name, AbsBase) -> %% Handles volumerelative names (on Windows only). +absname_vr([<<"/">>|Rest1], [Volume|_], _AbsBase) -> + %% Absolute path on current drive. + join([Volume|Rest1]); +absname_vr([<>|Rest1], [<>|_], AbsBase) -> + %% Relative to current directory on current drive. + absname(join(Rest1), AbsBase); +absname_vr([<>|Name], _, _AbsBase) -> + %% Relative to current directory on another drive. + Dcwd = + case file:get_cwd([X, $:]) of + {ok, Dir} -> filename_string_to_binary(Dir); + {error, _} -> <> + end, + absname(join(Name), Dcwd); absname_vr(["/"|Rest1], [Volume|_], _AbsBase) -> %% Absolute path on current drive. join([Volume|Rest1]); @@ -92,41 +115,13 @@ absname_vr([[X, $:]|Name], _, _AbsBase) -> end, absname(join(Name), Dcwd). -%% Joins a relative filename to an absolute base. For VxWorks the -%% resulting name is fixed to minimize the length by collapsing -%% ".." directories. -%% For other systems this is just a join/2, but assumes that +%% Joins a relative filename to an absolute base. +%% This is just a join/2, but assumes that %% AbsBase must be absolute and Name must be relative. --spec absname_join(string(), file:name()) -> string(). +-spec absname_join(file:filename(), file:name()) -> file:filename(). absname_join(AbsBase, Name) -> - case major_os_type() of - vxworks -> - absname_pretty(AbsBase, split(Name), lists:reverse(split(AbsBase))); - _Else -> - join(AbsBase, flatten(Name)) - end. - -%% Handles absolute filenames for VxWorks - these are 'pretty-printed', -%% since a C function call chdir("/erlang/lib/../bin") really sets -%% cwd to '/erlang/lib/../bin' which also works, but the long term -%% effect is potentially not so good ... -%% -%% absname_pretty("../bin", "/erlang/lib") -> "/erlang/bin" -%% absname_pretty("../../../..", "/erlang") -> "/erlang" - -absname_pretty(Abspath, Relpath, []) -> - %% AbsBase _must_ begin with a vxworks device name - {device, _Rest, Dev} = vxworks_first(Abspath), - absname_pretty(Abspath, Relpath, [lists:reverse(Dev)]); -absname_pretty(_Abspath, [], AbsBase) -> - join(lists:reverse(AbsBase)); -absname_pretty(Abspath, [[$.]|Rest], AbsBase) -> - absname_pretty(Abspath, Rest, AbsBase); -absname_pretty(Abspath, [[$.,$.]|Rest], [_|AbsRest]) -> - absname_pretty(Abspath, Rest, AbsRest); -absname_pretty(Abspath, [First|Rest], AbsBase) -> - absname_pretty(Abspath, Rest, [First|AbsBase]). + join(AbsBase, flatten(Name)). %% Returns the part of the filename after the last directory separator, %% or the filename itself if it has no separators. @@ -136,12 +131,36 @@ absname_pretty(Abspath, [First|Rest], AbsBase) -> %% basename("/usr/foo/") -> "foo" (trailing slashes ignored) %% basename("/") -> [] --spec basename(file:name()) -> string(). +-spec basename(file:name()) -> file:filename(). +basename(Name) when is_binary(Name) -> + case os:type() of + {win32,_} -> + win_basenameb(Name); + _ -> + basenameb(Name,[<<"/">>]) + end; + basename(Name0) -> Name = flatten(Name0), {DirSep2, DrvSep} = separators(), basename1(skip_prefix(Name, DrvSep), [], DirSep2). +win_basenameb(<>) when ?IS_DRIVELETTER(Letter) -> + basenameb(Rest,[<<"/">>,<<"\\">>]); +win_basenameb(O) -> + basenameb(O,[<<"/">>,<<"\\">>]). +basenameb(Bin,Sep) -> + Parts = [ X || X <- binary:split(Bin,Sep,[global]), + X =:= <<>> ], + if + Parts =:= [] -> + []; + true -> + lists:last(Parts) + end. + + + basename1([$/|[]], Tail, DirSep2) -> basename1([], Tail, DirSep2); basename1([$/|Rest], _Tail, DirSep2) -> @@ -155,26 +174,11 @@ basename1([Char|Rest], Tail, DirSep2) when is_integer(Char) -> basename1([], Tail, _DirSep2) -> lists:reverse(Tail). -skip_prefix(Name, false) -> % No prefix for unix, but for VxWorks. - case major_os_type() of - vxworks -> - case vxworks_first(Name) of - {device, Rest, _Device} -> - Rest; - {not_device, _Rest, _First} -> - Name - end; - _Else -> - Name - end; -skip_prefix(Name, DrvSep) -> - skip_prefix1(Name, DrvSep). - -skip_prefix1([L, DrvSep|Name], DrvSep) when is_integer(L) -> +skip_prefix(Name, false) -> + Name; +skip_prefix([L, DrvSep|Name], DrvSep) when ?IS_DRIVELETTER(L) -> Name; -skip_prefix1([L], _) when is_integer(L) -> - [L]; -skip_prefix1(Name, _) -> +skip_prefix(Name, _) -> Name. %% Returns the last component of the filename, with the given @@ -190,7 +194,27 @@ skip_prefix1(Name, _) -> %% rootname(basename("xxx.jam")) -> "xxx" %% rootname(basename("xxx.erl")) -> "xxx" --spec basename(file:name(), file:name()) -> string(). +-spec basename(file:name(), file:name()) -> file:filename(). +basename(Name, Ext) when is_binary(Name), is_list(Ext) -> + basename(Name,filename_string_to_binary(Ext)); +basename(Name, Ext) when is_list(Name), is_binary(Ext) -> + basename(filename_string_to_binary(Name),Ext); +basename(Name, Ext) when is_binary(Name), is_binary(Ext) -> + BName = basename(Name), + LN = byte_size(BName), + LE = byte_size(Ext), + case LN - LE of + Neg when Neg < 0 -> + BName; + Pos -> + case BName of + <> -> + Part; + Other -> + Other + end + end; + basename(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -216,21 +240,10 @@ basename([], _Ext, Tail, _DrvSep2) -> %% Example: dirname("/usr/src/kalle.erl") -> "/usr/src", %% dirname("kalle.erl") -> "." --spec dirname(file:name()) -> string(). +-spec dirname(file:name()) -> file:filename(). dirname(Name0) -> Name = flatten(Name0), - case os:type() of - vxworks -> - {Devicep, Restname, FirstComp} = vxworks_first(Name), - case Devicep of - device -> - dirname(Restname, FirstComp, [], separators()); - _ -> - dirname(Name, [], [], separators()) - end; - _ -> - dirname(Name, [], [], separators()) - end. + dirname(Name, [], [], separators()). dirname([[_|_]=List|Rest], Dir, File, Seps) -> dirname(List++Rest, Dir, File, Seps); @@ -268,7 +281,7 @@ dirname([], Dir, _, _) -> %% %% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src" --spec extension(file:name()) -> string(). +-spec extension(file:name()) -> file:filename(). extension(Name0) -> Name = flatten(Name0), extension(Name, [], major_os_type()). @@ -281,8 +294,6 @@ extension([$/|Rest], _Result, OsType) -> extension(Rest, [], OsType); extension([$\\|Rest], _Result, win32) -> extension(Rest, [], win32); -extension([$\\|Rest], _Result, vxworks) -> - extension(Rest, [], vxworks); extension([Char|Rest], Result, OsType) when is_integer(Char) -> extension(Rest, [Char|Result], OsType); extension([], Result, _OsType) -> @@ -290,23 +301,36 @@ extension([], Result, _OsType) -> %% Joins a list of filenames with directory separators. --spec join([string()]) -> string(). +-spec join([file:filename()]) -> file:filename(). join([Name1, Name2|Rest]) -> join([join(Name1, Name2)|Rest]); join([Name]) when is_list(Name) -> join1(Name, [], [], major_os_type()); +join([Name]) when is_binary(Name) -> + join1b(Name, <<>>, [], major_os_type()); join([Name]) when is_atom(Name) -> join([atom_to_list(Name)]). %% Joins two filenames with directory separators. --spec join(string(), string()) -> string(). +-spec join(file:filename(), file:filename()) -> file:filename(). join(Name1, Name2) when is_list(Name1), is_list(Name2) -> OsType = major_os_type(), case pathtype(Name2) of relative -> join1(Name1, Name2, [], OsType); _Other -> join1(Name2, [], [], OsType) end; +join(Name1, Name2) when is_binary(Name1), is_list(Name2) -> + join(Name1,filename_string_to_binary(Name2)); +join(Name1, Name2) when is_list(Name1), is_binary(Name2) -> + join(filename_string_to_binary(Name1),Name2); +join(Name1, Name2) when is_binary(Name1), is_binary(Name2) -> + OsType = major_os_type(), + case pathtype(Name2) of + relative -> join1b(Name1, Name2, [], OsType); + _Other -> join1b(Name2, <<>>, [], OsType) + end; + join(Name1, Name2) when is_atom(Name1) -> join(atom_to_list(Name1), Name2); join(Name1, Name2) when is_atom(Name2) -> @@ -321,8 +345,6 @@ when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); join1([$\\|Rest], RelativeName, Result, win32) -> join1([$/|Rest], RelativeName, Result, win32); -join1([$\\|Rest], RelativeName, Result, vxworks) -> - join1([$/|Rest], RelativeName, Result, vxworks); join1([$/|Rest], RelativeName, [$., $/|Result], OsType) -> join1(Rest, RelativeName, [$/|Result], OsType); join1([$/|Rest], RelativeName, [$/|Result], OsType) -> @@ -344,6 +366,26 @@ join1([Char|Rest], RelativeName, Result, OsType) when is_integer(Char) -> join1([Atom|Rest], RelativeName, Result, OsType) when is_atom(Atom) -> join1(atom_to_list(Atom)++Rest, RelativeName, Result, OsType). +join1b(<>, RelativeName, [], win32) +when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> + join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> + join1b(<<$/,Rest>>, RelativeName, Result, win32); +join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> + join1b(Rest, RelativeName, [$/|Result], OsType); +join1b(<<$/,Rest/binary>>, RelativeName, [$/|Result], OsType) -> + join1b(Rest, RelativeName, [$/|Result], OsType); +join1b(<<>>, <<>>, Result, OsType) -> + list_to_binary(maybe_remove_dirsep(Result, OsType)); +join1b(<<>>, RelativeName, [$:|Rest], win32) -> + join1b(RelativeName, <<>>, [$:|Rest], win32); +join1b(<<>>, RelativeName, [$/|Result], OsType) -> + join1b(RelativeName, <<>>, [$/|Result], OsType); +join1b(<<>>, RelativeName, Result, OsType) -> + join1b(RelativeName, <<>>, [$/|Result], OsType); +join1b(<>, RelativeName, Result, OsType) when is_integer(Char) -> + join1b(Rest, RelativeName, [Char|Result], OsType). + maybe_remove_dirsep([$/, $:, Letter], win32) -> [Letter, $:, $/]; maybe_remove_dirsep([$/], _) -> @@ -357,7 +399,7 @@ maybe_remove_dirsep(Name, _) -> %% a given base directory, which is is assumed to be normalised %% by a previous call to join/{1,2}. --spec append(string(), file:name()) -> string(). +-spec append(file:filename(), file:name()) -> file:filename(). append(Dir, Name) -> Dir ++ [$/|Name]. @@ -376,19 +418,14 @@ append(Dir, Name) -> -spec pathtype(file:name()) -> 'absolute' | 'relative' | 'volumerelative'. pathtype(Atom) when is_atom(Atom) -> pathtype(atom_to_list(Atom)); -pathtype(Name) when is_list(Name) -> +pathtype(Name) when is_list(Name) or is_binary(Name) -> case os:type() of {unix, _} -> unix_pathtype(Name); - {win32, _} -> win32_pathtype(Name); - vxworks -> case vxworks_first(Name) of - {device, _Rest, _Dev} -> - absolute; - _ -> - relative - end; - {ose,_} -> unix_pathtype(Name) + {win32, _} -> win32_pathtype(Name) end. +unix_pathtype(<<$/,_/binary>>) -> + absolute; unix_pathtype([$/|_]) -> absolute; unix_pathtype([List|Rest]) when is_list(List) -> @@ -404,6 +441,15 @@ win32_pathtype([Atom|Rest]) when is_atom(Atom) -> win32_pathtype(atom_to_list(Atom)++Rest); win32_pathtype([Char, List|Rest]) when is_list(List) -> win32_pathtype([Char|List++Rest]); +win32_pathtype(<<$/, $/, _/binary>>) -> absolute; +win32_pathtype(<<$\\, $/, _/binary>>) -> absolute; +win32_pathtype(<<$/, $\\, _/binary>>) -> absolute; +win32_pathtype(<<$\\, $\\, _/binary>>) -> absolute; +win32_pathtype(<<$/, _/binary>>) -> volumerelative; +win32_pathtype(<<$\\, _/binary>>) -> volumerelative; +win32_pathtype(<<_Letter, $:, $/, _/binary>>) -> absolute; +win32_pathtype(<<_Letter, $:, $\\, _/binary>>) -> absolute; +win32_pathtype(<<_Letter, $:, _/binary>>) -> volumerelative; win32_pathtype([$/, $/|_]) -> absolute; win32_pathtype([$\\, $/|_]) -> absolute; win32_pathtype([$/, $\\|_]) -> absolute; @@ -422,7 +468,7 @@ win32_pathtype(_) -> relative. %% Examples: rootname("/jam.src/kalle") -> "/jam.src/kalle" %% rootname("/jam.src/foo.erl") -> "/jam.src/foo" --spec rootname(file:name()) -> string(). +-spec rootname(file:name()) -> file:filename(). rootname(Name0) -> Name = flatten(Name0), rootname(Name, [], [], major_os_type()). @@ -431,8 +477,6 @@ rootname([$/|Rest], Root, Ext, OsType) -> rootname(Rest, [$/]++Ext++Root, [], OsType); rootname([$\\|Rest], Root, Ext, win32) -> rootname(Rest, [$/]++Ext++Root, [], win32); -rootname([$\\|Rest], Root, Ext, vxworks) -> - rootname(Rest, [$/]++Ext++Root, [], vxworks); rootname([$.|Rest], Root, [], OsType) -> rootname(Rest, Root, ".", OsType); rootname([$.|Rest], Root, Ext, OsType) -> @@ -451,7 +495,7 @@ rootname([], Root, _Ext, _OsType) -> %% Examples: rootname("/jam.src/kalle.jam", ".erl") -> "/jam.src/kalle.jam" %% rootname("/jam.src/foo.erl", ".erl") -> "/jam.src/foo" --spec rootname(file:name(), file:name()) -> string(). +-spec rootname(file:name(), file:name()) -> file:filename(). rootname(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -471,27 +515,55 @@ rootname2([Char|Rest], Ext, Result) when is_integer(Char) -> %% split("foo/bar") -> ["foo", "bar"] %% split("a:\\msdev\\include") -> ["a:/", "msdev", "include"] --spec split(file:name()) -> [string()]. +-spec split(file:name()) -> [file:filename()]. +split(Name) when is_binary(Name) -> + case os:type() of + {win32, _} -> win32_splitb(Name); + _ -> unix_splitb(Name) + end; + split(Name0) -> Name = flatten(Name0), case os:type() of - {unix, _} -> unix_split(Name); {win32, _} -> win32_split(Name); - vxworks -> vxworks_split(Name); - {ose,_} -> unix_split(Name) + _ -> unix_split(Name) end. -%% If a VxWorks filename starts with '[/\].*[^/\]' '[/\].*:' or '.*:' -%% that part of the filename is considered a device. -%% The rest of the name is interpreted exactly as for win32. -%% XXX - dirty solution to make filename:split([]) return the same thing on -%% VxWorks as on unix and win32. -vxworks_split([]) -> - []; -vxworks_split(L) -> - {_Devicep, Rest, FirstComp} = vxworks_first(L), - split(Rest, [], [lists:reverse(FirstComp)], win32). +unix_splitb(Name) -> + L = binary:split(Name,[<<"/">>],[global]), + LL = case L of + [<<>>|Rest] -> + [<<"/">>|Rest]; + _ -> + L + end, + [ X || X <- LL, X =/= <<>>]. + + +fix_driveletter(Letter0) -> + if + Letter0 >= $A, Letter0 =< $Z -> + Letter0+$a-$A; + true -> + Letter0 + end. +win32_splitb(<>) when (((Slash =:= $\\) orelse (Slash =:= $/)) andalso + ?IS_DRIVELETTER(Letter0)) -> + Letter = fix_driveletter(Letter0), + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<>) when ?IS_DRIVELETTER(Letter0) -> + Letter = fix_driveletter(Letter0), + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<<$/>> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(Name) -> + L = binary:split(Name,[<<"/">>,<<"\\">>],[global]), + [<<$/>> | [ X || X <- L, X =/= <<>> ]]. + unix_split(Name) -> split(Name, [], unix). @@ -502,8 +574,6 @@ win32_split([X, $\\|Rest]) when is_integer(X) -> win32_split([X, $/|Rest]); win32_split([X, Y, $\\|Rest]) when is_integer(X), is_integer(Y) -> win32_split([X, Y, $/|Rest]); -win32_split([$/, $/|Rest]) -> - split(Rest, [], [[$/, $/]]); win32_split([UcLetter, $:|Rest]) when UcLetter >= $A, UcLetter =< $Z -> win32_split([UcLetter+$a-$A, $:|Rest]); win32_split([Letter, $:, $/|Rest]) -> @@ -540,7 +610,7 @@ split([], Comp, Components, OsType) -> %% will be converted to backslashes. On all platforms, the %% name will be normalized as done by join/1. --spec nativename(string()) -> string(). +-spec nativename(file:filename()) -> file:filename(). nativename(Name0) -> Name = join([Name0]), %Normalize. case os:type() of @@ -557,13 +627,12 @@ win32_nativename([]) -> separators() -> case os:type() of - {unix, _} -> {false, false}; {win32, _} -> {$\\, $:}; - vxworks -> {$\\, false}; - {ose,_} -> {false, false} + _ -> {false, false} end. + %% find_src(Module) -- %% find_src(Module, Rules) -- %% @@ -733,45 +802,12 @@ major_os_type() -> OsT -> OsT end. -%% Need to take care of the first pathname component separately -%% due to VxWorks less than good device naming rules. -%% (i.e. this is VxWorks specific ...) -%% The following four all starts with device names -%% elrond:/foo -> elrond: -%% elrond:\\foo.bar -> elrond: -%% /DISK1:foo -> /DISK1: -%% /usr/include -> /usr -%% This one doesn't: -%% foo/bar - -vxworks_first([]) -> - {not_device, [], []}; -vxworks_first([$/|T]) -> - vxworks_first2(device, T, [$/]); -vxworks_first([$\\|T]) -> - vxworks_first2(device, T, [$/]); -vxworks_first([H|T]) when is_list(H) -> - vxworks_first(H++T); -vxworks_first([H|T]) -> - vxworks_first2(not_device, T, [H]). - -vxworks_first2(Devicep, [], FirstComp) -> - {Devicep, [], FirstComp}; -vxworks_first2(Devicep, [$/|T], FirstComp) -> - {Devicep, [$/|T], FirstComp}; -vxworks_first2(Devicep, [$\\|T], FirstComp) -> - {Devicep, [$/|T], FirstComp}; -vxworks_first2(_Devicep, [$:|T], FirstComp)-> - {device, T, [$:|FirstComp]}; -vxworks_first2(Devicep, [H|T], FirstComp) when is_list(H) -> - vxworks_first2(Devicep, H++T, FirstComp); -vxworks_first2(Devicep, [H|T], FirstComp) -> - vxworks_first2(Devicep, T, [H|FirstComp]). - %% flatten(List) %% Flatten a list, also accepting atoms. --spec flatten(file:name()) -> string(). +-spec flatten(file:name()) -> file:filename(). +flatten(Bin) when is_binary(Bin) -> + Bin; flatten(List) -> do_flatten(List, []). @@ -785,3 +821,12 @@ do_flatten([], Tail) -> Tail; do_flatten(Atom, Tail) when is_atom(Atom) -> atom_to_list(Atom) ++ flatten(Tail). + +filename_string_to_binary(List) -> + case unicode:characters_to_binary(List,unicode,file:native_name_encoding()) of + {error,_,_} -> + erlang:error(badarg); + Bin when is_binary(Bin) -> + Bin + end. + -- cgit v1.2.3 From 63eeba2f6829aac2644eaf212ebef9cdf4b59e8d Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 14 Oct 2010 10:12:57 +0200 Subject: Handle binary file names and conversion of unicode strings --- lib/stdlib/src/c.erl | 20 ++++++++++-- lib/stdlib/src/filename.erl | 79 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 3 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index 6d50a575eb..399b91b92f 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) -> pwd() -> case file:get_cwd() of {ok, Str} -> - ok = io:format("~s\n", [Str]); + ok = io:format("~s\n", [fixup_one_bin(Str)]); {error, _} -> ok = io:format("Cannot determine current directory\n") end. @@ -684,11 +684,27 @@ ls() -> ls(Dir) -> case file:list_dir(Dir) of {ok, Entries} -> - ls_print(sort(Entries)); + ls_print(sort(fixup_bin(Entries))); {error,_E} -> format("Invalid directory\n") end. +fixup_one_bin(X) when is_binary(X) -> + L = binary_to_list(X), + [ if + El > 127 -> + $?; + true -> + El + end || El <- L]; +fixup_one_bin(X) -> + X. +fixup_bin([H|T]) -> + [fixup_one_bin(H) | fixup_bin(T)]; +fixup_bin([]) -> + []. + + ls_print([]) -> ok; ls_print(L) -> Width = min([max(lengths(L, [])), 40]) + 5, diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 40df54fe54..9ca4b808e1 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -228,7 +228,7 @@ basename([$/|[]], Ext, Tail, DrvSep2) -> basename([], Ext, Tail, DrvSep2); basename([$/|Rest], Ext, _Tail, DrvSep2) -> basename(Rest, Ext, [], DrvSep2); -basename([$\\|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> +basename([DirSep2|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> basename([$/|Rest], Ext, Tail, DirSep2); basename([Char|Rest], Ext, Tail, DrvSep2) when is_integer(Char) -> basename(Rest, Ext, [Char|Tail], DrvSep2); @@ -241,6 +241,39 @@ basename([], _Ext, Tail, _DrvSep2) -> %% dirname("kalle.erl") -> "." -spec dirname(file:name()) -> file:filename(). +dirname(Name) when is_binary(Name) -> + {Dsep,Drivesep} = separators(), + SList = case Dsep of + Sep when is_integer(Sep) -> + [ <> ]; + _ -> + [] + end, + {XPart0,Dirs} = case Drivesep of + X when is_integer(X) -> + case Name of + <> when ?IS_DRIVELETTER(DL) -> + {<>,Rest}; + _ -> + {<<>>,Name} + end; + _ -> + {<<>>,Name} + end, + Parts0 = binary:split(Dirs,[<<"/">>|SList],[global]), + %% Fairly short lists of parts, OK to reverse twice... + Parts = case Parts0 of + [] -> []; + _ -> lists:reverse(fstrip(tl(lists:reverse(Parts0)))) + end, + XPart = case {Parts,XPart0} of + {[],<<>>} -> + <<".">>; + _ -> + XPart0 + end, + dirjoin(Parts,XPart,<<"/">>); + dirname(Name0) -> Name = flatten(Name0), dirname(Name, [], [], separators()). @@ -271,6 +304,26 @@ dirname([], [DrvSep,Dl], File, {_,DrvSep}) -> end; dirname([], Dir, _, _) -> lists:reverse(Dir). + +%% Compatibility with lists variant, remove trailing slashes +fstrip([<<>>,X|Y]) -> + fstrip([X|Y]); +fstrip(A) -> + A. + + +dirjoin([<<>>|T],Acc,Sep) -> + dirjoin1(T,<>,Sep); +dirjoin(A,B,C) -> + dirjoin1(A,B,C). + +dirjoin1([],Acc,_) -> + Acc; +dirjoin1([One],Acc,_) -> + <>; +dirjoin1([H|T],Acc,Sep) -> + dirjoin(T,<>,Sep). + %% Given a filename string, returns the file extension, %% including the period. Returns an empty list if there @@ -282,6 +335,30 @@ dirname([], Dir, _, _) -> %% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src" -spec extension(file:name()) -> file:filename(). +extension(Name) when is_binary(Name) -> + {Dsep,_} = separators(), + SList = case Dsep of + Sep when is_integer(Sep) -> + [ <> ]; + _ -> + [] + end, + case binary:matches(Name,[<<".">>]) of + nomatch -> % Bug in binary workaround :( + <<>>; + [] -> + <<>>; + List -> + {Pos,_} = lists:last(List), + <<_:Pos/binary,Part/binary>> = Name, + case binary:match(Part,[<<"/">>|SList]) of + nomatch -> + Part; + _ -> + <<>> + end + end; + extension(Name0) -> Name = flatten(Name0), extension(Name, [], major_os_type()). -- cgit v1.2.3 From 5da29a8c5fe9584e274d2fe9b95dead8334d419a Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 8 Nov 2010 17:34:41 +0100 Subject: Correct shell utilities to handle unicode and possibly binaries --- lib/stdlib/src/c.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index 399b91b92f..d04d8f191f 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) -> pwd() -> case file:get_cwd() of {ok, Str} -> - ok = io:format("~s\n", [fixup_one_bin(Str)]); + ok = io:format("~ts\n", [fixup_one_bin(Str)]); {error, _} -> ok = io:format("Cannot determine current directory\n") end. @@ -714,7 +714,7 @@ ls_print(X, Width, Len) when Width + Len >= 80 -> io:nl(), ls_print(X, Width, 0); ls_print([H|T], Width, Len) -> - io:format("~-*s",[Width,H]), + io:format("~-*ts",[Width,H]), ls_print(T, Width, Len+Width); ls_print([], _, _) -> io:nl(). -- cgit v1.2.3 From b21d33041ef30182cbb8e74c0023dc282d069a26 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 30 Nov 2010 12:31:33 +0100 Subject: Teach filelib to use re in unicode mode when filenames are not raw --- lib/stdlib/src/filelib.erl | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index d5ddf9ed7e..eaaca750ab 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -166,36 +166,41 @@ do_is_regular(File, Mod) -> %% If is true all sub-directories to are processed do_fold_files(Dir, RegExp, Recursive, Fun, Acc, Mod) -> - {ok, Re1} = re:compile(RegExp), - do_fold_files1(Dir, Re1, Recursive, Fun, Acc, Mod). + {ok, Re1} = re:compile(RegExp,[unicode]), + do_fold_files1(Dir, Re1, RegExp, Recursive, Fun, Acc, Mod). -do_fold_files1(Dir, RegExp, Recursive, Fun, Acc, Mod) -> +do_fold_files1(Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod) -> case eval_list_dir(Dir, Mod) of - {ok, Files} -> do_fold_files2(Files, Dir, RegExp, Recursive, Fun, Acc, Mod); + {ok, Files} -> do_fold_files2(Files, Dir, RegExp, OrigRE, + Recursive, Fun, Acc, Mod); {error, _} -> Acc end. -do_fold_files2([], _Dir, _RegExp, _Recursive, _Fun, Acc, _Mod) -> +%% OrigRE is not to be compiled as it's for non conforming filenames, +%% i.e. for filenames that does not comply to the current encoding, which should +%% be very rare. We use it only in those cases and do not want to precompile. +do_fold_files2([], _Dir, _RegExp, _OrigRE, _Recursive, _Fun, Acc, _Mod) -> Acc; -do_fold_files2([File|T], Dir, RegExp, Recursive, Fun, Acc0, Mod) -> +do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) -> FullName = filename:join(Dir, File), case do_is_regular(FullName, Mod) of true -> - case re:run(File, RegExp, [{capture,none}]) of + case re:run(File, if is_binary(File) -> OrigRE; true -> RegExp end, + [{capture,none}]) of match -> Acc = Fun(FullName, Acc0), - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc, Mod); + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod); nomatch -> - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end; false -> case Recursive andalso do_is_dir(FullName, Mod) of true -> - Acc1 = do_fold_files1(FullName, RegExp, Recursive, + Acc1 = do_fold_files1(FullName, RegExp, OrigRE, Recursive, Fun, Acc0, Mod), - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc1, Mod); + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc1, Mod); false -> - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end end. -- cgit v1.2.3 From 1ab2f46d91bce11bdc5b7ec65d3b3df46eadb105 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 1 Dec 2010 17:34:49 +0100 Subject: Make filelib not crash on re codepoints beyond 255 in re when filename is raw --- lib/stdlib/src/filelib.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index eaaca750ab..04147d40d1 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -185,11 +185,14 @@ do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) -> FullName = filename:join(Dir, File), case do_is_regular(FullName, Mod) of true -> - case re:run(File, if is_binary(File) -> OrigRE; true -> RegExp end, - [{capture,none}]) of + case (catch re:run(File, if is_binary(File) -> OrigRE; + true -> RegExp end, + [{capture,none}])) of match -> Acc = Fun(FullName, Acc0), do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod); + {'EXIT',_} -> + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod); nomatch -> do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end; -- cgit v1.2.3 From 159d79b6657e5bb4f3accb6a0d74b74f06da4ba2 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 3 Dec 2010 13:16:21 +0100 Subject: Test and correct filelib and filename --- lib/stdlib/src/filelib.erl | 28 ++++++++++++++++++++++------ lib/stdlib/src/filename.erl | 28 +++++++++++++++++++--------- 2 files changed, 41 insertions(+), 15 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index 04147d40d1..c845b61204 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -47,14 +47,14 @@ wildcard(Pattern) when is_list(Pattern) -> ?HANDLE_ERROR(do_wildcard(Pattern, file)). -spec wildcard(file:name(), file:name() | atom()) -> [file:filename()]. -wildcard(Pattern, Cwd) when is_list(Pattern), is_list(Cwd) -> +wildcard(Pattern, Cwd) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, file)); wildcard(Pattern, Mod) when is_list(Pattern), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Mod)). -spec wildcard(file:name(), file:name(), atom()) -> [file:filename()]. wildcard(Pattern, Cwd, Mod) - when is_list(Pattern), is_list(Cwd), is_atom(Mod) -> + when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, Mod)). -spec is_dir(file:name()) -> boolean(). @@ -118,7 +118,7 @@ do_wildcard_comp({compiled_wildcard,{exists,File}}, Mod) -> do_wildcard_comp({compiled_wildcard,[Base|Rest]}, Mod) -> do_wildcard_1([Base], Rest, Mod). -do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), is_list(Cwd) -> +do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> do_wildcard_comp(do_compile_wildcard(Pattern), Cwd, Mod). do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> @@ -127,9 +127,18 @@ do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> _ -> [] end; do_wildcard_comp({compiled_wildcard,[current|Rest]}, Cwd0, Mod) -> - Cwd = filename:join([Cwd0]), %Slash away redundant slashes. - PrefixLen = length(Cwd)+1, - [lists:nthtail(PrefixLen, N) || N <- do_wildcard_1([Cwd], Rest, Mod)]; + {Cwd,PrefixLen} = case filename:join([Cwd0]) of + Bin when is_binary(Bin) -> {Bin,byte_size(Bin)+1}; + Other -> {Other,length(Other)+1} + end, %Slash away redundant slashes. + [ + if + is_binary(N) -> + <<_:PrefixLen/binary,Res/binary>> = N, + Res; + true -> + lists:nthtail(PrefixLen, N) + end || N <- do_wildcard_1([Cwd], Rest, Mod)]; do_wildcard_comp({compiled_wildcard,[Base|Rest]}, _Cwd, Mod) -> do_wildcard_1([Base], Rest, Mod). @@ -276,6 +285,13 @@ do_wildcard_3(Base, [Pattern|Rest], Result, Mod) -> do_wildcard_3(Base, [], Result, _Mod) -> [Base|Result]. +wildcard_4(Pattern, [File|Rest], Base, Result) when is_binary(File) -> + case wildcard_5(Pattern, binary_to_list(File)) of + true -> + wildcard_4(Pattern, Rest, Base, [join(Base, File)|Result]); + false -> + wildcard_4(Pattern, Rest, Base, Result) + end; wildcard_4(Pattern, [File|Rest], Base, Result) -> case wildcard_5(Pattern, File) of true -> diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 9ca4b808e1..e38b8957f2 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -151,10 +151,10 @@ win_basenameb(O) -> basenameb(O,[<<"/">>,<<"\\">>]). basenameb(Bin,Sep) -> Parts = [ X || X <- binary:split(Bin,Sep,[global]), - X =:= <<>> ], + X =/= <<>> ], if Parts =:= [] -> - []; + <<>>; true -> lists:last(Parts) end. @@ -201,17 +201,19 @@ basename(Name, Ext) when is_list(Name), is_binary(Ext) -> basename(filename_string_to_binary(Name),Ext); basename(Name, Ext) when is_binary(Name), is_binary(Ext) -> BName = basename(Name), + LAll = byte_size(Name), LN = byte_size(BName), LE = byte_size(Ext), case LN - LE of Neg when Neg < 0 -> BName; Pos -> - case BName of - <> -> + StartLen = LAll - Pos - LE, + case Name of + <<_:StartLen/binary,Part:Pos/binary,Ext/binary>> -> Part; - Other -> - Other + _Other -> + BName end end; @@ -447,7 +449,7 @@ join1b(<>, RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> - join1b(<<$/,Rest>>, RelativeName, Result, win32); + join1b(<<$/,Rest/binary>>, RelativeName, Result, win32); join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> join1b(Rest, RelativeName, [$/|Result], OsType); join1b(<<$/,Rest/binary>>, RelativeName, [$/|Result], OsType) -> @@ -546,6 +548,8 @@ win32_pathtype(_) -> relative. %% rootname("/jam.src/foo.erl") -> "/jam.src/foo" -spec rootname(file:name()) -> file:filename(). +rootname(Name) when is_binary(Name) -> + list_to_binary(rootname(binary_to_list(Name))); % No need to handle unicode, . is < 128 rootname(Name0) -> Name = flatten(Name0), rootname(Name, [], [], major_os_type()). @@ -573,6 +577,12 @@ rootname([], Root, _Ext, _OsType) -> %% rootname("/jam.src/foo.erl", ".erl") -> "/jam.src/foo" -spec rootname(file:name(), file:name()) -> file:filename(). +rootname(Name, Ext) when is_binary(Name), is_binary(Ext) -> + list_to_binary(rootname(binary_to_list(Name),binary_to_list(Ext))); +rootname(Name, Ext) when is_binary(Name) -> + rootname(Name,filename_string_to_binary(Ext)); +rootname(Name, Ext) when is_binary(Ext) -> + rootname(filename_string_to_binary(Name),Ext); rootname(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -639,7 +649,7 @@ win32_splitb(<>) when ((Slash =:= $\\) orelse (Slash =:= $/)) [<<$/>> | [ X || X <- L, X =/= <<>> ]]; win32_splitb(Name) -> L = binary:split(Name,[<<"/">>,<<"\\">>],[global]), - [<<$/>> | [ X || X <- L, X =/= <<>> ]]. + [ X || X <- L, X =/= <<>> ]. unix_split(Name) -> @@ -900,7 +910,7 @@ do_flatten(Atom, Tail) when is_atom(Atom) -> atom_to_list(Atom) ++ flatten(Tail). filename_string_to_binary(List) -> - case unicode:characters_to_binary(List,unicode,file:native_name_encoding()) of + case unicode:characters_to_binary(flatten(List),unicode,file:native_name_encoding()) of {error,_,_} -> erlang:error(badarg); Bin when is_binary(Bin) -> -- cgit v1.2.3