From 4cf08709189ea8b7e2ae20f85c390abd04ae48ae Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 13 Oct 2010 17:08:32 +0200 Subject: Teach filename to accept raw data and add filename enc option to emu --- lib/stdlib/src/filename.erl | 335 +++++++++++++++++++++++++------------------- 1 file changed, 190 insertions(+), 145 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 01c06e4596..40df54fe54 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -41,6 +41,9 @@ -include_lib("kernel/include/file.hrl"). +-define(IS_DRIVELETTER(Letter),(((Letter >= $A) andalso (Letter =< $Z)) orelse + ((Letter >= $a) andalso (Letter =< $z)))). + %% Converts a relative filename to an absolute filename %% or the filename itself if it already is an absolute filename %% Note that no attempt is made to create the most beatiful @@ -57,12 +60,18 @@ %% (for Unix) : absname("/") -> "/" %% (for WIN32): absname("/") -> "D:/" --spec absname(file:name()) -> string(). + +-spec absname(file:name()) -> file:filename(). absname(Name) -> {ok, Cwd} = file:get_cwd(), absname(Name, Cwd). --spec absname(file:name(), string()) -> string(). +-spec absname(file:name(), file:filename()) -> file:filename(). +absname(Name, AbsBase) when is_binary(Name), is_list(AbsBase) -> + absname(Name,filename_string_to_binary(AbsBase)); +absname(Name, AbsBase) when is_list(Name), is_binary(AbsBase) -> + absname(filename_string_to_binary(Name),AbsBase); + absname(Name, AbsBase) -> case pathtype(Name) of relative -> @@ -77,6 +86,20 @@ absname(Name, AbsBase) -> %% Handles volumerelative names (on Windows only). +absname_vr([<<"/">>|Rest1], [Volume|_], _AbsBase) -> + %% Absolute path on current drive. + join([Volume|Rest1]); +absname_vr([<>|Rest1], [<>|_], AbsBase) -> + %% Relative to current directory on current drive. + absname(join(Rest1), AbsBase); +absname_vr([<>|Name], _, _AbsBase) -> + %% Relative to current directory on another drive. + Dcwd = + case file:get_cwd([X, $:]) of + {ok, Dir} -> filename_string_to_binary(Dir); + {error, _} -> <> + end, + absname(join(Name), Dcwd); absname_vr(["/"|Rest1], [Volume|_], _AbsBase) -> %% Absolute path on current drive. join([Volume|Rest1]); @@ -92,41 +115,13 @@ absname_vr([[X, $:]|Name], _, _AbsBase) -> end, absname(join(Name), Dcwd). -%% Joins a relative filename to an absolute base. For VxWorks the -%% resulting name is fixed to minimize the length by collapsing -%% ".." directories. -%% For other systems this is just a join/2, but assumes that +%% Joins a relative filename to an absolute base. +%% This is just a join/2, but assumes that %% AbsBase must be absolute and Name must be relative. --spec absname_join(string(), file:name()) -> string(). +-spec absname_join(file:filename(), file:name()) -> file:filename(). absname_join(AbsBase, Name) -> - case major_os_type() of - vxworks -> - absname_pretty(AbsBase, split(Name), lists:reverse(split(AbsBase))); - _Else -> - join(AbsBase, flatten(Name)) - end. - -%% Handles absolute filenames for VxWorks - these are 'pretty-printed', -%% since a C function call chdir("/erlang/lib/../bin") really sets -%% cwd to '/erlang/lib/../bin' which also works, but the long term -%% effect is potentially not so good ... -%% -%% absname_pretty("../bin", "/erlang/lib") -> "/erlang/bin" -%% absname_pretty("../../../..", "/erlang") -> "/erlang" - -absname_pretty(Abspath, Relpath, []) -> - %% AbsBase _must_ begin with a vxworks device name - {device, _Rest, Dev} = vxworks_first(Abspath), - absname_pretty(Abspath, Relpath, [lists:reverse(Dev)]); -absname_pretty(_Abspath, [], AbsBase) -> - join(lists:reverse(AbsBase)); -absname_pretty(Abspath, [[$.]|Rest], AbsBase) -> - absname_pretty(Abspath, Rest, AbsBase); -absname_pretty(Abspath, [[$.,$.]|Rest], [_|AbsRest]) -> - absname_pretty(Abspath, Rest, AbsRest); -absname_pretty(Abspath, [First|Rest], AbsBase) -> - absname_pretty(Abspath, Rest, [First|AbsBase]). + join(AbsBase, flatten(Name)). %% Returns the part of the filename after the last directory separator, %% or the filename itself if it has no separators. @@ -136,12 +131,36 @@ absname_pretty(Abspath, [First|Rest], AbsBase) -> %% basename("/usr/foo/") -> "foo" (trailing slashes ignored) %% basename("/") -> [] --spec basename(file:name()) -> string(). +-spec basename(file:name()) -> file:filename(). +basename(Name) when is_binary(Name) -> + case os:type() of + {win32,_} -> + win_basenameb(Name); + _ -> + basenameb(Name,[<<"/">>]) + end; + basename(Name0) -> Name = flatten(Name0), {DirSep2, DrvSep} = separators(), basename1(skip_prefix(Name, DrvSep), [], DirSep2). +win_basenameb(<>) when ?IS_DRIVELETTER(Letter) -> + basenameb(Rest,[<<"/">>,<<"\\">>]); +win_basenameb(O) -> + basenameb(O,[<<"/">>,<<"\\">>]). +basenameb(Bin,Sep) -> + Parts = [ X || X <- binary:split(Bin,Sep,[global]), + X =:= <<>> ], + if + Parts =:= [] -> + []; + true -> + lists:last(Parts) + end. + + + basename1([$/|[]], Tail, DirSep2) -> basename1([], Tail, DirSep2); basename1([$/|Rest], _Tail, DirSep2) -> @@ -155,26 +174,11 @@ basename1([Char|Rest], Tail, DirSep2) when is_integer(Char) -> basename1([], Tail, _DirSep2) -> lists:reverse(Tail). -skip_prefix(Name, false) -> % No prefix for unix, but for VxWorks. - case major_os_type() of - vxworks -> - case vxworks_first(Name) of - {device, Rest, _Device} -> - Rest; - {not_device, _Rest, _First} -> - Name - end; - _Else -> - Name - end; -skip_prefix(Name, DrvSep) -> - skip_prefix1(Name, DrvSep). - -skip_prefix1([L, DrvSep|Name], DrvSep) when is_integer(L) -> +skip_prefix(Name, false) -> + Name; +skip_prefix([L, DrvSep|Name], DrvSep) when ?IS_DRIVELETTER(L) -> Name; -skip_prefix1([L], _) when is_integer(L) -> - [L]; -skip_prefix1(Name, _) -> +skip_prefix(Name, _) -> Name. %% Returns the last component of the filename, with the given @@ -190,7 +194,27 @@ skip_prefix1(Name, _) -> %% rootname(basename("xxx.jam")) -> "xxx" %% rootname(basename("xxx.erl")) -> "xxx" --spec basename(file:name(), file:name()) -> string(). +-spec basename(file:name(), file:name()) -> file:filename(). +basename(Name, Ext) when is_binary(Name), is_list(Ext) -> + basename(Name,filename_string_to_binary(Ext)); +basename(Name, Ext) when is_list(Name), is_binary(Ext) -> + basename(filename_string_to_binary(Name),Ext); +basename(Name, Ext) when is_binary(Name), is_binary(Ext) -> + BName = basename(Name), + LN = byte_size(BName), + LE = byte_size(Ext), + case LN - LE of + Neg when Neg < 0 -> + BName; + Pos -> + case BName of + <> -> + Part; + Other -> + Other + end + end; + basename(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -216,21 +240,10 @@ basename([], _Ext, Tail, _DrvSep2) -> %% Example: dirname("/usr/src/kalle.erl") -> "/usr/src", %% dirname("kalle.erl") -> "." --spec dirname(file:name()) -> string(). +-spec dirname(file:name()) -> file:filename(). dirname(Name0) -> Name = flatten(Name0), - case os:type() of - vxworks -> - {Devicep, Restname, FirstComp} = vxworks_first(Name), - case Devicep of - device -> - dirname(Restname, FirstComp, [], separators()); - _ -> - dirname(Name, [], [], separators()) - end; - _ -> - dirname(Name, [], [], separators()) - end. + dirname(Name, [], [], separators()). dirname([[_|_]=List|Rest], Dir, File, Seps) -> dirname(List++Rest, Dir, File, Seps); @@ -268,7 +281,7 @@ dirname([], Dir, _, _) -> %% %% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src" --spec extension(file:name()) -> string(). +-spec extension(file:name()) -> file:filename(). extension(Name0) -> Name = flatten(Name0), extension(Name, [], major_os_type()). @@ -281,8 +294,6 @@ extension([$/|Rest], _Result, OsType) -> extension(Rest, [], OsType); extension([$\\|Rest], _Result, win32) -> extension(Rest, [], win32); -extension([$\\|Rest], _Result, vxworks) -> - extension(Rest, [], vxworks); extension([Char|Rest], Result, OsType) when is_integer(Char) -> extension(Rest, [Char|Result], OsType); extension([], Result, _OsType) -> @@ -290,23 +301,36 @@ extension([], Result, _OsType) -> %% Joins a list of filenames with directory separators. --spec join([string()]) -> string(). +-spec join([file:filename()]) -> file:filename(). join([Name1, Name2|Rest]) -> join([join(Name1, Name2)|Rest]); join([Name]) when is_list(Name) -> join1(Name, [], [], major_os_type()); +join([Name]) when is_binary(Name) -> + join1b(Name, <<>>, [], major_os_type()); join([Name]) when is_atom(Name) -> join([atom_to_list(Name)]). %% Joins two filenames with directory separators. --spec join(string(), string()) -> string(). +-spec join(file:filename(), file:filename()) -> file:filename(). join(Name1, Name2) when is_list(Name1), is_list(Name2) -> OsType = major_os_type(), case pathtype(Name2) of relative -> join1(Name1, Name2, [], OsType); _Other -> join1(Name2, [], [], OsType) end; +join(Name1, Name2) when is_binary(Name1), is_list(Name2) -> + join(Name1,filename_string_to_binary(Name2)); +join(Name1, Name2) when is_list(Name1), is_binary(Name2) -> + join(filename_string_to_binary(Name1),Name2); +join(Name1, Name2) when is_binary(Name1), is_binary(Name2) -> + OsType = major_os_type(), + case pathtype(Name2) of + relative -> join1b(Name1, Name2, [], OsType); + _Other -> join1b(Name2, <<>>, [], OsType) + end; + join(Name1, Name2) when is_atom(Name1) -> join(atom_to_list(Name1), Name2); join(Name1, Name2) when is_atom(Name2) -> @@ -321,8 +345,6 @@ when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); join1([$\\|Rest], RelativeName, Result, win32) -> join1([$/|Rest], RelativeName, Result, win32); -join1([$\\|Rest], RelativeName, Result, vxworks) -> - join1([$/|Rest], RelativeName, Result, vxworks); join1([$/|Rest], RelativeName, [$., $/|Result], OsType) -> join1(Rest, RelativeName, [$/|Result], OsType); join1([$/|Rest], RelativeName, [$/|Result], OsType) -> @@ -344,6 +366,26 @@ join1([Char|Rest], RelativeName, Result, OsType) when is_integer(Char) -> join1([Atom|Rest], RelativeName, Result, OsType) when is_atom(Atom) -> join1(atom_to_list(Atom)++Rest, RelativeName, Result, OsType). +join1b(<>, RelativeName, [], win32) +when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> + join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> + join1b(<<$/,Rest>>, RelativeName, Result, win32); +join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> + join1b(Rest, RelativeName, [$/|Result], OsType); +join1b(<<$/,Rest/binary>>, RelativeName, [$/|Result], OsType) -> + join1b(Rest, RelativeName, [$/|Result], OsType); +join1b(<<>>, <<>>, Result, OsType) -> + list_to_binary(maybe_remove_dirsep(Result, OsType)); +join1b(<<>>, RelativeName, [$:|Rest], win32) -> + join1b(RelativeName, <<>>, [$:|Rest], win32); +join1b(<<>>, RelativeName, [$/|Result], OsType) -> + join1b(RelativeName, <<>>, [$/|Result], OsType); +join1b(<<>>, RelativeName, Result, OsType) -> + join1b(RelativeName, <<>>, [$/|Result], OsType); +join1b(<>, RelativeName, Result, OsType) when is_integer(Char) -> + join1b(Rest, RelativeName, [Char|Result], OsType). + maybe_remove_dirsep([$/, $:, Letter], win32) -> [Letter, $:, $/]; maybe_remove_dirsep([$/], _) -> @@ -357,7 +399,7 @@ maybe_remove_dirsep(Name, _) -> %% a given base directory, which is is assumed to be normalised %% by a previous call to join/{1,2}. --spec append(string(), file:name()) -> string(). +-spec append(file:filename(), file:name()) -> file:filename(). append(Dir, Name) -> Dir ++ [$/|Name]. @@ -376,19 +418,14 @@ append(Dir, Name) -> -spec pathtype(file:name()) -> 'absolute' | 'relative' | 'volumerelative'. pathtype(Atom) when is_atom(Atom) -> pathtype(atom_to_list(Atom)); -pathtype(Name) when is_list(Name) -> +pathtype(Name) when is_list(Name) or is_binary(Name) -> case os:type() of {unix, _} -> unix_pathtype(Name); - {win32, _} -> win32_pathtype(Name); - vxworks -> case vxworks_first(Name) of - {device, _Rest, _Dev} -> - absolute; - _ -> - relative - end; - {ose,_} -> unix_pathtype(Name) + {win32, _} -> win32_pathtype(Name) end. +unix_pathtype(<<$/,_/binary>>) -> + absolute; unix_pathtype([$/|_]) -> absolute; unix_pathtype([List|Rest]) when is_list(List) -> @@ -404,6 +441,15 @@ win32_pathtype([Atom|Rest]) when is_atom(Atom) -> win32_pathtype(atom_to_list(Atom)++Rest); win32_pathtype([Char, List|Rest]) when is_list(List) -> win32_pathtype([Char|List++Rest]); +win32_pathtype(<<$/, $/, _/binary>>) -> absolute; +win32_pathtype(<<$\\, $/, _/binary>>) -> absolute; +win32_pathtype(<<$/, $\\, _/binary>>) -> absolute; +win32_pathtype(<<$\\, $\\, _/binary>>) -> absolute; +win32_pathtype(<<$/, _/binary>>) -> volumerelative; +win32_pathtype(<<$\\, _/binary>>) -> volumerelative; +win32_pathtype(<<_Letter, $:, $/, _/binary>>) -> absolute; +win32_pathtype(<<_Letter, $:, $\\, _/binary>>) -> absolute; +win32_pathtype(<<_Letter, $:, _/binary>>) -> volumerelative; win32_pathtype([$/, $/|_]) -> absolute; win32_pathtype([$\\, $/|_]) -> absolute; win32_pathtype([$/, $\\|_]) -> absolute; @@ -422,7 +468,7 @@ win32_pathtype(_) -> relative. %% Examples: rootname("/jam.src/kalle") -> "/jam.src/kalle" %% rootname("/jam.src/foo.erl") -> "/jam.src/foo" --spec rootname(file:name()) -> string(). +-spec rootname(file:name()) -> file:filename(). rootname(Name0) -> Name = flatten(Name0), rootname(Name, [], [], major_os_type()). @@ -431,8 +477,6 @@ rootname([$/|Rest], Root, Ext, OsType) -> rootname(Rest, [$/]++Ext++Root, [], OsType); rootname([$\\|Rest], Root, Ext, win32) -> rootname(Rest, [$/]++Ext++Root, [], win32); -rootname([$\\|Rest], Root, Ext, vxworks) -> - rootname(Rest, [$/]++Ext++Root, [], vxworks); rootname([$.|Rest], Root, [], OsType) -> rootname(Rest, Root, ".", OsType); rootname([$.|Rest], Root, Ext, OsType) -> @@ -451,7 +495,7 @@ rootname([], Root, _Ext, _OsType) -> %% Examples: rootname("/jam.src/kalle.jam", ".erl") -> "/jam.src/kalle.jam" %% rootname("/jam.src/foo.erl", ".erl") -> "/jam.src/foo" --spec rootname(file:name(), file:name()) -> string(). +-spec rootname(file:name(), file:name()) -> file:filename(). rootname(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -471,27 +515,55 @@ rootname2([Char|Rest], Ext, Result) when is_integer(Char) -> %% split("foo/bar") -> ["foo", "bar"] %% split("a:\\msdev\\include") -> ["a:/", "msdev", "include"] --spec split(file:name()) -> [string()]. +-spec split(file:name()) -> [file:filename()]. +split(Name) when is_binary(Name) -> + case os:type() of + {win32, _} -> win32_splitb(Name); + _ -> unix_splitb(Name) + end; + split(Name0) -> Name = flatten(Name0), case os:type() of - {unix, _} -> unix_split(Name); {win32, _} -> win32_split(Name); - vxworks -> vxworks_split(Name); - {ose,_} -> unix_split(Name) + _ -> unix_split(Name) end. -%% If a VxWorks filename starts with '[/\].*[^/\]' '[/\].*:' or '.*:' -%% that part of the filename is considered a device. -%% The rest of the name is interpreted exactly as for win32. -%% XXX - dirty solution to make filename:split([]) return the same thing on -%% VxWorks as on unix and win32. -vxworks_split([]) -> - []; -vxworks_split(L) -> - {_Devicep, Rest, FirstComp} = vxworks_first(L), - split(Rest, [], [lists:reverse(FirstComp)], win32). +unix_splitb(Name) -> + L = binary:split(Name,[<<"/">>],[global]), + LL = case L of + [<<>>|Rest] -> + [<<"/">>|Rest]; + _ -> + L + end, + [ X || X <- LL, X =/= <<>>]. + + +fix_driveletter(Letter0) -> + if + Letter0 >= $A, Letter0 =< $Z -> + Letter0+$a-$A; + true -> + Letter0 + end. +win32_splitb(<>) when (((Slash =:= $\\) orelse (Slash =:= $/)) andalso + ?IS_DRIVELETTER(Letter0)) -> + Letter = fix_driveletter(Letter0), + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<>) when ?IS_DRIVELETTER(Letter0) -> + Letter = fix_driveletter(Letter0), + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<<$/>> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(Name) -> + L = binary:split(Name,[<<"/">>,<<"\\">>],[global]), + [<<$/>> | [ X || X <- L, X =/= <<>> ]]. + unix_split(Name) -> split(Name, [], unix). @@ -502,8 +574,6 @@ win32_split([X, $\\|Rest]) when is_integer(X) -> win32_split([X, $/|Rest]); win32_split([X, Y, $\\|Rest]) when is_integer(X), is_integer(Y) -> win32_split([X, Y, $/|Rest]); -win32_split([$/, $/|Rest]) -> - split(Rest, [], [[$/, $/]]); win32_split([UcLetter, $:|Rest]) when UcLetter >= $A, UcLetter =< $Z -> win32_split([UcLetter+$a-$A, $:|Rest]); win32_split([Letter, $:, $/|Rest]) -> @@ -540,7 +610,7 @@ split([], Comp, Components, OsType) -> %% will be converted to backslashes. On all platforms, the %% name will be normalized as done by join/1. --spec nativename(string()) -> string(). +-spec nativename(file:filename()) -> file:filename(). nativename(Name0) -> Name = join([Name0]), %Normalize. case os:type() of @@ -557,13 +627,12 @@ win32_nativename([]) -> separators() -> case os:type() of - {unix, _} -> {false, false}; {win32, _} -> {$\\, $:}; - vxworks -> {$\\, false}; - {ose,_} -> {false, false} + _ -> {false, false} end. + %% find_src(Module) -- %% find_src(Module, Rules) -- %% @@ -733,45 +802,12 @@ major_os_type() -> OsT -> OsT end. -%% Need to take care of the first pathname component separately -%% due to VxWorks less than good device naming rules. -%% (i.e. this is VxWorks specific ...) -%% The following four all starts with device names -%% elrond:/foo -> elrond: -%% elrond:\\foo.bar -> elrond: -%% /DISK1:foo -> /DISK1: -%% /usr/include -> /usr -%% This one doesn't: -%% foo/bar - -vxworks_first([]) -> - {not_device, [], []}; -vxworks_first([$/|T]) -> - vxworks_first2(device, T, [$/]); -vxworks_first([$\\|T]) -> - vxworks_first2(device, T, [$/]); -vxworks_first([H|T]) when is_list(H) -> - vxworks_first(H++T); -vxworks_first([H|T]) -> - vxworks_first2(not_device, T, [H]). - -vxworks_first2(Devicep, [], FirstComp) -> - {Devicep, [], FirstComp}; -vxworks_first2(Devicep, [$/|T], FirstComp) -> - {Devicep, [$/|T], FirstComp}; -vxworks_first2(Devicep, [$\\|T], FirstComp) -> - {Devicep, [$/|T], FirstComp}; -vxworks_first2(_Devicep, [$:|T], FirstComp)-> - {device, T, [$:|FirstComp]}; -vxworks_first2(Devicep, [H|T], FirstComp) when is_list(H) -> - vxworks_first2(Devicep, H++T, FirstComp); -vxworks_first2(Devicep, [H|T], FirstComp) -> - vxworks_first2(Devicep, T, [H|FirstComp]). - %% flatten(List) %% Flatten a list, also accepting atoms. --spec flatten(file:name()) -> string(). +-spec flatten(file:name()) -> file:filename(). +flatten(Bin) when is_binary(Bin) -> + Bin; flatten(List) -> do_flatten(List, []). @@ -785,3 +821,12 @@ do_flatten([], Tail) -> Tail; do_flatten(Atom, Tail) when is_atom(Atom) -> atom_to_list(Atom) ++ flatten(Tail). + +filename_string_to_binary(List) -> + case unicode:characters_to_binary(List,unicode,file:native_name_encoding()) of + {error,_,_} -> + erlang:error(badarg); + Bin when is_binary(Bin) -> + Bin + end. + -- cgit v1.2.3 From 63eeba2f6829aac2644eaf212ebef9cdf4b59e8d Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 14 Oct 2010 10:12:57 +0200 Subject: Handle binary file names and conversion of unicode strings --- lib/stdlib/src/c.erl | 20 ++++++++++-- lib/stdlib/src/filename.erl | 79 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 3 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index 6d50a575eb..399b91b92f 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) -> pwd() -> case file:get_cwd() of {ok, Str} -> - ok = io:format("~s\n", [Str]); + ok = io:format("~s\n", [fixup_one_bin(Str)]); {error, _} -> ok = io:format("Cannot determine current directory\n") end. @@ -684,11 +684,27 @@ ls() -> ls(Dir) -> case file:list_dir(Dir) of {ok, Entries} -> - ls_print(sort(Entries)); + ls_print(sort(fixup_bin(Entries))); {error,_E} -> format("Invalid directory\n") end. +fixup_one_bin(X) when is_binary(X) -> + L = binary_to_list(X), + [ if + El > 127 -> + $?; + true -> + El + end || El <- L]; +fixup_one_bin(X) -> + X. +fixup_bin([H|T]) -> + [fixup_one_bin(H) | fixup_bin(T)]; +fixup_bin([]) -> + []. + + ls_print([]) -> ok; ls_print(L) -> Width = min([max(lengths(L, [])), 40]) + 5, diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 40df54fe54..9ca4b808e1 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -228,7 +228,7 @@ basename([$/|[]], Ext, Tail, DrvSep2) -> basename([], Ext, Tail, DrvSep2); basename([$/|Rest], Ext, _Tail, DrvSep2) -> basename(Rest, Ext, [], DrvSep2); -basename([$\\|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> +basename([DirSep2|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> basename([$/|Rest], Ext, Tail, DirSep2); basename([Char|Rest], Ext, Tail, DrvSep2) when is_integer(Char) -> basename(Rest, Ext, [Char|Tail], DrvSep2); @@ -241,6 +241,39 @@ basename([], _Ext, Tail, _DrvSep2) -> %% dirname("kalle.erl") -> "." -spec dirname(file:name()) -> file:filename(). +dirname(Name) when is_binary(Name) -> + {Dsep,Drivesep} = separators(), + SList = case Dsep of + Sep when is_integer(Sep) -> + [ <> ]; + _ -> + [] + end, + {XPart0,Dirs} = case Drivesep of + X when is_integer(X) -> + case Name of + <> when ?IS_DRIVELETTER(DL) -> + {<>,Rest}; + _ -> + {<<>>,Name} + end; + _ -> + {<<>>,Name} + end, + Parts0 = binary:split(Dirs,[<<"/">>|SList],[global]), + %% Fairly short lists of parts, OK to reverse twice... + Parts = case Parts0 of + [] -> []; + _ -> lists:reverse(fstrip(tl(lists:reverse(Parts0)))) + end, + XPart = case {Parts,XPart0} of + {[],<<>>} -> + <<".">>; + _ -> + XPart0 + end, + dirjoin(Parts,XPart,<<"/">>); + dirname(Name0) -> Name = flatten(Name0), dirname(Name, [], [], separators()). @@ -271,6 +304,26 @@ dirname([], [DrvSep,Dl], File, {_,DrvSep}) -> end; dirname([], Dir, _, _) -> lists:reverse(Dir). + +%% Compatibility with lists variant, remove trailing slashes +fstrip([<<>>,X|Y]) -> + fstrip([X|Y]); +fstrip(A) -> + A. + + +dirjoin([<<>>|T],Acc,Sep) -> + dirjoin1(T,<>,Sep); +dirjoin(A,B,C) -> + dirjoin1(A,B,C). + +dirjoin1([],Acc,_) -> + Acc; +dirjoin1([One],Acc,_) -> + <>; +dirjoin1([H|T],Acc,Sep) -> + dirjoin(T,<>,Sep). + %% Given a filename string, returns the file extension, %% including the period. Returns an empty list if there @@ -282,6 +335,30 @@ dirname([], Dir, _, _) -> %% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src" -spec extension(file:name()) -> file:filename(). +extension(Name) when is_binary(Name) -> + {Dsep,_} = separators(), + SList = case Dsep of + Sep when is_integer(Sep) -> + [ <> ]; + _ -> + [] + end, + case binary:matches(Name,[<<".">>]) of + nomatch -> % Bug in binary workaround :( + <<>>; + [] -> + <<>>; + List -> + {Pos,_} = lists:last(List), + <<_:Pos/binary,Part/binary>> = Name, + case binary:match(Part,[<<"/">>|SList]) of + nomatch -> + Part; + _ -> + <<>> + end + end; + extension(Name0) -> Name = flatten(Name0), extension(Name, [], major_os_type()). -- cgit v1.2.3 From 5da29a8c5fe9584e274d2fe9b95dead8334d419a Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 8 Nov 2010 17:34:41 +0100 Subject: Correct shell utilities to handle unicode and possibly binaries --- lib/stdlib/src/c.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index 399b91b92f..d04d8f191f 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) -> pwd() -> case file:get_cwd() of {ok, Str} -> - ok = io:format("~s\n", [fixup_one_bin(Str)]); + ok = io:format("~ts\n", [fixup_one_bin(Str)]); {error, _} -> ok = io:format("Cannot determine current directory\n") end. @@ -714,7 +714,7 @@ ls_print(X, Width, Len) when Width + Len >= 80 -> io:nl(), ls_print(X, Width, 0); ls_print([H|T], Width, Len) -> - io:format("~-*s",[Width,H]), + io:format("~-*ts",[Width,H]), ls_print(T, Width, Len+Width); ls_print([], _, _) -> io:nl(). -- cgit v1.2.3 From 0c18c4ba3f561ae6d9ff943863bfc62e5e9099e1 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 26 Nov 2010 14:13:25 +0100 Subject: Corrected testcases broken by unicode filenames Also corrected type-info for bifs --- lib/stdlib/test/binary_module_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl index 16ed9a2c26..e4cdcf6125 100644 --- a/lib/stdlib/test/binary_module_SUITE.erl +++ b/lib/stdlib/test/binary_module_SUITE.erl @@ -186,7 +186,7 @@ badargs(Config) when is_list(Config) -> binary:match(<<1,2,3>>, {ac,ets:match_spec_compile([{'_',[],['$_']}])}, [{scope,{0,1}}])), - ?line nomatch = + ?line [] = ?MASK_ERROR(binary:matches(<<1,2,3>>,<<1>>,[{scope,{0,0}}])), ?line badarg = ?MASK_ERROR(binary:matches(<<1,2,3>>,{bm,<<>>},[{scope,{0,1}}])), -- cgit v1.2.3 From b21d33041ef30182cbb8e74c0023dc282d069a26 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 30 Nov 2010 12:31:33 +0100 Subject: Teach filelib to use re in unicode mode when filenames are not raw --- lib/stdlib/src/filelib.erl | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index d5ddf9ed7e..eaaca750ab 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -166,36 +166,41 @@ do_is_regular(File, Mod) -> %% If is true all sub-directories to are processed do_fold_files(Dir, RegExp, Recursive, Fun, Acc, Mod) -> - {ok, Re1} = re:compile(RegExp), - do_fold_files1(Dir, Re1, Recursive, Fun, Acc, Mod). + {ok, Re1} = re:compile(RegExp,[unicode]), + do_fold_files1(Dir, Re1, RegExp, Recursive, Fun, Acc, Mod). -do_fold_files1(Dir, RegExp, Recursive, Fun, Acc, Mod) -> +do_fold_files1(Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod) -> case eval_list_dir(Dir, Mod) of - {ok, Files} -> do_fold_files2(Files, Dir, RegExp, Recursive, Fun, Acc, Mod); + {ok, Files} -> do_fold_files2(Files, Dir, RegExp, OrigRE, + Recursive, Fun, Acc, Mod); {error, _} -> Acc end. -do_fold_files2([], _Dir, _RegExp, _Recursive, _Fun, Acc, _Mod) -> +%% OrigRE is not to be compiled as it's for non conforming filenames, +%% i.e. for filenames that does not comply to the current encoding, which should +%% be very rare. We use it only in those cases and do not want to precompile. +do_fold_files2([], _Dir, _RegExp, _OrigRE, _Recursive, _Fun, Acc, _Mod) -> Acc; -do_fold_files2([File|T], Dir, RegExp, Recursive, Fun, Acc0, Mod) -> +do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) -> FullName = filename:join(Dir, File), case do_is_regular(FullName, Mod) of true -> - case re:run(File, RegExp, [{capture,none}]) of + case re:run(File, if is_binary(File) -> OrigRE; true -> RegExp end, + [{capture,none}]) of match -> Acc = Fun(FullName, Acc0), - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc, Mod); + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod); nomatch -> - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end; false -> case Recursive andalso do_is_dir(FullName, Mod) of true -> - Acc1 = do_fold_files1(FullName, RegExp, Recursive, + Acc1 = do_fold_files1(FullName, RegExp, OrigRE, Recursive, Fun, Acc0, Mod), - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc1, Mod); + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc1, Mod); false -> - do_fold_files2(T, Dir, RegExp, Recursive, Fun, Acc0, Mod) + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end end. -- cgit v1.2.3 From 1ab2f46d91bce11bdc5b7ec65d3b3df46eadb105 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 1 Dec 2010 17:34:49 +0100 Subject: Make filelib not crash on re codepoints beyond 255 in re when filename is raw --- lib/stdlib/src/filelib.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index eaaca750ab..04147d40d1 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -185,11 +185,14 @@ do_fold_files2([File|T], Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) -> FullName = filename:join(Dir, File), case do_is_regular(FullName, Mod) of true -> - case re:run(File, if is_binary(File) -> OrigRE; true -> RegExp end, - [{capture,none}]) of + case (catch re:run(File, if is_binary(File) -> OrigRE; + true -> RegExp end, + [{capture,none}])) of match -> Acc = Fun(FullName, Acc0), do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc, Mod); + {'EXIT',_} -> + do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod); nomatch -> do_fold_files2(T, Dir, RegExp, OrigRE, Recursive, Fun, Acc0, Mod) end; -- cgit v1.2.3 From 3e6877b06ae395a9d4310ef664d0360867a47f62 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 1 Dec 2010 17:35:40 +0100 Subject: Add documentation about raw filenames and Unicode file name translation mode --- lib/stdlib/doc/src/filelib.xml | 27 ++++++++++++++++++++++++--- lib/stdlib/doc/src/filename.xml | 15 ++++++++++++--- 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/filelib.xml b/lib/stdlib/doc/src/filelib.xml index 4ff3b22f32..969aff4fcb 100644 --- a/lib/stdlib/doc/src/filelib.xml +++ b/lib/stdlib/doc/src/filelib.xml @@ -36,14 +36,23 @@

This module contains utilities on a higher level than the file module.

+

The module supports Unicode file names, so that it will match against regular expressions given in Unicode and that it will find and process raw file names (i.e. files named in a way that does not confirm to the expected encoding).

+

If the VM operates in Unicode file naming mode on a machine with transparent file naming, the fun() provided to fold_files/5 needs to be prepared to handle binary file names.

+

For more information about raw file names, see the file module.

DATA TYPES -filename() = string() | atom() | DeepList -dirname() = filename() -DeepList = [char() | atom() | DeepList] +filename() = = string() | atom() | DeepList | RawFilename + DeepList = [char() | atom() | DeepList] + RawFilename = binary() + If VM is in unicode filename mode, string() and char() are allowed to be > 255. + RawFilename is a filename not subject to Unicode translation, meaning that it + can contain characters not conforming to the Unicode encoding expected from the + filesystem (i.e. non-UTF-8 characters although the VM is started in Unicode + filename mode). +dirname() = filename()
@@ -90,6 +99,18 @@ DeepList = [char() | atom() | DeepList] If Recursive is true all sub-directories to Dir are processed. The regular expression matching is done on just the filename without the directory part.

+ +

If Unicode file name translation is in effect and the file + system is completely transparent, file names that cannot be + interpreted as Unicode may be encountered, in which case the + fun() must be prepared to handle raw file names + (i.e. binaries). If the regular expression contains + codepoints beyond 255, it will not match file names that does + not conform to the expected character encoding (i.e. are not + encoded in valid UTF-8).

+ +

For more information about raw file names, see the + file module.

diff --git a/lib/stdlib/doc/src/filename.xml b/lib/stdlib/doc/src/filename.xml index fe6c6f898e..cdee6e4a81 100644 --- a/lib/stdlib/doc/src/filename.xml +++ b/lib/stdlib/doc/src/filename.xml @@ -4,7 +4,7 @@
- 19972009 + 19972010 Ericsson AB. All Rights Reserved. @@ -43,13 +43,22 @@ only, even if the arguments contain back slashes. Use join/1 to normalize a file name by removing redundant directory separators.

+

The module supports raw file names in the way that if a binary is present, or the file name cannot be interpreted according to the return value of + file:native_name_encoding/0, a raw file name will also be returned. For example filename:join/1 provided with a path component being a binary (and also not being possible to interpret under the current native file name encoding) will result in a raw file name being returned (the join operation will have been performed of course). For more information about raw file names, see the file module.

DATA TYPES -name() = string() | atom() | DeepList -DeepList = [char() | atom() | DeepList] +name() = string() | atom() | DeepList | RawFilename + DeepList = [char() | atom() | DeepList] + RawFilename = binary() + If VM is in unicode filename mode, string() and char() are allowed to be > 255. + RawFilename is a filename not subject to Unicode translation, meaning that it + can contain characters not conforming to the Unicode encoding expected from the + filesystem (i.e. non-UTF-8 characters although the VM is started in Unicode + filename mode). +
-- cgit v1.2.3 From 6ba7c90dd6ccd7a6a6c661c51f17c44a124d2182 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 2 Dec 2010 14:55:03 +0100 Subject: Add section about Unicode file names to stdlib users guide --- lib/stdlib/doc/src/unicode_usage.xml | 44 +++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index f1b0659ea2..df8e6c6b47 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -5,7 +5,7 @@
1999 - 2009 + 2010 Ericsson AB. All Rights Reserved. @@ -168,6 +168,48 @@ Eshell V5.7 (abort with ^G) Unicode characters in allowed and disallowed context
+Unicode file names +

Most modern operating systems support Unicode file names in some way or another. There are several different ways to do this and Erlang by default treats the different approaches differently:

+ +Mandatory Unicode file naming + +

Windows and, for most common uses, MacOSX enforces Unicode support for file names. All files created in the filesystem has names that can consistently be interpreted. In MacOSX, all file names are retrieved in UTF-8 encoding, while Windows have selected an approach where each system call handling file names has a special Unicode aware variant, giving much the same effect. There are no file names on these systems that are not Unicode file names, why the default behavior of the Erlang VM is to work in "Unicode file name translation mode", meaning that a file name can be given as a Unicode list and that will be automatically translated to the proper name encoding for the underlying operating and file system.

+

Doing i.e. a file:list_dir/1 on one of these systems may return Unicode lists with codepoints beyond 255, depending on the content of the actual filesystem.

+

As the feature is fairly new, you may still stumble upon non core applications that cannot handle being provided with file names containing characters with codepoints larger than 255, but the core Erlang system should have no problems with Unicode file names.

+
+Transparent file naming + +

Most Unix operating systems have adopted a simpler approach, namely that Unicode file naming is not enforced, but by convention. Those systems usually use UTF-8 encoding for Unicode file names, but do not enforce it. On such a system, a file name containing characters having codepoints between 128 and 255 may be named either as plain ISO-latin-1 or using UTF-8 encoding. As no consistency is enforced, the Erlang VM can do no consistent translation of all file names. If the VM would automatically select encoding based on heuristics, one could get unexpected behavior on these systems, therefore file names not being encoded in UTF-8 are returned as "raw file names" if Unicode file naming support is turned on.

+

A raw file name is not a list, but a binary. Many non core applications still does not handle file names given as binaries, why such raw names are avoided by default. This means that systems having implemented Unicode file naming through transparent file systems and an UTF-8 convention, does not by default have Unicode file naming turned on. Explicitly turning Unicode file name handling on for these types of systems is considered experimental.

+
+
+

The Unicode file naming support was introduced with OTP release R14B01. A VM operating in Unicode file mode can work with files having names in any language or character set (as long as it's supported by the underlying OS and file system). The Unicode character list is used to denote file or directory names and if the file system content is listed, you will also be able to get Unicode lists as return value. The support lies in the kernel and stdlib modules, why most applications (that does not explicitly require the file names to be in the ISO-latin-1 range) will benefit from the Unicode support without change.

+ +

On Operating systems with mandatory Unicode file names, this means that you more easily conform to the file names of other (non Erlang) applications, and you can also process file names that, at least on Windows, where completely inaccessible (due to having names that could not be represented in ISO-latin-1). Also you will avoid creating incomprehensible file names on MacOSX as the vfs layer of the OS will accept all your file names as UTF-8 and will not rewrite them.

+ +

For most systems, turning on Unicode file name translation is no problem even if it uses transparent file naming. Very few systems have mixed file name encodings. A consistent UTF-8 named system will work perfectly in Unicode file name mode. It is still however considered experimental in R14B01. Unicode file name translation is turned on with the +fnu switch to the erl program. If the VM is started in Unicode file name translation mode, file:native_name_encoding/0 will return the atom utf8.

+ +

In Unicode file name mode, file names given to the BIF open_port/2 with the option {spawn_executable,...} are also interpreted as Unicode. So is the parameter list given in the argv option available when using spawn_executable. The UTF-8 translation of arguments can be avoided using binaries, see the discussion about raw file names below.

+ +

It is worth noting that the file encoding options given when opening a file has nothing to do with the file name encoding convention. You can very well open files containing UTF-8 but having file names in ISO-latin-1 or vice versa.

+ +Erlang drivers and NIF shared objects still can not be named with names containing codepoints beyond 127. This is a known limitation to be removed in a future release. Erlang modules however can, but it is definitely not a good idea and is still considered experimental. + +
+Notes about raw file names and automatic file name conversion +

Raw file names is introduced together with Unicode file name support in erts-5.8.2 (OTP R14B01). The reason the "raw file names" is introduced in the system is to be able to consistently represent file names given in different encodings on the same system. Having the VM automatically translate a file name that is not in UTF-8 when to a list of Unicode characters might seem practical, but this would open up for both duplicate file names and other inconsistent behavior. Consider a directory containing a file named "björn" in ISO-latin-1, while the Erlang VM is operating in Unicode file name mode (and therefore expecting UTF-8 file naming). The ISO-latin-1 name is not valid UTF-8 and one could be tempted to think that automatic conversion in for example file:list_dir/1 is a good idea. But what would happen if we later tried to open the file and has the name as a Unicode list (magically converted from the ISO-latin-1 file name)? The VM will convert the file name given to UTF-8, as this is the encoding expected. Effectively this means trying to open the file named <<"björn"/utf8>>. This file does not exist, and even if it existed it would not be the same file as the one that was listed. We could even create two files named "björn", one named in the UTF-8 encoding and one not. If file:list_dir/1 would automatically convert the ISO-latin-1 file name to a list, we would get two identical file names as the result. To avoid this, we need to differentiate between file names being properly encoded according to the Unicode file naming convention (i.e. UTF-8) and file names being invalid under the encoding. This is done by representing invalid encoding as "raw" file names, i.e. as binaries.

+

The core system of Erlang (kernel and stdlib) accept raw file names except for loadable drivers and executables invoked using open_port({spawn, ...} ...). open_port({spawn_executable, ...} ...) however does accept them. As mentioned earlier, the arguments given in the option list to open_port({spawn_executable, ...} ...) undergo the same conversion as the file names, meaning that the executable will be provided with arguments in UTF-8 as well. This translation is avoided consistently with how the file names are treated, by giving the argument as a binary.

+

To force Unicode file name translation mode on systems where this is not the default is considered experimental in OTP R14B01 due to the raw file names possibly being a new experience to the programmer and that the non core applications of OTP are not tested for compliance with raw file names yet. Unicode file name translation is expected to be default in future releases.

+

If working with raw file names, one can still conform to the encoding convention of the Erlang VM by using the file:native_name_encoding/0 function, which returns either the atom latin1 or the atom utf8 depending on the file name translation mode. On Linux, an VM started without explicitly stating the file name translation mode will default to latin1 as the native file name encoding, why file names on the disk encoded as UTF-8 will be returned as a list of the names interpreted as ISO-latin-1. The "UTF-8 list" is not a practical type for displaying or operating on in Erlang, but it is backward compatible and usable in all functions requiring a file name. On Windows and MacOSX, the default behavior is that of file name translation, why the file:native_name_encoding/0 by default returns utf8 on those systems (the fact that Windows actually does not use UTF-8 on the file system level can safely be ignored by the Erlang programmer). The default behavior can be changed using the +fnu or +fnl options to the VM, see the erl command manual page.

+

Even if you are operating without Unicode file naming translation automatically done by the VM, you can access and create files with names in UTF-8 encoding by using raw file names encoded as UTF-8. Enforcing the UTF-8 encoding regardless of the mode the Erlang VM is started in might, in some circumstances be a good idea, as the convention of using UTF-8 file names is spreading.

+
+
+Notes about MacOSX +

MacOSXs vfs layer enforces UTF-8 file names in a quite aggressive way. Older versions did this by simply refusing to create non UTF-8 conforming file names, while newer versions replace offending bytes with the sequence "%HH", where HH is the original character in hexadecimal notation. As Unicode translation is enabled by default on MacOSX, the only way to come up against this is to either start the VM with the +fnl flag or to use a raw file name in latin1 encoding. In that case, the file can not be opened with the same name as the one used to create this. The problem is by design in newer versions of MacOSX.

+

MacOSX also reorganizes the names of files so that the representation of accents etc is denormalized, i.e. the character ö is represented as the codepoints [111,776], where 111 is the character o and 776 is a special accent character. This type of denormalized Unicode is otherwise very seldom used and Erlang normalizes those file names on retrieval, so that denormalized file names is not passed up to the Erlang application. In Erlang the file name "björn" is retrieved as [98,106,246,114,110], not as [98,106,117,776,114,110], even though the file system might think differently.

+
+
+
Unicode-aware modules

Most of the modules in Erlang/OTP are of course Unicode-unaware in the sense that they have no notion of Unicode and really shouldn't have. Typically they handle non-textual or byte-oriented data (like gen_tcp etc).

Modules that actually handle textual data (like io_lib, string etc) are sometimes subject to conversion or extension to be able to handle Unicode characters.

-- cgit v1.2.3 From 6ea8348174c62812057dd552d0890b2d9d4a3c16 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 3 Dec 2010 09:51:00 +0100 Subject: Add documentation to erlang.xml and slight correction to unicode_usage.xml --- lib/stdlib/doc/src/unicode_usage.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index df8e6c6b47..c02ea3cbcb 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -189,7 +189,7 @@ Eshell V5.7 (abort with ^G)

For most systems, turning on Unicode file name translation is no problem even if it uses transparent file naming. Very few systems have mixed file name encodings. A consistent UTF-8 named system will work perfectly in Unicode file name mode. It is still however considered experimental in R14B01. Unicode file name translation is turned on with the +fnu switch to the erl program. If the VM is started in Unicode file name translation mode, file:native_name_encoding/0 will return the atom utf8.

-

In Unicode file name mode, file names given to the BIF open_port/2 with the option {spawn_executable,...} are also interpreted as Unicode. So is the parameter list given in the argv option available when using spawn_executable. The UTF-8 translation of arguments can be avoided using binaries, see the discussion about raw file names below.

+

In Unicode file name mode, file names given to the BIF open_port/2 with the option {spawn_executable,...} are also interpreted as Unicode. So is the parameter list given in the args option available when using spawn_executable. The UTF-8 translation of arguments can be avoided using binaries, see the discussion about raw file names below.

It is worth noting that the file encoding options given when opening a file has nothing to do with the file name encoding convention. You can very well open files containing UTF-8 but having file names in ISO-latin-1 or vice versa.

-- cgit v1.2.3 From 159d79b6657e5bb4f3accb6a0d74b74f06da4ba2 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 3 Dec 2010 13:16:21 +0100 Subject: Test and correct filelib and filename --- lib/stdlib/src/filelib.erl | 28 +++- lib/stdlib/src/filename.erl | 28 ++-- lib/stdlib/test/filelib_SUITE.erl | 7 +- lib/stdlib/test/filename_SUITE.erl | 315 ++++++++++++++++++++++++++++++++++++- 4 files changed, 360 insertions(+), 18 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/filelib.erl b/lib/stdlib/src/filelib.erl index 04147d40d1..c845b61204 100644 --- a/lib/stdlib/src/filelib.erl +++ b/lib/stdlib/src/filelib.erl @@ -47,14 +47,14 @@ wildcard(Pattern) when is_list(Pattern) -> ?HANDLE_ERROR(do_wildcard(Pattern, file)). -spec wildcard(file:name(), file:name() | atom()) -> [file:filename()]. -wildcard(Pattern, Cwd) when is_list(Pattern), is_list(Cwd) -> +wildcard(Pattern, Cwd) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, file)); wildcard(Pattern, Mod) when is_list(Pattern), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Mod)). -spec wildcard(file:name(), file:name(), atom()) -> [file:filename()]. wildcard(Pattern, Cwd, Mod) - when is_list(Pattern), is_list(Cwd), is_atom(Mod) -> + when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)), is_atom(Mod) -> ?HANDLE_ERROR(do_wildcard(Pattern, Cwd, Mod)). -spec is_dir(file:name()) -> boolean(). @@ -118,7 +118,7 @@ do_wildcard_comp({compiled_wildcard,{exists,File}}, Mod) -> do_wildcard_comp({compiled_wildcard,[Base|Rest]}, Mod) -> do_wildcard_1([Base], Rest, Mod). -do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), is_list(Cwd) -> +do_wildcard(Pattern, Cwd, Mod) when is_list(Pattern), (is_list(Cwd) or is_binary(Cwd)) -> do_wildcard_comp(do_compile_wildcard(Pattern), Cwd, Mod). do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> @@ -127,9 +127,18 @@ do_wildcard_comp({compiled_wildcard,{exists,File}}, Cwd, Mod) -> _ -> [] end; do_wildcard_comp({compiled_wildcard,[current|Rest]}, Cwd0, Mod) -> - Cwd = filename:join([Cwd0]), %Slash away redundant slashes. - PrefixLen = length(Cwd)+1, - [lists:nthtail(PrefixLen, N) || N <- do_wildcard_1([Cwd], Rest, Mod)]; + {Cwd,PrefixLen} = case filename:join([Cwd0]) of + Bin when is_binary(Bin) -> {Bin,byte_size(Bin)+1}; + Other -> {Other,length(Other)+1} + end, %Slash away redundant slashes. + [ + if + is_binary(N) -> + <<_:PrefixLen/binary,Res/binary>> = N, + Res; + true -> + lists:nthtail(PrefixLen, N) + end || N <- do_wildcard_1([Cwd], Rest, Mod)]; do_wildcard_comp({compiled_wildcard,[Base|Rest]}, _Cwd, Mod) -> do_wildcard_1([Base], Rest, Mod). @@ -276,6 +285,13 @@ do_wildcard_3(Base, [Pattern|Rest], Result, Mod) -> do_wildcard_3(Base, [], Result, _Mod) -> [Base|Result]. +wildcard_4(Pattern, [File|Rest], Base, Result) when is_binary(File) -> + case wildcard_5(Pattern, binary_to_list(File)) of + true -> + wildcard_4(Pattern, Rest, Base, [join(Base, File)|Result]); + false -> + wildcard_4(Pattern, Rest, Base, Result) + end; wildcard_4(Pattern, [File|Rest], Base, Result) -> case wildcard_5(Pattern, File) of true -> diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 9ca4b808e1..e38b8957f2 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -151,10 +151,10 @@ win_basenameb(O) -> basenameb(O,[<<"/">>,<<"\\">>]). basenameb(Bin,Sep) -> Parts = [ X || X <- binary:split(Bin,Sep,[global]), - X =:= <<>> ], + X =/= <<>> ], if Parts =:= [] -> - []; + <<>>; true -> lists:last(Parts) end. @@ -201,17 +201,19 @@ basename(Name, Ext) when is_list(Name), is_binary(Ext) -> basename(filename_string_to_binary(Name),Ext); basename(Name, Ext) when is_binary(Name), is_binary(Ext) -> BName = basename(Name), + LAll = byte_size(Name), LN = byte_size(BName), LE = byte_size(Ext), case LN - LE of Neg when Neg < 0 -> BName; Pos -> - case BName of - <> -> + StartLen = LAll - Pos - LE, + case Name of + <<_:StartLen/binary,Part:Pos/binary,Ext/binary>> -> Part; - Other -> - Other + _Other -> + BName end end; @@ -447,7 +449,7 @@ join1b(<>, RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> - join1b(<<$/,Rest>>, RelativeName, Result, win32); + join1b(<<$/,Rest/binary>>, RelativeName, Result, win32); join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> join1b(Rest, RelativeName, [$/|Result], OsType); join1b(<<$/,Rest/binary>>, RelativeName, [$/|Result], OsType) -> @@ -546,6 +548,8 @@ win32_pathtype(_) -> relative. %% rootname("/jam.src/foo.erl") -> "/jam.src/foo" -spec rootname(file:name()) -> file:filename(). +rootname(Name) when is_binary(Name) -> + list_to_binary(rootname(binary_to_list(Name))); % No need to handle unicode, . is < 128 rootname(Name0) -> Name = flatten(Name0), rootname(Name, [], [], major_os_type()). @@ -573,6 +577,12 @@ rootname([], Root, _Ext, _OsType) -> %% rootname("/jam.src/foo.erl", ".erl") -> "/jam.src/foo" -spec rootname(file:name(), file:name()) -> file:filename(). +rootname(Name, Ext) when is_binary(Name), is_binary(Ext) -> + list_to_binary(rootname(binary_to_list(Name),binary_to_list(Ext))); +rootname(Name, Ext) when is_binary(Name) -> + rootname(Name,filename_string_to_binary(Ext)); +rootname(Name, Ext) when is_binary(Ext) -> + rootname(filename_string_to_binary(Name),Ext); rootname(Name0, Ext0) -> Name = flatten(Name0), Ext = flatten(Ext0), @@ -639,7 +649,7 @@ win32_splitb(<>) when ((Slash =:= $\\) orelse (Slash =:= $/)) [<<$/>> | [ X || X <- L, X =/= <<>> ]]; win32_splitb(Name) -> L = binary:split(Name,[<<"/">>,<<"\\">>],[global]), - [<<$/>> | [ X || X <- L, X =/= <<>> ]]. + [ X || X <- L, X =/= <<>> ]. unix_split(Name) -> @@ -900,7 +910,7 @@ do_flatten(Atom, Tail) when is_atom(Atom) -> atom_to_list(Atom) ++ flatten(Tail). filename_string_to_binary(List) -> - case unicode:characters_to_binary(List,unicode,file:native_name_encoding()) of + case unicode:characters_to_binary(flatten(List),unicode,file:native_name_encoding()) of {error,_,_} -> erlang:error(badarg); Bin when is_binary(Bin) -> diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl index d54741051f..5a279609c6 100644 --- a/lib/stdlib/test/filelib_SUITE.erl +++ b/lib/stdlib/test/filelib_SUITE.erl @@ -53,8 +53,11 @@ wildcard_one(Config) when is_list(Config) -> wildcard_two(Config) when is_list(Config) -> ?line Dir = filename:join(?config(priv_dir, Config), "wildcard_two"), + ?line DirB = unicode:characters_to_binary(Dir, file:native_name_encoding()), ?line ok = file:make_dir(Dir), - ?line do_wildcard_1(Dir, fun(Wc) -> filelib:wildcard(Wc, Dir) end), + ?line do_wildcard_1(Dir, fun(Wc) -> io:format("~p~n",[{Wc,Dir, X = filelib:wildcard(Wc, Dir)}]),X end), + ?line do_wildcard_1(Dir, fun(Wc) -> io:format("~p~n",[{Wc,DirB, X = filelib:wildcard(Wc, DirB)}]), + [unicode:characters_to_list(Y,file:native_name_encoding()) || Y <- X] end), ?line do_wildcard_1(Dir, fun(Wc) -> filelib:wildcard(Wc, Dir++"/") end), case os:type() of {win32,_} -> @@ -253,5 +256,7 @@ ensure_dir_eexist(Config) when is_list(Config) -> %% There already is a file with the name of the directory %% we want to create. ?line NeedFile = filename:join(Name, "file"), + ?line NeedFileB = filename:join(Name, <<"file">>), ?line {error, eexist} = filelib:ensure_dir(NeedFile), + ?line {error, eexist} = filelib:ensure_dir(NeedFileB), ok. diff --git a/lib/stdlib/test/filename_SUITE.erl b/lib/stdlib/test/filename_SUITE.erl index ab6521f37b..dbce93600b 100644 --- a/lib/stdlib/test/filename_SUITE.erl +++ b/lib/stdlib/test/filename_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% Copyright Ericsson AB 1997-2010. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -22,12 +22,19 @@ basename_1/1, basename_2/1, dirname/1, extension/1, join/1, t_nativename/1]). -export([pathtype/1,rootname/1,split/1,find_src/1]). +-export([absname_bin/1, absname_bin_2/1, + basename_bin_1/1, basename_bin_2/1, + dirname_bin/1, extension_bin/1, join_bin/1]). +-export([pathtype_bin/1,rootname_bin/1,split_bin/1]). -include("test_server.hrl"). all(suite) -> [absname, absname_2, basename_1, basename_2, dirname, extension, - join, pathtype, rootname, split, t_nativename, find_src]. + join, pathtype, rootname, split, t_nativename, find_src, + absname_bin, absname_bin_2, basename_bin_1, basename_bin_2, dirname_bin, + extension_bin, + join_bin, pathtype_bin, rootname_bin, split_bin]. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -457,3 +464,307 @@ find_src(Config) when is_list(Config) -> %% Try to find the source for a preloaded module. ?line {error,{preloaded,init}} = filename:find_src(init), ok. + +%% +%% +%% With binaries +%% +%% + +absname_bin(Config) when is_list(Config) -> + case os:type() of + {win32, _} -> + ?line [Drive|_] = ?config(priv_dir, Config), + ?line Temp = filename:join([Drive|":/"], "temp"), + ?line case file:make_dir(Temp) of + ok -> ok; + {error,eexist} -> ok + end, + ?line {ok,Cwd} = file:get_cwd(), + ?line ok = file:set_cwd(Temp), + ?line <> = filename:absname(<<"foo">>), + ?line <> = filename:absname(<<"../ebin">>), + ?line <> = filename:absname(<<"/erlang">>), + ?line <> = filename:absname(<<"/erlang/src">>), + ?line <> = filename:absname(<<"\\erlang\\src">>), + ?line <> = filename:absname(<>), + ?line <> = + filename:absname(<>), + ?line <> = + filename:absname(<>), + ?line <<"a:/erlang">> = filename:absname(<<"a:erlang">>), + + ?line file:set_cwd(<>), + ?line <> = filename:absname(<<"foo">>), + ?line <> = filename:absname(<<"../ebin">>), + ?line <> = filename:absname(<<"/erlang">>), + ?line <> = filename:absname(<<"/erlang/src">>), + ?line <> = filename:absname(<<"\\erlang\\\\src">>), + ?line <> = filename:absname(<>), + ?line <> = filename:absname(<>), + ?line <<"a:/erlang">> = filename:absname(<<"a:erlang">>), + + ?line file:set_cwd(Cwd), + ok; + {unix, _} -> + ?line ok = file:set_cwd(<<"/usr">>), + ?line <<"/usr/foo">> = filename:absname(<<"foo">>), + ?line <<"/usr/../ebin">> = filename:absname(<<"../ebin">>), + + ?line file:set_cwd(<<"/">>), + ?line <<"/foo">> = filename:absname(<<"foo">>), + ?line <<"/../ebin">> = filename:absname(<<"../ebin">>), + ?line <<"/erlang">> = filename:absname(<<"/erlang">>), + ?line <<"/erlang/src">> = filename:absname(<<"/erlang/src">>), + ?line <<"/erlang/src">> = filename:absname(<<"/erlang///src">>), + ok + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +absname_bin_2(Config) when is_list(Config) -> + case os:type() of + {win32, _} -> + ?line [Drive|_] = ?config(priv_dir, Config), + ?line <> = filename:absname(<<"foo">>, <>), + ?line <> = filename:absname(<<"../ebin">>, + <>), + ?line <> = filename:absname(<<"/erlang">>, <>), + ?line <> = filename:absname(<<"/erlang/src">>, + <>), + ?line <> = filename:absname(<<"\\erlang\\src">>, + <>), + ?line <> = filename:absname(<>, + <>), + ?line <> = filename:absname(<>, + <>), + ?line <> = + filename:absname(<>, <>), + ?line <<"a:/erlang">> = filename:absname(<<"a:erlang">>, <>), + + ?line file:set_cwd(<>), + ?line <> = filename:absname(foo, <>), + ?line <> = filename:absname(<<"foo">>, <>), + ?line <> = filename:absname(<<"../ebin">>, <>), + ?line <> = filename:absname(<<"/erlang">>, <>), + ?line <> = filename:absname(<<"/erlang/src">>, + <>), + ?line <> = filename:absname(<<"\\erlang\\\\src">>, + <>), + ?line <> = filename:absname(<>, + <>), + ?line <> = filename:absname(<>, + <>), + ?line <<"a:/erlang">> = filename:absname(<<"a:erlang">>, <>), + + ok; + {unix, _} -> + ?line <<"/usr/foo">> = filename:absname(<<"foo">>, <<"/usr">>), + ?line <<"/usr/../ebin">> = filename:absname(<<"../ebin">>, <<"/usr">>), + + ?line <<"/foo">> = filename:absname(<<"foo">>, <<"/">>), + ?line <<"/../ebin">> = filename:absname(<<"../ebin">>, <<"/">>), + ?line <<"/erlang">> = filename:absname(<<"/erlang">>, <<"/">>), + ?line <<"/erlang/src">> = filename:absname(<<"/erlang/src">>, <<"/">>), + ?line <<"/erlang/src">> = filename:absname(<<"/erlang///src">>, <<"/">>), + ok + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +basename_bin_1(Config) when is_list(Config) -> + ?line Dog = test_server:timetrap(test_server:seconds(10)), + ?line <<".">> = filename:basename(<<".">>), + ?line <<"foo">> = filename:basename(<<"foo">>), + ?line <<"foo">> = filename:basename(<<"/usr/foo">>), + ?line <<"foo.erl">> = filename:basename(<<"A:usr/foo.erl">>), + ?line case os:type() of + {win32, _} -> + ?line <<"foo">> = filename:basename(<<"A:\\usr\\foo">>), + ?line <<"foo">> = filename:basename(<<"A:foo">>); + {unix, _} -> + ?line <<"strange\\but\\true">> = + filename:basename(<<"strange\\but\\true">>) + end, + ?line test_server:timetrap_cancel(Dog), + ok. + +basename_bin_2(Config) when is_list(Config) -> + ?line Dog = test_server:timetrap(test_server:seconds(10)), + ?line <<".">> = filename:basename(<<".">>, <<".erl">>), + ?line <<"foo">> = filename:basename(<<"foo.erl">>, <<".erl">>), + ?line <<"foo.erl">> = filename:basename(<<"/usr/foo.erl">>, <<".hrl">>), + ?line <<"foo.erl">> = filename:basename(<<"/usr.hrl/foo.erl">>, <<".hrl">>), + ?line <<"foo">> = filename:basename(<<"/usr.hrl/foo">>, <<".hrl">>), + ?line <<"foo">> = filename:basename(<<"usr/foo/">>, <<".erl">>), + ?line <<"foo.erl">> = filename:basename(<<"usr/foo.erl/">>, <<".erl">>), + ?line case os:type() of + {win32, _} -> + ?line <<"foo">> = filename:basename(<<"A:foo">>, <<".erl">>), + ?line <<"foo.erl">> = filename:basename(<<"a:\\usr\\foo.erl">>, + <<".hrl">>), + ?line <<"foo.erl">> = filename:basename(<<"c:\\usr.hrl\\foo.erl">>, + <<".hrl">>), + ?line <<"foo">> = filename:basename(<<"A:\\usr\\foo">>, <<".hrl">>); + {unix, _} -> + ?line <<"strange\\but\\true">> = + filename:basename(<<"strange\\but\\true.erl">>, <<".erl">>), + ?line <<"strange\\but\\true">> = + filename:basename(<<"strange\\but\\true">>, <<".erl">>) + end, + ?line test_server:timetrap_cancel(Dog), + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +dirname_bin(Config) when is_list(Config) -> + case os:type() of + {win32,_} -> + ?line <<"A:/usr">> = filename:dirname(<<"A:/usr/foo.erl">>), + ?line <<"A:usr">> = filename:dirname(<<"A:usr/foo.erl">>), + ?line <<"/usr">> = filename:dirname(<<"\\usr\\foo.erl">>), + ?line <<"/">> = filename:dirname(<<"\\usr">>), + ?line <<"A:">> = filename:dirname(<<"A:">>); + vxworks -> + ?line <<"net:/usr">> = filename:dirname(<<"net:/usr/foo.erl">>), + ?line <<"/disk0:/usr">> = filename:dirname(<<"/disk0:/usr/foo.erl">>), + ?line <<"/usr">> = filename:dirname(<<"\\usr\\foo.erl">>), + ?line <<"/usr">> = filename:dirname(<<"\\usr">>), + ?line <<"net:">> = filename:dirname(<<"net:">>); + _ -> true + end, + ?line <<"usr">> = filename:dirname(<<"usr///foo.erl">>), + ?line <<".">> = filename:dirname(<<"foo.erl">>), + ?line <<".">> = filename:dirname(<<".">>), + case os:type() of + vxworks -> + ?line <<"/">> = filename:dirname(<<"/">>), + ?line <<"/usr">> = filename:dirname(<<"/usr">>); + _ -> + ?line <<"/">> = filename:dirname(<<"/">>), + ?line <<"/">> = filename:dirname(<<"/usr">>) + end, + ok. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +extension_bin(Config) when is_list(Config) -> + ?line <<".erl">> = filename:extension(<<"A:/usr/foo.erl">>), + ?line <<".erl">> = filename:extension(<<"A:/usr/foo.nisse.erl">>), + ?line <<".erl">> = filename:extension(<<"A:/usr.bar/foo.nisse.erl">>), + ?line <<"">> = filename:extension(<<"A:/usr.bar/foo">>), + ?line <<"">> = filename:extension(<<"A:/usr/foo">>), + ?line case os:type() of + {win32, _} -> + ?line <<"">> = filename:extension(<<"A:\\usr\\foo">>), + ?line <<".erl">> = + filename:extension(<<"A:/usr.bar/foo.nisse.erl">>), + ?line <<"">> = filename:extension(<<"A:/usr.bar/foo">>), + ok; + vxworks -> + ?line <<"">> = filename:extension(<<"/disk0:\\usr\\foo">>), + ?line <<".erl">> = + filename:extension(<<"net:/usr.bar/foo.nisse.erl">>), + ?line <<"">> = filename:extension(<<"net:/usr.bar/foo">>), + ok; + _ -> ok + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +join_bin(Config) when is_list(Config) -> + ?line <<"/">> = filename:join([<<"/">>]), + ?line <<"/">> = filename:join([<<"//">>]), + ?line <<"usr/foo.erl">> = filename:join(<<"usr">>,<<"foo.erl">>), + ?line <<"/src/foo.erl">> = filename:join(usr, <<"/src/foo.erl">>), + ?line <<"/src/foo.erl">> = filename:join([<<"/src/">>,'foo.erl']), + ?line <<"/src/foo.erl">> = filename:join(<<"usr">>, ["/sr", 'c/foo.erl']), + ?line <<"/src/foo.erl">> = filename:join(<<"usr">>, <<"/src/foo.erl">>), + + %% Make sure that redundant slashes work too. + ?line <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c/////d//e/f/g">>]), + ?line <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c/">>, <<"d//e/f/g">>]), + ?line <<"a/b/c/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"d//e/f/g">>]), + ?line <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"/d//e/f/g">>]), + ?line <<"/d/e/f/g">> = filename:join([<<"a//b/c">>, <<"//d//e/f/g">>]), + + ?line <<"foo/bar">> = filename:join([$f,$o,$o,$/,[]], <<"bar">>), + + ?line case os:type() of + {win32, _} -> + ?line <<"d:/">> = filename:join([<<"D:/">>]), + ?line <<"d:/">> = filename:join([<<"D:\\">>]), + ?line <<"d:/abc">> = filename:join([<<"D:/">>, <<"abc">>]), + ?line <<"d:abc">> = filename:join([<<"D:">>, <<"abc">>]), + ?line <<"a/b/c/d/e/f/g">> = + filename:join([<<"a//b\\c//\\/\\d/\\e/f\\g">>]), + ?line <<"a:usr/foo.erl">> = + filename:join([<<"A:">>,<<"usr">>,<<"foo.erl">>]), + ?line <<"/usr/foo.erl">> = + filename:join([<<"A:">>,<<"/usr">>,<<"foo.erl">>]), + ?line <<"c:usr">> = filename:join(<<"A:">>,<<"C:usr">>), + ?line <<"a:usr">> = filename:join(<<"A:">>,<<"usr">>), + ?line <<"c:/usr">> = filename:join(<<"A:">>, <<"C:/usr">>), + ?line <<"c:/usr/foo.erl">> = + filename:join([<<"A:">>,<<"C:/usr">>,<<"foo.erl">>]), + ?line <<"c:usr/foo.erl">> = + filename:join([<<"A:">>,<<"C:usr">>,<<"foo.erl">>]), + ?line <<"d:/foo">> = filename:join([$D, $:, $/, []], <<"foo">>), + ok; + {unix, _} -> + ok + end. + +pathtype_bin(Config) when is_list(Config) -> + ?line relative = filename:pathtype(<<"..">>), + ?line relative = filename:pathtype(<<"foo">>), + ?line relative = filename:pathtype(<<"foo/bar">>), + ?line relative = filename:pathtype('foo/bar'), + case os:type() of + {win32, _} -> + ?line volumerelative = filename:pathtype(<<"/usr/local/bin">>), + ?line volumerelative = filename:pathtype(<<"A:usr/local/bin">>), + ok; + {unix, _} -> + ?line absolute = filename:pathtype(<<"/">>), + ?line absolute = filename:pathtype(<<"/usr/local/bin">>), + ok + end. + +rootname_bin(Config) when is_list(Config) -> + ?line <<"/jam.src/kalle">> = filename:rootname(<<"/jam.src/kalle">>), + ?line <<"/jam.src/foo">> = filename:rootname(<<"/jam.src/foo.erl">>), + ?line <<"/jam.src/foo">> = filename:rootname(<<"/jam.src/foo.erl">>, <<".erl">>), + ?line <<"/jam.src/foo.jam">> = filename:rootname(<<"/jam.src/foo.jam">>, <<".erl">>), + ?line <<"/jam.src/foo.jam">> = filename:rootname(["/jam.sr",'c/foo.j',"am"],<<".erl">>), + ?line <<"/jam.src/foo.jam">> = filename:rootname(["/jam.sr",'c/foo.j'|am],<<".erl">>), + ok. + +split_bin(Config) when is_list(Config) -> + case os:type() of + vxworks -> + ?line [<<"/usr">>,<<"local">>,<<"bin">>] = filename:split(<<"/usr/local/bin">>); + _ -> + ?line [<<"/">>,<<"usr">>,<<"local">>,<<"bin">>] = filename:split(<<"/usr/local/bin">>) + end, + ?line [<<"foo">>,<<"bar">>]= filename:split(<<"foo/bar">>), + ?line [<<"foo">>, <<"bar">>, <<"hello">>]= filename:split(<<"foo////bar//hello">>), + case os:type() of + {win32,_} -> + ?line [<<"a:/">>,<<"msdev">>,<<"include">>] = + filename:split(<<"a:/msdev/include">>), + ?line [<<"a:/">>,<<"msdev">>,<<"include">>] = + filename:split(<<"A:/msdev/include">>), + ?line [<<"msdev">>,<<"include">>] = + filename:split(<<"msdev\\include">>), + ?line [<<"a:/">>,<<"msdev">>,<<"include">>] = + filename:split(<<"a:\\msdev\\include">>), + ?line [<<"a:">>,<<"msdev">>,<<"include">>] = + filename:split(<<"a:msdev\\include">>), + ok; + _ -> + ok + end. + -- cgit v1.2.3