From 63eeba2f6829aac2644eaf212ebef9cdf4b59e8d Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 14 Oct 2010 10:12:57 +0200 Subject: Handle binary file names and conversion of unicode strings --- erts/emulator/beam/bif.tab | 4 +- erts/emulator/beam/erl_unicode.c | 4 +- erts/emulator/drivers/common/efile_drv.c | 18 +++--- erts/preloaded/src/prim_file.erl | 106 ++++++++++++++++++------------- lib/kernel/src/file.erl | 2 + lib/kernel/src/file_io_server.erl | 4 +- lib/stdlib/src/c.erl | 20 +++++- lib/stdlib/src/filename.erl | 79 ++++++++++++++++++++++- 8 files changed, 177 insertions(+), 60 deletions(-) diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index cf251d9016..3de5e63a63 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -797,8 +797,8 @@ bif erlang:nif_error/2 # # Helpers for unicode filenames # -bif file:internal_name2native/1 -bif file:internal_native2name/1 +bif prim_file:internal_name2native/1 +bif prim_file:internal_native2name/1 bif file:native_name_encoding/0 # # Obsolete diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 3e7a935cef..6d8e9ccd90 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -2105,7 +2105,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ -BIF_RETTYPE file_internal_name2native_1(BIF_ALIST_1) +BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) { int encoding = erts_get_native_filename_encoding(); Sint need; @@ -2184,7 +2184,7 @@ BIF_RETTYPE file_internal_name2native_1(BIF_ALIST_1) BIF_RET(bin_term); } -BIF_RETTYPE file_internal_native2name_1(BIF_ALIST_1) +BIF_RETTYPE prim_file_internal_native2name_1(BIF_ALIST_1) { Eterm real_bin; Uint offset; diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c index c450f10f48..5aa5f60d0f 100644 --- a/erts/emulator/drivers/common/efile_drv.c +++ b/erts/emulator/drivers/common/efile_drv.c @@ -67,6 +67,8 @@ #define FILE_RESP_LDATA 6 #define FILE_RESP_N2DATA 7 #define FILE_RESP_EOF 8 +#define FILE_RESP_FNAME 9 +#define FILE_RESP_ALL_DATA 10 /* Options */ @@ -1591,7 +1593,7 @@ static void invoke_readdir(void *data) buf_sz = READDIR_BUFSIZE - 4/* EOB */; } - p[4] = FILE_RESP_OK; + p[4] = FILE_RESP_FNAME; buf_sz -= 4 + 1; str = p + 4 + 1; ASSERT(buf_sz >= MAXPATHLEN + 1); @@ -1911,7 +1913,7 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) if (!d->result_ok) reply_error(desc, &d->errInfo); else { - header[0] = FILE_RESP_OK; + header[0] = FILE_RESP_ALL_DATA; TRACE_C('R'); driver_output_binary(desc->port, header, 1, d->c.read_file.binp, @@ -1968,10 +1970,10 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) if (!d->result_ok) reply_error(desc, &d->errInfo); else { - resbuf[0] = FILE_RESP_OK; + resbuf[0] = FILE_RESP_FNAME; length = 1+strlen((char*) resbuf+1); TRACE_C('R'); - driver_output2(desc->port, resbuf, length, NULL, 0); + driver_output2(desc->port, resbuf, 1, resbuf+1, length-1); } free_data(data); break; @@ -2031,7 +2033,7 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) int sz = get_int32(p); while (sz) { /* 0 == EOB */ p += 4; - driver_output2(desc->port, p, sz, NULL, 0); + driver_output2(desc->port, p, 1, p+1, sz-1); p += sz; sz = get_int32(p); } @@ -2210,12 +2212,12 @@ file_output(ErlDrvData e, char* buf, int count) errInfo.posix_errno = 0; dir_handle = NULL; - resbuf[0] = FILE_RESP_OK; + resbuf[0] = FILE_RESP_FNAME; while (efile_readdir(&errInfo, name, &dir_handle, resbuf+1, RESBUFSIZE)) { - int length = 1 + strlen(resbuf+1); - driver_output2(desc->port, resbuf, length, NULL, 0); + int length = strlen(resbuf+1); + driver_output2(desc->port, resbuf, 1, resbuf+1, length); } if (errInfo.posix_errno != 0) { reply_error(desc, &errInfo); diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 7f24889bb2..b15997572c 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -109,6 +109,8 @@ -define(FILE_RESP_LDATA, 6). -define(FILE_RESP_N2DATA, 7). -define(FILE_RESP_EOF, 8). +-define(FILE_RESP_FNAME, 9). +-define(FILE_RESP_ALL_DATA, 10). %% Open modes for the driver's open function. -define(EFILE_MODE_READ, 1). @@ -153,7 +155,7 @@ %% Opens a file using the driver port Port. Returns {error, Reason} %% | {ok, FileDescriptor} open(Port, File, ModeList) when is_port(Port), - is_list(File), + (is_list(File) orelse is_binary(File)), is_list(ModeList) -> case open_mode(ModeList) of {Mode, _Portopts, _Setopts} -> @@ -165,10 +167,11 @@ open(_,_,_) -> {error, badarg}. %% Opens a file. Returns {error, Reason} | {ok, FileDescriptor}. -open(File, ModeList) when is_list(File), is_list(ModeList) -> +open(File, ModeList) when (is_list(File) orelse is_binary(File)), + is_list(ModeList) -> case open_mode(ModeList) of {Mode, Portopts, Setopts} -> - open_int({?FD_DRV, Portopts}, File, Mode, Setopts); + open_int({?FD_DRV, Portopts},File, Mode, Setopts); Reason -> {error, Reason} end; @@ -196,7 +199,7 @@ open_int({Driver, Portopts}, File, Mode, Setopts) -> end; open_int(Port, File, Mode, Setopts) -> M = Mode band ?EFILE_MODE_MASK, - case drv_command(Port, [<>, File, 0]) of + case drv_command(Port, [<>, pathname(File), 0]) of {ok, Number} -> open_int_setopts(Port, Number, Setopts); Error -> @@ -489,7 +492,7 @@ ipread_s32bu_p32bu(#file_descriptor{module = ?MODULE, data = {_, _}}, %% Returns {ok, Contents} | {error, Reason} -read_file(File) -> +read_file(File) when (is_list(File) orelse is_binary(File)) -> case drv_open(?FD_DRV, [binary]) of {ok, Port} -> Result = read_file(Port, File), @@ -497,11 +500,14 @@ read_file(File) -> Result; {error, _} = Error -> Error - end. + end; +read_file(_) -> + {error, badarg}. %% Takes a Port opened with open/1. -read_file(Port, File) when is_port(Port) -> - Cmd = [?FILE_READ_FILE | File], +read_file(Port, File) when is_port(Port), + (is_list(File) orelse is_binary(File))-> + Cmd = [?FILE_READ_FILE | pathname(File)], case drv_command(Port, Cmd) of {error, enomem} -> %% It could possibly help to do a @@ -512,12 +518,14 @@ read_file(Port, File) when is_port(Port) -> drv_command(Port, Cmd); Result -> Result - end. + end; +read_file(_,_) -> + {error, badarg}. %% Returns {error, Reason} | ok. -write_file(File, Bin) -> +write_file(File, Bin) when (is_list(File) orelse is_binary(File)) -> case open(File, [binary, write]) of {ok, Handle} -> Result = write(Handle, Bin), @@ -525,8 +533,10 @@ write_file(File, Bin) -> Result; Error -> Error - end. - + end; +write_file(_, _) -> + {error, badarg}. + %%%----------------------------------------------------------------- @@ -539,7 +549,7 @@ write_file(File, Bin) -> %% Returns {ok, Port}, the Port should be used as first argument in all %% the following functions. Returns {error, Reason} upon failure. start() -> - try erlang:open_port({spawn, atom_to_list(?DRV)}, []) of + try erlang:open_port({spawn, atom_to_list(?DRV)}, [binary]) of Port -> {ok, Port} catch @@ -596,7 +606,7 @@ get_cwd(_, _) -> {error, badarg}. get_cwd_int(Drive) -> - get_cwd_int({?DRV, []}, Drive). + get_cwd_int({?DRV, [binary]}, Drive). get_cwd_int(Port, Drive) -> drv_command(Port, <>). @@ -606,7 +616,7 @@ get_cwd_int(Port, Drive) -> %% set_cwd/{1,2} set_cwd(Dir) -> - set_cwd_int({?DRV, []}, Dir). + set_cwd_int({?DRV, [binary]}, Dir). set_cwd(Port, Dir) when is_port(Port) -> set_cwd_int(Port, Dir). @@ -632,89 +642,88 @@ set_cwd_int(Port, Dir0) -> end), %% Dir is now either a string or an EXIT tuple. %% An EXIT tuple will fail in the following catch. - drv_command(Port, [?FILE_CHDIR, Dir, 0]). + drv_command(Port, [?FILE_CHDIR, pathname(Dir), 0]). %% delete/{1,2} delete(File) -> - delete_int({?DRV, []}, File). + delete_int({?DRV, [binary]}, File). delete(Port, File) when is_port(Port) -> delete_int(Port, File). delete_int(Port, File) -> - drv_command(Port, [?FILE_DELETE, File, 0]). + drv_command(Port, [?FILE_DELETE, pathname(File), 0]). %% rename/{2,3} rename(From, To) -> - rename_int({?DRV, []}, From, To). + rename_int({?DRV, [binary]}, From, To). rename(Port, From, To) when is_port(Port) -> rename_int(Port, From, To). rename_int(Port, From, To) -> - drv_command(Port, [?FILE_RENAME, From, 0, To, 0]). + drv_command(Port, [?FILE_RENAME, pathname(From), 0, pathname(To), 0]). %% make_dir/{1,2} make_dir(Dir) -> - make_dir_int({?DRV, []}, Dir). + make_dir_int({?DRV, [binary]}, Dir). make_dir(Port, Dir) when is_port(Port) -> make_dir_int(Port, Dir). make_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_MKDIR, Dir, 0]). + drv_command(Port, [?FILE_MKDIR, pathname(Dir), 0]). %% del_dir/{1,2} del_dir(Dir) -> - del_dir_int({?DRV, []}, Dir). + del_dir_int({?DRV, [binary]}, Dir). del_dir(Port, Dir) when is_port(Port) -> del_dir_int(Port, Dir). del_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_RMDIR, Dir, 0]). + drv_command(Port, [?FILE_RMDIR, pathname(Dir), 0]). %% read_file_info/{1,2} read_file_info(File) -> - read_file_info_int({?DRV, []}, File). + read_file_info_int({?DRV, [binary]}, File). read_file_info(Port, File) when is_port(Port) -> read_file_info_int(Port, File). read_file_info_int(Port, File) -> - drv_command(Port, [?FILE_FSTAT, File, 0]). + drv_command(Port, [?FILE_FSTAT, pathname(File), 0]). %% altname/{1,2} altname(File) -> - altname_int({?DRV, []}, File). + altname_int({?DRV, [binary]}, File). altname(Port, File) when is_port(Port) -> altname_int(Port, File). altname_int(Port, File) -> - drv_command(Port, [?FILE_ALTNAME, File, 0]). - + drv_command(Port, [?FILE_ALTNAME, pathname(File), 0]). %% write_file_info/{2,3} write_file_info(File, Info) -> - write_file_info_int({?DRV, []}, File, Info). + write_file_info_int({?DRV, [binary]}, File, Info). write_file_info(Port, File, Info) when is_port(Port) -> write_file_info_int(Port, File, Info). @@ -740,72 +749,72 @@ write_file_info_int(Port, date_to_bytes(Atime), date_to_bytes(Mtime), date_to_bytes(Ctime), - File, 0]). + pathname(File), 0]). %% make_link/{2,3} make_link(Old, New) -> - make_link_int({?DRV, []}, Old, New). + make_link_int({?DRV, [binary]}, Old, New). make_link(Port, Old, New) when is_port(Port) -> make_link_int(Port, Old, New). make_link_int(Port, Old, New) -> - drv_command(Port, [?FILE_LINK, Old, 0, New, 0]). + drv_command(Port, [?FILE_LINK, pathname(Old), 0, pathname(New), 0]). %% make_symlink/{2,3} make_symlink(Old, New) -> - make_symlink_int({?DRV, []}, Old, New). + make_symlink_int({?DRV, [binary]}, Old, New). make_symlink(Port, Old, New) when is_port(Port) -> make_symlink_int(Port, Old, New). make_symlink_int(Port, Old, New) -> - drv_command(Port, [?FILE_SYMLINK, Old, 0, New, 0]). + drv_command(Port, [?FILE_SYMLINK, pathname(Old), 0, pathname(New), 0]). %% read_link/{2,3} read_link(Link) -> - read_link_int({?DRV, []}, Link). + read_link_int({?DRV, [binary]}, Link). read_link(Port, Link) when is_port(Port) -> read_link_int(Port, Link). read_link_int(Port, Link) -> - drv_command(Port, [?FILE_READLINK, Link, 0]). + drv_command(Port, [?FILE_READLINK, pathname(Link), 0]). %% read_link_info/{2,3} read_link_info(Link) -> - read_link_info_int({?DRV, []}, Link). + read_link_info_int({?DRV, [binary]}, Link). read_link_info(Port, Link) when is_port(Port) -> read_link_info_int(Port, Link). read_link_info_int(Port, Link) -> - drv_command(Port, [?FILE_LSTAT, Link, 0]). + drv_command(Port, [?FILE_LSTAT, pathname(Link), 0]). %% list_dir/{1,2} list_dir(Dir) -> - list_dir_int({?DRV, []}, Dir). + list_dir_int({?DRV, [binary]}, Dir). list_dir(Port, Dir) when is_port(Port) -> list_dir_int(Port, Dir). list_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_READDIR, Dir, 0], []). + drv_command(Port, [?FILE_READDIR, pathname(Dir), 0], []). @@ -1026,8 +1035,6 @@ lseek_position(_) -> translate_response(?FILE_RESP_OK, []) -> ok; -translate_response(?FILE_RESP_OK, Data) -> - {ok, Data}; translate_response(?FILE_RESP_ERROR, List) when is_list(List) -> {error, list_to_atom(List)}; translate_response(?FILE_RESP_NUMBER, List) -> @@ -1074,6 +1081,16 @@ translate_response(?FILE_RESP_N2DATA = X, L0) when is_list(L0) -> end; translate_response(?FILE_RESP_EOF, []) -> eof; +translate_response(?FILE_RESP_FNAME, []) -> + ok; +translate_response(?FILE_RESP_FNAME, Data) when is_binary(Data) -> + {ok, prim_file:internal_native2name(Data)}; +translate_response(?FILE_RESP_FNAME, Data) -> + {ok, Data}; + +translate_response(?FILE_RESP_ALL_DATA, Data) -> + {ok, Data}; + translate_response(X, Data) -> {error, {bad_response_from_port, [X | Data]}}. @@ -1209,3 +1226,6 @@ lists_split([Hd | Tl], N, Rev) -> reverse(X) -> lists:reverse(X, []). reverse(L, T) -> lists:reverse(L, T). + +pathname(File) -> + prim_file:internal_name2native(File). diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl index 89432869e9..579b9132c6 100644 --- a/lib/kernel/src/file.erl +++ b/lib/kernel/src/file.erl @@ -1033,6 +1033,8 @@ path_open_first([], _Name, _Mode, LastError) -> %% Generates a flat file name from a deep list of atoms and %% characters (integers). +file_name(N) when is_binary(N) -> + N; file_name(N) -> try file_name_1(N) diff --git a/lib/kernel/src/file_io_server.erl b/lib/kernel/src/file_io_server.erl index 39dc32bb79..14da9c1a55 100644 --- a/lib/kernel/src/file_io_server.erl +++ b/lib/kernel/src/file_io_server.erl @@ -44,11 +44,11 @@ format_error(ErrorId) -> erl_posix_msg:message(ErrorId). start(Owner, FileName, ModeList) - when is_pid(Owner), is_list(FileName), is_list(ModeList) -> + when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) -> do_start(spawn, Owner, FileName, ModeList). start_link(Owner, FileName, ModeList) - when is_pid(Owner), is_list(FileName), is_list(ModeList) -> + when is_pid(Owner), (is_list(FileName) orelse is_binary(FileName)), is_list(ModeList) -> do_start(spawn_link, Owner, FileName, ModeList). %%%----------------------------------------------------------------- diff --git a/lib/stdlib/src/c.erl b/lib/stdlib/src/c.erl index 6d50a575eb..399b91b92f 100644 --- a/lib/stdlib/src/c.erl +++ b/lib/stdlib/src/c.erl @@ -659,7 +659,7 @@ portformat(Name, Id, Cmd) -> pwd() -> case file:get_cwd() of {ok, Str} -> - ok = io:format("~s\n", [Str]); + ok = io:format("~s\n", [fixup_one_bin(Str)]); {error, _} -> ok = io:format("Cannot determine current directory\n") end. @@ -684,11 +684,27 @@ ls() -> ls(Dir) -> case file:list_dir(Dir) of {ok, Entries} -> - ls_print(sort(Entries)); + ls_print(sort(fixup_bin(Entries))); {error,_E} -> format("Invalid directory\n") end. +fixup_one_bin(X) when is_binary(X) -> + L = binary_to_list(X), + [ if + El > 127 -> + $?; + true -> + El + end || El <- L]; +fixup_one_bin(X) -> + X. +fixup_bin([H|T]) -> + [fixup_one_bin(H) | fixup_bin(T)]; +fixup_bin([]) -> + []. + + ls_print([]) -> ok; ls_print(L) -> Width = min([max(lengths(L, [])), 40]) + 5, diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 40df54fe54..9ca4b808e1 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -228,7 +228,7 @@ basename([$/|[]], Ext, Tail, DrvSep2) -> basename([], Ext, Tail, DrvSep2); basename([$/|Rest], Ext, _Tail, DrvSep2) -> basename(Rest, Ext, [], DrvSep2); -basename([$\\|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> +basename([DirSep2|Rest], Ext, Tail, DirSep2) when is_integer(DirSep2) -> basename([$/|Rest], Ext, Tail, DirSep2); basename([Char|Rest], Ext, Tail, DrvSep2) when is_integer(Char) -> basename(Rest, Ext, [Char|Tail], DrvSep2); @@ -241,6 +241,39 @@ basename([], _Ext, Tail, _DrvSep2) -> %% dirname("kalle.erl") -> "." -spec dirname(file:name()) -> file:filename(). +dirname(Name) when is_binary(Name) -> + {Dsep,Drivesep} = separators(), + SList = case Dsep of + Sep when is_integer(Sep) -> + [ <> ]; + _ -> + [] + end, + {XPart0,Dirs} = case Drivesep of + X when is_integer(X) -> + case Name of + <> when ?IS_DRIVELETTER(DL) -> + {<>,Rest}; + _ -> + {<<>>,Name} + end; + _ -> + {<<>>,Name} + end, + Parts0 = binary:split(Dirs,[<<"/">>|SList],[global]), + %% Fairly short lists of parts, OK to reverse twice... + Parts = case Parts0 of + [] -> []; + _ -> lists:reverse(fstrip(tl(lists:reverse(Parts0)))) + end, + XPart = case {Parts,XPart0} of + {[],<<>>} -> + <<".">>; + _ -> + XPart0 + end, + dirjoin(Parts,XPart,<<"/">>); + dirname(Name0) -> Name = flatten(Name0), dirname(Name, [], [], separators()). @@ -271,6 +304,26 @@ dirname([], [DrvSep,Dl], File, {_,DrvSep}) -> end; dirname([], Dir, _, _) -> lists:reverse(Dir). + +%% Compatibility with lists variant, remove trailing slashes +fstrip([<<>>,X|Y]) -> + fstrip([X|Y]); +fstrip(A) -> + A. + + +dirjoin([<<>>|T],Acc,Sep) -> + dirjoin1(T,<>,Sep); +dirjoin(A,B,C) -> + dirjoin1(A,B,C). + +dirjoin1([],Acc,_) -> + Acc; +dirjoin1([One],Acc,_) -> + <>; +dirjoin1([H|T],Acc,Sep) -> + dirjoin(T,<>,Sep). + %% Given a filename string, returns the file extension, %% including the period. Returns an empty list if there @@ -282,6 +335,30 @@ dirname([], Dir, _, _) -> %% On Windows: fn:dirname("\\usr\\src/kalle.erl") -> "/usr/src" -spec extension(file:name()) -> file:filename(). +extension(Name) when is_binary(Name) -> + {Dsep,_} = separators(), + SList = case Dsep of + Sep when is_integer(Sep) -> + [ <> ]; + _ -> + [] + end, + case binary:matches(Name,[<<".">>]) of + nomatch -> % Bug in binary workaround :( + <<>>; + [] -> + <<>>; + List -> + {Pos,_} = lists:last(List), + <<_:Pos/binary,Part/binary>> = Name, + case binary:match(Part,[<<"/">>|SList]) of + nomatch -> + Part; + _ -> + <<>> + end + end; + extension(Name0) -> Name = flatten(Name0), extension(Name, [], major_os_type()). -- cgit v1.2.3