From b00651f9701f6d352b270af3700abce0e65aa5b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Feb 2013 09:13:41 +0100 Subject: prim_file: Always open non-file ports in binary mode Ports for operations that did not directly operate on a file (such as listing the files in a directory) was always opened in a binary mode, but there was still code that supported such port opened in non-binary mode. Since we are about to update the code reading directories, and we don't want to bother we supporting non-binary ports, make sure that we force the use of binary mode. --- erts/preloaded/src/erl_prim_loader.erl | 4 ++-- erts/preloaded/src/prim_file.erl | 34 +++++----------------------------- 2 files changed, 7 insertions(+), 31 deletions(-) (limited to 'erts/preloaded') diff --git a/erts/preloaded/src/erl_prim_loader.erl b/erts/preloaded/src/erl_prim_loader.erl index d36fdeba3f..c2fac8c0fc 100644 --- a/erts/preloaded/src/erl_prim_loader.erl +++ b/erts/preloaded/src/erl_prim_loader.erl @@ -149,7 +149,7 @@ start_it("inet", Id, Pid, Hosts) -> start_it("efile", Id, Pid, _Hosts) -> process_flag(trap_exit, true), - {ok, Port} = prim_file:open([binary]), + {ok, Port} = prim_file:start(), init_ack(Pid), MultiGet = case erlang:system_info(thread_pool_size) of 0 -> false; @@ -434,7 +434,7 @@ efile_multi_get_file_from_port2(_MFs, 0, _Max, State, _Paths, _Fun, _Ref, Ret) - efile_par_get_file(Ref, State, {Mod,File} = MF, Paths, Pid, Fun) -> %% One port for each file read in "parallel": - case prim_file:open([binary]) of + case prim_file:start() of {ok, Port} -> Port0 = State#state.data, State1 = State#state{data = Port}, diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 50adf9c89d..71ec97eb89 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -210,12 +210,7 @@ open(_, _) -> %% Opens a port that can be used for open/3 or read_file/2. %% Returns {ok, Port} | {error, Reason}. open(Portopts) when is_list(Portopts) -> - case drv_open(?FD_DRV, Portopts) of - {error, _} = Error -> - Error; - Other -> - Other - end; + drv_open(?FD_DRV, [binary|Portopts]); open(_) -> {error, badarg}. @@ -607,13 +602,7 @@ sendfile(#file_descriptor{module = ?MODULE, data = {Port, _}}, %% Returns {ok, Port}, the Port should be used as first argument in all %% the following functions. Returns {error, Reason} upon failure. start() -> - try erlang:open_port({spawn, ?DRV}, [binary]) of - Port -> - {ok, Port} - catch - error:Reason -> - {error, Reason} - end. + drv_open(?DRV, [binary]). stop(Port) when is_port(Port) -> try erlang:port_close(Port) of @@ -923,7 +912,7 @@ list_dir_int(Port, Dir) -> %% Returns {ok, Port} when successful. drv_open(Driver, Portopts) -> - try erlang:open_port({spawn, Driver}, Portopts) of + try erlang:open_port({spawn_driver, Driver}, Portopts) of Port -> {ok, Port} catch @@ -1205,18 +1194,12 @@ translate_response(?FILE_RESP_N2DATA = X, L0) when is_list(L0) -> end; translate_response(?FILE_RESP_EOF, []) -> eof; -translate_response(?FILE_RESP_FNAME, []) -> - ok; translate_response(?FILE_RESP_FNAME, Data) when is_binary(Data) -> {ok, prim_file:internal_native2name(Data)}; -translate_response(?FILE_RESP_FNAME, Data) -> - {ok, Data}; translate_response(?FILE_RESP_LFNAME, []) -> ok; translate_response(?FILE_RESP_LFNAME, Data) when is_binary(Data) -> {append, transform_lfname(Data)}; -translate_response(?FILE_RESP_LFNAME, Data) -> - {append, transform_lfname(Data)}; translate_response(?FILE_RESP_ALL_DATA, Data) -> {ok, Data}; translate_response(X, Data) -> @@ -1332,15 +1315,8 @@ transform_ldata(0, List, [Size | Sizes], R) -> {Front, Rear} = lists_split(List, Size), transform_ldata(0, Rear, Sizes, [Front | R]). -transform_lfname(<<>>) -> []; -transform_lfname(<>) -> - [ prim_file:internal_native2name(Name) | transform_lfname(Names)]; -transform_lfname([]) -> []; -transform_lfname([L1,L2|Names]) -> - L = (L1 bsl 8) bor L2, - {Name, Rest} = lists_split(Names, L), - [Name | transform_lfname(Rest)]. - +transform_lfname(Names) -> + [prim_file:internal_native2name(Name) || <> <= Names]. lists_split(List, 0) when is_list(List) -> {[], List}; -- cgit v1.2.3 From 43093a22099f6b0ec33970163a40f42a6b70b978 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Feb 2013 10:10:45 +0100 Subject: prim_file: Refactor handling of responses Currently, the format of the return value from drv_command/3 is determined solely by the efile driver's response. In a future commit, we will need to produce different return values that also dependend on which function in prim_file that was called; thus, we will need some way to pass down some sort of state to drv_get_response/2. As a preparation for that, allow the third argument of drv_command/3 to be a fun. That also allows us to remove the convoluted special case handling of the list_dir operation. --- erts/preloaded/src/prim_file.erl | 55 +++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 26 deletions(-) (limited to 'erts/preloaded') diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 71ec97eb89..e1d42a4b9a 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -899,9 +899,30 @@ list_dir(Port, Dir) when is_port(Port) -> list_dir_int(Port, Dir). list_dir_int(Port, Dir) -> - drv_command(Port, [?FILE_READDIR, pathname(Dir)], []). - + drv_command(Port, [?FILE_READDIR, pathname(Dir)], + fun(P) -> + case list_dir_response(P, []) of + {ok, RawNames} -> + {ok, list_dir_convert(RawNames)}; + Error -> + Error + end + end). + +list_dir_response(Port, Acc0) -> + case drv_get_response(Port) of + {lfname, []} -> + {ok, Acc0}; + {lfname, Names} -> + Acc = [Name || <> <= Names] ++ Acc0, + list_dir_response(Port, Acc); + Error -> + Error + end. +list_dir_convert([Name|Names]) -> + [prim_file:internal_native2name(Name)|list_dir_convert(Names)]; +list_dir_convert([]) -> []. %%%----------------------------------------------------------------- %%% Functions to communicate with the driver @@ -1017,19 +1038,10 @@ drv_command_nt(Port, Command, R) when is_port(Port) -> %% Receives the response from a driver port. %% Returns: {ok, ListOrBinary}|{error, Reason} -drv_get_response(Port, R) when is_list(R) -> - case drv_get_response(Port) of - ok -> - {ok, R}; - {ok, Name} -> - drv_get_response(Port, [Name|R]); - {append, Names} -> - drv_get_response(Port, append(Names, R)); - Error -> - Error - end; -drv_get_response(Port, _) -> - drv_get_response(Port). +drv_get_response(Port, undefined) -> + drv_get_response(Port); +drv_get_response(Port, Fun) when is_function(Fun, 1) -> + Fun(Port). drv_get_response(Port) -> erlang:bump_reductions(100), @@ -1049,10 +1061,6 @@ drv_get_response(Port) -> %%%----------------------------------------------------------------- %%% Utility functions. -append([I | Is], R) when is_list(R) -> append(Is, [I | R]); -append([], R) -> R. - - %% Converts a list of mode atoms into a mode word for the driver. %% Returns {Mode, Portopts, Setopts} where Portopts is a list of %% options for erlang:open_port/2 and Setopts is a list of @@ -1196,10 +1204,8 @@ translate_response(?FILE_RESP_EOF, []) -> eof; translate_response(?FILE_RESP_FNAME, Data) when is_binary(Data) -> {ok, prim_file:internal_native2name(Data)}; -translate_response(?FILE_RESP_LFNAME, []) -> - ok; -translate_response(?FILE_RESP_LFNAME, Data) when is_binary(Data) -> - {append, transform_lfname(Data)}; +translate_response(?FILE_RESP_LFNAME, Data) -> + {lfname, Data}; translate_response(?FILE_RESP_ALL_DATA, Data) -> {ok, Data}; translate_response(X, Data) -> @@ -1315,9 +1321,6 @@ transform_ldata(0, List, [Size | Sizes], R) -> {Front, Rear} = lists_split(List, Size), transform_ldata(0, Rear, Sizes, [Front | R]). -transform_lfname(Names) -> - [prim_file:internal_native2name(Name) || <> <= Names]. - lists_split(List, 0) when is_list(List) -> {[], List}; lists_split(List, N) when is_list(List), is_integer(N), N < 0 -> -- cgit v1.2.3 From 066c26ec53012ccea106a4f27b85ddbdb58bb2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Feb 2013 12:24:12 +0100 Subject: prim_file: Refactor functions that return filenames --- erts/preloaded/src/prim_file.erl | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'erts/preloaded') diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index e1d42a4b9a..af1a6127c9 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -656,7 +656,8 @@ get_cwd_int(Drive) -> get_cwd_int({?DRV, [binary]}, Drive). get_cwd_int(Port, Drive) -> - drv_command(Port, <>). + drv_command(Port, <>, + fun handle_fname_response/1). @@ -764,7 +765,8 @@ altname(Port, File) when is_port(Port) -> altname_int(Port, File). altname_int(Port, File) -> - drv_command(Port, [?FILE_ALTNAME, pathname(File)]). + drv_command(Port, [?FILE_ALTNAME, pathname(File)], + fun handle_fname_response/1). %% write_file_info/{2,3,4} @@ -857,7 +859,8 @@ read_link(Port, Link) when is_port(Port) -> read_link_int(Port, Link). read_link_int(Port, Link) -> - drv_command(Port, [?FILE_READLINK, pathname(Link)]). + drv_command(Port, [?FILE_READLINK, pathname(Link)], + fun handle_fname_response/1). @@ -927,7 +930,13 @@ list_dir_convert([]) -> []. %%%----------------------------------------------------------------- %%% Functions to communicate with the driver - +handle_fname_response(Port) -> + case drv_get_response(Port) of + {fname, Name} -> + {ok, prim_file:internal_native2name(Name)}; + Error -> + Error + end. %% Opens a driver port and converts any problems into {error, emfile}. %% Returns {ok, Port} when successful. @@ -1202,8 +1211,8 @@ translate_response(?FILE_RESP_N2DATA = X, L0) when is_list(L0) -> end; translate_response(?FILE_RESP_EOF, []) -> eof; -translate_response(?FILE_RESP_FNAME, Data) when is_binary(Data) -> - {ok, prim_file:internal_native2name(Data)}; +translate_response(?FILE_RESP_FNAME, Data) -> + {fname, Data}; translate_response(?FILE_RESP_LFNAME, Data) -> {lfname, Data}; translate_response(?FILE_RESP_ALL_DATA, Data) -> -- cgit v1.2.3 From aa15249fe5d8819e511ca0f09eae1d1207903e53 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 29 Jan 2013 17:28:20 +0100 Subject: Make prim_file skip invalid filenames in unicode mode The fix affects list_dir and read_link. Raw filenames are now never produced, just consumed even if +fnu or +fna is used on Linux etc. This also adds the options to get error return or error handler warning messages with +fn{u|a}{i|w|e} as an option to erl. This is still not documented and there needs to be other versions of read_dir and read_link to facilitate reading of all types of filenames and links. A check that we will not change to an invalid directory is also needed. --- erts/preloaded/ebin/erl_prim_loader.beam | Bin 54120 -> 54360 bytes erts/preloaded/ebin/prim_file.beam | Bin 41128 -> 42004 bytes erts/preloaded/src/erl_prim_loader.erl | 12 +++++++++++- erts/preloaded/src/prim_file.erl | 32 ++++++++++++++++++++++++++++--- 4 files changed, 40 insertions(+), 4 deletions(-) (limited to 'erts/preloaded') diff --git a/erts/preloaded/ebin/erl_prim_loader.beam b/erts/preloaded/ebin/erl_prim_loader.beam index f8c2df3a5a..4a3af265c1 100644 Binary files a/erts/preloaded/ebin/erl_prim_loader.beam and b/erts/preloaded/ebin/erl_prim_loader.beam differ diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam index ca93edbe25..b1b54ca050 100644 Binary files a/erts/preloaded/ebin/prim_file.beam and b/erts/preloaded/ebin/prim_file.beam differ diff --git a/erts/preloaded/src/erl_prim_loader.erl b/erts/preloaded/src/erl_prim_loader.erl index c2fac8c0fc..7490954f2d 100644 --- a/erts/preloaded/src/erl_prim_loader.erl +++ b/erts/preloaded/src/erl_prim_loader.erl @@ -150,7 +150,17 @@ start_it("inet", Id, Pid, Hosts) -> start_it("efile", Id, Pid, _Hosts) -> process_flag(trap_exit, true), {ok, Port} = prim_file:start(), - init_ack(Pid), + %% Check that we started in a valid directory. + case prim_file:get_cwd(Port) of + {error, _} -> + %% At this point in the startup, we have no error_logger at all. + Report = "Invalid current directory or invalid filename " + "mode: loader cannot read current directory\n", + erlang:display(Report), + exit({error, invalid_current_directory}); + _ -> + init_ack(Pid) + end, MultiGet = case erlang:system_info(thread_pool_size) of 0 -> false; _ -> true diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index af1a6127c9..27e7640b7e 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -155,13 +155,15 @@ internal_normalize_utf8/1]). -type prim_file_name() :: string() | unicode:unicode_binary(). +-type prim_file_name_error() :: 'error' | 'ignore' | 'warning'. -spec internal_name2native(prim_file_name()) -> binary(). internal_name2native(_) -> erlang:nif_error(undefined). --spec internal_native2name(binary()) -> prim_file_name(). +-spec internal_native2name(binary()) -> + prim_file_name() | {'error',prim_file_name_error()}. internal_native2name(_) -> erlang:nif_error(undefined). @@ -924,7 +926,21 @@ list_dir_response(Port, Acc0) -> end. list_dir_convert([Name|Names]) -> - [prim_file:internal_native2name(Name)|list_dir_convert(Names)]; + %% If the filename cannot be converted, return error or ignore + %% with optional error logger warning, depending on +fn{u|a}{i|e|w} + %% emulator switches. + case prim_file:internal_native2name(Name) of + {error, warning} -> + error_logger:warning_msg("Non-unicode filename ~p ignored\n", + [Name]), + list_dir_convert(Names); + {error, ignore} -> + list_dir_convert(Names); + {error, error} -> + {error, {no_translation, Name}}; + Converted when is_list(Converted) -> + [Converted|list_dir_convert(Names)] + end; list_dir_convert([]) -> []. %%%----------------------------------------------------------------- @@ -933,7 +949,17 @@ list_dir_convert([]) -> []. handle_fname_response(Port) -> case drv_get_response(Port) of {fname, Name} -> - {ok, prim_file:internal_native2name(Name)}; + case prim_file:internal_native2name(Name) of + {error, warning} -> + error_logger:warning_msg("Non-unicode filename ~p " + "ignored when reading link\n", + [Name]), + {error, einval}; + {error, _} -> + {error, einval}; + Converted when is_list(Converted) -> + {ok, Converted} + end; Error -> Error end. -- cgit v1.2.3 From a8a8d27a461e82af3f88774e07906a920f95e63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 11 Feb 2013 12:48:08 +0100 Subject: Teach prim_file:set_cwd() to avoid entering non-translatable directories We have decided that we don't want to deal with the compilations of prim_file:get_cwd() returning a binary when the current directory name cannot be translated losslessly to a list (i.e. when the run-time system was started with +fnu and the current directory name contains bytes that are not part of a valid UTF-8 sequence). Therefore, if prim_file:set_cwd() is given a binary as the pathname, we will need to check the binary to make sure it can be translated to a list. We will introduce a new BIF, called prim_file:is_translatable/1, which will check both filename encoding mode, and if it is one of Unicode modes, the binary as well. We don't need to do anything special if prim_file:set_cwd() is passed a list. --- erts/preloaded/src/prim_file.erl | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'erts/preloaded') diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 27e7640b7e..305abb8b0a 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -152,7 +152,8 @@ -export([internal_name2native/1, internal_native2name/1, - internal_normalize_utf8/1]). + internal_normalize_utf8/1, + is_translatable/1]). -type prim_file_name() :: string() | unicode:unicode_binary(). -type prim_file_name_error() :: 'error' | 'ignore' | 'warning'. @@ -173,6 +174,11 @@ internal_native2name(_) -> internal_normalize_utf8(_) -> erlang:nif_error(undefined). +-spec is_translatable(prim_file_name()) -> boolean(). + +is_translatable(_) -> + erlang:nif_error(undefined). + %%% End of BIFs %%%----------------------------------------------------------------- @@ -671,10 +677,17 @@ set_cwd(Dir) -> set_cwd(Port, Dir) when is_port(Port) -> set_cwd_int(Port, Dir). -set_cwd_int(Port, Dir) -> - %% Dir is now either a string or an EXIT tuple. - %% An EXIT tuple will fail in the following catch. - drv_command(Port, [?FILE_CHDIR, pathname(Dir)]). +set_cwd_int(Port, Dir) when is_binary(Dir) -> + case prim_file:is_translatable(Dir) of + false -> + {error, no_translation}; + true -> + drv_command(Port, [?FILE_CHDIR, pathname(Dir)]) + end; +set_cwd_int(Port, Dir) when is_list(Dir) -> + drv_command(Port, [?FILE_CHDIR, pathname(Dir)]); +set_cwd_int(_, _) -> + {error, badarg}. -- cgit v1.2.3 From 6d516de001dde82c02fe050db8e3aab47914fa90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Feb 2013 14:02:25 +0100 Subject: prim_file: Add list_dir_all() and read_link_all() --- erts/preloaded/ebin/prim_file.beam | Bin 42004 -> 44092 bytes erts/preloaded/src/prim_file.erl | 57 +++++++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) (limited to 'erts/preloaded') diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam index b1b54ca050..f7b3aac376 100644 Binary files a/erts/preloaded/ebin/prim_file.beam and b/erts/preloaded/ebin/prim_file.beam differ diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 305abb8b0a..bf8879c2a0 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -50,9 +50,9 @@ write_file_info/2, write_file_info/3, write_file_info/4, make_link/2, make_link/3, make_symlink/2, make_symlink/3, - read_link/1, read_link/2, + read_link/1, read_link/2, read_link_all/1, read_link_all/2, read_link_info/1, read_link_info/2, read_link_info/3, - list_dir/1, list_dir/2]). + list_dir/1, list_dir/2, list_dir_all/1, list_dir_all/2]). %% How to start and stop the ?DRV port. -export([start/0, stop/1]). @@ -877,6 +877,18 @@ read_link_int(Port, Link) -> drv_command(Port, [?FILE_READLINK, pathname(Link)], fun handle_fname_response/1). +%% read_link_all/{2,3} + +read_link_all(Link) -> + read_link_all_int({?DRV, [binary]}, Link). + +read_link_all(Port, Link) when is_port(Port) -> + read_link_all_int(Port, Link). + +read_link_all_int(Port, Link) -> + drv_command(Port, [?FILE_READLINK, pathname(Link)], + fun handle_fname_response_all/1). + %% read_link_info/{2,3} @@ -927,6 +939,23 @@ list_dir_int(Port, Dir) -> end end). +list_dir_all(Dir) -> + list_dir_all_int({?DRV, [binary]}, Dir). + +list_dir_all(Port, Dir) when is_port(Port) -> + list_dir_all_int(Port, Dir). + +list_dir_all_int(Port, Dir) -> + drv_command(Port, [?FILE_READDIR, pathname(Dir)], + fun(P) -> + case list_dir_response(P, []) of + {ok, RawNames} -> + {ok, list_dir_convert_all(RawNames)}; + Error -> + Error + end + end). + list_dir_response(Port, Acc0) -> case drv_get_response(Port) of {lfname, []} -> @@ -956,6 +985,17 @@ list_dir_convert([Name|Names]) -> end; list_dir_convert([]) -> []. +list_dir_convert_all([Name|Names]) -> + %% If the filename cannot be converted, retain the filename as + %% a binary. + case prim_file:internal_native2name(Name) of + {error, _} -> + [Name|list_dir_convert(Names)]; + Converted when is_list(Converted) -> + [Converted|list_dir_convert(Names)] + end; +list_dir_convert_all([]) -> []. + %%%----------------------------------------------------------------- %%% Functions to communicate with the driver @@ -977,6 +1017,19 @@ handle_fname_response(Port) -> Error end. +handle_fname_response_all(Port) -> + case drv_get_response(Port) of + {fname, Name} -> + case prim_file:internal_native2name(Name) of + {error, _} -> + {ok, Name}; + Converted when is_list(Converted) -> + {ok, Converted} + end; + Error -> + Error + end. + %% Opens a driver port and converts any problems into {error, emfile}. %% Returns {ok, Port} when successful. -- cgit v1.2.3