From d15bd6b9366ff4eef81ec9c4bcc875dfe694fe98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= Date: Tue, 13 Nov 2018 11:55:48 +0100 Subject: Avoid closing files in gc/monitor callbacks Closing files in these callbacks could block scheduler progress and cause major system instability. We now defer these operations to a dedicated process instead. This process may in turn block forever and prevent further orphaned files from being closed, but it will keep the emulator itself from misbehaving. --- erts/emulator/nifs/common/prim_file_nif.c | 146 +++++++++++++++++++++--------- erts/emulator/nifs/common/prim_file_nif.h | 7 +- erts/emulator/nifs/unix/unix_prim_file.c | 7 +- erts/emulator/nifs/win32/win_prim_file.c | 7 +- erts/preloaded/ebin/prim_file.beam | Bin 27780 -> 28528 bytes erts/preloaded/src/prim_file.erl | 18 +++- 6 files changed, 135 insertions(+), 50 deletions(-) (limited to 'erts') diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c index fd6aaa61f6..ba36a33458 100644 --- a/erts/emulator/nifs/common/prim_file_nif.c +++ b/erts/emulator/nifs/common/prim_file_nif.c @@ -38,6 +38,9 @@ static void unload(ErlNifEnv *env, void* priv_data); static ErlNifResourceType *efile_resource_type; +static ERL_NIF_TERM am_erts_prim_file; +static ERL_NIF_TERM am_close; + static ERL_NIF_TERM am_ok; static ERL_NIF_TERM am_error; static ERL_NIF_TERM am_continue; @@ -96,11 +99,14 @@ static ERL_NIF_TERM set_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +/* Internal ops */ +static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM get_handle_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM altname_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); -static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); - /* All file handle operations are passed through a wrapper that handles state * transitions, marking it as busy during the course of the operation, and * closing on completion if the owner died in the middle of an operation. @@ -128,7 +134,11 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[] * * CLOSE_PENDING -> * CLOSED (file_handle_wrapper) - */ + * + * Should the owner of a file die, we can't close it immediately as that could + * potentially block a normal scheduler. When entering the CLOSED state from + * owner_death_callback, we will instead send a message to the erts_prim_file + * process that will then close the file through delayed_close_nif. */ typedef ERL_NIF_TERM (*file_op_impl_t)(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); @@ -142,7 +152,6 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env return file_handle_wrapper( name ## _impl , env, argc, argv); \ } -WRAP_FILE_HANDLE_EXPORT(close_nif) WRAP_FILE_HANDLE_EXPORT(read_nif) WRAP_FILE_HANDLE_EXPORT(write_nif) WRAP_FILE_HANDLE_EXPORT(pread_nif) @@ -193,18 +202,27 @@ static ErlNifFunc nif_funcs[] = { /* Internal ops. */ {"get_handle_nif", 1, get_handle_nif}, + {"delayed_close_nif", 1, delayed_close_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"altname_nif", 1, altname_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, }; ERL_NIF_INIT(prim_file, nif_funcs, load, NULL, upgrade, unload) +static ErlNifPid erts_prim_file_pid; + static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon); -static void gc_callback(ErlNifEnv *env, void* data); -static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info) +static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM prim_file_pid) { ErlNifResourceTypeInit callbacks; + if(!enif_get_local_pid(env, prim_file_pid, &erts_prim_file_pid)) { + ASSERT(!"bad pid passed to prim_file_nif"); + } + + am_erts_prim_file = enif_make_atom(env, "erts_prim_file"); + am_close = enif_make_atom(env, "close"); + am_ok = enif_make_atom(env, "ok"); am_error = enif_make_atom(env, "error"); am_continue = enif_make_atom(env, "continue"); @@ -239,7 +257,7 @@ static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info) am_eof = enif_make_atom(env, "eof"); callbacks.down = owner_death_callback; - callbacks.dtor = gc_callback; + callbacks.dtor = NULL; callbacks.stop = NULL; efile_resource_type = enif_open_resource_type_x(env, "efile", &callbacks, @@ -305,8 +323,10 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env /* This is the only point where a change from CLOSE_PENDING is * possible, and we're running synchronously, so we can't race with * anything else here. */ + posix_errno_t ignored; + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); - efile_close(d); + efile_close(d, &ignored); } } else { /* CLOSE_PENDING should be impossible at this point since it requires @@ -319,6 +339,24 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env return result; } +/* This is a special close operation used by the erts_prim_file process for + * cleaning up orphaned files. It differs from the ordinary close_nif in that + * it only works for files that have already entered the CLOSED state. */ +static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t ignored; + efile_data_t *d; + + ASSERT(argc == 1); + if(!get_file_data(env, argv[0], &d)) { + return enif_make_badarg(env); + } + + ASSERT(erts_atomic32_read_acqb(&d->state) == EFILE_STATE_CLOSED); + efile_close(d, &ignored); + + return am_ok; +} + static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon) { efile_data_t *d = (efile_data_t*)obj; @@ -334,8 +372,24 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN switch(previous_state) { case EFILE_STATE_IDLE: - efile_close(d); - return; + { + /* We cannot close the file here as that could block a normal + * scheduler, so we tell erts_prim_file to do it for us. + * + * This can in turn become a bottleneck (especially in cases + * like NFS failure), but it's less problematic than blocking + * thread progress. */ + ERL_NIF_TERM message, file_ref; + + file_ref = enif_make_resource(env, d); + message = enif_make_tuple2(env, am_close, file_ref); + + if(!enif_send(env, &erts_prim_file_pid, NULL, message)) { + ERTS_INTERNAL_ERROR("Failed to defer prim_file close."); + } + + return; + } case EFILE_STATE_CLOSE_PENDING: case EFILE_STATE_CLOSED: /* We're either already closed or managed to mark ourselves for @@ -352,24 +406,6 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN } } -static void gc_callback(ErlNifEnv *env, void* data) { - efile_data_t *d = (efile_data_t*)data; - - enum efile_state_t previous_state; - - (void)env; - - previous_state = erts_atomic32_cmpxchg_acqb(&d->state, - EFILE_STATE_CLOSED, EFILE_STATE_IDLE); - - ASSERT(previous_state != EFILE_STATE_CLOSE_PENDING && - previous_state != EFILE_STATE_BUSY); - - if(previous_state == EFILE_STATE_IDLE) { - efile_close(d); - } -} - static ERL_NIF_TERM efile_filetype_to_atom(enum efile_filetype_t type) { switch(type) { case EFILE_FILETYPE_DEVICE: return am_device; @@ -454,40 +490,62 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[] return posix_error_to_tuple(env, posix_errno); } - result = enif_make_resource(env, d); - enif_release_resource(d); - enif_self(env, &controlling_process); if(enif_monitor_process(env, d, &controlling_process, &d->monitor)) { + /* We need to close the file manually as we haven't registered a + * destructor. */ + posix_errno_t ignored; + + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); + efile_close(d, &ignored); + return posix_error_to_tuple(env, EINVAL); } + /* Note that we do not call enif_release_resource at this point. While it's + * normally safe to leave resource management to the GC, efile_close is a + * blocking operation which must not be done in the GC callback, and we + * can't defer it as the resource is gone as soon as it returns. + * + * We instead keep the resource alive until efile_close is called, after + * which it's safe to leave things to the GC. If the controlling process + * were to die before the user had a chance to close their file, the above + * monitor will tell the erts_prim_file process to close it for them. */ + result = enif_make_resource(env, d); + return enif_make_tuple2(env, am_ok, result); } -static ERL_NIF_TERM close_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { +static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { enum efile_state_t previous_state; + efile_data_t *d; - if(argc != 0) { + ASSERT(argc == 1); + if(!get_file_data(env, argv[0], &d)) { return enif_make_badarg(env); } previous_state = erts_atomic32_cmpxchg_acqb(&d->state, - EFILE_STATE_CLOSED, EFILE_STATE_BUSY); + EFILE_STATE_CLOSED, EFILE_STATE_IDLE); - ASSERT(previous_state == EFILE_STATE_CLOSE_PENDING || - previous_state == EFILE_STATE_BUSY); + if(previous_state == EFILE_STATE_IDLE) { + posix_errno_t error; - if(previous_state == EFILE_STATE_BUSY) { enif_demonitor_process(env, d, &d->monitor); - if(!efile_close(d)) { - return posix_error_to_tuple(env, d->posix_errno); + if(!efile_close(d, &error)) { + return posix_error_to_tuple(env, error); } - } - return am_ok; + return am_ok; + } else { + /* CLOSE_PENDING should be impossible at this point since it requires + * a transition from BUSY; the only valid state here is CLOSED. */ + ASSERT(previous_state == EFILE_STATE_CLOSED); + + return posix_error_to_tuple(env, EINVAL); + } } static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { @@ -1190,7 +1248,7 @@ static posix_errno_t read_file(efile_data_t *d, size_t size, ErlNifBinary *resul } static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { - posix_errno_t posix_errno; + posix_errno_t posix_errno, ignored; efile_fileinfo_t info = {0}; efile_path_t path; @@ -1211,7 +1269,9 @@ static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a } posix_errno = read_file(d, info.size, &result); - enif_release_resource(d); + + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); + efile_close(d, &ignored); if(posix_errno) { return posix_error_to_tuple(env, posix_errno); diff --git a/erts/emulator/nifs/common/prim_file_nif.h b/erts/emulator/nifs/common/prim_file_nif.h index 099c06c48c..b2e30c59dd 100644 --- a/erts/emulator/nifs/common/prim_file_nif.h +++ b/erts/emulator/nifs/common/prim_file_nif.h @@ -159,8 +159,11 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, ErlNifResourceType *nif_type, efile_data_t **d); /** @brief Closes a file. The file must have entered the CLOSED state prior to - * calling this to prevent double close. */ -int efile_close(efile_data_t *d); + * calling this to prevent double close. + * + * Note that the file is completely invalid after this point, so the error code + * is provided in \c error rather than d->posix_errno. */ +int efile_close(efile_data_t *d, posix_errno_t *error); /* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */ diff --git a/erts/emulator/nifs/unix/unix_prim_file.c b/erts/emulator/nifs/unix/unix_prim_file.c index dea73db18a..169b193993 100644 --- a/erts/emulator/nifs/unix/unix_prim_file.c +++ b/erts/emulator/nifs/unix/unix_prim_file.c @@ -202,21 +202,24 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, return errno; } -int efile_close(efile_data_t *d) { +int efile_close(efile_data_t *d, posix_errno_t *error) { efile_unix_t *u = (efile_unix_t*)d; int fd; + ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER); ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED); ASSERT(u->fd != -1); fd = u->fd; u->fd = -1; + enif_release_resource(d); + /* close(2) either always closes (*BSD, Linux) or leaves the fd in an * undefined state (POSIX 2008, Solaris), so we must not retry on EINTR. */ if(close(fd) < 0) { - u->common.posix_errno = errno; + *error = errno; return 0; } diff --git a/erts/emulator/nifs/win32/win_prim_file.c b/erts/emulator/nifs/win32/win_prim_file.c index 602a282dd1..d0aa70542f 100644 --- a/erts/emulator/nifs/win32/win_prim_file.c +++ b/erts/emulator/nifs/win32/win_prim_file.c @@ -466,18 +466,21 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, } } -int efile_close(efile_data_t *d) { +int efile_close(efile_data_t *d, posix_errno_t *error) { efile_win_t *w = (efile_win_t*)d; HANDLE handle; + ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER); ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED); ASSERT(w->handle != INVALID_HANDLE_VALUE); handle = w->handle; w->handle = INVALID_HANDLE_VALUE; + enif_release_resource(d); + if(!CloseHandle(handle)) { - w->common.posix_errno = windows_to_posix_errno(GetLastError()); + *error = windows_to_posix_errno(GetLastError()); return 0; } diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam index df611f2bb0..d0435a10ef 100644 Binary files a/erts/preloaded/ebin/prim_file.beam and b/erts/preloaded/ebin/prim_file.beam differ diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 6d85868183..5fc22bc582 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -83,6 +83,15 @@ internal_normalize_utf8(_) -> is_translatable(_) -> erlang:nif_error(undefined). +%% This is a janitor process used to close files whose controlling process has +%% died. The emulator will be torn down if this is killed. +delayed_close_loop() -> + receive + {close, FRef} when is_reference(FRef) -> delayed_close_nif(FRef); + _ -> ok + end, + delayed_close_loop(). + %% %% Returns {error, Reason} | {ok, BytesCopied} @@ -95,7 +104,12 @@ copy(#file_descriptor{module = ?MODULE} = Source, file:copy_opened(Source, Dest, Length). on_load() -> - ok = erlang:load_nif(atom_to_list(?MODULE), 0). + Pid = spawn(fun() -> + process_flag(trap_exit, true), + delayed_close_loop() + end), + true = register(erts_prim_file, Pid), + ok = erlang:load_nif(atom_to_list(?MODULE), Pid). open(Name, Modes) -> %% The try/catch pattern seen here is used throughout the file to adhere to @@ -482,6 +496,8 @@ truncate_nif(_FileRef) -> erlang:nif_error(undef). get_handle_nif(_FileRef) -> erlang:nif_error(undef). +delayed_close_nif(_FileRef) -> + erlang:nif_error(undef). %% %% Quality-of-life helpers -- cgit v1.2.3