From d15bd6b9366ff4eef81ec9c4bcc875dfe694fe98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= Date: Tue, 13 Nov 2018 11:55:48 +0100 Subject: Avoid closing files in gc/monitor callbacks Closing files in these callbacks could block scheduler progress and cause major system instability. We now defer these operations to a dedicated process instead. This process may in turn block forever and prevent further orphaned files from being closed, but it will keep the emulator itself from misbehaving. --- erts/emulator/nifs/common/prim_file_nif.c | 146 +++++++++++++++++++++--------- erts/emulator/nifs/common/prim_file_nif.h | 7 +- erts/emulator/nifs/unix/unix_prim_file.c | 7 +- erts/emulator/nifs/win32/win_prim_file.c | 7 +- erts/preloaded/ebin/prim_file.beam | Bin 27780 -> 28528 bytes erts/preloaded/src/prim_file.erl | 18 +++- 6 files changed, 135 insertions(+), 50 deletions(-) (limited to 'erts') diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c index fd6aaa61f6..ba36a33458 100644 --- a/erts/emulator/nifs/common/prim_file_nif.c +++ b/erts/emulator/nifs/common/prim_file_nif.c @@ -38,6 +38,9 @@ static void unload(ErlNifEnv *env, void* priv_data); static ErlNifResourceType *efile_resource_type; +static ERL_NIF_TERM am_erts_prim_file; +static ERL_NIF_TERM am_close; + static ERL_NIF_TERM am_ok; static ERL_NIF_TERM am_error; static ERL_NIF_TERM am_continue; @@ -96,11 +99,14 @@ static ERL_NIF_TERM set_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +/* Internal ops */ +static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM get_handle_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM altname_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); -static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); - /* All file handle operations are passed through a wrapper that handles state * transitions, marking it as busy during the course of the operation, and * closing on completion if the owner died in the middle of an operation. @@ -128,7 +134,11 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[] * * CLOSE_PENDING -> * CLOSED (file_handle_wrapper) - */ + * + * Should the owner of a file die, we can't close it immediately as that could + * potentially block a normal scheduler. When entering the CLOSED state from + * owner_death_callback, we will instead send a message to the erts_prim_file + * process that will then close the file through delayed_close_nif. */ typedef ERL_NIF_TERM (*file_op_impl_t)(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); @@ -142,7 +152,6 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env return file_handle_wrapper( name ## _impl , env, argc, argv); \ } -WRAP_FILE_HANDLE_EXPORT(close_nif) WRAP_FILE_HANDLE_EXPORT(read_nif) WRAP_FILE_HANDLE_EXPORT(write_nif) WRAP_FILE_HANDLE_EXPORT(pread_nif) @@ -193,18 +202,27 @@ static ErlNifFunc nif_funcs[] = { /* Internal ops. */ {"get_handle_nif", 1, get_handle_nif}, + {"delayed_close_nif", 1, delayed_close_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"altname_nif", 1, altname_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, }; ERL_NIF_INIT(prim_file, nif_funcs, load, NULL, upgrade, unload) +static ErlNifPid erts_prim_file_pid; + static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon); -static void gc_callback(ErlNifEnv *env, void* data); -static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info) +static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM prim_file_pid) { ErlNifResourceTypeInit callbacks; + if(!enif_get_local_pid(env, prim_file_pid, &erts_prim_file_pid)) { + ASSERT(!"bad pid passed to prim_file_nif"); + } + + am_erts_prim_file = enif_make_atom(env, "erts_prim_file"); + am_close = enif_make_atom(env, "close"); + am_ok = enif_make_atom(env, "ok"); am_error = enif_make_atom(env, "error"); am_continue = enif_make_atom(env, "continue"); @@ -239,7 +257,7 @@ static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info) am_eof = enif_make_atom(env, "eof"); callbacks.down = owner_death_callback; - callbacks.dtor = gc_callback; + callbacks.dtor = NULL; callbacks.stop = NULL; efile_resource_type = enif_open_resource_type_x(env, "efile", &callbacks, @@ -305,8 +323,10 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env /* This is the only point where a change from CLOSE_PENDING is * possible, and we're running synchronously, so we can't race with * anything else here. */ + posix_errno_t ignored; + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); - efile_close(d); + efile_close(d, &ignored); } } else { /* CLOSE_PENDING should be impossible at this point since it requires @@ -319,6 +339,24 @@ static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env return result; } +/* This is a special close operation used by the erts_prim_file process for + * cleaning up orphaned files. It differs from the ordinary close_nif in that + * it only works for files that have already entered the CLOSED state. */ +static ERL_NIF_TERM delayed_close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t ignored; + efile_data_t *d; + + ASSERT(argc == 1); + if(!get_file_data(env, argv[0], &d)) { + return enif_make_badarg(env); + } + + ASSERT(erts_atomic32_read_acqb(&d->state) == EFILE_STATE_CLOSED); + efile_close(d, &ignored); + + return am_ok; +} + static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon) { efile_data_t *d = (efile_data_t*)obj; @@ -334,8 +372,24 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN switch(previous_state) { case EFILE_STATE_IDLE: - efile_close(d); - return; + { + /* We cannot close the file here as that could block a normal + * scheduler, so we tell erts_prim_file to do it for us. + * + * This can in turn become a bottleneck (especially in cases + * like NFS failure), but it's less problematic than blocking + * thread progress. */ + ERL_NIF_TERM message, file_ref; + + file_ref = enif_make_resource(env, d); + message = enif_make_tuple2(env, am_close, file_ref); + + if(!enif_send(env, &erts_prim_file_pid, NULL, message)) { + ERTS_INTERNAL_ERROR("Failed to defer prim_file close."); + } + + return; + } case EFILE_STATE_CLOSE_PENDING: case EFILE_STATE_CLOSED: /* We're either already closed or managed to mark ourselves for @@ -352,24 +406,6 @@ static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlN } } -static void gc_callback(ErlNifEnv *env, void* data) { - efile_data_t *d = (efile_data_t*)data; - - enum efile_state_t previous_state; - - (void)env; - - previous_state = erts_atomic32_cmpxchg_acqb(&d->state, - EFILE_STATE_CLOSED, EFILE_STATE_IDLE); - - ASSERT(previous_state != EFILE_STATE_CLOSE_PENDING && - previous_state != EFILE_STATE_BUSY); - - if(previous_state == EFILE_STATE_IDLE) { - efile_close(d); - } -} - static ERL_NIF_TERM efile_filetype_to_atom(enum efile_filetype_t type) { switch(type) { case EFILE_FILETYPE_DEVICE: return am_device; @@ -454,40 +490,62 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[] return posix_error_to_tuple(env, posix_errno); } - result = enif_make_resource(env, d); - enif_release_resource(d); - enif_self(env, &controlling_process); if(enif_monitor_process(env, d, &controlling_process, &d->monitor)) { + /* We need to close the file manually as we haven't registered a + * destructor. */ + posix_errno_t ignored; + + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); + efile_close(d, &ignored); + return posix_error_to_tuple(env, EINVAL); } + /* Note that we do not call enif_release_resource at this point. While it's + * normally safe to leave resource management to the GC, efile_close is a + * blocking operation which must not be done in the GC callback, and we + * can't defer it as the resource is gone as soon as it returns. + * + * We instead keep the resource alive until efile_close is called, after + * which it's safe to leave things to the GC. If the controlling process + * were to die before the user had a chance to close their file, the above + * monitor will tell the erts_prim_file process to close it for them. */ + result = enif_make_resource(env, d); + return enif_make_tuple2(env, am_ok, result); } -static ERL_NIF_TERM close_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { +static ERL_NIF_TERM close_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { enum efile_state_t previous_state; + efile_data_t *d; - if(argc != 0) { + ASSERT(argc == 1); + if(!get_file_data(env, argv[0], &d)) { return enif_make_badarg(env); } previous_state = erts_atomic32_cmpxchg_acqb(&d->state, - EFILE_STATE_CLOSED, EFILE_STATE_BUSY); + EFILE_STATE_CLOSED, EFILE_STATE_IDLE); - ASSERT(previous_state == EFILE_STATE_CLOSE_PENDING || - previous_state == EFILE_STATE_BUSY); + if(previous_state == EFILE_STATE_IDLE) { + posix_errno_t error; - if(previous_state == EFILE_STATE_BUSY) { enif_demonitor_process(env, d, &d->monitor); - if(!efile_close(d)) { - return posix_error_to_tuple(env, d->posix_errno); + if(!efile_close(d, &error)) { + return posix_error_to_tuple(env, error); } - } - return am_ok; + return am_ok; + } else { + /* CLOSE_PENDING should be impossible at this point since it requires + * a transition from BUSY; the only valid state here is CLOSED. */ + ASSERT(previous_state == EFILE_STATE_CLOSED); + + return posix_error_to_tuple(env, EINVAL); + } } static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { @@ -1190,7 +1248,7 @@ static posix_errno_t read_file(efile_data_t *d, size_t size, ErlNifBinary *resul } static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { - posix_errno_t posix_errno; + posix_errno_t posix_errno, ignored; efile_fileinfo_t info = {0}; efile_path_t path; @@ -1211,7 +1269,9 @@ static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a } posix_errno = read_file(d, info.size, &result); - enif_release_resource(d); + + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); + efile_close(d, &ignored); if(posix_errno) { return posix_error_to_tuple(env, posix_errno); diff --git a/erts/emulator/nifs/common/prim_file_nif.h b/erts/emulator/nifs/common/prim_file_nif.h index 099c06c48c..b2e30c59dd 100644 --- a/erts/emulator/nifs/common/prim_file_nif.h +++ b/erts/emulator/nifs/common/prim_file_nif.h @@ -159,8 +159,11 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, ErlNifResourceType *nif_type, efile_data_t **d); /** @brief Closes a file. The file must have entered the CLOSED state prior to - * calling this to prevent double close. */ -int efile_close(efile_data_t *d); + * calling this to prevent double close. + * + * Note that the file is completely invalid after this point, so the error code + * is provided in \c error rather than d->posix_errno. */ +int efile_close(efile_data_t *d, posix_errno_t *error); /* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */ diff --git a/erts/emulator/nifs/unix/unix_prim_file.c b/erts/emulator/nifs/unix/unix_prim_file.c index dea73db18a..169b193993 100644 --- a/erts/emulator/nifs/unix/unix_prim_file.c +++ b/erts/emulator/nifs/unix/unix_prim_file.c @@ -202,21 +202,24 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, return errno; } -int efile_close(efile_data_t *d) { +int efile_close(efile_data_t *d, posix_errno_t *error) { efile_unix_t *u = (efile_unix_t*)d; int fd; + ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER); ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED); ASSERT(u->fd != -1); fd = u->fd; u->fd = -1; + enif_release_resource(d); + /* close(2) either always closes (*BSD, Linux) or leaves the fd in an * undefined state (POSIX 2008, Solaris), so we must not retry on EINTR. */ if(close(fd) < 0) { - u->common.posix_errno = errno; + *error = errno; return 0; } diff --git a/erts/emulator/nifs/win32/win_prim_file.c b/erts/emulator/nifs/win32/win_prim_file.c index 602a282dd1..d0aa70542f 100644 --- a/erts/emulator/nifs/win32/win_prim_file.c +++ b/erts/emulator/nifs/win32/win_prim_file.c @@ -466,18 +466,21 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, } } -int efile_close(efile_data_t *d) { +int efile_close(efile_data_t *d, posix_errno_t *error) { efile_win_t *w = (efile_win_t*)d; HANDLE handle; + ASSERT(enif_thread_type() == ERL_NIF_THR_DIRTY_IO_SCHEDULER); ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED); ASSERT(w->handle != INVALID_HANDLE_VALUE); handle = w->handle; w->handle = INVALID_HANDLE_VALUE; + enif_release_resource(d); + if(!CloseHandle(handle)) { - w->common.posix_errno = windows_to_posix_errno(GetLastError()); + *error = windows_to_posix_errno(GetLastError()); return 0; } diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam index df611f2bb0..d0435a10ef 100644 Binary files a/erts/preloaded/ebin/prim_file.beam and b/erts/preloaded/ebin/prim_file.beam differ diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index 6d85868183..5fc22bc582 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -83,6 +83,15 @@ internal_normalize_utf8(_) -> is_translatable(_) -> erlang:nif_error(undefined). +%% This is a janitor process used to close files whose controlling process has +%% died. The emulator will be torn down if this is killed. +delayed_close_loop() -> + receive + {close, FRef} when is_reference(FRef) -> delayed_close_nif(FRef); + _ -> ok + end, + delayed_close_loop(). + %% %% Returns {error, Reason} | {ok, BytesCopied} @@ -95,7 +104,12 @@ copy(#file_descriptor{module = ?MODULE} = Source, file:copy_opened(Source, Dest, Length). on_load() -> - ok = erlang:load_nif(atom_to_list(?MODULE), 0). + Pid = spawn(fun() -> + process_flag(trap_exit, true), + delayed_close_loop() + end), + true = register(erts_prim_file, Pid), + ok = erlang:load_nif(atom_to_list(?MODULE), Pid). open(Name, Modes) -> %% The try/catch pattern seen here is used throughout the file to adhere to @@ -482,6 +496,8 @@ truncate_nif(_FileRef) -> erlang:nif_error(undef). get_handle_nif(_FileRef) -> erlang:nif_error(undef). +delayed_close_nif(_FileRef) -> + erlang:nif_error(undef). %% %% Quality-of-life helpers -- cgit v1.2.3 From 7c902ed60b759c30aabfecde52baa2d12b8885bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= Date: Tue, 13 Nov 2018 19:47:51 +0100 Subject: Fix broken assertion on monitor release We sometimes bump the refc without messing with the table, which means that we sometimes decrement it while in the table, causing the old assertion to fail. The property we want to check is that neither end of the monitor is present in the table when the monitor is deleted. --- erts/emulator/beam/erl_monitor_link.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/erl_monitor_link.h b/erts/emulator/beam/erl_monitor_link.h index 9ff8aa509a..ed7bf7d54a 100644 --- a/erts/emulator/beam/erl_monitor_link.h +++ b/erts/emulator/beam/erl_monitor_link.h @@ -1387,11 +1387,14 @@ ERTS_GLB_INLINE void erts_monitor_release(ErtsMonitor *mon) { ErtsMonitorData *mdp = erts_monitor_to_data(mon); - ERTS_ML_ASSERT(!(mon->flags & ERTS_ML_FLG_IN_TABLE)); ERTS_ML_ASSERT(erts_atomic32_read_nob(&mdp->refc) > 0); - if (erts_atomic32_dec_read_nob(&mdp->refc) == 0) + if (erts_atomic32_dec_read_nob(&mdp->refc) == 0) { + ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE)); + ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE)); + erts_monitor_destroy__(mdp); + } } ERTS_GLB_INLINE void @@ -1399,12 +1402,14 @@ erts_monitor_release_both(ErtsMonitorData *mdp) { ERTS_ML_ASSERT((mdp->origin.flags & ERTS_ML_FLGS_SAME) == (mdp->target.flags & ERTS_ML_FLGS_SAME)); - ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE)); - ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE)); ERTS_ML_ASSERT(erts_atomic32_read_nob(&mdp->refc) >= 2); - if (erts_atomic32_add_read_nob(&mdp->refc, (erts_aint32_t) -2) == 0) + if (erts_atomic32_add_read_nob(&mdp->refc, (erts_aint32_t) -2) == 0) { + ERTS_ML_ASSERT(!(mdp->origin.flags & ERTS_ML_FLG_IN_TABLE)); + ERTS_ML_ASSERT(!(mdp->target.flags & ERTS_ML_FLG_IN_TABLE)); + erts_monitor_destroy__(mdp); + } } ERTS_GLB_INLINE int -- cgit v1.2.3