diff options
author | John Högberg <[email protected]> | 2017-11-03 11:49:27 +0100 |
---|---|---|
committer | John Högberg <[email protected]> | 2017-11-30 15:44:33 +0100 |
commit | ebbd26eeea4115c946d1254d94acd50f150b4455 (patch) | |
tree | ab2a8ba1023aa0684695dc36ff5567ae9e3f59c4 /erts/emulator | |
parent | 6cb62e44eba1db8d1917ebb0db84298e91582c4e (diff) | |
download | otp-ebbd26eeea4115c946d1254d94acd50f150b4455.tar.gz otp-ebbd26eeea4115c946d1254d94acd50f150b4455.tar.bz2 otp-ebbd26eeea4115c946d1254d94acd50f150b4455.zip |
Reimplement efile_drv as a dirty NIF
This improves the latency of file operations as dirty schedulers
are a bit more eager to run jobs than async threads, and use a
single global queue rather than per-thread queues, eliminating the
risk of a job stalling behind a long-running job on the same thread
while other async threads sit idle.
There's no such thing as a free lunch though; the lowered latency
comes at the cost of increased busy-waiting which may have an
adverse effect on some applications. This behavior can be tweaked
with the +sbwt flag, but unfortunately it affects all types of
schedulers and not just dirty ones. We plan to add type-specific
flags at a later stage.
sendfile has been moved to inet_drv to lessen the effect of a nasty
race; the cooperation between inet_drv and efile has never been
airtight and the socket dying at the wrong time (Regardless of
reason) could result in fd aliasing. Moving it to the inet driver
makes it impossible to trigger this by closing the socket in the
middle of a sendfile operation, while still allowing it to be
aborted -- something that can't be done if it stays in the file
driver.
The race still occurs if the controlling process dies in the short
window between dispatching the sendfile operation and the dup(2)
call in the driver, but it's much less likely to happen now.
A proper fix is in the works.
--
Notable functional differences:
* The use_threads option for file:sendfile/5 no longer has any
effect.
* The file-specific DTrace probes have been removed. The same
effect can be achieved with normal tracing together with the
nif__entry/nif__return probes to track scheduling.
--
OTP-14256
Diffstat (limited to 'erts/emulator')
-rw-r--r-- | erts/emulator/Makefile.in | 34 | ||||
-rw-r--r-- | erts/emulator/beam/erl_lock_check.c | 4 | ||||
-rw-r--r-- | erts/emulator/drivers/common/efile_drv.c | 4295 | ||||
-rw-r--r-- | erts/emulator/drivers/common/erl_efile.h | 176 | ||||
-rw-r--r-- | erts/emulator/drivers/common/gzio.c | 712 | ||||
-rw-r--r-- | erts/emulator/drivers/common/gzio.h | 8 | ||||
-rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 382 | ||||
-rw-r--r-- | erts/emulator/drivers/unix/unix_efile.c | 1102 | ||||
-rw-r--r-- | erts/emulator/drivers/win32/win_efile.c | 2058 | ||||
-rw-r--r-- | erts/emulator/nifs/common/prim_file_nif.c | 1237 | ||||
-rw-r--r-- | erts/emulator/nifs/common/prim_file_nif.h | 240 | ||||
-rw-r--r-- | erts/emulator/nifs/unix/unix_prim_file.c | 957 | ||||
-rw-r--r-- | erts/emulator/nifs/win32/win_prim_file.c | 1427 |
13 files changed, 4258 insertions, 8374 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 9a09a0f6fa..15b65f1c71 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -634,6 +634,7 @@ GENERATE += $(TTF_DIR)/driver_tab.c PRELOAD_BEAM = $(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \ $(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \ $(ERL_TOP)/erts/preloaded/ebin/init.beam \ + $(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam \ $(ERL_TOP)/erts/preloaded/ebin/prim_eval.beam \ $(ERL_TOP)/erts/preloaded/ebin/prim_inet.beam \ $(ERL_TOP)/erts/preloaded/ebin/prim_file.beam \ @@ -644,8 +645,7 @@ PRELOAD_BEAM = $(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \ $(ERL_TOP)/erts/preloaded/ebin/erts_internal.beam \ $(ERL_TOP)/erts/preloaded/ebin/erl_tracer.beam \ $(ERL_TOP)/erts/preloaded/ebin/erts_literal_area_collector.beam \ - $(ERL_TOP)/erts/preloaded/ebin/erts_dirty_process_code_checker.beam \ - $(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam + $(ERL_TOP)/erts/preloaded/ebin/erts_dirty_process_code_checker.beam ifeq ($(TARGET),win32) # On windows the preloaded objects are in a resource object. @@ -786,6 +786,9 @@ $(OBJDIR)/%.o: drivers/$(ERLANG_OSTYPE)/%.c $(OBJDIR)/%.o: nifs/common/%.c $(V_CC) $(CFLAGS) -DLIBSCTP=$(LIBSCTP) $(INCLUDES) -Inifs/common -Inifs/$(ERLANG_OSTYPE) -c $< -o $@ +$(OBJDIR)/%.o: nifs/$(ERLANG_OSTYPE)/%.c + $(V_CC) $(CFLAGS) $(INCLUDES) -Inifs/common -Inifs/$(ERLANG_OSTYPE) -I../etc/$(ERLANG_OSTYPE) -c $< -o $@ + # ---------------------------------------------------------------------- # Specials # @@ -874,18 +877,17 @@ RUN_OBJS += \ LTTNG_OBJS = $(OBJDIR)/erlang_lttng.o NIF_OBJS = \ $(OBJDIR)/erl_tracer_nif.o \ - $(OBJDIR)/zlib_nif.o \ - $(OBJDIR)/prim_buffer_nif.o + $(OBJDIR)/prim_buffer_nif.o \ + $(OBJDIR)/prim_file_nif.o \ + $(OBJDIR)/zlib_nif.o ifeq ($(TARGET),win32) DRV_OBJS = \ $(OBJDIR)/registry_drv.o \ - $(OBJDIR)/efile_drv.o \ $(OBJDIR)/inet_drv.o \ $(OBJDIR)/ram_file_drv.o \ $(OBJDIR)/ttsl_drv.o OS_OBJS = \ - $(OBJDIR)/win_efile.o \ $(OBJDIR)/win_con.o \ $(OBJDIR)/dll_sys.o \ $(OBJDIR)/driver_tab.o \ @@ -894,7 +896,8 @@ OS_OBJS = \ $(OBJDIR)/sys_time.o \ $(OBJDIR)/sys_interrupt.o \ $(OBJDIR)/sys_env.o \ - $(OBJDIR)/dosmap.o + $(OBJDIR)/dosmap.o \ + $(OBJDIR)/win_prim_file.o else OS_OBJS = \ @@ -902,14 +905,13 @@ OS_OBJS = \ $(OBJDIR)/sys_drivers.o \ $(OBJDIR)/sys_uds.o \ $(OBJDIR)/driver_tab.o \ - $(OBJDIR)/unix_efile.o \ + $(OBJDIR)/elib_memmove.o \ $(OBJDIR)/gzio.o \ - $(OBJDIR)/elib_memmove.o + $(OBJDIR)/unix_prim_file.o OS_OBJS += $(OBJDIR)/sys_float.o \ $(OBJDIR)/sys_time.o DRV_OBJS = \ - $(OBJDIR)/efile_drv.o \ $(OBJDIR)/inet_drv.o \ $(OBJDIR)/ram_file_drv.o \ $(OBJDIR)/ttsl_drv.o @@ -1137,6 +1139,7 @@ BEAM_SRC=$(wildcard beam/*.c) DRV_COMMON_SRC=$(wildcard drivers/common/*.c) DRV_OSTYPE_SRC=$(wildcard drivers/$(ERLANG_OSTYPE)/*.c) NIF_COMMON_SRC=$(wildcard nifs/common/*.c) +NIF_OSTYPE_SRC=$(wildcard nifs/$(ERLANG_OSTYPE)/*.c) ALL_SYS_SRC=$(wildcard sys/$(ERLANG_OSTYPE)/*.c) $(wildcard sys/common/*.c) # We use $(shell ls) here instead of wildcard as $(wildcard ) resolved at # loadtime of the makefile and at that time these files are not generated yet. @@ -1149,7 +1152,10 @@ ifeq ($(TARGET),win32) #DEP_CC=$(EMU_CC) DEP_CC=$(CC) -DEP_FLAGS=-MM $(subst -O2,,$(CFLAGS)) $(INCLUDES) -I../etc/win32 -Idrivers/common -Idrivers/$(ERLANG_OSTYPE) +DEP_FLAGS=-MM $(subst -O2,,$(CFLAGS)) $(INCLUDES) -I../etc/win32 \ + -Idrivers/common -Idrivers/$(ERLANG_OSTYPE) \ + -Inifs/common -Inifs/$(ERLANG_OSTYPE) + # ifeq (@MIXED_CYGWIN_VC@,yes) # VC++ used for compiling. If __GNUC__ is defined we will include # other headers then when compiling which will result in faulty @@ -1169,7 +1175,9 @@ MG_FLAG=-MG endif DEP_CC=$(CC) -DEP_FLAGS=-MM $(MG_FLAG) $(CFLAGS) $(INCLUDES) -Inifs/common -Idrivers/common -Idrivers/$(ERLANG_OSTYPE) +DEP_FLAGS=-MM $(MG_FLAG) $(CFLAGS) $(INCLUDES) \ + -Idrivers/common -Idrivers/$(ERLANG_OSTYPE) \ + -Inifs/common -Inifs/$(ERLANG_OSTYPE) SYS_SRC=$(ALL_SYS_SRC) endif @@ -1200,6 +1208,8 @@ $(TTF_DIR)/depend.mk: $(TTF_DIR)/GENERATED $(PRELOAD_SRC) | $(SED_DEPEND) >> $(TTF_DIR)/depend.mk $(V_at)$(DEP_CC) $(DEP_FLAGS) $(NIF_COMMON_SRC) \ | $(SED_DEPEND) >> $(TTF_DIR)/depend.mk + $(V_at)$(DEP_CC) $(DEP_FLAGS) -I../etc/$(ERLANG_OSTYPE) $(NIF_OSTYPE_SRC) \ + | $(SED_DEPEND) >> $(TTF_DIR)/depend.mk $(V_at)$(DEP_CC) $(DEP_FLAGS) $(SYS_SRC) \ | $(SED_DEPEND) >> $(TTF_DIR)/depend.mk $(V_at)$(DEP_CC) $(DEP_FLAGS) $(TARGET_SRC) \ diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index 88716d478d..64950fc252 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -109,7 +109,6 @@ static erts_lc_lock_order_t erts_lock_order[] = { { "fun_tab", NULL }, { "environ", NULL }, { "release_literal_areas", NULL }, - { "efile_drv", "address" }, { "drv_ev_state_grow", NULL, }, { "drv_ev_state", "address" }, { "safe_hash", "address" }, @@ -170,9 +169,6 @@ static erts_lc_lock_order_t erts_lock_order[] = { { "save_ops_lock", NULL }, #endif #endif -#ifdef USE_VM_PROBES - { "efile_drv dtrace mutex", NULL }, -#endif { "mtrace_buf", NULL }, { "os_monotonic_time", NULL }, { "erts_alloc_hard_debug", NULL }, diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c deleted file mode 100644 index 4e1d2f0d7f..0000000000 --- a/erts/emulator/drivers/common/efile_drv.c +++ /dev/null @@ -1,4295 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 1996-2017. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ -/* - * Purpose: Provides file and directory operations. - * - * This file is generic, and does the work of decoding the commands - * and encoding the responses. System-specific functions are found in - * the unix_efile.c and win_efile.c files. - */ - -/* Operations */ - -#define FILE_OPEN 1 /* Essential for startup */ -#define FILE_READ 2 -#define FILE_LSEEK 3 -#define FILE_WRITE 4 -#define FILE_FSTAT 5 /* Essential for startup */ -#define FILE_PWD 6 /* Essential for startup */ -#define FILE_READDIR 7 /* Essential for startup */ -#define FILE_CHDIR 8 -#define FILE_FSYNC 9 -#define FILE_MKDIR 10 -#define FILE_DELETE 11 -#define FILE_RENAME 12 -#define FILE_RMDIR 13 -#define FILE_TRUNCATE 14 -#define FILE_READ_FILE 15 /* Essential for startup */ -#define FILE_WRITE_INFO 16 -#define FILE_LSTAT 19 -#define FILE_READLINK 20 -#define FILE_LINK 21 -#define FILE_SYMLINK 22 -#define FILE_CLOSE 23 -#define FILE_PWRITEV 24 -#define FILE_PREADV 25 -#define FILE_SETOPT 26 -#define FILE_IPREAD 27 -#define FILE_ALTNAME 28 -#define FILE_READ_LINE 29 -#define FILE_FDATASYNC 30 -#define FILE_FADVISE 31 -#define FILE_SENDFILE 32 -#define FILE_FALLOCATE 33 -#define FILE_CLOSE_ON_PORT_EXIT 34 -/* Return codes */ - -#define FILE_RESP_OK 0 -#define FILE_RESP_ERROR 1 -#define FILE_RESP_DATA 2 -#define FILE_RESP_NUMBER 3 -#define FILE_RESP_INFO 4 -#define FILE_RESP_NUMERR 5 -#define FILE_RESP_LDATA 6 -#define FILE_RESP_N2DATA 7 -#define FILE_RESP_EOF 8 -#define FILE_RESP_FNAME 9 -#define FILE_RESP_ALL_DATA 10 -#define FILE_RESP_LFNAME 11 - -/* Options */ - -#define FILE_OPT_DELAYED_WRITE 0 -#define FILE_OPT_READ_AHEAD 1 - -/* IPREAD variants */ - -#define IPREAD_S32BU_P32BU 0 - -/* Limits */ - -#define FILE_SEGMENT_READ (256*1024) -#define FILE_SEGMENT_WRITE (256*1024) - -/* Internal */ - -/* Set to 1 to test having read_ahead implicitly for read_line */ -#define ALWAYS_READ_LINE_AHEAD 0 - - -/* Must not be possible to get from malloc()! */ -#define FILE_FD_INVALID ((Sint)(-1)) - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include <ctype.h> -#include <sys/types.h> -#include <stdlib.h> - -/* Need (NON)BLOCKING macros for sendfile */ -#ifndef WANT_NONBLOCKING -#define WANT_NONBLOCKING -#endif - -#include "sys.h" - -#include "erl_driver.h" -#include "erl_efile.h" -#include "erl_threads.h" -#include "gzio.h" -#include "dtrace-wrapper.h" - - -static ErlDrvSysInfo sys_info; - -/* For explanation of this var, see comment for same var in erl_async.c */ -static unsigned gcc_optimizer_hack = 0; - -#ifdef USE_VM_PROBES - -#define DTRACE_EFILE_BUFSIZ 128 - -#define DTRACE_INVOKE_SETUP(op) \ - do { DTRACE3(efile_drv_int_entry, d->sched_i1, d->sched_i2, op); } while (0) -#define DTRACE_INVOKE_SETUP_BY_NAME(op) \ - struct t_data *d = (struct t_data *) data ; \ - DTRACE_INVOKE_SETUP(op) -#define DTRACE_INVOKE_RETURN(op) \ - do { DTRACE3(efile_drv_int_return, d->sched_i1, d->sched_i2, \ - op); } while (0) ; gcc_optimizer_hack++ ; - -/* Assign human-friendlier id numbers to scheduler & I/O worker threads */ -int dt_driver_idnum = 0; -int dt_driver_io_worker_base = 5000; -erts_mtx_t dt_driver_mutex; -pthread_key_t dt_driver_key; - -typedef struct { - int thread_num; - Uint64 tag; -} dt_private; - -dt_private *get_dt_private(int); -#else /* USE_VM_PROBES */ -#define DTRACE_INVOKE_SETUP(op) do {} while (0) -#define DTRACE_INVOKE_SETUP_BY_NAME(op) do {} while (0) -#define DTRACE_INVOKE_RETURN(op) do {} while (0) -#endif /* USE_VM_PROBES */ - -/* #define TRACE 1 */ -#ifdef TRACE -# define TRACE_C(c) do { putchar(c); fflush(stdout); } while (0) -# define TRACE_S(s) do { fputs((s), stdout); fflush(stdout); } while (0) -# define TRACE_F(args) do { printf args ;fflush(stdout); } while (0) -#else -# define TRACE_C(c) ((void)(0)) -# define TRACE_S(s) ((void)(0)) -# define TRACE_F(args) ((void)(0)) -#endif - - -#define THRDS_AVAILABLE (sys_info.async_threads > 0) -#ifdef HARDDEBUG /* HARDDEBUG in io.c is expected too */ -#define TRACE_DRIVER fprintf(stderr, "Efile: ") -#else -#define TRACE_DRIVER -#endif -#define MUTEX_INIT(m, p) do { IF_THRDS { TRACE_DRIVER; (m = driver_pdl_create(p)); } } while (0) -#define MUTEX_LOCK(m) do { IF_THRDS { TRACE_DRIVER; driver_pdl_lock(m); } } while (0) -#define MUTEX_UNLOCK(m) do { IF_THRDS { TRACE_DRIVER; driver_pdl_unlock(m); } } while (0) -#define IF_THRDS if (THRDS_AVAILABLE) - - -#define SENDFILE_FLGS_USE_THREADS (1 << 0) -/** - * On DARWIN sendfile can deadlock with close if called in - * different threads. So until Apple fixes so that sendfile - * is not buggy we disable usage of the async pool for - * DARWIN. The testcase t_sendfile_crashduring reproduces - * this error when using +A 10 and enabling SENDFILE_FLGS_USE_THREADS. - */ -#if defined(__APPLE__) && defined(__MACH__) -#define USE_THRDS_FOR_SENDFILE(DATA) 0 -#else -#define USE_THRDS_FOR_SENDFILE(DATA) (DATA->flags & SENDFILE_FLGS_USE_THREADS) -#endif /* defined(__APPLE__) && defined(__MACH__) */ - - - -#if 0 -/* Experimental, for forcing all file operations to use the same thread. */ - static unsigned file_fixed_key = 1; -# define KEY(desc) (&file_fixed_key) -#else -# define KEY(desc) (&(desc)->key) -#endif - -#ifndef MAX -# define MAX(x, y) (((x) > (y)) ? (x) : (y)) -#endif - -#ifdef FILENAMES_16BIT -#ifdef USE_VM_PROBES -#error 16bit characters in filenames and dtrace in combination is not supported. -#endif -# define FILENAME_BYTELEN(Str) filename_len_16bit(Str) -# define FILENAME_COPY(To,From) filename_cpy_16bit((To),(From)) -# define FILENAME_CHARSIZE 2 - - static int filename_len_16bit(char *str) - { - char *p = str; - while(*p != '\0' || p[1] != '\0') { - p += 2; - } - return (p - str); - } - - static void filename_cpy_16bit(char *to, char *from) - { - while(*from != '\0' || from[1] != '\0') { - *to++ = *from++; - *to++ = *from++; - } - *to++ = *from++; - *to++ = *from++; - } - -#else -# define FILENAME_BYTELEN(Str) strlen(Str) -# define FILENAME_COPY(To,From) strcpy(To,From) -# define FILENAME_CHARSIZE 1 -#endif - -#if (MAXPATHLEN+1)*FILENAME_CHARSIZE+1 > BUFSIZ -# define RESBUFSIZE ((MAXPATHLEN+1)*FILENAME_CHARSIZE+1) -#else -# define RESBUFSIZE BUFSIZ -#endif - -#define READDIR_CHUNKS (5) - - - -#if ALWAYS_READ_LINE_AHEAD -#define DEFAULT_LINEBUF_SIZE 2048 -#else -#define DEFAULT_LINEBUF_SIZE 512 /* Small, it's usually discarded anyway */ -#endif - -typedef unsigned char uchar; - -static ErlDrvData file_start(ErlDrvPort port, char* command); -static int file_init(void); -static void file_stop(ErlDrvData); -static void file_output(ErlDrvData, char* buf, ErlDrvSizeT len); -static ErlDrvSSizeT file_control(ErlDrvData, unsigned int command, - char* buf, ErlDrvSizeT len, - char **rbuf, ErlDrvSizeT rlen); -static void file_timeout(ErlDrvData); -static void file_outputv(ErlDrvData, ErlIOVec*); -static void file_async_ready(ErlDrvData, ErlDrvThreadData); -static void file_flush(ErlDrvData); - -#ifdef HAVE_SENDFILE -static void file_ready_output(ErlDrvData data, ErlDrvEvent event); -static void file_stop_select(ErlDrvEvent event, void* _); -#endif /* HAVE_SENDFILE */ - - -enum e_timer {timer_idle, timer_again, timer_write}; -#ifdef HAVE_SENDFILE -enum e_sendfile {sending, not_sending}; -#define SENDFILE_USE_THREADS (1 << 0) -#endif /* HAVE_SENDFILE */ - -struct t_data; - -typedef struct { - SWord fd; - ErlDrvPort port; - unsigned int key; /* Async queue key */ - unsigned flags; /* Original flags from FILE_OPEN. */ - void (*invoke)(void *); - struct t_data *d; - void (*free)(void *); - struct t_data *cq_head; /* Queue of incoming commands */ - struct t_data *cq_tail; /* -""- */ - enum e_timer timer_state; -#ifdef HAVE_SENDFILE - enum e_sendfile sendfile_state; -#endif /* HAVE_SENDFILE */ - size_t read_bufsize; - ErlDrvBinary *read_binp; - size_t read_offset; - size_t read_size; - size_t write_bufsize; - unsigned long write_delay; - int write_error; - Efile_error write_errInfo; - ErlDrvPDL q_mtx; /* Mutex for the driver queue, known by the emulator. Also used for - mutual exclusion when accessing field(s) below. */ - size_t write_buffered; -#ifdef USE_VM_PROBES - int idnum; /* Unique ID # for this driver thread/desc */ - char port_str[DTRACE_TERM_BUF_SIZE]; -#endif -} file_descriptor; - - -static int reply_error(file_descriptor*, Efile_error* errInfo); - -struct erl_drv_entry efile_driver_entry = { - file_init, - file_start, - file_stop, - file_output, - NULL, -#ifdef HAVE_SENDFILE - file_ready_output, -#else - NULL, -#endif /* HAVE_SENDFILE */ - "efile", - NULL, - NULL, - file_control, - file_timeout, - file_outputv, - file_async_ready, - file_flush, - NULL, - NULL, - ERL_DRV_EXTENDED_MARKER, - ERL_DRV_EXTENDED_MAJOR_VERSION, - ERL_DRV_EXTENDED_MINOR_VERSION, - ERL_DRV_FLAG_USE_PORT_LOCKING, - NULL, - NULL, -#ifdef HAVE_SENDFILE - file_stop_select -#else - NULL -#endif /* HAVE_SENDFILE */ -}; - - - -static int thread_short_circuit; - -#define DRIVER_ASYNC(level, desc, f_invoke, data, f_free) \ -if (thread_short_circuit >= (level)) { \ - (*(f_invoke))(data); \ - file_async_ready((ErlDrvData)(desc), (data)); \ -} else { \ - driver_async((desc)->port, KEY(desc), (f_invoke), (data), (f_free)); \ -} - - - -struct t_pbuf_spec { - Sint64 offset; - size_t size; -}; - -struct t_pwritev { - ErlDrvPort port; - ErlDrvPDL q_mtx; - size_t size; - unsigned cnt; - unsigned n; - struct t_pbuf_spec specs[1]; -}; - -struct t_preadv { - ErlIOVec eiov; - unsigned n; - unsigned cnt; - size_t size; - Sint64 offsets[1]; -}; - -#define READDIR_BUFSIZE (8*1024)*READDIR_CHUNKS -#if READDIR_BUFSIZE < (1 + (2 + MAXPATHLEN)*FILENAME_CHARSIZE*READDIR_CHUNKS) -# undef READDIR_BUFSIZE -# define READDIR_BUFSIZE (1 + (2 + MAXPATHLEN)*FILENAME_CHARSIZE*READDIR_CHUNKS) -#endif - -struct t_readdir_buf { - struct t_readdir_buf *next; - size_t n; - char buf[READDIR_BUFSIZE]; -}; - -struct t_data -{ - struct t_data *next; - int command; - int level; - void (*invoke)(void *); - void (*free)(void *); - void *data_to_free; /* used by FILE_CLOSE_ON_PORT_EXIT only */ - int again; - int reply; -#ifdef USE_VM_PROBES - int sched_i1; - Uint64 sched_i2; - char sched_utag[DTRACE_EFILE_BUFSIZ+1]; -#endif - int result_ok; - Efile_error errInfo; - int flags; - SWord fd; - int is_fd_unused; - /**/ - Efile_info info; - EFILE_DIR_HANDLE dir_handle; /* Handle to open directory. */ - ErlDrvBinary *bin; - int drive; - size_t n; - /*off_t offset;*/ - /*size_t bytesRead; Bytes read from the file. */ - /**/ - union { - struct { - Sint64 offset; - int origin; - Sint64 location; - } lseek; - struct { - ErlDrvPort port; - ErlDrvPDL q_mtx; - size_t size; - size_t reply_size; - } writev; - struct t_pwritev pwritev; - struct t_preadv preadv; - struct { - ErlDrvBinary *binp; - size_t bin_offset; - size_t bin_size; - size_t size; - } read; - struct { - ErlDrvBinary *binp; /* in - out */ - size_t read_offset; /* in - out */ - size_t read_size; /* in - out */ - size_t nl_pos; /* out */ - short nl_skip; /* out, 0 or 1 */ -#if !ALWAYS_READ_LINE_AHEAD - short read_ahead; /* in, bool */ -#endif - } read_line; - struct { - ErlDrvBinary *binp; - int size; - int offset; - } read_file; - struct { - struct t_readdir_buf *first_buf; - struct t_readdir_buf *last_buf; - } read_dir; - struct { - Sint64 offset; - Sint64 length; - int advise; - } fadvise; -#ifdef HAVE_SENDFILE - struct { - ErlDrvPort port; - ErlDrvPDL q_mtx; - int out_fd; - off_t offset; - Uint64 nbytes; - Uint64 written; - } sendfile; -#endif /* HAVE_SENDFILE */ - struct { - Sint64 offset; - Sint64 length; - } fallocate; - } c; - char b[1]; -}; - -#define EF_ALLOC(S) driver_alloc((S)) -#define EF_REALLOC(P, S) driver_realloc((P), (S)) -#define EF_SAFE_ALLOC(S) ef_safe_alloc((S)) -#define EF_SAFE_REALLOC(P, S) ef_safe_realloc((P), (S)) -#define EF_FREE(P) do { if((P)) driver_free((P)); } while(0) - -static void *ef_safe_alloc(Uint s) -{ - void *p = EF_ALLOC(s); - if (!p) erts_exit(ERTS_ERROR_EXIT, "efile drv: Can't allocate %lu bytes of memory\n", (unsigned long)s); - return p; -} - -/********************************************************************* - * ErlIOVec manipulation functions. - */ - -/* char EV_CHAR_P(ErlIOVec *ev, int p, int q) */ -#define EV_CHAR_P(ev, p, q) \ - (((char *)(ev)->iov[q].iov_base) + (p)) - -/* int EV_GET_CHAR(ErlIOVec *ev, char *p, int *pp, int *qp) */ -#define EV_GET_CHAR(ev, p, pp, qp) efile_ev_get_char(ev, p ,pp, qp) -static int -efile_ev_get_char(ErlIOVec *ev, char *p, size_t *pp, size_t *qp) { - if (*pp + 1 <= ev->iov[*qp].iov_len) { - *p = *EV_CHAR_P(ev, *pp, *qp); - if (*pp + 1 < ev->iov[*qp].iov_len) - *pp += 1; - else { - *qp += 1; - *pp = 0; - } - return !0; - } - return 0; -} - -/* Uint32 EV_UINT32(ErlIOVec *ev, int p, int q)*/ -#define EV_UINT32(ev, p, q) \ - ((Uint32) ((unsigned char *)(ev)->iov[q].iov_base)[p]) - -/* int EV_GET_UINT32(ErlIOVec *ev, Uint32 *p, int *pp, int *qp) */ -#define EV_GET_UINT32(ev, p, pp, qp) efile_ev_get_uint32(ev, p, pp, qp) -static int -efile_ev_get_uint32(ErlIOVec *ev, Uint32 *p, size_t *pp, size_t *qp) { - if (*pp + 4 <= ev->iov[*qp].iov_len) { - *p = (EV_UINT32(ev, *pp, *qp) << 24) - | (EV_UINT32(ev, *pp + 1, *qp) << 16) - | (EV_UINT32(ev, *pp + 2, *qp) << 8) - | (EV_UINT32(ev, *pp + 3, *qp)); - if (*pp + 4 < ev->iov[*qp].iov_len) - *pp += 4; - else { - *qp += 1; - *pp = 0; - } - return !0; - } - return 0; -} - -/* Uint64 EV_UINT64(ErlIOVec *ev, int p, int q)*/ -#define EV_UINT64(ev, p, q) \ - ((Uint64) ((unsigned char *)(ev)->iov[q].iov_base)[p]) - -/* int EV_GET_UINT64(ErlIOVec *ev, Uint64 *p, int *pp, int *qp) */ -#define EV_GET_UINT64(ev, p, pp, qp) efile_ev_get_uint64(ev, p, pp, qp) -static int -efile_ev_get_uint64(ErlIOVec *ev, Uint64 *p, size_t *pp, size_t *qp) { - if (*pp + 8 <= ev->iov[*qp].iov_len) { - *p = (EV_UINT64(ev, *pp, *qp) << 56) - | (EV_UINT64(ev, *pp + 1, *qp) << 48) - | (EV_UINT64(ev, *pp + 2, *qp) << 40) - | (EV_UINT64(ev, *pp + 3, *qp) << 32) - | (EV_UINT64(ev, *pp + 4, *qp) << 24) - | (EV_UINT64(ev, *pp + 5, *qp) << 16) - | (EV_UINT64(ev, *pp + 6, *qp) << 8) - | (EV_UINT64(ev, *pp + 7, *qp)); - if (*pp + 8 < ev->iov[*qp].iov_len) - *pp += 8; - else { - *qp += 1; - *pp = 0; - } - return !0; - } - return 0; -} - -/* int EV_GET_SINT64(ErlIOVec *ev, Uint64 *p, int *pp, int *qp) */ -#define EV_GET_SINT64(ev, p, pp, qp) efile_ev_get_sint64(ev, p, pp, qp) -static int -efile_ev_get_sint64(ErlIOVec *ev, Sint64 *p, size_t *pp, size_t *qp) { - Uint64 *tmp = (Uint64*)p; - return EV_GET_UINT64(ev, tmp, pp, qp); -} - -#if 0 - -static void ev_clear(ErlIOVec *ev) { - ASSERT(ev); - ev->size = 0; - ev->vsize = 0; - ev->iov = NULL; - ev->binv = NULL; -} - -/* Assumes that ->iov and ->binv were allocated with sys_alloc(). - */ -static void ev_free(ErlIOVec *ev) { - if (! ev) { - return; - } - if (ev->vsize > 0) { - int i; - ASSERT(ev->iov); - ASSERT(ev->binv); - for (i = 0; i < ev->vsize; i++) { - if (ev->binv[i]) { - driver_free_binary(ev->binv[i]); - } - } - EF_FREE(ev->iov); - EF_FREE(ev->binv); - } -} - -/* Copy the contents from source to dest. - * Data in binaries is not copied, just the pointers; - * and refc is incremented. - */ -static ErlIOVec *ev_copy(ErlIOVec *dest, ErlIOVec *source) { - int *ip; - ASSERT(dest); - ASSERT(source); - if (source->vsize == 0) { - /* Empty source */ - ev_clear(dest); - return dest; - } - /* Allocate ->iov and ->binv */ - dest->iov = EF_ALLOC(sizeof(*dest->iov) * source->vsize); - if (! dest->iov) { - return NULL; - } - dest->binv = EF_ALLOC(sizeof(*dest->binv) * source->vsize); - if (! dest->binv) { - EF_FREE(dest->iov); - return NULL; - } - dest->size = source->size; - /* Copy one vector element at the time. - * Use *ip as an alias for dest->vsize to improve readabiliy. - * Keep dest consistent in every iteration by using - * dest->vsize==*ip as loop variable. - */ - for (ip = &dest->vsize, *ip = 0; *ip < source->vsize; (*ip)++) { - if (source->iov[*ip].iov_len == 0) { - /* Empty vector element */ - dest->iov[*ip].iov_len = 0; - dest->iov[*ip].iov_base = NULL; - dest->binv[*ip] = NULL; - } else { - /* Non empty vector element */ - if (source->binv[*ip]) { - /* Contents in binary - copy pointers and increment refc */ - dest->iov[*ip] = source->iov[*ip]; - dest->binv[*ip] = source->binv[*ip]; - driver_binary_inc_refc(source->binv[*ip]); - } else { - /* Contents not in binary - allocate new binary and copy data */ - if (! (dest->binv[*ip] = - driver_alloc_binary(source->iov[*ip].iov_len))) { - goto failed; - } - sys_memcpy(dest->binv[*ip]->orig_bytes, - source->iov[*ip].iov_base, - source->iov[*ip].iov_len); - dest->iov[*ip].iov_base = dest->binv[*ip]->orig_bytes; - dest->iov[*ip].iov_len = source->iov[*ip].iov_len; - } - } - } - return dest; - failed: - ev_free(dest); - return NULL; -} - -#endif - - - -/********************************************************************* - * Command queue functions - */ - -static void cq_enq(file_descriptor *desc, struct t_data *d) { - ASSERT(d); - if (desc->cq_head) { - ASSERT(desc->cq_tail); - ASSERT(!desc->cq_tail->next); - desc->cq_tail = desc->cq_tail->next = d; - } else { - ASSERT(desc->cq_tail == NULL); - desc->cq_head = desc->cq_tail = d; - } - d->next = NULL; -} - -static struct t_data *cq_deq(file_descriptor *desc) { - struct t_data *d = desc->cq_head; - ASSERT(d || (!d && !desc->cq_tail)); - if (d) { - ASSERT(!d->next || (d->next && desc->cq_tail != d)); - if ((desc->cq_head = d->next) == NULL) { - ASSERT(desc->cq_tail == d); - desc->cq_tail = NULL; - } - } - return d; -} - - -/********************************************************************* - * Driver entry point -> init - */ -static int -file_init(void) -{ - char buf[21]; /* enough to hold any 64-bit integer */ - size_t bufsz = sizeof(buf); - thread_short_circuit = (erl_drv_getenv("ERL_EFILE_THREAD_SHORT_CIRCUIT", - buf, - &bufsz) == 0 - ? atoi(buf) - : 0); - driver_system_info(&sys_info, sizeof(ErlDrvSysInfo)); - - /* run initiation of efile_driver if needed */ - efile_init(); - -#ifdef USE_VM_PROBES - erts_mtx_init(&dt_driver_mutex, "efile_drv dtrace mutex", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); - pthread_key_create(&dt_driver_key, NULL); -#endif /* USE_VM_PROBES */ - - return 0; -} - - -/********************************************************************* - * Driver entry point -> start - */ -static ErlDrvData -file_start(ErlDrvPort port, char* command) - -{ - file_descriptor* desc; - - if ((desc = (file_descriptor*) EF_ALLOC(sizeof(file_descriptor))) - == NULL) { - errno = ENOMEM; - return ERL_DRV_ERROR_ERRNO; - } - desc->fd = FILE_FD_INVALID; - desc->port = port; - desc->key = driver_async_port_key(port); - desc->flags = 0; - desc->invoke = NULL; - desc->d = NULL; - desc->free = NULL; - desc->cq_head = NULL; - desc->cq_tail = NULL; - desc->timer_state = timer_idle; -#ifdef HAVE_SENDFILE - desc->sendfile_state = not_sending; -#endif - desc->read_bufsize = 0; - desc->read_binp = NULL; - desc->read_offset = 0; - desc->read_size = 0; - desc->write_delay = 0L; - desc->write_bufsize = 0; - desc->write_error = 0; - MUTEX_INIT(desc->q_mtx, port); /* Refc is one, referenced by emulator now */ - desc->write_buffered = 0; -#ifdef USE_VM_PROBES - dtrace_drvport_str(port, desc->port_str); - get_dt_private(0); /* throw away return value */ -#endif /* USE_VM_PROBES */ - return (ErlDrvData) desc; -} - -static void do_close(int flags, SWord fd) { - if (flags & EFILE_COMPRESSED) { - erts_gzclose((ErtsGzFile)(fd)); - } else { - efile_closefile((int) fd); - } -} - -static void invoke_close(void *data) -{ - struct t_data *d = (struct t_data *) data; - DTRACE_INVOKE_SETUP(FILE_CLOSE); - d->again = 0; - do_close(d->flags, d->fd); - DTRACE_INVOKE_RETURN(FILE_CLOSE); -} - -static void free_data(void *data) -{ - struct t_data *d = (struct t_data *) data; - - switch (d->command) { - case FILE_OPEN: - if (d->is_fd_unused && d->fd != FILE_FD_INVALID) { - /* This is OK to do in scheduler thread because there can be no async op - ongoing for this fd here, as we exited during async open. - Ideally, this close should happen in an async thread too, but that would - require a substantial rewrite, as we are here because of a dead port and - cannot schedule async jobs for that port any more... */ - do_close(d->flags, d->fd); - } - break; - case FILE_CLOSE_ON_PORT_EXIT: - EF_FREE(d->data_to_free); - break; - } - - EF_FREE(data); -} - - -/* - * Sends back an error reply to Erlang. - */ - -static void reply_posix_error(file_descriptor *desc, int posix_errno) { - char response[256]; /* Response buffer. */ - char* s; - char* t; - - /* - * Contents of buffer sent back: - * - * +-----------------------------------------+ - * | FILE_RESP_ERROR | Posix error id string | - * +-----------------------------------------+ - */ - - TRACE_C('E'); - - response[0] = FILE_RESP_ERROR; - for (s = erl_errno_id(posix_errno), t = response+1; *s; s++, t++) - *t = tolower(*s); - driver_output2(desc->port, response, t-response, NULL, 0); -} - -static void reply_Uint_posix_error(file_descriptor *desc, Uint num, - int posix_errno) { - char response[256]; /* Response buffer. */ - char* s; - char* t; - - /* - * Contents of buffer sent back: - * - * +----------------------------------------------------------------------+ - * | FILE_RESP_NUMERR | 64-bit number (big-endian) | Posix error id string | - * +----------------------------------------------------------------------+ - */ - - TRACE_C('N'); - - response[0] = FILE_RESP_NUMERR; -#if SIZEOF_VOID_P == 4 - put_int32(0, response+1); -#else - put_int32(num>>32, response+1); -#endif - put_int32((Uint32)num, response+1+4); - for (s = erl_errno_id(posix_errno), t = response+1+4+4; *s; s++, t++) - *t = tolower(*s); - driver_output2(desc->port, response, t-response, NULL, 0); -} - -#ifdef HAVE_SENDFILE -static void reply_string_error(file_descriptor *desc, char* str) { - char response[256]; /* Response buffer. */ - char* s; - char* t; - - response[0] = FILE_RESP_ERROR; - for (s = str, t = response+1; *s; s++, t++) - *t = tolower(*s); - driver_output2(desc->port, response, t-response, NULL, 0); -} -#endif - -static int reply_error(file_descriptor *desc, - Efile_error *errInfo) /* The error codes. */ -{ - reply_posix_error(desc, errInfo->posix_errno); - return 0; -} - -static int reply_Uint_error(file_descriptor *desc, Uint num, - Efile_error *errInfo) /* The error codes. */ -{ - reply_Uint_posix_error(desc, num, errInfo->posix_errno); - return 0; -} - -static int reply_ok(file_descriptor *desc) { - char c = FILE_RESP_OK; - - driver_output2(desc->port, &c, 1, NULL, 0); - return 0; -} - -static int reply(file_descriptor *desc, int ok, Efile_error *errInfo) { - if (!ok) { - reply_error(desc, errInfo); - } else { - TRACE_C('K'); - reply_ok(desc); - } - return 0; -} - -static int reply_Uint(file_descriptor *desc, Uint result) { - char tmp[1+4+4]; - - /* - * Contents of buffer sent back: - * - * +-----------------------------------------------+ - * | FILE_RESP_NUMBER | 64-bit number (big-endian) | - * +-----------------------------------------------+ - */ - - TRACE_C('R'); - - tmp[0] = FILE_RESP_NUMBER; -#if SIZEOF_VOID_P == 4 - put_int32(0, tmp+1); -#else - put_int32(result>>32, tmp+1); -#endif - put_int32((Uint32)result, tmp+1+4); - driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0); - return 0; -} - -static int reply_Sint64(file_descriptor *desc, Sint64 result) { - char tmp[1+4+4]; - - /* - * Contents of buffer sent back: - * - * +-----------------------------------------------+ - * | FILE_RESP_NUMBER | 64-bit number (big-endian) | - * +-----------------------------------------------+ - */ - - TRACE_C('R'); - - tmp[0] = FILE_RESP_NUMBER; - put_int64(result, tmp+1); - driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0); - return 0; -} - -#if 0 -static void reply_again(file_descriptor *desc) { - char tmp[1]; - tmp[0] = FILE_RESP_AGAIN; - driver_output2(desc->port, tmp, sizeof(tmp), NULL, 0); -} -#endif - -static void reply_ev(file_descriptor *desc, char response, ErlIOVec *ev) { - char tmp[1]; - /* Data arriving at the Erlang process: - * [Response, Binary0, Binary1, .... | BinaryN-1] - */ - tmp[0] = response; - driver_outputv(desc->port, tmp, sizeof(tmp), ev, 0); -} - -static void reply_data(file_descriptor *desc, - ErlDrvBinary *binp, size_t offset, size_t len) { - char header[1+4+4]; - /* Data arriving at the Erlang process: - * [?FILE_RESP_DATA, 64-bit length (big-endian) | Data] - */ - header[0] = FILE_RESP_DATA; -#if SIZEOF_SIZE_T == 4 - put_int32(0, header+1); -#else - put_int32(len>>32, header+1); -#endif - put_int32((Uint32)len, header+1+4); - driver_output_binary(desc->port, header, sizeof(header), - binp, offset, len); -} - -static void reply_buf(file_descriptor *desc, char *buf, size_t len) { - char header[1+4+4]; - /* Data arriving at the Erlang process: - * [?FILE_RESP_DATA, 64-bit length (big-endian) | Data] - */ - header[0] = FILE_RESP_DATA; -#if SIZEOF_SIZE_T == 4 - put_int32(0, header+1); -#else - put_int32(len>>32, header+1); -#endif - put_int32((Uint32)len, header+1+4); - driver_output2(desc->port, header, sizeof(header), buf, len); -} - -static int reply_eof(file_descriptor *desc) { - char c = FILE_RESP_EOF; - - driver_output2(desc->port, &c, 1, NULL, 0); - return 0; -} - -static void invoke_name(void *data, int (*f)(Efile_error *, char *)) -{ - struct t_data *d = (struct t_data *) data; - char *name = (char *) d->b; - - d->again = 0; - d->result_ok = (*f)(&d->errInfo, name); -} - -static void invoke_mkdir(void *data) -{ - DTRACE_INVOKE_SETUP_BY_NAME(FILE_MKDIR); - invoke_name(data, efile_mkdir); - DTRACE_INVOKE_RETURN(FILE_MKDIR); -} - -static void invoke_rmdir(void *data) -{ - DTRACE_INVOKE_SETUP_BY_NAME(FILE_RMDIR); - invoke_name(data, efile_rmdir); - DTRACE_INVOKE_RETURN(FILE_RMDIR); -} - -static void invoke_delete_file(void *data) -{ - DTRACE_INVOKE_SETUP_BY_NAME(FILE_DELETE); - invoke_name(data, efile_delete_file); - DTRACE_INVOKE_RETURN(FILE_DELETE); -} - -static void invoke_chdir(void *data) -{ - DTRACE_INVOKE_SETUP_BY_NAME(FILE_CHDIR); - invoke_name(data, efile_chdir); - DTRACE_INVOKE_RETURN(FILE_CHDIR); -} - -static void invoke_fdatasync(void *data) -{ - struct t_data *d = (struct t_data *) data; - int fd = (int) d->fd; - DTRACE_INVOKE_SETUP(FILE_FDATASYNC); - - d->again = 0; - d->result_ok = efile_fdatasync(&d->errInfo, fd); - DTRACE_INVOKE_RETURN(FILE_FDATASYNC); -} - -static void invoke_fsync(void *data) -{ - struct t_data *d = (struct t_data *) data; - int fd = (int) d->fd; - DTRACE_INVOKE_SETUP(FILE_FSYNC); - - d->again = 0; - d->result_ok = efile_fsync(&d->errInfo, fd); - DTRACE_INVOKE_RETURN(FILE_FSYNC); -} - -static void invoke_truncate(void *data) -{ - struct t_data *d = (struct t_data *) data; - int fd = (int) d->fd; - DTRACE_INVOKE_SETUP(FILE_TRUNCATE); - - d->again = 0; - d->result_ok = efile_truncate_file(&d->errInfo, &fd, d->flags); - DTRACE_INVOKE_RETURN(FILE_TRUNCATE); -} - -static void invoke_read(void *data) -{ - struct t_data *d = (struct t_data *) data; - int status, segment; - size_t size, read_size; - DTRACE_INVOKE_SETUP(FILE_READ); - - segment = d->again && d->c.read.bin_size >= 2*FILE_SEGMENT_READ; - if (segment) { - size = FILE_SEGMENT_READ; - } else { - size = d->c.read.bin_size; - } - read_size = size; - if (d->flags & EFILE_COMPRESSED) { - read_size = erts_gzread((ErtsGzFile)d->fd, - d->c.read.binp->orig_bytes + d->c.read.bin_offset, - size); - status = (read_size != (size_t) -1); - if (!status) { - d->errInfo.posix_errno = EIO; - } - } else { - status = efile_read(&d->errInfo, d->flags, (int) d->fd, - d->c.read.binp->orig_bytes + d->c.read.bin_offset, - size, - &read_size); - } - if ( (d->result_ok = status)) { - ASSERT(read_size <= size); - d->c.read.bin_offset += read_size; - if (read_size < size || !segment) { - d->c.read.bin_size = 0; - d->again = 0; - } else { - d->c.read.bin_size -= read_size; - } - } else { - d->again = 0; - } - DTRACE_INVOKE_RETURN(FILE_READ); -} - -static void free_read(void *data) -{ - struct t_data *d = (struct t_data *) data; - - driver_free_binary(d->c.read.binp); - EF_FREE(d); -} - -static void invoke_read_line(void *data) -{ - struct t_data *d = (struct t_data *) data; - int status; - size_t read_size = 0; - int local_loop = (d->again == 0); - DTRACE_INVOKE_SETUP(FILE_READ_LINE); - - do { - size_t size = (d->c.read_line.binp)->orig_size - - d->c.read_line.read_offset - d->c.read_line.read_size; - if (size == 0) { - /* Need more place */ - ErlDrvSizeT need = (d->c.read_line.read_size >= DEFAULT_LINEBUF_SIZE) ? - d->c.read_line.read_size + DEFAULT_LINEBUF_SIZE : DEFAULT_LINEBUF_SIZE; - ErlDrvBinary *newbin; -#if !ALWAYS_READ_LINE_AHEAD - /* Use read_ahead size if need does not exceed it */ - if (need < (d->c.read_line.binp)->orig_size && - d->c.read_line.read_ahead) - need = (d->c.read_line.binp)->orig_size; -#endif - newbin = driver_alloc_binary(need); - if (newbin == NULL) { - d->result_ok = 0; - d->errInfo.posix_errno = ENOMEM; - d->again = 0; - break; - } - memcpy(newbin->orig_bytes, (d->c.read_line.binp)->orig_bytes + d->c.read_line.read_offset, - d->c.read_line.read_size); - driver_free_binary(d->c.read_line.binp); - d->c.read_line.binp = newbin; - d->c.read_line.read_offset = 0; - size = need - d->c.read_line.read_size; - } - if (d->flags & EFILE_COMPRESSED) { - read_size = erts_gzread((ErtsGzFile)d->fd, - d->c.read_line.binp->orig_bytes + - d->c.read_line.read_offset + d->c.read_line.read_size, - size); - status = (read_size != (size_t) -1); - if (!status) { - d->errInfo.posix_errno = EIO; - } - } else { - status = efile_read(&d->errInfo, d->flags, (int) d->fd, - d->c.read_line.binp->orig_bytes + - d->c.read_line.read_offset + d->c.read_line.read_size, - size, - &read_size); - } - if ( (d->result_ok = status)) { - void *nl_ptr = memchr((d->c.read_line.binp)->orig_bytes + - d->c.read_line.read_offset + d->c.read_line.read_size,'\n',read_size); - ASSERT(read_size <= size); - d->c.read_line.read_size += read_size; - if (nl_ptr != NULL) { - /* If found, we're done */ - d->c.read_line.nl_pos = ((char *) nl_ptr) - - ((char *) ((d->c.read_line.binp)->orig_bytes)) + 1; - if (d->c.read_line.nl_pos > 1 && - *(((char *) nl_ptr) - 1) == '\r') { - --d->c.read_line.nl_pos; - *(((char *) nl_ptr) - 1) = '\n'; - d->c.read_line.nl_skip = 1; - } else { - d->c.read_line.nl_skip = 0; - } - d->again = 0; -#if !ALWAYS_READ_LINE_AHEAD - if (!(d->c.read_line.read_ahead)) { - /* Ouch! Undo buffering... */ - size_t too_much = d->c.read_line.read_size - d->c.read_line.nl_skip - - (d->c.read_line.nl_pos - d->c.read_line.read_offset); - d->c.read_line.read_size -= too_much; - ASSERT(d->c.read_line.read_size >= 0); - if (d->flags & EFILE_COMPRESSED) { - Sint64 location = erts_gzseek((ErtsGzFile)d->fd, - -((Sint64) too_much), EFILE_SEEK_CUR); - if (location == -1) { - d->result_ok = 0; - d->errInfo.posix_errno = errno; - } - } else { - Sint64 location; - d->result_ok = efile_seek(&d->errInfo, (int) d->fd, - -((Sint64) too_much), EFILE_SEEK_CUR, - &location); - } - } -#endif - break; - } else if (read_size == 0) { - d->c.read_line.nl_pos = - d->c.read_line.read_offset + d->c.read_line.read_size; - d->c.read_line.nl_skip = 0; - d->again = 0; - break; - } - } else { - d->again = 0; - break; - } - } while (local_loop); - DTRACE_INVOKE_RETURN(FILE_READ_LINE); -} - -static void free_read_line(void *data) -{ - struct t_data *d = (struct t_data *) data; - - driver_free_binary(d->c.read_line.binp); - EF_FREE(d); -} - -void read_file_zero_size(struct t_data* d); -#define ZERO_FILE_CHUNK (64 * 1024) - -/* [ERL-327] Some special files like /proc/... have reported size 0 */ -void read_file_zero_size(struct t_data* d) { - size_t total_read_size = 0; - size_t allocated_size = ZERO_FILE_CHUNK; /* allocd in invoke_read_file */ - for (;;) { - size_t read_result; - - /* Read until we hit EOF (read less than FILE_SEGMENT_READ) */ - d->result_ok = efile_read(&d->errInfo, - EFILE_MODE_READ, - (int) d->fd, - (d->c.read_file.binp->orig_bytes + - total_read_size), - ZERO_FILE_CHUNK, - &read_result); - if (!d->result_ok) { - break; - } - - total_read_size += read_result; - d->c.read_file.offset += read_result; - if (read_result < ZERO_FILE_CHUNK) { - break; - } - - /* Grow before the next read call */ - allocated_size = total_read_size + ZERO_FILE_CHUNK; - d->c.read_file.binp = driver_realloc_binary(d->c.read_file.binp, - allocated_size); - } - - /* Finalize the memory usage. Hopefully it was read fully on the first - * go, so the binary allocation overhead becomes: - * alloc ZERO_FILE_CHUNK (64kb) -> realloc real_size */ - if (allocated_size != total_read_size) { - d->c.read_file.binp = driver_realloc_binary(d->c.read_file.binp, - total_read_size); - } - d->again = 0; -} - -static void invoke_read_file(void *data) -{ - struct t_data *d = (struct t_data *) data; - size_t read_size; - int chop; - DTRACE_INVOKE_SETUP(FILE_READ_FILE); - - if (! d->c.read_file.binp) { /* First invocation only */ - int fd; - Sint64 size; - - if (! (d->result_ok = - efile_openfile(&d->errInfo, d->b, - EFILE_MODE_READ, &fd, &size))) { - goto done; - } - d->fd = fd; - d->c.read_file.size = (int) size; - - /* For zero sized files allocate a reasonable chunk to attempt reading - * anyway. Note: This will eat ZERO_FILE_CHUNK bytes for any 0 file - * and free them immediately after (if the file was empty). */ - ERTS_ASSERT(size >= 0); - d->c.read_file.binp = driver_alloc_binary(size != 0 ? (size_t)size - : ZERO_FILE_CHUNK); - - if (size < 0 || size != d->c.read_file.size || !d->c.read_file.binp) { - d->result_ok = 0; - d->errInfo.posix_errno = ENOMEM; - goto close; - } - d->c.read_file.offset = 0; - } - /* Invariant: d->c.read_file.size >= d->c.read_file.offset */ - - if (d->c.read_file.size == 0) { - read_file_zero_size(d); - goto close; - } - - read_size = (size_t) (d->c.read_file.size - d->c.read_file.offset); - if (! read_size) goto close; - chop = d->again && read_size >= FILE_SEGMENT_READ*2; - if (chop) read_size = FILE_SEGMENT_READ; - d->result_ok = - efile_read(&d->errInfo, - EFILE_MODE_READ, - (int) d->fd, - d->c.read_file.binp->orig_bytes + d->c.read_file.offset, - read_size, - &read_size); - if (d->result_ok) { - d->c.read_file.offset += read_size; - if (chop) goto chop_done; /* again */ - } - close: - efile_closefile((int) d->fd); - done: - d->again = 0; - chop_done: - DTRACE_INVOKE_RETURN(FILE_READ_FILE); -} - -static void free_read_file(void *data) -{ - struct t_data *d = (struct t_data *) data; - - if (d->c.read_file.binp) driver_free_binary(d->c.read_file.binp); - EF_FREE(d); -} - - - -static void invoke_preadv(void *data) -{ - struct t_data *d = (struct t_data *) data; - struct t_preadv *c = &d->c.preadv; - ErlIOVec *ev = &c->eiov; - size_t bytes_read_so_far = 0; - unsigned char *p = (unsigned char *)ev->iov[0].iov_base + 4+4+8*c->cnt; - DTRACE_INVOKE_SETUP(FILE_PREADV); - - while (c->cnt < c->n) { - size_t read_size = ev->iov[1 + c->cnt].iov_len - c->size; - size_t bytes_read = 0; - int chop = d->again - && bytes_read_so_far + read_size >= 2*FILE_SEGMENT_READ; - if (chop) { - ASSERT(bytes_read_so_far < FILE_SEGMENT_READ); - read_size = FILE_SEGMENT_READ + FILE_SEGMENT_READ/2 - - bytes_read_so_far; - } - if ( (d->result_ok - = efile_pread(&d->errInfo, - (int) d->fd, - c->offsets[c->cnt] + c->size, - ((char *)ev->iov[1 + c->cnt].iov_base) + c->size, - read_size, - &bytes_read))) { - bytes_read_so_far += bytes_read; - if (chop && bytes_read == read_size) { - c->size += bytes_read; - goto done; - } - ASSERT(bytes_read <= read_size); - ev->iov[1 + c->cnt].iov_len = bytes_read + c->size; - ev->size += bytes_read + c->size; - put_int64(bytes_read + c->size, p); p += 8; - c->size = 0; - c->cnt++; - if (d->again - && bytes_read_so_far >= FILE_SEGMENT_READ - && c->cnt < c->n) { - goto done; - } - } else { - /* In case of a read error, ev->size will not be correct, - * which does not matter since no read data is returned - * to Erlang. - */ - break; - } - } - d->again = 0; - done: - DTRACE_INVOKE_RETURN(FILE_PREADV); -} - -static void free_preadv(void *data) { - struct t_data *d = data; - int i; - ErlIOVec *ev = &d->c.preadv.eiov; - - for(i = 0; i < ev->vsize; i++) { - driver_free_binary(ev->binv[i]); - } - EF_FREE(d); -} - -static void invoke_ipread(void *data) -{ - struct t_data *d = data; - struct t_preadv *c = &d->c.preadv; - ErlIOVec *ev = &c->eiov; - size_t bytes_read = 0; - char buf[2*sizeof(Uint32)]; - Uint32 offset, size; - DTRACE_INVOKE_SETUP(FILE_IPREAD); - - /* Read indirection header */ - if (! efile_pread(&d->errInfo, (int) d->fd, c->offsets[0], - buf, sizeof(buf), &bytes_read)) { - goto error; - } - if (bytes_read != sizeof(buf)) goto done; /* eof */ - size = get_int32(buf); - offset = get_int32(buf+4); - if (size > c->size) goto done; /* eof */ - c->n = 1; - c->cnt = 0; - c->size = 0; - c->offsets[0] = offset; - if (! (ev->binv[0] = driver_alloc_binary(3*8))) { - d->errInfo.posix_errno = ENOMEM; - goto error; - } - ev->vsize = 1; - ev->iov[0].iov_len = 3*8; - ev->iov[0].iov_base = ev->binv[0]->orig_bytes; - ev->size = ev->iov[0].iov_len; - put_int64(offset, ev->iov[0].iov_base); - put_int64(size, ((char *)ev->iov[0].iov_base) + 2*8); - if (size == 0) { - put_int64(size, ((char *)ev->iov[0].iov_base) + 8); - goto done; - } - if (! (ev->binv[1] = driver_alloc_binary(size))) { - d->errInfo.posix_errno = ENOMEM; - goto error; - } - ev->vsize = 2; - ev->iov[1].iov_len = size; - ev->iov[1].iov_base = ev->binv[1]->orig_bytes; - /* Read data block */ - d->invoke = invoke_preadv; - invoke_preadv(data); - DTRACE_INVOKE_RETURN(FILE_IPREAD); - return; - error: - d->result_ok = 0; - d->again = 0; - DTRACE_INVOKE_RETURN(FILE_IPREAD); - return; - done: - d->result_ok = !0; - d->again = 0; - DTRACE_INVOKE_RETURN(FILE_IPREAD); -} - -/* invoke_writev and invoke_pwritev are the only thread functions that - * access non-thread data i.e the port queue and a mutex in the port - * structure that is used to lock the port queue. - * - * The port will normally not be terminated until the port queue is - * empty, but if the port is killed, i.e., exit(Port, kill) is called, - * it will terminate regardless of the port queue state. When the - * port is invalid driver_peekq() returns NULL and set the size to -1, - * and driver_sizeq() returns -1. - */ - -static void invoke_writev(void *data) { - struct t_data *d = (struct t_data *) data; - SysIOVec *iov0; - SysIOVec *iov; - int iovlen; - int iovcnt; - size_t size; - size_t p; - int segment; - DTRACE_INVOKE_SETUP(FILE_WRITE); - - segment = d->again && d->c.writev.size >= 2*FILE_SEGMENT_WRITE; - if (segment) { - size = FILE_SEGMENT_WRITE; - } else { - size = d->c.writev.size; - } - - /* Copy the io vector to avoid locking the port que while writing, - * also, both we and efile_writev might/will change the SysIOVec - * when segmenting or due to partial write and we do not want to - * tamper with the actual queue that we get from driver_peekq - */ - MUTEX_LOCK(d->c.writev.q_mtx); /* Lock before accessing the port queue */ - iov0 = driver_peekq(d->c.writev.port, &iovlen); - - /* Calculate iovcnt */ - for (p = 0, iovcnt = 0; - p < size && iovcnt < iovlen; - p += iov0[iovcnt++].iov_len) - ; - iov = EF_SAFE_ALLOC(sizeof(SysIOVec)*iovcnt); - memcpy(iov,iov0,iovcnt*sizeof(SysIOVec)); - MUTEX_UNLOCK(d->c.writev.q_mtx); - /* Let go of lock until we deque from original vector */ - - if (iovlen > 0) { - ASSERT(iov[iovcnt-1].iov_len > p - size); - iov[iovcnt-1].iov_len -= p - size; - if (d->flags & EFILE_COMPRESSED) { - int i, status = 1; - for (i = 0; i < iovcnt; i++) { - if (iov[i].iov_base && iov[i].iov_len > 0) { - /* Just in case, I do not know what gzwrite does - * with errno. - */ - errno = EINVAL; - status = erts_gzwrite((ErtsGzFile)d->fd, - iov[i].iov_base, - iov[i].iov_len) == iov[i].iov_len; - if (! status) { - d->errInfo.posix_errno = - d->errInfo.os_errno = errno; /* XXX Correct? */ - break; - } - } - } - d->result_ok = status; - } else { - d->result_ok = efile_writev(&d->errInfo, - d->flags, (int) d->fd, - iov, iovcnt); - } - } else if (iovlen == 0) { - d->result_ok = 1; - } - else { /* Port has terminated */ - d->result_ok = 0; - d->errInfo.posix_errno = d->errInfo.os_errno = EINVAL; - } - EF_FREE(iov); - - if (! d->result_ok) { - d->again = 0; - MUTEX_LOCK(d->c.writev.q_mtx); - driver_deq(d->c.writev.port, d->c.writev.size); - MUTEX_UNLOCK(d->c.writev.q_mtx); - } else { - if (! segment) { - d->again = 0; - } - d->c.writev.size -= size; - TRACE_F(("w%lu", (unsigned long)size)); - MUTEX_LOCK(d->c.writev.q_mtx); - driver_deq(d->c.writev.port, size); - MUTEX_UNLOCK(d->c.writev.q_mtx); - } - - - DTRACE_INVOKE_RETURN(FILE_WRITE); -} - -static void invoke_pwd(void *data) -{ - struct t_data *d = (struct t_data *) data; - DTRACE_INVOKE_SETUP(FILE_PWD); - - d->again = 0; - d->result_ok = efile_getdcwd(&d->errInfo,d->drive, d->b+1, - RESBUFSIZE-1); - DTRACE_INVOKE_RETURN(FILE_PWD); -} - -static void invoke_readlink(void *data) -{ - struct t_data *d = (struct t_data *) data; - char resbuf[RESBUFSIZE]; /* Result buffer. */ - DTRACE_INVOKE_SETUP(FILE_READLINK); - - d->again = 0; - d->result_ok = efile_readlink(&d->errInfo, d->b, resbuf+1, - RESBUFSIZE-1); - if (d->result_ok != 0) - FILENAME_COPY((char *) d->b + 1, resbuf+1); - DTRACE_INVOKE_RETURN(FILE_READLINK); -} - -static void invoke_altname(void *data) -{ - struct t_data *d = (struct t_data *) data; - char resbuf[RESBUFSIZE]; /* Result buffer. */ - DTRACE_INVOKE_SETUP(FILE_ALTNAME); - - d->again = 0; - d->result_ok = efile_altname(&d->errInfo, d->b, resbuf+1, - RESBUFSIZE-1); - if (d->result_ok != 0) - FILENAME_COPY((char *) d->b + 1, resbuf+1); - DTRACE_INVOKE_RETURN(FILE_ALTNAME); -} - -static void invoke_pwritev(void *data) { - struct t_data* const d = (struct t_data *) data; - struct t_pwritev * const c = &d->c.pwritev; - SysIOVec *iov0; - SysIOVec *iov; - int iovlen; - int iovcnt; - size_t p; - int segment; - size_t size, write_size, written; - DTRACE_INVOKE_SETUP(FILE_PWRITEV); - - segment = d->again && c->size >= 2*FILE_SEGMENT_WRITE; - if (segment) { - size = FILE_SEGMENT_WRITE; - } else { - size = c->size; - } - d->result_ok = !0; - p = 0; - /* Lock the queue just for a while, we don't want it locked during write */ - MUTEX_LOCK(c->q_mtx); - iov0 = driver_peekq(c->port, &iovlen); - iov = EF_SAFE_ALLOC(sizeof(SysIOVec)*iovlen); - memcpy(iov,iov0,sizeof(SysIOVec)*iovlen); - MUTEX_UNLOCK(c->q_mtx); - - if (iovlen < 0) - goto error; /* Port terminated */ - for (iovcnt = 0, written = 0; - c->cnt < c->n && iovcnt < iovlen && written < size; - c->cnt++) { - int chop; - write_size = c->specs[c->cnt].size; - if (iov[iovcnt].iov_len - p < write_size) { - goto error; - } - chop = segment && written + write_size >= 2*FILE_SEGMENT_WRITE; - if (chop) { - ASSERT(written < FILE_SEGMENT_WRITE); - write_size = FILE_SEGMENT_WRITE + FILE_SEGMENT_WRITE/2 - - written; - } - d->result_ok = efile_pwrite(&d->errInfo, (int) d->fd, - (char *)(iov[iovcnt].iov_base) + p, - write_size, - c->specs[c->cnt].offset); - if (! d->result_ok) { - d->again = 0; - goto deq_error; - } - written += write_size; - c->size -= write_size; - if (chop) { - c->specs[c->cnt].offset += write_size; - c->specs[c->cnt].size -= write_size; - /* Schedule out (d->again != 0) */ - break; - } - /* Move forward in buffer */ - p += write_size; - ASSERT(iov[iovcnt].iov_len >= p); - if (iov[iovcnt].iov_len == p) { - /* Move to next iov[], we trust that it is not a - * zero length vector, and thereby depend on that - * such are not queued. - */ - iovcnt++; p = 0; - } - } - if (! segment) { - if (c->cnt != c->n) { - /* Mismatch between number of - * pos/size specs vs number of queued buffers . - */ - error: - d->errInfo.posix_errno = EINVAL; - d->result_ok = 0; - d->again = 0; - deq_error: - MUTEX_LOCK(c->q_mtx); - driver_deq(c->port, c->size); - MUTEX_UNLOCK(c->q_mtx); - - goto done; - } else { - ASSERT(written == size); - d->again = 0; - } - } else { - ASSERT(written >= FILE_SEGMENT_WRITE); - } - - MUTEX_LOCK(c->q_mtx); - driver_deq(c->port, written); - MUTEX_UNLOCK(c->q_mtx); - done: - EF_FREE(iov); /* Free our copy of the vector, nothing to restore */ - - DTRACE_INVOKE_RETURN(FILE_PWRITEV); -} - -static void invoke_flstat(void *data) -{ - struct t_data *d = (struct t_data *) data; - - DTRACE3(efile_drv_int_entry, d->sched_i1, d->sched_i2, - d->command == FILE_LSTAT ? FILE_LSTAT : FILE_FSTAT); - d->again = 0; - d->result_ok = efile_fileinfo(&d->errInfo, &d->info, - d->b, d->command == FILE_LSTAT); - DTRACE3(efile_drv_int_entry, d->sched_i1, d->sched_i2, - d->command == FILE_LSTAT ? FILE_LSTAT : FILE_FSTAT); - gcc_optimizer_hack++; -} - -static void invoke_link(void *data) -{ - struct t_data *d = (struct t_data *) data; - char *name = d->b; - char *new_name; - DTRACE_INVOKE_SETUP(FILE_LINK); - - d->again = 0; - new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE; - d->result_ok = efile_link(&d->errInfo, name, new_name); - DTRACE_INVOKE_RETURN(FILE_LINK); -} - -static void invoke_symlink(void *data) -{ - struct t_data *d = (struct t_data *) data; - char *name = d->b; - char *new_name; - DTRACE_INVOKE_SETUP(FILE_SYMLINK); - - d->again = 0; - new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE; - d->result_ok = efile_symlink(&d->errInfo, name, new_name); - DTRACE_INVOKE_RETURN(FILE_SYMLINK); -} - -static void invoke_rename(void *data) -{ - struct t_data *d = (struct t_data *) data; - char *name = d->b; - char *new_name; - DTRACE_INVOKE_SETUP(FILE_RENAME); - - d->again = 0; - new_name = name+FILENAME_BYTELEN(name)+FILENAME_CHARSIZE; - d->result_ok = efile_rename(&d->errInfo, name, new_name); - DTRACE_INVOKE_RETURN(FILE_RENAME); -} - -static void invoke_write_info(void *data) -{ - struct t_data *d = (struct t_data *) data; - DTRACE_INVOKE_SETUP(FILE_WRITE_INFO); - - d->again = 0; - d->result_ok = efile_write_info(&d->errInfo, &d->info, d->b); - DTRACE_INVOKE_RETURN(FILE_WRITE_INFO); -} - -static void invoke_lseek(void *data) -{ - struct t_data *d = (struct t_data *) data; - int status; - DTRACE_INVOKE_SETUP(FILE_LSEEK); - - d->again = 0; - if (d->flags & EFILE_COMPRESSED) { - int offset = (int) d->c.lseek.offset; - - if (offset != d->c.lseek.offset) { - d->errInfo.posix_errno = EINVAL; - status = 0; - } else { - d->c.lseek.location = erts_gzseek((ErtsGzFile)d->fd, - offset, d->c.lseek.origin); - if (d->c.lseek.location == -1) { - d->errInfo.posix_errno = errno; - status = 0; - } else { - status = 1; - } - } - } else { - status = efile_seek(&d->errInfo, (int) d->fd, - d->c.lseek.offset, d->c.lseek.origin, - &d->c.lseek.location); - } - d->result_ok = status; - DTRACE_INVOKE_RETURN(FILE_LSEEK); -} - -static void invoke_readdir(void *data) -{ - struct t_data *d = (struct t_data *) data; - char *p = NULL; - size_t file_bs; - size_t n = 0, total = 0; - struct t_readdir_buf *b = NULL; - int res = 0; - DTRACE_INVOKE_SETUP(FILE_READDIR); - - d->again = 0; - d->errInfo.posix_errno = 0; - - do { - total = READDIR_BUFSIZE; - n = 1; - b = EF_SAFE_ALLOC(sizeof(struct t_readdir_buf)); - b->next = NULL; - - if (d->c.read_dir.last_buf) { - d->c.read_dir.last_buf->next = b; - } else { - d->c.read_dir.first_buf = b; - } - d->c.read_dir.last_buf = b; - - p = &b->buf[0]; - p[0] = FILE_RESP_LFNAME; - file_bs = READDIR_BUFSIZE - n; - - do { - res = efile_readdir(&d->errInfo, d->b, &d->dir_handle, p + n + 2, &file_bs); - - if (res) { - put_int16((Uint16)file_bs, p + n); - n += 2 + file_bs; - file_bs = READDIR_BUFSIZE - n; - } - } while( res && ((total - n - 2) >= MAXPATHLEN*FILENAME_CHARSIZE)); - - b->n = n; - } while(res); - - d->result_ok = (d->errInfo.posix_errno == 0); - DTRACE_INVOKE_RETURN(FILE_READDIR); -} - -static void invoke_open(void *data) -{ - struct t_data *d = (struct t_data *) data; - int status = 1; /* Status of open call. */ - DTRACE_INVOKE_SETUP(FILE_OPEN); - - d->again = 0; - if ((d->flags & EFILE_COMPRESSED) == 0) { - int fd; - status = efile_openfile(&d->errInfo, d->b, d->flags, &fd, NULL); - d->fd = fd; - } else { - char* mode = NULL; - - if (((d->flags & (EFILE_MODE_READ_WRITE)) == EFILE_MODE_READ_WRITE) || - (d->flags & EFILE_MODE_APPEND)) { - status = 0; - d->errInfo.posix_errno = EINVAL; - } else { - status = efile_may_openfile(&d->errInfo, d->b); - if (status || (d->errInfo.posix_errno != EISDIR)) { - mode = (d->flags & EFILE_MODE_READ) ? "rb" : "wb"; - d->fd = (SWord) erts_gzopen(d->b, mode); - if ((ErtsGzFile)d->fd) { - status = 1; - } else { - if (errno == 0) { - errno = ENOMEM; - } - d->errInfo.posix_errno = errno; - status = 0; - } - } - } - } - - d->result_ok = status; - if (!status) { - d->fd = FILE_FD_INVALID; - } - DTRACE_INVOKE_RETURN(FILE_OPEN); -} - -static void invoke_fadvise(void *data) -{ - struct t_data *d = (struct t_data *) data; - int fd = (int) d->fd; - off_t offset = (off_t) d->c.fadvise.offset; - off_t length = (off_t) d->c.fadvise.length; - int advise = (int) d->c.fadvise.advise; - DTRACE_INVOKE_SETUP(FILE_FADVISE); - - d->again = 0; - d->result_ok = efile_fadvise(&d->errInfo, fd, offset, length, advise); - DTRACE_INVOKE_RETURN(FILE_FADVISE); -} - -#ifdef HAVE_SENDFILE -static void invoke_sendfile(void *data) -{ - struct t_data *d = (struct t_data *)data; - int fd = d->fd; - int out_fd = (int)d->c.sendfile.out_fd; - Uint64 nbytes = d->c.sendfile.nbytes; - int result = 0; - d->again = 0; - - result = efile_sendfile(&d->errInfo, fd, out_fd, &d->c.sendfile.offset, &nbytes, NULL); - - d->c.sendfile.written += nbytes; - - if (result == 1 || (result == 0 && USE_THRDS_FOR_SENDFILE(d))) { - d->result_ok = 0; - } else if (result == 0 && (d->errInfo.posix_errno == EAGAIN - || d->errInfo.posix_errno == EINTR)) { - if ((d->c.sendfile.nbytes - nbytes) != 0) { - d->result_ok = 1; - if (d->c.sendfile.nbytes != 0) - d->c.sendfile.nbytes -= nbytes; - } else if (nbytes == 0 && d->c.sendfile.nbytes == 0) { - d->result_ok = 1; - } else - d->result_ok = 0; - } else { - d->result_ok = -1; - } -} - -static void free_sendfile(void *data) { - struct t_data *d = (struct t_data *)data; - if (USE_THRDS_FOR_SENDFILE(d)) { - SET_NONBLOCKING(d->c.sendfile.out_fd); - } else { - MUTEX_LOCK(d->c.sendfile.q_mtx); - driver_deq(d->c.sendfile.port,1); - MUTEX_UNLOCK(d->c.sendfile.q_mtx); - driver_select(d->c.sendfile.port, (ErlDrvEvent)(long)d->c.sendfile.out_fd, - ERL_DRV_USE_NO_CALLBACK|ERL_DRV_WRITE, 0); - } - EF_FREE(data); -} - -static void file_ready_output(ErlDrvData data, ErlDrvEvent event) -{ - file_descriptor* fd = (file_descriptor*) data; - - switch (fd->d->command) { - case FILE_SENDFILE: - driver_select(fd->d->c.sendfile.port, event, - (int)ERL_DRV_WRITE,(int) 0); - invoke_sendfile((void *)fd->d); - file_async_ready(data, (ErlDrvThreadData)fd->d); - break; - default: - break; - } -} - -static void file_stop_select(ErlDrvEvent event, void* _) -{ - -} - -static int flush_sendfile(file_descriptor *desc,void *_) { - if (desc->sendfile_state == sending) { - desc->d->result_ok = -1; - desc->d->errInfo.posix_errno = ECONNABORTED; - file_async_ready((ErlDrvData)desc,(ErlDrvThreadData)desc->d); - } - return 1; -} -#endif /* HAVE_SENDFILE */ - - -static void invoke_fallocate(void *data) -{ - struct t_data *d = (struct t_data *) data; - int fd = (int) d->fd; - Sint64 offset = d->c.fallocate.offset; - Sint64 length = d->c.fallocate.length; - - d->again = 0; - d->result_ok = efile_fallocate(&d->errInfo, fd, offset, length); -} - -static void free_readdir(void *data) -{ - struct t_data *d = (struct t_data *) data; - struct t_readdir_buf *b1 = d->c.read_dir.first_buf; - - while (b1) { - struct t_readdir_buf *b2 = b1; - b1 = b1->next; - EF_FREE(b2); - } - EF_FREE(d); -} - - - -static void try_free_read_bin(file_descriptor *desc) { - if ((desc->read_size == 0) - && (desc->read_offset >= desc->read_binp->orig_size)) { - ASSERT(desc->read_offset == desc->read_binp->orig_size); - driver_free_binary(desc->read_binp); - desc->read_binp = NULL; - desc->read_offset = 0; - desc->read_size = 0; - } -} - - - -static int try_again(file_descriptor *desc, struct t_data *d) { - if (! d->again) - return 0; - if (desc->timer_state != timer_idle) { - driver_cancel_timer(desc->port); - } - desc->timer_state = timer_again; - desc->invoke = d->invoke; - desc->d = d; - desc->free = d->free; - driver_set_timer(desc->port, 0L); - return !0; -} - - - -static void cq_execute(file_descriptor *desc) { - struct t_data *d; - register void *void_ptr; /* Soft cast variable */ - if (desc->timer_state == timer_again) - return; -#ifdef HAVE_SENDFILE - if (desc->sendfile_state == sending) - return; -#endif - if (! (d = cq_deq(desc))) - return; - TRACE_F(("x%i", (int) d->command)); - d->again = sys_info.async_threads == 0; - DRIVER_ASYNC(d->level, desc, d->invoke, void_ptr=d, d->free); -} - -static struct t_data *async_write(file_descriptor *desc, int *errp, - int reply, Uint32 reply_size -#ifdef USE_VM_PROBES - ,Sint64 *dt_i1, Sint64 *dt_i2, Sint64 *dt_i3 -#endif -) { - struct t_data *d; - if (! (d = EF_ALLOC(sizeof(struct t_data) - 1))) { - if (errp) *errp = ENOMEM; - return NULL; - } - TRACE_F(("w%lu", (unsigned long)desc->write_buffered)); - d->command = FILE_WRITE; - d->fd = desc->fd; - d->flags = desc->flags; - d->c.writev.port = desc->port; - d->c.writev.q_mtx = desc->q_mtx; - d->c.writev.size = desc->write_buffered; -#ifdef USE_VM_PROBES - if (dt_i1 != NULL) { - *dt_i1 = d->fd; - *dt_i2 = d->flags; - *dt_i3 = d->c.writev.size; - } -#endif - d->reply = reply; - d->c.writev.reply_size = reply_size; - d->invoke = invoke_writev; - d->free = free_data; - d->level = 1; - cq_enq(desc, d); - desc->write_buffered = 0; - return d; -} - -static int flush_write(file_descriptor *desc, int *errp -#ifdef USE_VM_PROBES - , dt_private *dt_priv, char *dt_utag -#endif -) { - int result = 0; -#ifdef USE_VM_PROBES - Sint64 dt_i1 = 0, dt_i2 = 0, dt_i3 = 0; -#endif - struct t_data *d = NULL; - - MUTEX_LOCK(desc->q_mtx); - if (desc->write_buffered > 0) { - if ((d = async_write(desc, errp, 0, 0 -#ifdef USE_VM_PROBES - ,&dt_i1, &dt_i2, &dt_i3 -#endif - )) == NULL) { - result = -1; - } - } - MUTEX_UNLOCK(desc->q_mtx); -#ifdef USE_VM_PROBES - if (d != NULL) { - d->sched_i1 = dt_priv->thread_num; - d->sched_i2 = dt_priv->tag; - d->sched_utag[0] = '\0'; - if (dt_utag != NULL) { - if (dt_utag[0] == '\0') { - dt_utag = NULL; - } else { - strncpy(d->sched_utag, dt_utag, sizeof(d->sched_utag) - 1); - d->sched_utag[sizeof(d->sched_utag) - 1] = '\0'; - } - } - DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag++, - dt_utag, FILE_WRITE, - NULL, NULL, dt_i1, dt_i2, dt_i3, 0, desc->port_str); - } -#endif /* USE_VM_PROBES */ - return result; -} - -static int check_write_error(file_descriptor *desc, int *errp) { - if (desc->write_error) { - if (errp) *errp = desc->write_errInfo.posix_errno; - desc->write_error = 0; - return -1; - } - return 0; -} - -static int flush_write_check_error(file_descriptor *desc, int *errp -#ifdef USE_VM_PROBES - , dt_private *dt_priv, char *dt_utag -#endif - ) { - int r; - if ( (r = flush_write(desc, errp -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - )) != 0) { - check_write_error(desc, NULL); - return r; - } else { - return check_write_error(desc, errp); - } -} - -static struct t_data *async_lseek(file_descriptor *desc, int *errp, int reply, - Sint64 offset, int origin -#ifdef USE_VM_PROBES - , Sint64 *dt_i1, Sint64 *dt_i2, Sint64 *dt_i3 -#endif - ) { - struct t_data *d; - if (! (d = EF_ALLOC(sizeof(struct t_data)))) { - *errp = ENOMEM; - return NULL; - } - d->flags = desc->flags; - d->fd = desc->fd; - d->command = FILE_LSEEK; - d->reply = reply; - d->c.lseek.offset = offset; - d->c.lseek.origin = origin; -#ifdef USE_VM_PROBES - if (dt_i1 != NULL) { - *dt_i1 = d->fd; - *dt_i2 = d->c.lseek.offset; - *dt_i3 = d->c.lseek.origin; - } -#endif - d->invoke = invoke_lseek; - d->free = free_data; - d->level = 1; - cq_enq(desc, d); - return d; -} - -static void flush_read(file_descriptor *desc) { - desc->read_offset = 0; - desc->read_size = 0; - if (desc->read_binp) { - driver_free_binary(desc->read_binp); - desc->read_binp = NULL; - } -} - -static int lseek_flush_read(file_descriptor *desc, int *errp -#ifdef USE_VM_PROBES - ,dt_private *dt_priv, char *dt_utag -#endif - ) { - int r = 0; - size_t read_size = desc->read_size; -#ifdef USE_VM_PROBES - Sint64 dt_i1 = 0, dt_i2 = 0, dt_i3 = 0; -#endif - struct t_data *d; - - flush_read(desc); - if (read_size != 0) { - if ((d = async_lseek(desc, errp, 0, - -((ssize_t)read_size), EFILE_SEEK_CUR -#ifdef USE_VM_PROBES - , &dt_i1, &dt_i2, &dt_i3 -#endif - )) == NULL) { - r = -1; - } else { -#ifdef USE_VM_PROBES - d->sched_i1 = dt_priv->thread_num; - d->sched_i2 = dt_priv->tag; - d->sched_utag[0] = '\0'; - if (dt_utag != NULL) { - if (dt_utag[0] == '\0') { - dt_utag = NULL; - } else { - strncpy(d->sched_utag, dt_utag, sizeof(d->sched_utag) - 1); - d->sched_utag[sizeof(d->sched_utag) - 1] = '\0'; - } - } - DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag++, - dt_utag, FILE_LSEEK, - NULL, NULL, dt_i1, dt_i2, dt_i3, 0, desc->port_str); -#endif /* USE_VM_PROBES */ - } - } - return r; -} - - -/********************************************************************* - * Driver entry point -> stop - * The close has to be scheduled on async thread, so that currently active - * async operation does not suddenly have the ground disappearing under their feet... - */ -static void -file_stop(ErlDrvData e) -{ - file_descriptor* desc = (file_descriptor*)e; - - TRACE_C('p'); - - IF_THRDS { - flush_read(desc); - if (desc->fd != FILE_FD_INVALID) { - struct t_data *d = EF_SAFE_ALLOC(sizeof(struct t_data)); - d->command = FILE_CLOSE_ON_PORT_EXIT; - d->reply = !0; - d->fd = desc->fd; - d->flags = desc->flags; - d->invoke = invoke_close; - d->free = free_data; - d->level = 2; - d->data_to_free = (void *) desc; - cq_enq(desc, d); - desc->fd = FILE_FD_INVALID; - desc->flags = 0; - cq_execute(desc); - } else { - EF_FREE(desc); - } - } else { - if (desc->fd != FILE_FD_INVALID) { - do_close(desc->flags, desc->fd); - desc->fd = FILE_FD_INVALID; - desc->flags = 0; - } - if (desc->read_binp) { - driver_free_binary(desc->read_binp); - } - EF_FREE(desc); - } -} - -/********************************************************************* - * Driver entry point -> ready_async - */ -static void -file_async_ready(ErlDrvData e, ErlDrvThreadData data) -{ - file_descriptor *desc = (file_descriptor*)e; - struct t_data *d = (struct t_data *) data; - char header[5]; /* result code + count */ - char resbuf[RESBUFSIZE]; /* Result buffer. */ -#ifdef USE_VM_PROBES - int sched_i1 = d->sched_i1, sched_i2 = d->sched_i2, command = d->command, - result_ok = d->result_ok, - posix_errno = d->result_ok ? 0 : d->errInfo.posix_errno; - DTRACE_CHARBUF(sched_utag, DTRACE_EFILE_BUFSIZ+1); - - sched_utag[0] = '\0'; - if (DTRACE_ENABLED(efile_drv_return)) { - strncpy(sched_utag, d->sched_utag, DTRACE_EFILE_BUFSIZ); - sched_utag[DTRACE_EFILE_BUFSIZ] = '\0'; - } -#endif /* USE_VM_PROBES */ - - TRACE_C('r'); - - if (try_again(desc, d)) { - /* DTRACE TODO: what kind of probe makes sense here? */ - return; - } - - switch (d->command) - { - case FILE_READ: - if (!d->result_ok) { - reply_error(desc, &d->errInfo); - } else { - size_t available_bytes = - d->c.read.bin_offset + d->c.read.bin_size - desc->read_offset; - if (available_bytes < d->c.read.size) { - d->c.read.size = available_bytes; - } - TRACE_C('D'); - reply_data(desc, d->c.read.binp, - desc->read_offset, d->c.read.size); - desc->read_offset += d->c.read.size; - desc->read_size = - d->c.read.bin_offset + d->c.read.bin_size - desc->read_offset; - try_free_read_bin(desc); - } - free_read(data); - break; - case FILE_READ_LINE: - /* The read_line structure differs from the read structure. - The data->read_offset and d->c.read_line.read_offset are copies, as are - data->read_size and d->c.read_line.read_size - The read_line function does not kniow in advance how large the binary has to be, - why new allocation (but not reallocation of the old binary, for obvious reasons) - may happen in the worker thread. */ - if (!d->result_ok) { - reply_error(desc, &d->errInfo); - } else { - size_t len = d->c.read_line.nl_pos - d->c.read_line.read_offset; - TRACE_C('L'); - reply_data(desc, d->c.read_line.binp, - d->c.read_line.read_offset, len); - desc->read_offset = d->c.read_line.read_offset + d->c.read_line.nl_skip + len; - desc->read_size = - d->c.read_line.read_size - d->c.read_line.nl_skip - len; - if (desc->read_binp != d->c.read_line.binp) { /* New binary allocated */ - driver_free_binary(desc->read_binp); - desc->read_binp = d->c.read_line.binp; - driver_binary_inc_refc(desc->read_binp); - } -#if !ALWAYS_READ_LINE_AHEAD - ASSERT(desc->read_bufsize > 0 || desc->read_size == 0); - if (desc->read_bufsize == 0) { - desc->read_offset = desc->read_binp->orig_size; /* triggers cleanup */ - } -#endif - try_free_read_bin(desc); - } - free_read_line(data); - break; - case FILE_READ_FILE: - if (!d->result_ok) - reply_error(desc, &d->errInfo); - else { - header[0] = FILE_RESP_ALL_DATA; - TRACE_C('R'); - driver_output_binary(desc->port, header, 1, - d->c.read_file.binp, - 0, d->c.read_file.offset); - } - free_read_file(data); - break; - case FILE_WRITE: - if (d->reply) { - if (! d->result_ok) { - reply_error(desc, &d->errInfo); - } else { - reply_Uint(desc, d->c.writev.reply_size); - } - } else { - if (! d->result_ok) { - desc->write_error = !0; - desc->write_errInfo = d->errInfo; - } - } - free_data(data); - break; - case FILE_LSEEK: - if (d->reply) { - if (d->result_ok) - reply_Sint64(desc, d->c.lseek.location); - else - reply_error(desc, &d->errInfo); - } - free_data(data); - break; - case FILE_MKDIR: - case FILE_RMDIR: - case FILE_CHDIR: - case FILE_DELETE: - case FILE_FDATASYNC: - case FILE_FSYNC: - case FILE_TRUNCATE: - case FILE_LINK: - case FILE_SYMLINK: - case FILE_RENAME: - case FILE_WRITE_INFO: - case FILE_FADVISE: - case FILE_FALLOCATE: - reply(desc, d->result_ok, &d->errInfo); - free_data(data); - break; - case FILE_ALTNAME: - case FILE_PWD: - case FILE_READLINK: - { - int length; - char *resbuf = d->b; - - if (!d->result_ok) - reply_error(desc, &d->errInfo); - else { - resbuf[0] = FILE_RESP_FNAME; - length = 1+FILENAME_BYTELEN((char*) resbuf+1); - TRACE_C('R'); - driver_output2(desc->port, resbuf, 1, resbuf+1, length-1); - } - free_data(data); - break; - } - case FILE_OPEN: - if (!d->result_ok) { - reply_error(desc, &d->errInfo); - } else { - ASSERT(d->is_fd_unused); - desc->fd = d->fd; - desc->flags = d->flags; - d->is_fd_unused = 0; - reply_Uint(desc, d->fd); - } - free_data(data); - break; - case FILE_FSTAT: - case FILE_LSTAT: - { - if (d->result_ok) { - resbuf[0] = FILE_RESP_INFO; - - put_int32(d->info.size_high, &resbuf[1 + ( 0 * 4)]); - put_int32(d->info.size_low, &resbuf[1 + ( 1 * 4)]); - put_int32(d->info.type, &resbuf[1 + ( 2 * 4)]); - - /* Note 64 bit indexing in resbuf here */ - put_int64(d->info.accessTime, &resbuf[1 + ( 3 * 4)]); - put_int64(d->info.modifyTime, &resbuf[1 + ( 5 * 4)]); - put_int64(d->info.cTime, &resbuf[1 + ( 7 * 4)]); - - put_int32(d->info.mode, &resbuf[1 + ( 9 * 4)]); - put_int32(d->info.links, &resbuf[1 + (10 * 4)]); - put_int32(d->info.major_device, &resbuf[1 + (11 * 4)]); - put_int32(d->info.minor_device, &resbuf[1 + (12 * 4)]); - put_int32(d->info.inode, &resbuf[1 + (13 * 4)]); - put_int32(d->info.uid, &resbuf[1 + (14 * 4)]); - put_int32(d->info.gid, &resbuf[1 + (15 * 4)]); - put_int32(d->info.access, &resbuf[1 + (16 * 4)]); - -#define RESULT_SIZE (1 + (17 * 4)) - TRACE_C('R'); - driver_output2(desc->port, resbuf, RESULT_SIZE, NULL, 0); -#undef RESULT_SIZE - } else - reply_error(desc, &d->errInfo); - } - free_data(data); - break; - case FILE_READDIR: - if (!d->result_ok) { - reply_error(desc, &d->errInfo); - } else { - struct t_readdir_buf *b1 = d->c.read_dir.first_buf; - char op = FILE_RESP_LFNAME; - - TRACE_C('R'); - ASSERT(b1); - - while (b1) { - struct t_readdir_buf *b2 = b1; - char *p = &b1->buf[0]; - driver_output2(desc->port, p, 1, p + 1, b1->n - 1); - b1 = b1->next; - EF_FREE(b2); - } - driver_output2(desc->port, &op, 1, NULL, 0); - - d->c.read_dir.first_buf = NULL; - d->c.read_dir.last_buf = NULL; - } - free_readdir(data); - break; - case FILE_CLOSE: - if (d->reply) { - TRACE_C('K'); - reply_ok(desc); -#ifdef USE_VM_PROBES - result_ok = 1; -#endif - } - free_data(data); - break; - case FILE_PWRITEV: - if (!d->result_ok) { - reply_Uint_error(desc, d->c.pwritev.cnt, &d->errInfo); - } else { - reply_Uint(desc, d->c.pwritev.n); - } - free_data(data); - break; - case FILE_PREADV: - if (!d->result_ok) { - reply_error(desc, &d->errInfo); - } else { - reply_ev(desc, FILE_RESP_LDATA, &d->c.preadv.eiov); - } - free_preadv(data); - break; - case FILE_IPREAD: - if (!d->result_ok) { - reply_error(desc, &d->errInfo); - } else if (!d->c.preadv.eiov.vsize) { - reply_eof(desc); - } else { - reply_ev(desc, FILE_RESP_N2DATA, &d->c.preadv.eiov); - } - free_preadv(data); - break; -#ifdef HAVE_SENDFILE - case FILE_SENDFILE: - if (d->result_ok == -1) { - if (d->errInfo.posix_errno == ECONNRESET || - d->errInfo.posix_errno == ENOTCONN || - d->errInfo.posix_errno == EPIPE) - reply_string_error(desc,"closed"); - else - reply_error(desc, &d->errInfo); - desc->sendfile_state = not_sending; - free_sendfile(data); - } else if (d->result_ok == 0) { - reply_Sint64(desc, d->c.sendfile.written); - desc->sendfile_state = not_sending; - free_sendfile(data); - } else if (d->result_ok == 1) { /* If we are using select to send the rest of the data */ - desc->sendfile_state = sending; - desc->d = d; - driver_select(desc->port, (ErlDrvEvent)(long)d->c.sendfile.out_fd, - ERL_DRV_USE|ERL_DRV_WRITE, 1); - } - break; -#endif - case FILE_CLOSE_ON_PORT_EXIT: - /* See file_stop. However this is never invoked after the port is killed. */ - free_data(data); - desc = NULL; - /* This is it for this port, so just send dtrace and return, avoid doing anything to the freed data */ - DTRACE6(efile_drv_return, sched_i1, sched_i2, sched_utag, - command, result_ok, posix_errno); - return; - default: - abort(); - } - DTRACE6(efile_drv_return, sched_i1, sched_i2, sched_utag, - command, result_ok, posix_errno); - if (desc->write_buffered != 0 && desc->timer_state == timer_idle ) { - desc->timer_state = timer_write; - driver_set_timer(desc->port, desc->write_delay); - } - cq_execute(desc); - -} - - -/********************************************************************* - * Driver entry point -> output - */ -static void -file_output(ErlDrvData e, char* buf, ErlDrvSizeT count) -{ - file_descriptor* desc = (file_descriptor*)e; - Efile_error errInfo; /* The error codes for the last operation. */ - Sint fd; /* The file descriptor for this port, if any, - * -1 if none. - */ - char* name; /* Points to the filename in buf. */ - int command; - struct t_data *d = NULL; -#ifdef USE_VM_PROBES - char *dt_utag = NULL; - char *dt_s1 = NULL, *dt_s2 = NULL; - Sint64 dt_i1 = 0; - Sint64 dt_i2 = 0; - Sint64 dt_i3 = 0; - Sint64 dt_i4 = 0; - dt_private *dt_priv = get_dt_private(0); -#endif /* USE_VM_PROBES */ - - TRACE_C('o'); - - fd = desc->fd; - name = buf+1; - command = *(uchar*)buf++; - - switch(command) { - - case FILE_MKDIR: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; -#endif - d->command = command; - d->invoke = invoke_mkdir; - d->free = free_data; - d->level = 2; - goto done; - } - case FILE_RMDIR: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; -#endif - d->command = command; - d->invoke = invoke_rmdir; - d->free = free_data; - d->level = 2; - goto done; - } - case FILE_DELETE: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; -#endif - d->command = command; - d->invoke = invoke_delete_file; - d->free = free_data; - d->level = 2; - goto done; - } - case FILE_RENAME: - { - char* new_name; - int namelen = FILENAME_BYTELEN(name)+FILENAME_CHARSIZE; - new_name = name+namelen; - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 - + namelen - + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); - FILENAME_COPY(d->b + namelen, new_name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_s2 = d->b + namelen; - dt_utag = buf + namelen + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE; -#endif - d->flags = desc->flags; - d->fd = fd; - d->command = command; - d->invoke = invoke_rename; - d->free = free_data; - d->level = 2; - goto done; - } - case FILE_CHDIR: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; -#endif - d->command = command; - d->invoke = invoke_chdir; - d->free = free_data; - d->level = 2; - goto done; - } - case FILE_PWD: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + RESBUFSIZE + 1); - - d->drive = *(uchar*)buf; -#ifdef USE_VM_PROBES - dt_utag = buf + 1; -#endif - d->command = command; - d->invoke = invoke_pwd; - d->free = free_data; - d->level = 2; - goto done; - } - - case FILE_READDIR: - if (sys_info.async_threads > 0) - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + - FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; -#endif - d->dir_handle = NULL; - d->command = command; - d->invoke = invoke_readdir; - d->free = free_readdir; - d->level = 2; - d->c.read_dir.first_buf = NULL; - d->c.read_dir.last_buf = NULL; - goto done; - } - else - { - size_t resbufsize; - size_t n = 0, total = 0; - int res = 0; - char resbuf[READDIR_BUFSIZE]; - - EFILE_DIR_HANDLE dir_handle; /* Handle to open directory. */ - - total = READDIR_BUFSIZE; - errInfo.posix_errno = 0; - dir_handle = NULL; - resbuf[0] = FILE_RESP_LFNAME; - -#ifdef USE_VM_PROBES - dt_s1 = name; - dt_utag = name + FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; -#endif - /* Fill the buffer with multiple directory listings before sending it to the - * receiving process. READDIR_CHUNKS is minimum number of files sent to the - * receiver. - * Format for each driver_output2: - * ------------------------------------ - * | Type | Len | Filename | ... - * | 1 byte | 2 bytes | Len bytes | ... - * ------------------------------------ - */ - - do { - n = 1; - resbufsize = READDIR_BUFSIZE - n; - - do { - res = efile_readdir(&errInfo, name, &dir_handle, resbuf + n + 2, &resbufsize); - - if (res) { - put_int16((Uint16)resbufsize, resbuf + n); - n += 2 + resbufsize; - resbufsize = READDIR_BUFSIZE - n; - } - } while( res && ((total - n - 2) >= MAXPATHLEN*FILENAME_CHARSIZE)); - - if (n > 1) { - driver_output2(desc->port, resbuf, 1, resbuf + 1, n - 1); - } - } while(res); - - if (errInfo.posix_errno != 0) { - reply_error(desc, &errInfo); - return; - } -#ifdef USE_VM_PROBES - if (dt_utag != NULL && dt_utag[0] == '\0') { - dt_utag = NULL; - } - - DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag, - dt_utag, command, name, dt_s2, - dt_i1, dt_i2, dt_i3, dt_i4, desc->port_str); - DTRACE6(efile_drv_return, dt_priv->thread_num, dt_priv->tag++, - dt_utag, command, 1, 0); -#endif - TRACE_C('R'); - driver_output2(desc->port, resbuf, 1, NULL, 0); - return; - } - case FILE_OPEN: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(buf+4) + - FILENAME_CHARSIZE); - - d->flags = get_int32((uchar*)buf); - name = buf+4; - FILENAME_COPY(d->b, name); -#ifdef USE_VM_PROBES - dt_i1 = d->flags; - dt_s1 = d->b; - dt_utag = name + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE; -#endif - d->command = command; - d->invoke = invoke_open; - d->free = free_data; - d->level = 2; - d->is_fd_unused = 1; - goto done; - } - - case FILE_FDATASYNC: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data)); - - d->fd = fd; -#ifdef USE_VM_PROBES - dt_utag = name; - dt_i1 = fd; -#endif - d->command = command; - d->invoke = invoke_fdatasync; - d->free = free_data; - d->level = 2; - goto done; - } - - case FILE_FSYNC: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data)); - - d->fd = fd; -#ifdef USE_VM_PROBES - dt_utag = name; - dt_i1 = fd; -#endif - d->command = command; - d->invoke = invoke_fsync; - d->free = free_data; - d->level = 2; - goto done; - } - - - case FILE_FSTAT: - case FILE_LSTAT: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + FILENAME_BYTELEN(name) + - FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); - d->fd = fd; -#ifdef USE_VM_PROBES - dt_utag = name + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE; - if (command == FILE_LSTAT) { - dt_s1 = d->b; - } else { - dt_i1 = fd; - } -#endif - d->command = command; - d->invoke = invoke_flstat; - d->free = free_data; - d->level = 2; - goto done; - } - - case FILE_TRUNCATE: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data)); - - d->flags = desc->flags; - d->fd = fd; -#ifdef USE_VM_PROBES - dt_utag = name; - dt_i1 = fd; - dt_i2 = d->flags; -#endif - d->command = command; - d->invoke = invoke_truncate; - d->free = free_data; - d->level = 2; - goto done; - } - - case FILE_WRITE_INFO: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 - + FILENAME_BYTELEN(buf + 9*4) + FILENAME_CHARSIZE); - - d->info.mode = get_int32(buf + 0 * 4); - d->info.uid = get_int32(buf + 1 * 4); - d->info.gid = get_int32(buf + 2 * 4); - d->info.accessTime = get_int64(buf + 3 * 4); - d->info.modifyTime = get_int64(buf + 5 * 4); - d->info.cTime = get_int64(buf + 7 * 4); - - FILENAME_COPY(d->b, buf + 9*4); -#ifdef USE_VM_PROBES - dt_i1 = d->info.mode; - dt_i2 = d->info.uid; - dt_i3 = d->info.gid; - dt_s1 = d->b; - dt_utag = buf + 9 * 4 + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE; -#endif - d->command = command; - d->invoke = invoke_write_info; - d->free = free_data; - d->level = 2; - goto done; - } - - case FILE_READLINK: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + - MAX(RESBUFSIZE, (FILENAME_BYTELEN(name) + - FILENAME_CHARSIZE)) + 1); - FILENAME_COPY(d->b, name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_utag = name + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE; -#endif - d->command = command; - d->invoke = invoke_readlink; - d->free = free_data; - d->level = 2; - goto done; - } - - case FILE_ALTNAME: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 + - MAX(RESBUFSIZE, (FILENAME_BYTELEN(name) + - FILENAME_CHARSIZE)) + 1); - FILENAME_COPY(d->b, name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_utag = name + FILENAME_BYTELEN(d->b) + FILENAME_CHARSIZE; -#endif - d->command = command; - d->invoke = invoke_altname; - d->free = free_data; - d->level = 2; - goto done; - } - - - case FILE_LINK: - { - char* new_name; - int namelen = FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; - - new_name = name+namelen; - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 - + namelen - + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); - FILENAME_COPY(d->b + namelen, new_name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_s2 = d->b + namelen; - dt_utag = buf + namelen + FILENAME_BYTELEN(dt_s2) + FILENAME_CHARSIZE; -#endif - d->flags = desc->flags; - d->fd = fd; - d->command = command; - d->invoke = invoke_link; - d->free = free_data; - d->level = 2; - goto done; - } - - case FILE_SYMLINK: - { - char* new_name; - int namelen = FILENAME_BYTELEN(name) + FILENAME_CHARSIZE; - - new_name = name+namelen; - d = EF_SAFE_ALLOC(sizeof(struct t_data) - 1 - + namelen - + FILENAME_BYTELEN(new_name) + FILENAME_CHARSIZE); - - FILENAME_COPY(d->b, name); - FILENAME_COPY(d->b + namelen, new_name); -#ifdef USE_VM_PROBES - dt_s1 = d->b; - dt_s2 = d->b + namelen; - dt_utag = buf + namelen + FILENAME_BYTELEN(dt_s2) + FILENAME_CHARSIZE; -#endif - d->flags = desc->flags; - d->fd = fd; - d->command = command; - d->invoke = invoke_symlink; - d->free = free_data; - d->level = 2; - goto done; - } - - case FILE_FADVISE: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data)); - - d->fd = fd; - d->command = command; - d->invoke = invoke_fadvise; - d->free = free_data; - d->level = 2; - d->c.fadvise.offset = get_int64((uchar*) buf); - d->c.fadvise.length = get_int64(((uchar*) buf) + sizeof(Sint64)); - d->c.fadvise.advise = get_int32(((uchar*) buf) + 2 * sizeof(Sint64)); -#ifdef USE_VM_PROBES - dt_i1 = d->fd; - dt_i2 = d->c.fadvise.offset; - dt_i3 = d->c.fadvise.length; - dt_i4 = d->c.fadvise.advise; - dt_utag = buf + 3 * sizeof(Sint64); -#endif - goto done; - } - - case FILE_FALLOCATE: - { - d = EF_SAFE_ALLOC(sizeof(struct t_data)); - - d->fd = fd; - d->command = command; - d->invoke = invoke_fallocate; - d->free = free_data; - d->level = 2; - d->c.fallocate.offset = get_int64((uchar*) buf); - d->c.fallocate.length = get_int64(((uchar*) buf) + sizeof(Sint64)); - goto done; - } - - } - - /* - * Ignore anything else -- let the caller hang. - */ - - return; - - done: - if (d) { -#ifdef USE_VM_PROBES - d->sched_i1 = dt_priv->thread_num; - d->sched_i2 = dt_priv->tag; - d->sched_utag[0] = '\0'; - if (dt_utag != NULL) { - if (dt_utag[0] == '\0') { - dt_utag = NULL; - } else { - strncpy(d->sched_utag, dt_utag, sizeof(d->sched_utag) - 1); - d->sched_utag[sizeof(d->sched_utag) - 1] = '\0'; - } - } - DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag++, - dt_utag, command, dt_s1, dt_s2, - dt_i1, dt_i2, dt_i3, dt_i4, desc->port_str); -#endif - cq_enq(desc, d); - } -} - -/********************************************************************* - * Driver entry point -> flush - */ -static void -file_flush(ErlDrvData e) { - file_descriptor *desc = (file_descriptor *)e; -#ifdef DEBUG - int r; -#endif -#ifdef USE_VM_PROBES - dt_private *dt_priv = get_dt_private(dt_driver_io_worker_base); -#endif - - TRACE_C('f'); - -#ifdef HAVE_SENDFILE - flush_sendfile(desc, NULL); -#endif - -#ifdef DEBUG - r = -#endif - flush_write(desc, NULL -#ifdef USE_VM_PROBES - , dt_priv, (desc->d == NULL) ? NULL : desc->d->sched_utag -#endif - ); - /* Only possible reason for bad return value is ENOMEM, and - * there is nobody to tell... - */ -#ifdef DEBUG - ASSERT(r == 0); -#endif - cq_execute(desc); -} - - - -/********************************************************************* - * Driver entry point -> control - * Only debug functionality... - */ -static ErlDrvSSizeT -file_control(ErlDrvData e, unsigned int command, - char* buf, ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen) { - file_descriptor *desc = (file_descriptor *)e; - switch (command) { - case 'K' : - if (rlen < 4) { - *rbuf = EF_ALLOC(4); - } - (*rbuf)[0] = ((desc->key) >> 24) & 0xFF; - (*rbuf)[1] = ((desc->key) >> 16) & 0xFF; - (*rbuf)[2] = ((desc->key) >> 8) & 0xFF; - (*rbuf)[3] = (desc->key) & 0xFF; - return 4; - default: - return 0; - } -} - -/********************************************************************* - * Driver entry point -> timeout - */ -static void -file_timeout(ErlDrvData e) { - file_descriptor *desc = (file_descriptor *)e; - enum e_timer timer_state = desc->timer_state; -#ifdef USE_VM_PROBES - dt_private *dt_priv = get_dt_private(dt_driver_io_worker_base); -#endif - - TRACE_C('t'); - - desc->timer_state = timer_idle; - switch (timer_state) { - case timer_idle: - ASSERT(0); - break; - case timer_again: - ASSERT(desc->invoke); - ASSERT(desc->free); - driver_async(desc->port, KEY(desc), desc->invoke, desc->d, desc->free); - break; - case timer_write: { -#ifdef DEBUG - int r = -#endif - flush_write(desc, NULL -#ifdef USE_VM_PROBES - , dt_priv, (desc->d == NULL) ? NULL : desc->d->sched_utag -#endif - ); - /* Only possible reason for bad return value is ENOMEM, and - * there is nobody to tell... - */ - ASSERT(r == 0); - cq_execute(desc); - } break; - } /* case */ -} - - - -/********************************************************************* - * Driver entry point -> outputv - */ -static void -file_outputv(ErlDrvData e, ErlIOVec *ev) { - file_descriptor* desc = (file_descriptor*)e; - char command; - size_t p, q; - int err; - struct t_data *d = NULL; -#ifdef USE_VM_PROBES - Sint64 dt_i1 = 0, dt_i2 = 0, dt_i3 = 0; - Sint64 dt_i4 = 0; - char *dt_utag = NULL; - char *dt_s1 = NULL; - dt_private *dt_priv = get_dt_private(dt_driver_io_worker_base); -#endif - - TRACE_C('v'); - - p = 0; q = 1; - if (! EV_GET_CHAR(ev, &command, &p, &q)) { - /* Empty command */ - reply_posix_error(desc, EINVAL); - goto done; - } - /* 'command' contains the decoded command number, - * 'p' and 'q' point out the next byte in the command: - * ((char *)ev->iov[q].iov_base) + p; - */ - - TRACE_F(("%i", (int) command)); - - switch (command) { - - case FILE_CLOSE: { -#ifdef USE_VM_PROBES - dt_utag = EV_CHAR_P(ev, p, q); -#endif - flush_read(desc); - if (flush_write_check_error(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if (desc->fd != FILE_FD_INVALID) { - if (! (d = EF_ALLOC(sizeof(struct t_data)))) { - reply_posix_error(desc, ENOMEM); - } else { - d->command = command; - d->reply = !0; - d->fd = desc->fd; - d->flags = desc->flags; -#ifdef USE_VM_PROBES - dt_i1 = d->fd; - dt_i2 = d->flags; -#endif - d->invoke = invoke_close; - d->free = free_data; - d->level = 2; - cq_enq(desc, d); - desc->fd = FILE_FD_INVALID; - desc->flags = 0; - } - } else { - reply_posix_error(desc, EBADF); - } - } goto done; - - case FILE_READ: { - Uint32 sizeH, sizeL; - size_t size, alloc_size; - - if (!EV_GET_UINT32(ev, &sizeH, &p, &q) - || !EV_GET_UINT32(ev, &sizeL, &p, &q)) { - /* Wrong buffer length to contain the read count */ - reply_posix_error(desc, EINVAL); - goto done; - } -#ifdef USE_VM_PROBES - dt_utag = EV_CHAR_P(ev, p, q); -#endif - if (flush_write_check_error(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } -#if ALWAYS_READ_LINE_AHEAD - if (desc->read_bufsize == 0 && desc->read_binp != NULL && desc->read_size > 0) { - /* We have allocated a buffer for line mode but should not really have a - read-ahead buffer... */ - if (lseek_flush_read(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - } -#endif -#if SIZEOF_SIZE_T == 4 - if (sizeH != 0) { - reply_posix_error(desc, EINVAL); - goto done; - } - size = sizeL; -#else - size = ((size_t)sizeH << 32) | sizeL; -#endif - if ((desc->fd == FILE_FD_INVALID) - || (! (desc->flags & EFILE_MODE_READ)) ) { - reply_posix_error(desc, EBADF); - goto done; - } - if (size == 0) { - reply_buf(desc, &command, 0); - goto done; - } - if (desc->read_size >= size) { - /* We already have all data */ - TRACE_C('D'); - reply_data(desc, desc->read_binp, desc->read_offset, size); - desc->read_offset += size; - desc->read_size -= size; - try_free_read_bin(desc); - goto done; - } - /* We may have some of the data - */ - /* Justification for the following strange formula: - * If the read request is for such a large block as more than - * half the buffer size it may lead to a lot of unnecessary copying, - * since the tail of the old buffer is copied to the head of the - * new, and if the tail is almost half the buffer it is a lot - * to copy. Therefore allocate the exact amount needed in - * this case, giving no lingering tail. */ - alloc_size = - size > (desc->read_bufsize>>1) ? - size : desc->read_bufsize; - if (! desc->read_binp) { - /* Need to allocate a new binary for the result */ - if (! (desc->read_binp = driver_alloc_binary(alloc_size))) { - reply_posix_error(desc, ENOMEM); - goto done; - } - } else { - /* We already have a buffer */ - if (desc->read_binp->orig_size - desc->read_offset < size) { - /* Need to allocate a new binary for the result */ - ErlDrvBinary *binp; - if (! (binp = driver_alloc_binary(alloc_size))) { - reply_posix_error(desc, ENOMEM); - goto done; - } - /* Move data we already have to the new binary */ - sys_memcpy(binp->orig_bytes, - desc->read_binp->orig_bytes + desc->read_offset, - desc->read_size); - driver_free_binary(desc->read_binp); - desc->read_offset = 0; - desc->read_binp = binp; - } - } - if (! (d = EF_ALLOC(sizeof(struct t_data)))) { - reply_posix_error(desc, ENOMEM); - goto done; - } - d->command = command; - d->reply = !0; - d->fd = desc->fd; - d->flags = desc->flags; - d->c.read.binp = desc->read_binp; - d->c.read.bin_offset = desc->read_offset + desc->read_size; - d->c.read.bin_size = desc->read_binp->orig_size - d->c.read.bin_offset; - d->c.read.size = size; -#ifdef USE_VM_PROBES - dt_i1 = d->fd; - dt_i2 = d->flags; - dt_i3 = d->c.read.size; -#endif - driver_binary_inc_refc(d->c.read.binp); - d->invoke = invoke_read; - d->free = free_read; - d->level = 1; - cq_enq(desc, d); - } goto done; /* case FILE_READ: */ - - case FILE_READ_LINE: { - /* - * Icky little creature... We do mostly as ordinary file read, but with a few differences. - * 1) We have to scan for proper newline sequence if there is a buffer already, we cannot know - * in advance if the buffer contains a whole line without scanning. - * 2) We do not know how large the buffer needs to be in advance. We give a default buffer, - * but the worker may need to allocate a new one. Freeing the old and rereferencing a newly - * allocated binary + dealing with offsets and lengts are done in file_async ready - * for this OP. - */ -#ifdef USE_VM_PROBES - dt_utag = EV_CHAR_P(ev, p, q); -#endif - if (flush_write_check_error(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if (ev->size != 1 -#ifdef USE_VM_PROBES - + FILENAME_BYTELEN(dt_utag) + FILENAME_CHARSIZE -#endif - ) { - /* Wrong command length */ - reply_posix_error(desc, EINVAL); - goto done; - } - if ((desc->fd == FILE_FD_INVALID) - || (! (desc->flags & EFILE_MODE_READ)) ) { - reply_posix_error(desc, EBADF); - goto done; - } - if (desc->read_size > 0) { - /* look for '\n' in what we'we already got */ - void *nl_ptr = memchr(desc->read_binp->orig_bytes + desc->read_offset,'\n',desc->read_size); - if (nl_ptr != NULL) { - /* If found, we're done */ - int skip = 0; - size_t size = ((char *) nl_ptr) - - ((char *) (desc->read_binp->orig_bytes + desc->read_offset)) + 1; - if (size > 1 && - *(((char *) nl_ptr) - 1) == '\r') { - *(((char *) nl_ptr) - 1) = '\n'; - skip = 1; - --size; - } - reply_data(desc, desc->read_binp, desc->read_offset, size); - desc->read_offset += (size + skip); - desc->read_size -= (size + skip); - try_free_read_bin(desc); - goto done; - } - } - /* Now, it's up to the thread to work out the need for more buffers and such, it's - no use doing it in this thread as we do not have the information required anyway. - Even a NULL buffer could be handled by the thread, but code is simplified by us - allocating it */ - if (! desc->read_binp) { - int alloc_size = (desc->read_bufsize > DEFAULT_LINEBUF_SIZE) ? desc->read_bufsize : - DEFAULT_LINEBUF_SIZE; - /* Allocate a new binary for the result */ - if (! (desc->read_binp = driver_alloc_binary(alloc_size))) { - reply_posix_error(desc, ENOMEM); - goto done; - } - } - if (! (d = EF_ALLOC(sizeof(struct t_data)))) { - reply_posix_error(desc, ENOMEM); - goto done; - } - - d->command = command; - d->reply = !0; - d->fd = desc->fd; - d->flags = desc->flags; - d->c.read_line.binp = desc->read_binp; - d->c.read_line.read_offset = desc->read_offset; - d->c.read_line.read_size = desc->read_size; -#ifdef USE_VM_PROBES - dt_i1 = d->fd; - dt_i2 = d->flags; - dt_i3 = d->c.read_line.read_offset; -#endif -#if !ALWAYS_READ_LINE_AHEAD - d->c.read_line.read_ahead = (desc->read_bufsize > 0); -#ifdef USE_VM_PROBES - dt_i4 = d->c.read_line.read_ahead; -#endif -#endif - driver_binary_inc_refc(d->c.read.binp); - d->invoke = invoke_read_line; - d->free = free_read_line; - d->level = 1; - cq_enq(desc, d); - } goto done; - case FILE_WRITE: { /* Dtrace: The dtrace user tag is not last in message, - but follows the message tag directly. - This is handled specially in prim_file.erl */ - ErlDrvSizeT skip = 1; - ErlDrvSizeT size = ev->size - skip; - -#ifdef USE_VM_PROBES - dt_utag = EV_CHAR_P(ev, p, q); - skip += FILENAME_BYTELEN(dt_utag) + FILENAME_CHARSIZE; - size = ev->size - skip; -#endif - if (lseek_flush_read(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if (! (desc->flags & EFILE_MODE_WRITE)) { - reply_posix_error(desc, EBADF); - goto done; - } - if (size == 0) { - reply_Uint(desc, size); - goto done; - } - MUTEX_LOCK(desc->q_mtx); - if (driver_enqv(desc->port, ev, skip)) { - MUTEX_UNLOCK(desc->q_mtx); - reply_posix_error(desc, ENOMEM); - goto done; - } - desc->write_buffered += size; - if (desc->write_buffered < desc->write_bufsize) { - MUTEX_UNLOCK(desc->q_mtx); - reply_Uint(desc, size); - if (desc->timer_state == timer_idle) { - desc->timer_state = timer_write; - driver_set_timer(desc->port, desc->write_delay); - } - } else { - if ((d = async_write(desc, &err, !0, size -#ifdef USE_VM_PROBES - , &dt_i1, &dt_i2, &dt_i3 -#endif - )) == NULL) { - MUTEX_UNLOCK(desc->q_mtx); - reply_posix_error(desc, err); - goto done; - } else { - MUTEX_UNLOCK(desc->q_mtx); - } - } - } goto done; /* case FILE_WRITE */ - - case FILE_PWRITEV: { /* Dtrace: The dtrace user tag is not last in message, - but follows the message tag directly. - This is handled specially in prim_file.erl */ - Uint32 i, j, n; - size_t total; -#ifdef USE_VM_PROBES - char dt_tmp; - int dt_utag_bytes = 1; - - dt_utag = EV_CHAR_P(ev, p, q); - /* This will work for UTF-8, but not for UTF-16 - extra reminder here */ -#ifdef FILENAMES_16BIT -#error 16bit characters in filenames and dtrace in combination is not supported. -#endif - while (EV_GET_CHAR(ev, &dt_tmp, &p, &q) && dt_tmp != '\0') { - dt_utag_bytes++; - } -#endif - if (ev->size < 1+4 -#ifdef USE_VM_PROBES - + dt_utag_bytes -#endif - || !EV_GET_UINT32(ev, &n, &p, &q)) { - /* Buffer too short to contain even the number of pos/size specs */ - reply_Uint_posix_error(desc, 0, EINVAL); - goto done; - } - if (lseek_flush_read(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_Uint_posix_error(desc, 0, err); - goto done; - } - if (flush_write_check_error(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_Uint_posix_error(desc, 0, err); - goto done; - } - if (n == 0) { - /* Trivial case - nothing to write */ - if (ev->size != 1+4) { - reply_posix_error(desc, err); - } else { - reply_Uint(desc, 0); - } - goto done; - } - if (ev->size < 1+4+8*(2*n) -#ifdef USE_VM_PROBES - + dt_utag_bytes -#endif - ) { - /* Buffer too short to contain even the pos/size specs */ - reply_Uint_posix_error(desc, 0, EINVAL); - goto done; - } - d = EF_ALLOC(sizeof(struct t_data) - + (n * sizeof(struct t_pbuf_spec))); - if (! d) { - reply_Uint_posix_error(desc, 0, ENOMEM); - goto done; - } - d->command = command; - d->reply = !0; - d->fd = desc->fd; - d->flags = desc->flags; -#ifdef USE_VM_PROBES - dt_i1 = d->fd; - dt_i2 = d->flags; -#endif - d->c.pwritev.port = desc->port; - d->c.pwritev.q_mtx = desc->q_mtx; - d->c.pwritev.n = n; - d->c.pwritev.cnt = 0; - total = 0; - j = 0; - /* Create pos/size specs in the thread data structure - * for all non-zero size binaries. Calculate total size. - */ - for(i = 0; i < n; i++) { - Uint32 sizeH, sizeL; - size_t size; - if ( !EV_GET_SINT64(ev, &d->c.pwritev.specs[i].offset, &p, &q) - || !EV_GET_UINT32(ev, &sizeH, &p, &q) - || !EV_GET_UINT32(ev, &sizeL, &p, &q)) { - /* Misalignment in buffer */ - reply_Uint_posix_error(desc, 0, EINVAL); - EF_FREE(d); - goto done; - } -#if SIZEOF_SIZE_T == 4 - if (sizeH != 0) { - reply_Uint_posix_error(desc, 0, EINVAL); - EF_FREE(d); - goto done; - } - size = sizeL; -#else - size = ((size_t)sizeH<<32) | sizeL; -#endif - if (size > 0) { - total += size; - d->c.pwritev.specs[j].size = size; - j++; - } - } - d->c.pwritev.size = total; -#ifdef USE_VM_PROBES - dt_i3 = d->c.pwritev.size; -#endif - if (j == 0) { - /* Trivial case - nothing to write */ - EF_FREE(d); - reply_Uint(desc, 0); - } else { - ErlDrvSizeT skip = 1 + 4 + 8 * (2*n) -#ifdef USE_VM_PROBES - + dt_utag_bytes -#endif - ; - if (skip + total != ev->size) { - /* Actual amount of data does not match - * total of all pos/size specs - */ - EF_FREE(d); - reply_Uint_posix_error(desc, 0, EINVAL); - } else { - /* Enqueue the data */ - MUTEX_LOCK(desc->q_mtx); - driver_enqv(desc->port, ev, skip); - MUTEX_UNLOCK(desc->q_mtx); - /* Execute the command */ - d->invoke = invoke_pwritev; - d->free = free_data; - d->level = 1; - cq_enq(desc, d); - } - } - } goto done; /* case FILE_PWRITEV: */ - - case FILE_PREADV: { /* Dtrace: The dtrace user tag is not last in message, - but follows the message tag directly. - This is handled specially in prim_file.erl */ - register void * void_ptr; - Uint32 i, n; - ErlIOVec *res_ev; -#ifdef USE_VM_PROBES - char dt_tmp; - int dt_utag_bytes = 1; - /* This will work for UTF-8, but not for UTF-16 - extra reminder here */ -#ifdef FILENAMES_16BIT -#error 16bit characters in filenames and dtrace in combination is not supported. -#endif - dt_utag = EV_CHAR_P(ev, p, q); - while (EV_GET_CHAR(ev, &dt_tmp, &p, &q) && dt_tmp != '\0') { - dt_utag_bytes++; - } -#endif - if (lseek_flush_read(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if (flush_write_check_error(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if (ev->size < 1+8 -#ifdef USE_VM_PROBES - + dt_utag_bytes -#endif - || !EV_GET_UINT32(ev, &n, &p, &q) - || !EV_GET_UINT32(ev, &n, &p, &q)) { - /* Buffer too short to contain even the number of pos/size specs */ - reply_posix_error(desc, EINVAL); - goto done; - } - if (ev->size < 1+8+8*(2*n) -#ifdef USE_VM_PROBES - + dt_utag_bytes -#endif - ) { - /* Buffer wrong length to contain the pos/size specs */ - reply_posix_error(desc, EINVAL); - goto done; - } - /* Create the thread data structure with the contained ErlIOVec - * and corresponding binaries for the response - */ - d = EF_ALLOC(sizeof(*d) - + (n * sizeof(*d->c.preadv.offsets)) - + ((1+n) * (sizeof(*res_ev->iov) - + sizeof(*res_ev->binv)))); - if (! d) { - reply_posix_error(desc, ENOMEM); - goto done; - } - d->command = command; - d->reply = !0; - d->fd = desc->fd; - d->flags = desc->flags; -#ifdef USE_VM_PROBES - dt_i1 = d->fd; - dt_i2 = d->flags; -#endif - d->c.preadv.n = n; - d->c.preadv.cnt = 0; - d->c.preadv.size = 0; - res_ev = &d->c.preadv.eiov; - /* XXX possible alignment problems here for weird machines */ - res_ev->vsize = 1+d->c.preadv.n; - res_ev->iov = void_ptr = &d->c.preadv.offsets[d->c.preadv.n]; - res_ev->binv = void_ptr = &res_ev->iov[res_ev->vsize]; - /* Read in the pos/size specs and allocate binaries for the results */ - for (i = 1; i < 1+n; i++) { - Uint32 sizeH, sizeL; - size_t size; - if ( !EV_GET_SINT64(ev, &d->c.preadv.offsets[i-1], &p, &q) - || !EV_GET_UINT32(ev, &sizeH, &p, &q) - || !EV_GET_UINT32(ev, &sizeL, &p, &q)) { - reply_posix_error(desc, EINVAL); - break; - } -#if SIZEOF_SIZE_T == 4 - if (sizeH != 0) { - reply_posix_error(desc, EINVAL); - break; - } - size = sizeL; -#else - size = ((size_t)sizeH<<32) | sizeL; -#endif -#ifdef USE_VM_PROBES - dt_i3 += size; -#endif - if (! (res_ev->binv[i] = driver_alloc_binary(size))) { - reply_posix_error(desc, ENOMEM); - break; - } else { - res_ev->iov[i].iov_len = size; - res_ev->iov[i].iov_base = res_ev->binv[i]->orig_bytes; - } - } - if (i < 1+n) { - for (i--; i > 0; i--) { - driver_free_binary(res_ev->binv[i]); - } - EF_FREE(d); - goto done; - } - /* Allocate the header binary (index 0) */ - res_ev->binv[0] = driver_alloc_binary(4+4+8*n); - if (! res_ev->binv[0]) { - reply_posix_error(desc, ENOMEM); - for (i = 1; i < 1+n; i++) { - driver_free_binary(res_ev->binv[i]); - } - EF_FREE(d); - goto done; - } - res_ev->iov[0].iov_len = 4+4+8*n; - res_ev->iov[0].iov_base = res_ev->binv[0]->orig_bytes; - /* Fill in the number of buffers in the header */ - put_int32(0, res_ev->iov[0].iov_base); - put_int32(n, (char *)(res_ev->iov[0].iov_base) + 4); - /**/ - res_ev->size = res_ev->iov[0].iov_len; - if (n == 0) { - /* Trivial case - nothing to read */ - reply_ev(desc, FILE_RESP_LDATA, res_ev); - free_preadv(d); - goto done; - } else { - d->invoke = invoke_preadv; - d->free = free_preadv; - d->level = 1; - cq_enq(desc, d); - } - } goto done; /* case FILE_PREADV: */ - - case FILE_LSEEK: { - Sint64 offset; /* Offset for seek */ - Uint32 origin; /* Origin of seek. */ - - if (ev->size < 1+8+4 - || !EV_GET_SINT64(ev, &offset, &p, &q) - || !EV_GET_UINT32(ev, &origin, &p, &q)) { - /* Wrong length of buffer to contain offset and origin */ - reply_posix_error(desc, EINVAL); - goto done; - } -#ifdef USE_VM_PROBES - dt_utag = EV_CHAR_P(ev, p, q); -#endif - if (lseek_flush_read(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if (flush_write_check_error(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if ((d = async_lseek(desc, &err, !0, offset, origin -#ifdef USE_VM_PROBES - , &dt_i1, &dt_i2, &dt_i3 -#endif - )) == NULL) { - reply_posix_error(desc, err); - goto done; - } - } goto done; - - case FILE_READ_FILE: { - char *filename; - if (ev->size < 1+1) { - /* Buffer contains empty name */ - reply_posix_error(desc, ENOENT); - goto done; - } -#ifndef USE_VM_PROBES - /* In the dtrace case, the iov has an extra element, the dtrace utag - we will need - another test to see that - the filename is in a single buffer: */ - if (ev->size-1 != ev->iov[q].iov_len-p) { - /* Name not in one single buffer */ - reply_posix_error(desc, EINVAL); - goto done; - } -#else - if (((byte *)ev->iov[q].iov_base)[ev->iov[q].iov_len-1] != '\0') { - /* Name not in one single buffer */ - reply_posix_error(desc, EINVAL); - goto done; - } -#endif - filename = EV_CHAR_P(ev, p, q); - d = EF_ALLOC(sizeof(struct t_data) -1 + FILENAME_BYTELEN(filename) + FILENAME_CHARSIZE); - if (! d) { - reply_posix_error(desc, ENOMEM); - goto done; - } - d->command = command; - d->reply = !0; - /* Copy name */ - FILENAME_COPY(d->b, filename); -#ifdef USE_VM_PROBES - { - char dt_tmp; - - /* This will work for UTF-8, but not for UTF-16 - extra reminder here */ -#ifdef FILENAMES_16BIT -#error 16bit characters in filenames and dtrace in combination is not supported. -#endif - while (EV_GET_CHAR(ev, &dt_tmp, &p, &q) && dt_tmp != '\0') - ; - dt_s1 = d->b; - dt_utag = EV_CHAR_P(ev, p, q); - } -#endif - d->c.read_file.binp = NULL; - d->invoke = invoke_read_file; - d->free = free_read_file; - d->level = 2; - cq_enq(desc, d); - } goto done; - - case FILE_IPREAD: { - /* This operation cheets by using invoke_preadv() and free_preadv() - * plus its own invoke_ipread. Therefore the result format is - * a bit awkward - the header binary contains one extra 64 bit - * field that invoke_preadv() fortunately ignores, - * and the first 64 bit field does not contain the number of - * data binaries which invoke_preadv() also ignores. - */ - register void * void_ptr; - char mode; - Sint64 hdr_offset; - Uint32 max_size; - ErlIOVec *res_ev; - int vsize; - if (! EV_GET_CHAR(ev, &mode, &p, &q)) { - /* Empty command */ - reply_posix_error(desc, EINVAL); - goto done; - } - if (mode != IPREAD_S32BU_P32BU) { - reply_posix_error(desc, EINVAL); - goto done; - } - if (ev->size < 1+1+8+4 - || !EV_GET_SINT64(ev, &hdr_offset, &p, &q) - || !EV_GET_UINT32(ev, &max_size, &p, &q)) { - /* Buffer too short to contain - * the header offset and max size spec */ - reply_posix_error(desc, EINVAL); - goto done; - } -#ifdef USE_VM_PROBES - dt_utag = EV_CHAR_P(ev, p, q); -#endif - if (lseek_flush_read(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if (flush_write_check_error(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - /* Create the thread data structure with the contained ErlIOVec - * and corresponding binaries for the response - */ - vsize = 2; - d = EF_ALLOC(sizeof(*d) + - vsize*(sizeof(*res_ev->iov) + sizeof(*res_ev->binv))); - if (! d) { - reply_posix_error(desc, ENOMEM); - goto done; - } - d->command = command; - d->reply = !0; - d->fd = desc->fd; - d->flags = desc->flags; - d->c.preadv.offsets[0] = hdr_offset; - d->c.preadv.size = max_size; -#ifdef USE_VM_PROBES - dt_i1 = d->fd; - dt_i2 = d->flags; - dt_i3 = d->c.preadv.offsets[0]; - dt_i4 = d->c.preadv.size; -#endif - res_ev = &d->c.preadv.eiov; - /* XXX possible alignment problems here for weird machines */ - res_ev->iov = void_ptr = d + 1; - res_ev->binv = void_ptr = res_ev->iov + vsize; - res_ev->size = 0; - res_ev->vsize = 0; - d->invoke = invoke_ipread; - d->free = free_preadv; - d->level = 1; - cq_enq(desc, d); - } goto done; /* case FILE_IPREAD: */ - - case FILE_SETOPT: { - char opt; - - if (ev->size < 1+1 - || !EV_GET_CHAR(ev, &opt, &p, &q)) { - /* Buffer too short to contain even the option type */ - reply_posix_error(desc, EINVAL); - goto done; - } -#ifdef USE_VM_PROBES - dt_i1 = opt; - dt_utag = EV_CHAR_P(ev, p, q); -#endif - switch (opt) { - case FILE_OPT_DELAYED_WRITE: { - Uint32 sizeH, sizeL, delayH, delayL; - if (ev->size != 1+1+4*sizeof(Uint32) -#ifdef USE_VM_PROBES - + FILENAME_BYTELEN(dt_utag) + FILENAME_CHARSIZE -#endif - || !EV_GET_UINT32(ev, &sizeH, &p, &q) - || !EV_GET_UINT32(ev, &sizeL, &p, &q) - || !EV_GET_UINT32(ev, &delayH, &p, &q) - || !EV_GET_UINT32(ev, &delayL, &p, &q)) { - /* Buffer has wrong length to contain the option values */ - reply_posix_error(desc, EINVAL); - goto done; - } -#if SIZEOF_SIZE_T == 4 - if (sizeH != 0) { - reply_posix_error(desc, EINVAL); - goto done; - } - desc->write_bufsize = sizeL; -#else - desc->write_bufsize = ((size_t)sizeH << 32) | sizeL; -#endif -#if SIZEOF_LONG == 4 - if (delayH != 0) { - reply_posix_error(desc, EINVAL); - goto done; - } - desc->write_delay = delayL; -#else - desc->write_delay = ((unsigned long)delayH << 32) | delayL; -#endif -#ifdef USE_VM_PROBES - dt_i2 = desc->write_delay; -#endif - TRACE_C('K'); - reply_ok(desc); - } goto done; - case FILE_OPT_READ_AHEAD: { - Uint32 sizeH, sizeL; - if (ev->size != 1+1+2*sizeof(Uint32) -#ifdef USE_VM_PROBES - + FILENAME_BYTELEN(dt_utag)+FILENAME_CHARSIZE -#endif - || !EV_GET_UINT32(ev, &sizeH, &p, &q) - || !EV_GET_UINT32(ev, &sizeL, &p, &q)) { - /* Buffer has wrong length to contain the option values */ - reply_posix_error(desc, EINVAL); - goto done; - } -#if SIZEOF_SIZE_T == 4 - if (sizeH != 0) { - reply_posix_error(desc, EINVAL); - goto done; - } - desc->read_bufsize = sizeL; -#else - desc->read_bufsize = ((size_t)sizeH << 32) | sizeL; -#endif -#ifdef USE_VM_PROBES - dt_i2 = desc->read_bufsize; -#endif - TRACE_C('K'); - reply_ok(desc); - } goto done; - default: - reply_posix_error(desc, EINVAL); - goto done; - } /* case FILE_OPT_DELAYED_WRITE: */ - } ASSERT(0); goto done; /* case FILE_SETOPT: */ - - case FILE_SENDFILE: { - -#ifdef HAVE_SENDFILE - struct t_data *d; - Uint32 out_fd, offsetH, offsetL, hd_len, tl_len; - Uint64 nbytes; - char flags; - - if (ev->size < 1 + 7 * sizeof(Uint32) + sizeof(char) - || !EV_GET_UINT32(ev, &out_fd, &p, &q) - || !EV_GET_CHAR(ev, &flags, &p, &q) - || !EV_GET_UINT32(ev, &offsetH, &p, &q) - || !EV_GET_UINT32(ev, &offsetL, &p, &q) - || !EV_GET_UINT64(ev, &nbytes, &p, &q) - || !EV_GET_UINT32(ev, &hd_len, &p, &q) - || !EV_GET_UINT32(ev, &tl_len, &p, &q)) { - /* Buffer has wrong length to contain all the needed values */ - reply_posix_error(desc, EINVAL); - goto done; - } - - if (hd_len != 0 || tl_len != 0) { - /* We do not allow header, trailers */ - reply_posix_error(desc, EINVAL); - goto done; - } - - - if (flags & SENDFILE_FLGS_USE_THREADS && !THRDS_AVAILABLE) { - /* We do not allow use_threads flag on a system where - no threads are available. */ - reply_posix_error(desc, EINVAL); - goto done; - } - - d = EF_SAFE_ALLOC(sizeof(struct t_data)); - d->fd = desc->fd; - d->command = command; - d->invoke = invoke_sendfile; - d->free = free_sendfile; - d->flags = flags; - d->level = 2; - - d->c.sendfile.out_fd = (int) out_fd; - d->c.sendfile.written = 0; - d->c.sendfile.port = desc->port; - d->c.sendfile.q_mtx = desc->q_mtx; - - #if SIZEOF_OFF_T == 4 - if (offsetH != 0) { - reply_posix_error(desc, EINVAL); - goto done; - } - d->c.sendfile.offset = (off_t) offsetL; - #else - d->c.sendfile.offset = ((off_t) offsetH << 32) | offsetL; - #endif - - d->c.sendfile.nbytes = nbytes; - - if (USE_THRDS_FOR_SENDFILE(d)) { - SET_BLOCKING(d->c.sendfile.out_fd); - } else { - /** - * Write a place holder to queue in order to force file_flush - * to be called before the driver is closed. - */ - char tmp[1] = ""; - MUTEX_LOCK(d->c.sendfile.q_mtx); - if (driver_enq(d->c.sendfile.port, tmp, 1)) { - MUTEX_UNLOCK(d->c.sendfile.q_mtx); - reply_posix_error(desc, ENOMEM); - goto done; - } - MUTEX_UNLOCK(d->c.sendfile.q_mtx); - } - - cq_enq(desc, d); -#else - reply_posix_error(desc, ENOTSUP); -#endif - goto done; - } /* case FILE_SENDFILE: */ - - } /* switch(command) */ - - if (lseek_flush_read(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } - if (flush_write_check_error(desc, &err -#ifdef USE_VM_PROBES - , dt_priv, dt_utag -#endif - ) < 0) { - reply_posix_error(desc, err); - goto done; - } else { - /* Flatten buffer and send it to file_output(desc, buf, len) */ - int len = ev->size; - char *buf = EF_ALLOC(len); - if (! buf) { - reply_posix_error(desc, ENOMEM); - goto done; - } - driver_vec_to_buf(ev, buf, len); - file_output((ErlDrvData) desc, buf, len); - EF_FREE(buf); - goto done; - } - - done: - if (d != NULL) { -#ifdef USE_VM_PROBES - /* - * If d == NULL, then either: - * 1). There was an error of some sort, or - * 2). The command given to us is actually implemented - * by file_output() instead. - * - * Case #1 is probably a TODO item, perhaps? - * Case #2 we definitely don't want to activate a probe. - */ - d->sched_i1 = dt_priv->thread_num; - d->sched_i2 = dt_priv->tag; - d->sched_utag[0] = '\0'; - if (dt_utag != NULL) { - if (dt_utag[0] == '\0') { - dt_utag = NULL; - } else { - strncpy(d->sched_utag, dt_utag, sizeof(d->sched_utag) - 1); - d->sched_utag[sizeof(d->sched_utag) - 1] = '\0'; - } - } - DTRACE11(efile_drv_entry, dt_priv->thread_num, dt_priv->tag++, - dt_utag, command, dt_s1, NULL, dt_i1, dt_i2, dt_i3, dt_i4, - desc->port_str); -#endif - } - cq_execute(desc); -} - -#ifdef USE_VM_PROBES -dt_private * -get_dt_private(int base) -{ - dt_private *dt_priv = (dt_private *) pthread_getspecific(dt_driver_key); - - if (dt_priv == NULL) { - dt_priv = EF_SAFE_ALLOC(sizeof(dt_private)); - erts_mtx_lock(&dt_driver_mutex); - dt_priv->thread_num = (base + dt_driver_idnum++); - erts_mtx_unlock(&dt_driver_mutex); - dt_priv->tag = 0; - pthread_setspecific(dt_driver_key, dt_priv); - } - return dt_priv; -} -#endif /* USE_VM_PROBES */ diff --git a/erts/emulator/drivers/common/erl_efile.h b/erts/emulator/drivers/common/erl_efile.h deleted file mode 100644 index b7f063b4f2..0000000000 --- a/erts/emulator/drivers/common/erl_efile.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 1997-2016. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ -/* - * Defines the interfaces between the generic efile driver and its - * operating-system dependent helpers. - */ - -#include "sys.h" -#include "erl_driver.h" - -/* - * Open modes for efile_openfile(). - */ -#define EFILE_MODE_READ 1 -#define EFILE_MODE_WRITE 2 /* Implies truncating file when used alone. */ -#define EFILE_MODE_READ_WRITE 3 -#define EFILE_MODE_APPEND 4 -#define EFILE_COMPRESSED 8 -#define EFILE_MODE_EXCL 16 -#define EFILE_NO_TRUNCATE 32 /* Special for reopening on VxWorks */ -#define EFILE_MODE_SYNC 64 - -/* - * Seek modes for efile_seek(). - */ -#define EFILE_SEEK_SET 0 -#define EFILE_SEEK_CUR 1 -#define EFILE_SEEK_END 2 - -/* - * File types returned by efile_fileinfo(). - */ -#define FT_DEVICE 1 -#define FT_DIRECTORY 2 -#define FT_REGULAR 3 -#define FT_SYMLINK 4 -#define FT_OTHER 5 - -/* - * Access attributes returned by efile_fileinfo() (the bits can be ORed - * together). - */ -#define FA_NONE 0 -#define FA_WRITE 1 -#define FA_READ 2 - -/* Some OS'es (i.e. Windows) has filenames in wide charaqcters. That requires special handling */ -/* Note that we do *not* honor alignment in the communication to the OS specific driver, */ -/* which is not a problem on x86, but might be on other platforms. The OS specific efile */ -/* implementation is expected to align if needed */ -#ifdef __WIN32__ -#define FILENAMES_16BIT 1 -#endif - -/* We use sendfilev if it exist on solaris */ -#if !defined(HAVE_SENDFILE) && defined(HAVE_SENDFILEV) -#define HAVE_SENDFILE -#endif - -/* - * An handle to an open directory. To be cast to the correct type - * in the system-dependent directory functions. - */ - -typedef struct _Efile_Dir_Handle* EFILE_DIR_HANDLE; - -/* - * Error information from the last call. - */ -typedef struct _Efile_error { - int posix_errno; /* Posix error number, as in <errno.h>. */ - int os_errno; /* Os-dependent error number (not used). */ -} Efile_error; - -/* - * Describes what is returned by file:file_info/1. - */ - -typedef struct _Efile_info { - Uint32 size_low; /* Size of file, lower 32 bits.. */ - Uint32 size_high; /* Size of file, higher 32 bits. */ - Uint32 type; /* Type of file -- one of FT_*. */ - Uint32 access; /* Access to file -- one of FA_*. */ - Uint32 mode; /* Access permissions -- bit field. */ - Uint32 links; /* Number of links to file. */ - Uint32 major_device; /* Major device or file system. */ - Uint32 minor_device; /* Minor device (for devices). */ - Uint32 inode; /* Inode number. */ - Uint32 uid; /* User id of owner. */ - Uint32 gid; /* Group id of owner. */ - Sint64 accessTime; /* Last time the file was accessed. */ - Sint64 modifyTime; /* Last time the file was modified. */ - Sint64 cTime; /* Creation time (Windows) or last - * inode change (Unix). - */ -} Efile_info; - - -#ifdef HAVE_SENDFILE -/* - * Describes the structure of headers/trailers for sendfile - */ -struct t_sendfile_hdtl { - SysIOVec *headers; - int hdr_cnt; - SysIOVec *trailers; - int trl_cnt; -}; -#endif /* HAVE_SENDFILE */ - -/* - * Functions. - */ -int efile_init(void); -int efile_mkdir(Efile_error* errInfo, char* name); -int efile_rmdir(Efile_error* errInfo, char* name); -int efile_delete_file(Efile_error* errInfo, char* name); -int efile_rename(Efile_error* errInfo, char* src, char* dst); -int efile_chdir(Efile_error* errInfo, char* name); -int efile_getdcwd(Efile_error* errInfo, int drive, - char* buffer, size_t size); -int efile_readdir(Efile_error* errInfo, char* name, - EFILE_DIR_HANDLE* dir_handle, - char* buffer, size_t *size); -int efile_openfile(Efile_error* errInfo, char* name, int flags, - int* pfd, Sint64* pSize); -void efile_closefile(int fd); -int efile_fdatasync(Efile_error* errInfo, int fd); -int efile_fsync(Efile_error* errInfo, int fd); -int efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, - char *name, int info_for_link); -int efile_write_info(Efile_error* errInfo, Efile_info* pInfo, char *name); -int efile_write(Efile_error* errInfo, int flags, int fd, - char* buf, size_t count); -int efile_writev(Efile_error* errInfo, int flags, int fd, - SysIOVec* iov, int iovcnt); -int efile_read(Efile_error* errInfo, int flags, int fd, - char* buf, size_t count, size_t* pBytesRead); -int efile_seek(Efile_error* errInfo, int fd, - Sint64 offset, int origin, Sint64* new_location); -int efile_truncate_file(Efile_error* errInfo, int *fd, int flags); -int efile_pwrite(Efile_error* errInfo, int fd, - char* buf, size_t count, Sint64 offset); -int efile_pread(Efile_error* errInfo, int fd, - Sint64 offset, char* buf, size_t count, size_t* pBytesRead); -int efile_readlink(Efile_error* errInfo, char *name, - char* buffer, size_t size); -int efile_altname(Efile_error* errInfo, char *name, - char* buffer, size_t size); -int efile_link(Efile_error* errInfo, char* old, char* new); -int efile_symlink(Efile_error* errInfo, char* old, char* new); -int efile_may_openfile(Efile_error* errInfo, char *name); -int efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length, - int advise); -#ifdef HAVE_SENDFILE -int efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd, - off_t *offset, Uint64 *nbytes, struct t_sendfile_hdtl *hdtl); -#endif /* HAVE_SENDFILE */ -int efile_fallocate(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length); diff --git a/erts/emulator/drivers/common/gzio.c b/erts/emulator/drivers/common/gzio.c index f60c781894..86c3b07cea 100644 --- a/erts/emulator/drivers/common/gzio.c +++ b/erts/emulator/drivers/common/gzio.c @@ -19,726 +19,16 @@ #include <unistd.h> #endif #include <ctype.h> + #include "erl_driver.h" -#include "erl_efile.h" #include "sys.h" -#ifdef __WIN32__ -#ifndef HAVE_CONFLICTING_FREAD_DECLARATION -#define HAVE_CONFLICTING_FREAD_DECLARATION -#endif -#define FILENAMES_16BIT 1 -#endif - -#ifdef STDC -# define zstrerror(errnum) strerror(errnum) -#else -# define zstrerror(errnum) "" -#endif - #include "gzio_zutil.h" #include "erl_zlib.h" #include "gzio.h" -/********struct internal_state {int dummy;}; / * for buggy compilers */ - -#define Z_BUFSIZE 4096 - -#define ALLOC(size) driver_alloc(size) -#define TRYFREE(p) {if (p) driver_free(p);} - static int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ -/* gzip flag byte */ -#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ -#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ -#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ -#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ -#define COMMENT 0x10 /* bit 4 set: file comment present */ -#define RESERVED 0xE0 /* bits 5..7: reserved */ - -typedef struct gz_stream { - z_stream stream; - int z_err; /* error code for last stream operation */ - int z_eof; /* set if end of input file */ -#ifdef UNIX - int file; /* .gz file descriptor */ -#else - FILE *file; /* .gz file */ -#endif - Byte *inbuf; /* input buffer */ - Byte *outbuf; /* output buffer */ - uLong crc; /* crc32 of uncompressed data */ - char *msg; /* error message */ - char *path; /* path name for debugging only */ - int transparent; /* 1 if input file is not a .gz file */ - char mode; /* 'w' or 'r' */ - int position; /* Position (for seek) */ - int (*destroy)(struct gz_stream*); /* Function to destroy - * this structure. */ -} gz_stream; - -local ErtsGzFile gz_open (const char *path, const char *mode); -local int get_byte (gz_stream *s); -local void check_header (gz_stream *s); -local int destroy (gz_stream *s); -local uLong getLong (gz_stream *s); - -#ifdef UNIX -/* - * In Solaris 8 and earlier, fopen() and its friends cannot handle - * file descriptors larger than 255. Therefore, we use read()/write() - * on all Unix systems. - */ -# define ERTS_GZWRITE(File, Buf, Count) write((File), (Buf), (Count)) -# define ERTS_GZREAD(File, Buf, Count) read((File), (Buf), (Count)) -#else -/* - * On all other operating systems, using fopen(), fread()/fwrite(), since - * there is not guaranteed to exist any read()/write() (not part of - * ANSI/ISO-C). - */ -# define ERTS_GZWRITE(File, Buf, Count) fwrite((Buf), 1, (Count), (File)) -# define ERTS_GZREAD(File, Buf, Count) fread((Buf), 1, (Count), (File)) -#endif - -/* - * Ripped from efile_drv.c - */ - -#ifdef FILENAMES_16BIT -# define FILENAME_BYTELEN(Str) filename_len_16bit(Str) -# define FILENAME_COPY(To,From) filename_cpy_16bit((To),(From)) -# define FILENAME_CHARSIZE 2 - - static int filename_len_16bit(const char *str) - { - const char *p = str; - while(*p != '\0' || p[1] != '\0') { - p += 2; - } - return (p - str); - } - - static void filename_cpy_16bit(char *to, const char *from) - { - while(*from != '\0' || from[1] != '\0') { - *to++ = *from++; - *to++ = *from++; - } - *to++ = *from++; - *to++ = *from++; - } - -#else -# define FILENAME_BYTELEN(Str) strlen(Str) -# define FILENAME_COPY(To,From) strcpy(To,From) -# define FILENAME_CHARSIZE 1 -#endif - -/* =========================================================================== - Opens a gzip (.gz) file for reading or writing. The mode parameter - is as in fopen ("rb" or "wb"). The file is given either by file descriptor - or path name (if fd == -1). - gz_open return NULL if the file could not be opened or if there was - insufficient memory to allocate the (de)compression state; errno - can be checked to distinguish the two cases (if errno is zero, the - zlib error is Z_MEM_ERROR). -*/ -local ErtsGzFile gz_open (path, mode) - const char *path; - const char *mode; -{ - int err; - int level = Z_DEFAULT_COMPRESSION; /* compression level */ - char *p = (char*)mode; - gz_stream *s; - char fmode[80]; /* copy of mode, without the compression level */ - char *m = fmode; - - if (!path || !mode) return Z_NULL; - - s = (gz_stream *)ALLOC(sizeof(gz_stream)); - if (!s) return Z_NULL; - - erl_zlib_alloc_init(&s->stream); - s->stream.next_in = s->inbuf = Z_NULL; - s->stream.next_out = s->outbuf = Z_NULL; - s->stream.avail_in = s->stream.avail_out = 0; -#ifdef UNIX - s->file = -1; -#else - s->file = NULL; -#endif - s->z_err = Z_OK; - s->z_eof = 0; - s->crc = crc32(0L, Z_NULL, 0); - s->msg = NULL; - s->transparent = 0; - s->position = 0; - s->destroy = destroy; - - s->path = (char*)ALLOC(FILENAME_BYTELEN(path)+FILENAME_CHARSIZE); - if (s->path == NULL) { - return s->destroy(s), (ErtsGzFile)Z_NULL; - } - FILENAME_COPY(s->path, path); /* do this early for debugging */ - - s->mode = '\0'; - do { - if (*p == 'r') - s->mode = 'r'; - if (*p == 'w' || *p == 'a') - s->mode = 'w'; - if (isdigit((int)*p)) { - level = *p - '0'; - } else { - *m++ = *p; /* Copy the mode */ - } - } while (*p++ && m < fmode + sizeof(fmode) - 1); - *m = '\0'; - if (s->mode == '\0') - return s->destroy(s), (ErtsGzFile)Z_NULL; - - if (s->mode == 'w') { - err = deflateInit2(&(s->stream), level, - Z_DEFLATED, MAX_WBITS+16, DEF_MEM_LEVEL, 0); - /* windowBits is passed < 0 to suppress zlib header */ - - s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); - - if (err != Z_OK || s->outbuf == Z_NULL) { - return s->destroy(s), (ErtsGzFile)Z_NULL; - } - } else { - /* - * It is tempting to use the built-in support in zlib - * for handling GZIP headers, but unfortunately it - * cannot handle multiple GZIP headers (which occur when - * several GZIP files have been concatenated). - */ - - err = inflateInit2(&(s->stream), -MAX_WBITS); - s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); - - if (err != Z_OK || s->inbuf == Z_NULL) { - return s->destroy(s), (ErtsGzFile)Z_NULL; - } - } - s->stream.avail_out = Z_BUFSIZE; - - errno = 0; -#if defined(FILENAMES_16BIT) - { - FILE* efile_wfopen(const WCHAR* name, const WCHAR* mode); - WCHAR wfmode[80]; - int i = 0; - int j; - for(j = 0; fmode[j] != '\0'; ++j) { - wfmode[i++] = (WCHAR) fmode[j]; - } - wfmode[i++] = L'\0'; - s->file = efile_wfopen((WCHAR *)path, wfmode); - if (s->file == NULL) { - return s->destroy(s), (ErtsGzFile)Z_NULL; - } - } -#elif defined(UNIX) - if (s->mode == 'r') { - s->file = open(path, O_RDONLY); - } else { - s->file = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); - } - if (s->file == -1) { - return s->destroy(s), (ErtsGzFile)Z_NULL; - } -#else - s->file = fopen(path, fmode); - if (s->file == NULL) { - return s->destroy(s), (ErtsGzFile)Z_NULL; - } -#endif - if (s->mode == 'r') { - check_header(s); /* skip the .gz header */ - } - return (ErtsGzFile)s; -} - -/* =========================================================================== - Rewind a gzfile back to the beginning. -*/ - -local int gz_rewind (gz_stream *s) -{ - TRYFREE(s->msg); - -#ifdef UNIX - lseek(s->file, 0L, SEEK_SET); -#else - fseek(s->file, 0L, SEEK_SET); -#endif - inflateReset(&(s->stream)); - s->stream.next_in = Z_NULL; - s->stream.next_out = Z_NULL; - s->stream.avail_in = s->stream.avail_out = 0; - s->z_err = Z_OK; - s->z_eof = 0; - s->crc = crc32(0L, Z_NULL, 0); - s->msg = NULL; - s->position = 0; - s->stream.next_in = s->inbuf; - - s->stream.avail_out = Z_BUFSIZE; - - check_header(s); /* skip the .gz header */ - return 1; -} - -/* =========================================================================== - Opens a gzip (.gz) file for reading or writing. -*/ -ErtsGzFile erts_gzopen (path, mode) - const char *path; - const char *mode; -{ - return gz_open (path, mode); -} - - -/* =========================================================================== - Read a byte from a gz_stream; update next_in and avail_in. Return EOF - for end of file. - IN assertion: the stream s has been successfully opened for reading. -*/ -local int get_byte(s) - gz_stream *s; -{ - if (s->z_eof) return EOF; - if (s->stream.avail_in == 0) { -#ifdef UNIX - ssize_t res; - errno = 0; - res = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE); - if (res == 0) { - s->stream.avail_in = 0; - s->z_eof = 1; - return EOF; - } else if (res < 0) { - s->stream.avail_in = 0; - s->z_eof = 1; - s->z_err = Z_ERRNO; - return EOF; - } else { - s->stream.avail_in = (uInt) res; - } -#else - errno = 0; - s->stream.avail_in = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE); - if (s->stream.avail_in == 0) { - s->z_eof = 1; - if (s->file && ferror(s->file)) - s->z_err = Z_ERRNO; - return EOF; - } -#endif - s->stream.next_in = s->inbuf; - } - s->stream.avail_in--; - return *(s->stream.next_in)++; -} - -/* =========================================================================== - Check the gzip header of a gz_stream opened for reading. Set the stream - mode to transparent if the gzip magic header is not present; set s->err - to Z_DATA_ERROR if the magic header is present but the rest of the header - is incorrect. - IN assertion: the stream s has already been created sucessfully; - s->stream.avail_in is zero for the first time, but may be non-zero - for concatenated .gz files. -*/ -local void check_header(s) - gz_stream *s; -{ - int method; /* method byte */ - int flags; /* flags byte */ - uInt len; - int c; - - /* Check the gzip magic header */ - for (len = 0; len < 2; len++) { - c = get_byte(s); - if (c != gz_magic[len]) { - if (len != 0) s->stream.avail_in++, s->stream.next_in--; - if (c != EOF) { - s->stream.avail_in++, s->stream.next_in--; - s->transparent = 1; - } - s->z_err = s->stream.avail_in != 0 ? Z_OK : Z_STREAM_END; - return; - } - } - method = get_byte(s); - flags = get_byte(s); - if (method != Z_DEFLATED || (flags & RESERVED) != 0) { - s->z_err = Z_DATA_ERROR; - return; - } - - /* Discard time, xflags and OS code: */ - for (len = 0; len < 6; len++) (void)get_byte(s); - - if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ - len = (uInt)get_byte(s); - len += ((uInt)get_byte(s))<<8; - /* len is garbage if EOF but the loop below will quit anyway */ - while (len-- != 0 && get_byte(s) != EOF) ; - } - if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ - while ((c = get_byte(s)) != 0 && c != EOF) ; - } - if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ - while ((c = get_byte(s)) != 0 && c != EOF) ; - } - if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ - for (len = 0; len < 2; len++) (void)get_byte(s); - } - s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK; -} - - /* =========================================================================== - * Cleanup then free the given gz_stream. Return a zlib error code. - Try freeing in the reverse order of allocations. - */ -local int destroy (s) - gz_stream *s; -{ - int err = Z_OK; - - if (!s) return Z_STREAM_ERROR; - - TRYFREE(s->msg); - - if (s->stream.state != NULL) { - if (s->mode == 'w') { - err = deflateEnd(&(s->stream)); - } else if (s->mode == 'r') { - err = inflateEnd(&(s->stream)); - } - } -#ifdef UNIX - if (s->file != -1 && close(s->file)) { - err = Z_ERRNO; - } -#else - if (s->file != NULL && fclose(s->file)) { - err = Z_ERRNO; - } -#endif - if (s->z_err < 0) err = s->z_err; - - TRYFREE(s->inbuf); - TRYFREE(s->outbuf); - TRYFREE(s->path); - TRYFREE(s); - return err; -} - -/* =========================================================================== - Reads the given number of uncompressed bytes from the compressed file. - gzread returns the number of bytes actually read (0 for end of file). -*/ -int -erts_gzread(ErtsGzFile file, voidp buf, unsigned len) -{ - gz_stream *s = (gz_stream*)file; - Bytef *start = buf; /* starting point for crc computation */ - Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */ - - if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR; - - if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1; - if (s->z_err == Z_STREAM_END) return 0; /* EOF */ - - s->stream.next_out = next_out = buf; - s->stream.avail_out = len; - - while (s->stream.avail_out != 0) { - - if (s->transparent) { - /* Copy first the lookahead bytes: */ - uInt n = s->stream.avail_in; - if (n > s->stream.avail_out) n = s->stream.avail_out; - if (n > 0) { - zmemcpy(s->stream.next_out, s->stream.next_in, n); - next_out += n; - s->stream.next_out = next_out; - s->stream.next_in += n; - s->stream.avail_out -= n; - s->stream.avail_in -= n; - } - if (s->stream.avail_out > 0) { - s->stream.avail_out -= ERTS_GZREAD(s->file, next_out, - s->stream.avail_out); - } - len -= s->stream.avail_out; - s->stream.total_in += (uLong)len; - s->stream.total_out += (uLong)len; - if (len == 0) s->z_eof = 1; - s->position += (int)len; - return (int)len; - } - if (s->stream.avail_in == 0 && !s->z_eof) { -#ifdef UNIX - ssize_t res; - errno = 0; - res = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE); - if (res == 0) { - s->stream.avail_in = 0; - s->z_eof = 1; - return EOF; - } else if (res < 0) { - s->stream.avail_in = 0; - s->z_eof = 1; - s->z_err = Z_ERRNO; - return EOF; - } else { - s->stream.avail_in = (uInt) res; - } -#else - errno = 0; - s->stream.avail_in = ERTS_GZREAD(s->file, s->inbuf, Z_BUFSIZE); - if (s->stream.avail_in == 0) { - s->z_eof = 1; - if (s->file && ferror(s->file)) { - s->z_err = Z_ERRNO; - break; - } - } -#endif - s->stream.next_in = s->inbuf; - } - s->z_err = inflate(&(s->stream), Z_NO_FLUSH); - - if (s->z_err == Z_STREAM_END) { - /* Check CRC and original size */ - s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); - start = s->stream.next_out; - - if (getLong(s) != s->crc) { - s->z_err = Z_DATA_ERROR; - } else { - (void)getLong(s); - /* The uncompressed length returned by above getlong() may - * be different from s->stream.total_out) in case of - * concatenated .gz files. Check for such files: - */ - check_header(s); - if (s->z_err == Z_OK) { - uLong total_in = s->stream.total_in; - uLong total_out = s->stream.total_out; - - inflateReset(&(s->stream)); - s->stream.total_in = total_in; - s->stream.total_out = total_out; - s->crc = crc32(0L, Z_NULL, 0); - } - } - } - if (s->z_err != Z_OK || s->z_eof) break; - } - s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); - - s->position += (int)(len - s->stream.avail_out); - - return (int)(len - s->stream.avail_out); -} - -/* =========================================================================== - Writes the given number of uncompressed bytes into the compressed file. - gzwrite returns the number of bytes actually written (0 in case of error). -*/ -int -erts_gzwrite(ErtsGzFile file, voidp buf, unsigned len) -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; - - s->stream.next_in = buf; - s->stream.avail_in = len; - - while (s->stream.avail_in != 0) { - - if (s->stream.avail_out == 0) { - - s->stream.next_out = s->outbuf; - if (ERTS_GZWRITE(s->file, s->outbuf, Z_BUFSIZE) != Z_BUFSIZE) { - s->z_err = Z_ERRNO; - break; - } - s->stream.avail_out = Z_BUFSIZE; - } - s->z_err = deflate(&(s->stream), Z_NO_FLUSH); - if (s->z_err != Z_OK) break; - } - s->position += (int)(len - s->stream.avail_in); - return (int)(len - s->stream.avail_in); -} - -/* - * For use by Erlang file driver. - * - * XXX Limitations: - * - SEEK_END is not allowed (length of file is not known). - * - When writing, only forward seek is supported. - */ - -int -erts_gzseek(ErtsGzFile file, int offset, int whence) -{ - int pos; - gz_stream* s = (gz_stream *) file; - - switch (whence) { - case EFILE_SEEK_SET: whence = SEEK_SET; break; - case EFILE_SEEK_CUR: whence = SEEK_CUR; break; - case EFILE_SEEK_END: whence = SEEK_END; break; - default: - errno = EINVAL; - return -1; - } - - if (s == NULL) { - errno = EINVAL; - return -1; - } - if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) { - errno = EIO; - return -1; - } - - switch (whence) { - case SEEK_SET: pos = offset; break; - case SEEK_CUR: pos = s->position+offset; break; - case SEEK_END: - default: - errno = EINVAL; return -1; - } - - if (pos == s->position) { - return pos; - } - - if (pos < s->position) { - if (s->mode == 'w') { - errno = EINVAL; - return -1; - } - gz_rewind(s); - } - - while (s->position < pos) { - char buf[512]; - int n; - int save_pos = s->position; - - n = pos - s->position; - if (n > sizeof(buf)) - n = sizeof(buf); - - if (s->mode == 'r') { - erts_gzread(file, buf, n); - } else { - memset(buf, '\0', n); - erts_gzwrite(file, buf, n); - } - if (save_pos == s->position) break; - } - - return s->position; -} - -/* =========================================================================== - Flushes all pending output into the compressed file. The parameter - flush is as in the deflate() function. - gzflush should be called only when strictly necessary because it can - degrade compression. -*/ -int -erts_gzflush(ErtsGzFile file, int flush) -{ - uInt len; - int done = 0; - gz_stream *s = (gz_stream*)file; - - if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; - - s->stream.avail_in = 0; /* should be zero already anyway */ - - for (;;) { - len = Z_BUFSIZE - s->stream.avail_out; - - if (len != 0) { - if ((uInt)ERTS_GZWRITE(s->file, s->outbuf, len) != len) { - s->z_err = Z_ERRNO; - return Z_ERRNO; - } - s->stream.next_out = s->outbuf; - s->stream.avail_out = Z_BUFSIZE; - } - if (done) break; - s->z_err = deflate(&(s->stream), flush); - - /* deflate has finished flushing only when it hasn't used up - * all the available space in the output buffer: - */ - done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END); - - if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break; - } -#ifndef UNIX - fflush(s->file); -#endif - return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; -} - -/* =========================================================================== - Reads a long in LSB order from the given gz_stream. Sets -*/ -local uLong getLong (s) - gz_stream *s; -{ - uLong x = (uLong)get_byte(s); - int c; - - x += ((uLong)get_byte(s))<<8; - x += ((uLong)get_byte(s))<<16; - c = get_byte(s); - if (c == EOF) s->z_err = Z_DATA_ERROR; - x += ((uLong)c)<<24; - return x; -} - -/* =========================================================================== - Flushes all pending output if necessary, closes the compressed file - and deallocates all the (de)compression state. -*/ -int -erts_gzclose(ErtsGzFile file) -{ - int err; - gz_stream *s = (gz_stream*)file; - - if (s == NULL) return Z_STREAM_ERROR; - - if (s->mode == 'w') { - err = erts_gzflush (file, Z_FINISH); - if (err != Z_OK) return s->destroy(s); - } - return s->destroy(s); -} - - /* =========================================================================== Uncompresses the buffer given and returns a pointer to a binary. If the buffer was not compressed with gzip, the buffer contents diff --git a/erts/emulator/drivers/common/gzio.h b/erts/emulator/drivers/common/gzio.h index ee0ebe7bd8..e331b5208b 100644 --- a/erts/emulator/drivers/common/gzio.h +++ b/erts/emulator/drivers/common/gzio.h @@ -20,13 +20,5 @@ #include "zlib.h" -typedef struct erts_gzFile* ErtsGzFile; - -ErtsGzFile erts_gzopen (const char *path, const char *mode); -int erts_gzread(ErtsGzFile file, voidp buf, unsigned len); -int erts_gzwrite(ErtsGzFile file, voidp buf, unsigned len); -int erts_gzseek(ErtsGzFile, int, int); -int erts_gzflush(ErtsGzFile file, int flush); -int erts_gzclose(ErtsGzFile file); ErlDrvBinary* erts_gzinflate_buffer(char*, uLong); ErlDrvBinary* erts_gzdeflate_buffer(char*, uLong); diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 95d61fcc5d..4294fb4f46 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -63,6 +63,20 @@ #include <sys/un.h> #endif +#ifdef HAVE_SENDFILE +#if defined(__linux__) || (defined(__sun) && defined(__SVR4)) + #include <sys/sendfile.h> +#elif defined(__FreeBSD__) || defined(__DragonFly__) + /* Need to define __BSD_VISIBLE in order to expose prototype of sendfile */ + #define __BSD_VISIBLE 1 + #include <sys/socket.h> +#endif +#endif + +#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) + #define __DARWIN__ 1 +#endif + /* All platforms fail on malloc errors. */ #define FATAL_MALLOC @@ -701,6 +715,7 @@ static size_t my_strnlen(const char *s, size_t maxlen) #define TCP_REQ_RECV 42 #define TCP_REQ_UNRECV 43 #define TCP_REQ_SHUTDOWN 44 +#define TCP_REQ_SENDFILE 45 /* UDP and SCTP requests */ #define PACKET_REQ_RECV 60 /* Common for UDP and SCTP */ /* #define SCTP_REQ_LISTEN 61 MERGED Different from TCP; not for UDP */ @@ -723,6 +738,7 @@ static size_t my_strnlen(const char *s, size_t maxlen) #define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occurred on send or shutdown */ #define TCP_ADDF_SHUTDOWN_WR_DONE 256 /* A shutdown(sock, SHUT_WR) or SHUT_RDWR was made */ #define TCP_ADDF_LINGER_ZERO 512 /* Discard driver queue on port close */ +#define TCP_ADDF_SENDFILE 1024 /* Send from an fd instead of the driver queue */ /* *_REQ_* replies */ #define INET_REP_ERROR 0 @@ -1235,6 +1251,21 @@ typedef struct { inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */ inet_async_multi_op *multi_last; MultiTimerData *mtd; /* Timer structures for multiple accept */ +#ifdef HAVE_SENDFILE + struct { + ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time + * sendfile was issued, which must be sent + * before issuing the sendfile call itself. */ + int dup_file_fd; /* The file handle to send from; this is + * duplicated when sendfile is issued to + * reduce (but not eliminate) the impact of a + * nasty race, so we have to remember to close + * it. */ + Uint64 bytes_sent; + Uint64 offset; + Uint64 length; + } sendfile; +#endif } tcp_descriptor; /* send function */ @@ -1245,6 +1276,8 @@ static int tcp_deliver(tcp_descriptor* desc, int len); static int tcp_shutdown_error(tcp_descriptor* desc, int err); +static int tcp_inet_sendfile(tcp_descriptor* desc); + static int tcp_inet_output(tcp_descriptor* desc, HANDLE event); static int tcp_inet_input(tcp_descriptor* desc, HANDLE event); @@ -1329,6 +1362,9 @@ static ErlDrvTermData am_ipv6_v6only; static ErlDrvTermData am_netns; static ErlDrvTermData am_bind_to_device; #endif +#ifdef HAVE_SENDFILE +static ErlDrvTermData am_sendfile; +#endif static char str_eafnosupport[] = "eafnosupport"; static char str_einval[] = "einval"; @@ -3875,6 +3911,10 @@ static int inet_init() INIT_ATOM(https); INIT_ATOM(scheme); +#ifdef HAVE_SENDFILE + INIT_ATOM(sendfile); +#endif + /* add TCP, UDP and SCTP drivers */ add_driver_entry(&tcp_inet_driver_entry); #ifdef HAVE_UDP @@ -9270,6 +9310,13 @@ static void tcp_inet_stop(ErlDrvData e) * will be freed through tcp_inet_stop later on. */ static void tcp_desc_close(tcp_descriptor* desc) { +#ifdef HAVE_SENDFILE + if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) { + desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE; + close(desc->sendfile.dup_file_fd); + } +#endif + tcp_clear_input(desc); tcp_clear_output(desc); @@ -9608,6 +9655,60 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } } + + case TCP_REQ_SENDFILE: { +#ifdef HAVE_SENDFILE + const ErlDrvSizeT required_len = + sizeof(desc->sendfile.dup_file_fd) + + sizeof(Uint64) * 2; + + int raw_file_fd; + + DEBUGF(("tcp_inet_ctl(%ld): SENDFILE\r\n", (long)desc->inet.port)); + + if (len != required_len) { + return ctl_error(EINVAL, rbuf, rsize); + } else if (!IS_CONNECTED(INETP(desc))) { + return ctl_error(ENOTCONN, rbuf, rsize); + } + + sys_memcpy(&raw_file_fd, buf, sizeof(raw_file_fd)); + buf += sizeof(raw_file_fd); + + desc->sendfile.dup_file_fd = dup(raw_file_fd); + + if(desc->sendfile.dup_file_fd == -1) { + return ctl_error(errno, rbuf, rsize); + } + + desc->sendfile.offset = get_int64(buf); + buf += sizeof(Uint64); + + desc->sendfile.length = get_int64(buf); + buf += sizeof(Uint64); + + ASSERT(desc->sendfile.offset >= 0); + ASSERT(desc->sendfile.length >= 0); + + desc->sendfile.ioq_skip = driver_sizeq(desc->inet.port); + desc->sendfile.bytes_sent = 0; + + desc->inet.caller = driver_caller(desc->inet.port); + desc->tcp_add_flags |= TCP_ADDF_SENDFILE; + + /* See if we can finish sending without selecting & rescheduling. */ + tcp_inet_sendfile(desc); + + if(desc->sendfile.length > 0) { + sock_select(INETP(desc), FD_WRITE, 1); + } + + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); +#else + return ctl_error(ENOTSUP, rbuf, rsize); +#endif + } + default: DEBUGF(("tcp_inet_ctl(%ld): %u\r\n", (long)desc->inet.port, cmd)); return inet_ctl(INETP(desc), cmd, buf, len, rbuf, rsize); @@ -9747,12 +9848,27 @@ static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev) static void tcp_inet_flush(ErlDrvData e) { tcp_descriptor* desc = (tcp_descriptor*)e; - if (!(desc->inet.event_mask & FD_WRITE)) { - /* Discard send queue to avoid hanging port (OTP-7615) */ - tcp_clear_output(desc); + int discard_output; + + /* Discard send queue to avoid hanging port (OTP-7615) */ + discard_output = !(desc->inet.event_mask & FD_WRITE); + + discard_output |= desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO; + +#ifdef HAVE_SENDFILE + /* The old file driver aborted when it was stopped during sendfile, so + * we'll clear the flag and discard all output. */ + if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) { + desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE; + close(desc->sendfile.dup_file_fd); + + discard_output = 1; + } +#endif + + if (discard_output) { + tcp_clear_output(desc); } - if (desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO) - tcp_clear_output(desc); } static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp) @@ -10647,7 +10763,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) ev->size += h_len; } - if ((sz = driver_sizeq(ix)) > 0) { + sz = driver_sizeq(ix); + + if ((desc->tcp_add_flags & TCP_ADDF_SENDFILE) || sz > 0) { driver_enqv(ix, ev, 0); if (sz+ev->size >= desc->high) { DEBUGF(("tcp_sendv(%ld): s=%d, sender forced busy\r\n", @@ -10741,8 +10859,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len) inet_output_count(INETP(desc), len+h_len); + sz = driver_sizeq(ix); - if ((sz = driver_sizeq(ix)) > 0) { + if ((desc->tcp_add_flags & TCP_ADDF_SENDFILE) || sz > 0) { if (h_len > 0) driver_enq(ix, buf, h_len); driver_enq(ix, ptr, len); @@ -10832,6 +10951,246 @@ static void tcp_inet_drv_input(ErlDrvData data, ErlDrvEvent event) (void)tcp_inet_input((tcp_descriptor*)data, (HANDLE)event); } +#ifdef HAVE_SENDFILE +static int tcp_sendfile_completed(tcp_descriptor* desc) { + ErlDrvTermData spec[LOAD_PORT_CNT + LOAD_TUPLE_CNT * 2 + + LOAD_ATOM_CNT * 2 + LOAD_UINT_CNT * 2]; + Uint32 sent_low, sent_high; + int i; + + desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE; + close(desc->sendfile.dup_file_fd); + + /* While we flushed the output queue prior to sending the file, we've + * deferred clearing busy status until now as there's no point in doing so + * while we still have a file to send. + * + * The watermark is checked since more data may have been added while we + * were sending the file. */ + + if (driver_sizeq(desc->inet.port) <= desc->low) { + if (IS_BUSY(INETP(desc))) { + desc->inet.caller = desc->inet.busy_caller; + desc->inet.state &= ~INET_F_BUSY; + + set_busy_port(desc->inet.port, 0); + + /* if we have a timer then cancel and send ok to client */ + if (desc->busy_on_send) { + driver_cancel_timer(desc->inet.port); + desc->busy_on_send = 0; + } + + inet_reply_ok(INETP(desc)); + } + } + + if (driver_sizeq(desc->inet.port) == 0) { + sock_select(INETP(desc), FD_WRITE, 0); + send_empty_out_q_msgs(INETP(desc)); + + if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) { + tcp_shutdown_async(desc); + } + } + + sent_low = ((Uint64)desc->sendfile.bytes_sent >> 0) & 0xFFFFFFFF; + sent_high = ((Uint64)desc->sendfile.bytes_sent >> 32) & 0xFFFFFFFF; + + i = LOAD_ATOM(spec, 0, am_sendfile); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_ATOM(spec, i, am_ok); + i = LOAD_UINT(spec, i, sent_low); + i = LOAD_UINT(spec, i, sent_high); + i = LOAD_TUPLE(spec, i, 3); + i = LOAD_TUPLE(spec, i, 3); + + ASSERT(i == sizeof(spec)/sizeof(*spec)); + + return erl_drv_output_term(desc->inet.dport, spec, i); +} + +static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error) { + ErlDrvTermData spec[LOAD_PORT_CNT + LOAD_TUPLE_CNT * 2 + LOAD_ATOM_CNT * 3]; + int i; + + /* We don't clean up sendfile state here, as that's done in tcp_desc_close + * following normal error handling. All we do here is report the failure. */ + + i = LOAD_ATOM(spec, 0, am_sendfile); + i = LOAD_PORT(spec, i, desc->inet.dport); + i = LOAD_ATOM(spec, i, am_error); + + switch (socket_error) { + case ECONNRESET: + case ENOTCONN: + case EPIPE: + i = LOAD_ATOM(spec, i, am_closed); + break; + default: + i = LOAD_ATOM(spec, i, error_atom(socket_error)); + } + + i = LOAD_TUPLE(spec, i, 2); + i = LOAD_TUPLE(spec, i, 3); + + ASSERT(i == sizeof(spec)/sizeof(*spec)); + + return erl_drv_output_term(desc->inet.dport, spec, i); +} + +static int tcp_inet_sendfile(tcp_descriptor* desc) { + ErlDrvPort ix = desc->inet.port; + int result = 0; + ssize_t n; + + DEBUGF(("tcp_inet_sendfile(%ld) {s=%d\r\n", (long)ix, desc->inet.s)); + + /* If there was any data in the queue by the time sendfile was issued, + * we'll need to skip it first. Note that we don't clear busy status until + * we're finished sending the file. */ + while (desc->sendfile.ioq_skip > 0) { + ssize_t bytes_to_send; + SysIOVec* iov; + int vsize; + + ASSERT(driver_sizeq(ix) >= desc->sendfile.ioq_skip); + + if ((iov = driver_peekq(ix, &vsize)) == NULL) { + ERTS_INTERNAL_ERROR("ioq empty when sendfile.ioq_skip > 0"); + } + + bytes_to_send = MIN(desc->sendfile.ioq_skip, iov[0].iov_len); + n = sock_send(desc->inet.s, iov[0].iov_base, bytes_to_send, 0); + + if (!IS_SOCKET_ERROR(n)) { + desc->sendfile.ioq_skip -= n; + driver_deq(ix, n); + } else if (sock_errno() == ERRNO_BLOCK) { +#ifdef __WIN32__ + desc->inet.send_would_block = 1; +#endif + goto done; + } else if (sock_errno() != EINTR) { + goto socket_error; + } + } + + while (desc->sendfile.length > 0) { + /* For some reason the maximum ssize_t cannot be used as the max size. + * 1GB seems to work on all platforms */ + const Sint64 SENDFILE_CHUNK_SIZE = ((1UL << 30) - 1); + + ssize_t bytes_to_send = MIN(SENDFILE_CHUNK_SIZE, desc->sendfile.length); + off_t offset = desc->sendfile.offset; + +#if defined(__linux__) + n = sendfile(desc->inet.s, desc->sendfile.dup_file_fd, &offset, + bytes_to_send); +#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__DARWIN__) + { + off_t bytes_sent; + int error; + + #if defined(__DARWIN__) + bytes_sent = bytes_to_send; + + error = sendfile(desc->sendfile.dup_file_fd, desc->inet.s, offset, + &bytes_sent, NULL, 0); + n = bytes_sent; + #else + error = sendfile(desc->sendfile.dup_file_fd, desc->inet.s, offset, + bytes_to_send, NULL, &bytes_sent, 0); + n = bytes_sent; + #endif + + if(error < 0) { + /* EAGAIN/EINTR report partial success by setting bytes_sent, + * so we have to skip error handling if nonzero, and skip EOF + * handling if zero, as it's possible that we didn't manage to + * send anything at all before being interrupted by a + * signal. */ + if((errno != EAGAIN && errno != EINTR) || bytes_sent == 0) { + n = -1; + } + } + } +#elif defined(__sun) && defined(__SVR4) && defined(HAVE_SENDFILEV) + { + sendfilevec_t sfvec[1]; + size_t bytes_sent; + ssize_t error; + + sfvec[0].sfv_fd = desc->sendfile.dup_file_fd; + sfvec[0].sfv_len = bytes_to_send; + sfvec[0].sfv_off = offset; + sfvec[0].sfv_flag = 0; + + error = sendfilev(desc->inet.s, sfvec, 1, &bytes_sent); + n = bytes_sent; + + if(error < 0) { + if(errno == EINVAL) { + /* On some solaris versions (I've seen it on SunOS 5.10), + * using a sfv_len larger than the filesize will result in + * a (-1 && errno == EINVAL). We translate this to a + * successful send of the data.*/ + } else { + /* EAGAIN/EINTR behavior is identical to *BSD. */ + if((errno != EAGAIN && errno != EINTR) || bytes_sent == 0) { + n = -1; + } + } + } + } +#else + #error "Unsupported sendfile syscall; update configure test." +#endif + + if (n > 0) { + desc->sendfile.bytes_sent += n; + desc->sendfile.offset += n; + desc->sendfile.length -= n; + } else if (n == 0) { + /* EOF. */ + desc->sendfile.length = 0; + break; + } else if (IS_SOCKET_ERROR(n) && sock_errno() != EINTR) { + if (sock_errno() != ERRNO_BLOCK) { + goto socket_error; + } + +#ifdef __WIN32__ + desc->inet.send_would_block = 1; +#endif + break; + } + } + + if (desc->sendfile.length == 0) { + tcp_sendfile_completed(desc); + } + + goto done; + +socket_error: { + int socket_errno = sock_errno(); + + DEBUGF(("tcp_inet_sendfile(%ld): send errno = %d (errno %d)\r\n", + (long)desc->inet.port, socket_errno, errno)); + + result = tcp_send_error(desc, socket_errno); + tcp_sendfile_aborted(desc, socket_errno); + + goto done; + } + +done: + DEBUGF(("tcp_inet_sendfile(%ld) }\r\n", (long)desc->inet.port)); + return result; +} +#endif /* HAVE_SENDFILE */ + /* socket ready for ouput: ** 1. INET_STATE_CONNECTING => non block connect ? ** 2. INET_STATE_CONNECTED => write output @@ -10892,7 +11251,14 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) async_ok(INETP(desc)); } else if (IS_CONNECTED(INETP(desc))) { - for (;;) { + +#ifdef HAVE_SENDFILE + if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) { + return tcp_inet_sendfile(desc); + } +#endif + + for (;;) { int vsize; ssize_t n; SysIOVec* iov; diff --git a/erts/emulator/drivers/unix/unix_efile.c b/erts/emulator/drivers/unix/unix_efile.c deleted file mode 100644 index 33e4d75ef7..0000000000 --- a/erts/emulator/drivers/unix/unix_efile.c +++ /dev/null @@ -1,1102 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 1997-2017. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ -/* - * Purpose: Provides file and directory operations for Unix. - */ -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif -#if defined(HAVE_POSIX_FALLOCATE) && !defined(__sun) && !defined(__sun__) -#define _XOPEN_SOURCE 600 -#endif -#if !defined(_GNU_SOURCE) && defined(HAVE_LINUX_FALLOC_H) -#define _GNU_SOURCE -#endif -#include "sys.h" -#include "erl_driver.h" -#include "erl_efile.h" -#include <utime.h> -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -#ifdef HAVE_SYS_UIO_H -#include <sys/types.h> -#include <sys/uio.h> -#if defined(HAVE_SENDFILE) && (defined(__FreeBSD__) || defined(__DragonFly__)) -/* Need to define __BSD_VISIBLE in order to expose prototype of sendfile */ -#define __BSD_VISIBLE 1 -#include <sys/socket.h> -#endif -#endif -#if defined(HAVE_SENDFILE) && (defined(__linux__) || (defined(__sun) && defined(__SVR4))) -#include <sys/sendfile.h> -#endif - -#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) -#define __DARWIN__ 1 -#endif - -#if defined(__DARWIN__) || defined(HAVE_LINUX_FALLOC_H) || defined(HAVE_POSIX_FALLOCATE) -#include <fcntl.h> -#endif - -#ifdef HAVE_LINUX_FALLOC_H -#include <linux/falloc.h> -#endif - -#ifdef SUNOS4 -# define getcwd(buf, size) getwd(buf) -#endif - -/* Find a definition of MAXIOV, that is used in the code later. */ -#if defined IOV_MAX -#define MAXIOV IOV_MAX -#elif defined UIO_MAXIOV -#define MAXIOV UIO_MAXIOV -#else -#define MAXIOV 16 -#endif - - -/* - * Macros for testing file types. - */ - -#define ISDIR(st) (S_ISDIR((st).st_mode)) -#define ISREG(st) (S_ISREG((st).st_mode)) -#define ISDEV(st) (S_ISCHR((st).st_mode) || S_ISBLK((st).st_mode)) -#define ISLNK(st) (S_ISLNK((st).st_mode)) -#ifdef NO_UMASK -#define FILE_MODE 0644 -#define DIR_MODE 0755 -#else -#define FILE_MODE 0666 -#define DIR_MODE 0777 -#endif - -#define IS_DOT_OR_DOTDOT(s) \ - (s[0] == '.' && (s[1] == '\0' || (s[1] == '.' && s[2] == '\0'))) - -static int check_error(int result, Efile_error* errInfo); - -static int -check_error(int result, Efile_error *errInfo) -{ - if (result < 0) { - errInfo->posix_errno = errInfo->os_errno = errno; - return 0; - } - return 1; -} - -int -efile_init() { - return 1; -} - -int -efile_mkdir(Efile_error* errInfo, /* Where to return error codes. */ - char* name) /* Name of directory to create. */ -{ -#ifdef NO_MKDIR_MODE - return check_error(mkdir(name), errInfo); -#else - return check_error(mkdir(name, DIR_MODE), errInfo); -#endif -} - -int -efile_rmdir(Efile_error* errInfo, /* Where to return error codes. */ - char* name) /* Name of directory to delete. */ -{ - if (rmdir(name) == 0) { - return 1; - } - if (errno == ENOTEMPTY) { - errno = EEXIST; - } - if (errno == EEXIST) { - int saved_errno = errno; - struct stat file_stat; - struct stat cwd_stat; - - /* - * The error code might be wrong if this is the current directory. - */ - - if (stat(name, &file_stat) == 0 && stat(".", &cwd_stat) == 0 && - file_stat.st_ino == cwd_stat.st_ino && - file_stat.st_dev == cwd_stat.st_dev) { - saved_errno = EINVAL; - } - errno = saved_errno; - } - return check_error(-1, errInfo); -} - -int -efile_delete_file(Efile_error* errInfo, /* Where to return error codes. */ - char* name) /* Name of file to delete. */ -{ - if (unlink(name) == 0) { - return 1; - } - if (errno == EISDIR) { /* Linux sets the wrong error code. */ - errno = EPERM; - } - return check_error(-1, errInfo); -} - -/* - *--------------------------------------------------------------------------- - * - * Changes the name of an existing file or directory, from src to dst. - * If src and dst refer to the same file or directory, does nothing - * and returns success. Otherwise if dst already exists, it will be - * deleted and replaced by src subject to the following conditions: - * If src is a directory, dst may be an empty directory. - * If src is a file, dst may be a file. - * In any other situation where dst already exists, the rename will - * fail. - * - * Results: - * If the directory was successfully created, returns 1. - * Otherwise the return value is 0 and errno is set to - * indicate the error. Some possible values for errno are: - * - * EACCES: src or dst parent directory can't be read and/or written. - * EEXIST: dst is a non-empty directory. - * EINVAL: src is a root directory or dst is a subdirectory of src. - * EISDIR: dst is a directory, but src is not. - * ENOENT: src doesn't exist, or src or dst is "". - * ENOTDIR: src is a directory, but dst is not. - * EXDEV: src and dst are on different filesystems. - * - * Side effects: - * The implementation of rename may allow cross-filesystem renames, - * but the caller should be prepared to emulate it with copy and - * delete if errno is EXDEV. - * - *--------------------------------------------------------------------------- - */ - -int -efile_rename(Efile_error* errInfo, /* Where to return error codes. */ - char* src, /* Original name. */ - char* dst) /* New name. */ -{ - if (rename(src, dst) == 0) { - return 1; - } - if (errno == ENOTEMPTY) { - errno = EEXIST; - } -#if defined (sparc) - /* - * SunOS 4.1.4 reports overwriting a non-empty directory with a - * directory as EINVAL instead of EEXIST (first rule out the correct - * EINVAL result code for moving a directory into itself). Must be - * conditionally compiled because realpath() is only defined on SunOS. - */ - - if (errno == EINVAL) { - char srcPath[MAXPATHLEN], dstPath[MAXPATHLEN]; - DIR *dirPtr; - struct dirent *dirEntPtr; - -#ifdef PURIFY - memset(srcPath, '\0', sizeof(srcPath)); - memset(dstPath, '\0', sizeof(dstPath)); -#endif - - if ((realpath(src, srcPath) != NULL) - && (realpath(dst, dstPath) != NULL) - && (strncmp(srcPath, dstPath, strlen(srcPath)) != 0)) { - dirPtr = opendir(dst); - if (dirPtr != NULL) { - while ((dirEntPtr = readdir(dirPtr)) != NULL) { - if ((strcmp(dirEntPtr->d_name, ".") != 0) && - (strcmp(dirEntPtr->d_name, "..") != 0)) { - errno = EEXIST; - closedir(dirPtr); - return check_error(-1, errInfo); - } - } - closedir(dirPtr); - } - } - errno = EINVAL; - } -#endif /* sparc */ - - if (strcmp(src, "/") == 0) { - /* - * Alpha reports renaming / as EBUSY and Linux reports it as EACCES, - * instead of EINVAL. - */ - - errno = EINVAL; - } - - /* - * DEC Alpha OSF1 V3.0 returns EACCES when attempting to move a - * file across filesystems and the parent directory of that file is - * not writable. Most other systems return EXDEV. Does nothing to - * correct this behavior. - */ - - return check_error(-1, errInfo); -} - -int -efile_chdir(Efile_error* errInfo, /* Where to return error codes. */ - char* name) /* Name of directory to make current. */ -{ - return check_error(chdir(name), errInfo); -} - - -int -efile_getdcwd(Efile_error* errInfo, /* Where to return error codes. */ - int drive, /* 0 - current, 1 - A, 2 - B etc. */ - char* buffer, /* Where to return the current - directory. */ - size_t size) /* Size of buffer. */ -{ - if (drive == 0) { - if (getcwd(buffer, size) == NULL) - return check_error(-1, errInfo); - -#ifdef SIMSPARCSOLARIS - /* We get "host:" prepended to the dirname - remove!. */ - { - int i = 0; - int j = 0; - while ((buffer[i] != ':') && (buffer[i] != '\0')) i++; - if (buffer[i] == ':') { - i++; - while ((buffer[j++] = buffer[i++]) != '\0'); - } - } -#endif - return 1; - } - - /* - * Drives other than 0 is not supported on Unix. - */ - - errno = ENOTSUP; - return check_error(-1, errInfo); -} - -int -efile_readdir(Efile_error* errInfo, /* Where to return error codes. */ - char* name, /* Name of directory to open. */ - EFILE_DIR_HANDLE* p_dir_handle, /* Pointer to directory - handle of - open directory.*/ - char* buffer, /* Pointer to buffer for - one filename. */ - size_t *size) /* in-out Size of buffer, length - of name. */ -{ - DIR *dp; /* Pointer to directory structure. */ - struct dirent* dirp; /* Pointer to directory entry. */ - - /* - * If this is the first call, we must open the directory. - */ - - if (*p_dir_handle == NULL) { - dp = opendir(name); - if (dp == NULL) - return check_error(-1, errInfo); - *p_dir_handle = (EFILE_DIR_HANDLE) dp; - } - - /* - * Retrieve the name of the next file using the directory handle. - */ - - dp = *((DIR **)((void *)p_dir_handle)); - for (;;) { - dirp = readdir(dp); - if (dirp == NULL) { - closedir(dp); - return 0; - } - if (IS_DOT_OR_DOTDOT(dirp->d_name)) - continue; - buffer[0] = '\0'; - strncat(buffer, dirp->d_name, (*size)-1); - *size = strlen(dirp->d_name); - return 1; - } -} - -int -efile_openfile(Efile_error* errInfo, /* Where to return error codes. */ - char* name, /* Name of directory to open. */ - int flags, /* Flags to user for opening. */ - int* pfd, /* Where to store the file - descriptor. */ - Sint64 *pSize) /* Where to store the size of the - file. */ -{ - struct stat statbuf; - int fd; - int mode; /* Open mode. */ - - switch (flags & (EFILE_MODE_READ|EFILE_MODE_WRITE)) { - case EFILE_MODE_READ: - mode = O_RDONLY; - break; - case EFILE_MODE_WRITE: - if (flags & EFILE_NO_TRUNCATE) - mode = O_WRONLY | O_CREAT; - else - mode = O_WRONLY | O_CREAT | O_TRUNC; - break; - case EFILE_MODE_READ_WRITE: - mode = O_RDWR | O_CREAT; - break; - default: - errno = EINVAL; - return check_error(-1, errInfo); - } - - if (flags & EFILE_MODE_APPEND) { - mode &= ~O_TRUNC; - mode |= O_APPEND; - } - if (flags & EFILE_MODE_EXCL) { - mode |= O_EXCL; - } - if (flags & EFILE_MODE_SYNC) { -#ifdef O_SYNC - mode |= O_SYNC; -#else - errno = ENOTSUP; - return check_error(-1, errInfo); -#endif - } - -#ifdef HAVE_FSTAT - while (((fd = open(name, mode, FILE_MODE)) < 0) && (errno == EINTR)); - if (!check_error(fd, errInfo)) return 0; -#endif - - if ( -#ifdef HAVE_FSTAT - fstat(fd, &statbuf) < 0 -#else - stat(name, &statbuf) < 0 -#endif - ) { - /* statbuf is undefined: if the caller depends on it, - i.e. invoke_read_file(), fail the call immediately */ - if (pSize && flags == EFILE_MODE_READ) { - check_error(-1, errInfo); -#ifdef HAVE_FSTAT - efile_closefile(fd); -#endif - return 0; - } - } - else if (! ISREG(statbuf)) { - struct stat nullstatbuf; - /* - * For UNIX only, here is some ugly code to allow - * /dev/null to be opened as a file. - */ - if ( (stat("/dev/null", &nullstatbuf) < 0) - || (statbuf.st_ino != nullstatbuf.st_ino) - || (statbuf.st_dev != nullstatbuf.st_dev) ) { -#ifdef HAVE_FSTAT - efile_closefile(fd); -#endif - errno = EISDIR; - return check_error(-1, errInfo); - } - } - -#ifndef HAVE_FSTAT - while (((fd = open(name, mode, FILE_MODE)) < 0) && (errno == EINTR)); - if (!check_error(fd, errInfo)) return 0; -#endif - - *pfd = fd; - if (pSize) *pSize = statbuf.st_size; - return 1; -} - -int -efile_may_openfile(Efile_error* errInfo, char *name) { - struct stat statbuf; /* Information about the file */ - int result; - - result = stat(name, &statbuf); - if (!check_error(result, errInfo)) - return 0; - if (!ISREG(statbuf)) { - errno = EISDIR; - return check_error(-1, errInfo); - } - return 1; -} - -void -efile_closefile(int fd) -{ - close(fd); -} - -int -efile_fdatasync(Efile_error *errInfo, /* Where to return error codes. */ - int fd) /* File descriptor for file to sync data. */ -{ -#if defined(HAVE_FDATASYNC) && !defined(__DARWIN__) - return check_error(fdatasync(fd), errInfo); -#else - return efile_fsync(errInfo, fd); -#endif -} - -int -efile_fsync(Efile_error *errInfo, /* Where to return error codes. */ - int fd) /* File descriptor for file to sync. */ -{ -#ifdef NO_FSYNC - undefined fsync /* XXX: Really? */ -#else -#if defined(__DARWIN__) && defined(F_FULLFSYNC) - return check_error(fcntl(fd, F_FULLFSYNC), errInfo); -#else - return check_error(fsync(fd), errInfo); -#endif /* __DARWIN__ */ -#endif /* NO_FSYNC */ -} - -int -efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, - char* name, int info_for_link) -{ - struct stat statbuf; /* Information about the file */ - int result; - - if (info_for_link) { - result = lstat(name, &statbuf); - } else { - result = stat(name, &statbuf); - } - if (!check_error(result, errInfo)) { - return 0; - } - -#if SIZEOF_OFF_T == 4 - pInfo->size_high = 0; -#else - pInfo->size_high = (Uint32)(statbuf.st_size >> 32); -#endif - pInfo->size_low = (Uint32)statbuf.st_size; - -#ifdef NO_ACCESS - /* Just look at read/write access for owner. */ - - pInfo->access = ((statbuf.st_mode >> 6) & 07) >> 1; - -#else - pInfo->access = FA_NONE; - if (access(name, R_OK) == 0) - pInfo->access |= FA_READ; - if (access(name, W_OK) == 0) - pInfo->access |= FA_WRITE; - -#endif - - if (ISDEV(statbuf)) - pInfo->type = FT_DEVICE; - else if (ISDIR(statbuf)) - pInfo->type = FT_DIRECTORY; - else if (ISREG(statbuf)) - pInfo->type = FT_REGULAR; - else if (ISLNK(statbuf)) - pInfo->type = FT_SYMLINK; - else - pInfo->type = FT_OTHER; - - pInfo->accessTime = (Sint64)statbuf.st_atime; - pInfo->modifyTime = (Sint64)statbuf.st_mtime; - pInfo->cTime = (Sint64)statbuf.st_ctime; - - pInfo->mode = statbuf.st_mode; - pInfo->links = statbuf.st_nlink; - pInfo->major_device = statbuf.st_dev; - pInfo->minor_device = statbuf.st_rdev; - pInfo->inode = statbuf.st_ino; - pInfo->uid = statbuf.st_uid; - pInfo->gid = statbuf.st_gid; - - return 1; -} - -int -efile_write_info(Efile_error *errInfo, Efile_info *pInfo, char *name) -{ - struct utimbuf tval; - - /* - * On some systems chown will always fail for a non-root user unless - * POSIX_CHOWN_RESTRICTED is not set. Others will succeed as long as - * you don't try to chown a file to someone besides youself. - */ - - if (chown(name, pInfo->uid, pInfo->gid) && errno != EPERM) { - return check_error(-1, errInfo); - } - - if (pInfo->mode != -1) { - mode_t newMode = pInfo->mode & (S_ISUID | S_ISGID | - S_IRWXU | S_IRWXG | S_IRWXO); - if (chmod(name, newMode)) { - newMode &= ~(S_ISUID | S_ISGID); - if (chmod(name, newMode)) { - return check_error(-1, errInfo); - } - } - } - - tval.actime = (time_t)pInfo->accessTime; - tval.modtime = (time_t)pInfo->modifyTime; - - return check_error(utime(name, &tval), errInfo); -} - - -int -efile_write(Efile_error* errInfo, /* Where to return error codes. */ - int flags, /* Flags given when file was - opened. */ - int fd, /* File descriptor to write to. */ - char* buf, /* Buffer to write. */ - size_t count) /* Number of bytes to write. */ -{ - ssize_t written; /* Bytes written in last operation. */ - - while (count > 0) { - if ((written = write(fd, buf, count)) < 0) { - if (errno != EINTR) - return check_error(-1, errInfo); - else - written = 0; - } - ASSERT(written <= count); - buf += written; - count -= written; - } - return 1; -} - -int -efile_writev(Efile_error* errInfo, /* Where to return error codes */ - int flags, /* Flags given when file was - * opened */ - int fd, /* File descriptor to write to */ - SysIOVec* iov, /* Vector of buffer structs. - * The structs may be changed i.e. - * due to incomplete writes */ - int iovcnt) /* Number of structs in vector */ -{ - int cnt = 0; /* Buffers so far written */ - - ASSERT(iovcnt >= 0); - - while (cnt < iovcnt) { - if ((! iov[cnt].iov_base) || (iov[cnt].iov_len <= 0)) { - /* Empty buffer - skip */ - cnt++; - } else { /* Non-empty buffer */ - ssize_t w; /* Bytes written in this call */ -#ifdef HAVE_WRITEV - int b = iovcnt - cnt; /* Buffers to write */ - /* Use as many buffers as MAXIOV allows */ - if (b > MAXIOV) - b = MAXIOV; - if (b > 1) { - do { - w = writev(fd, &iov[cnt], b); - } while (w < 0 && errno == EINTR); - if (w < 0 && errno == EINVAL) { - goto single_write; - } - } else - single_write: - /* Degenerated io vector - use regular write */ -#endif - { - do { - size_t iov_len = iov[cnt].iov_len; - size_t limit = 1024*1024*1024; /* 1GB */ - if (iov_len > limit) { - iov_len = limit; - } - w = write(fd, iov[cnt].iov_base, iov_len); - } while (w < 0 && errno == EINTR); - ASSERT(w <= iov[cnt].iov_len || - (w == -1 && errno != EINTR)); - } - if (w < 0) return check_error(-1, errInfo); - /* Move forward to next buffer to write */ - for (; cnt < iovcnt && w > 0; cnt++) { - if (iov[cnt].iov_base && iov[cnt].iov_len > 0) { - if (w < iov[cnt].iov_len) { - /* Adjust the buffer for next write */ - iov[cnt].iov_len -= w; - iov[cnt].iov_base = ((char *)iov[cnt].iov_base) + w; - w = 0; - break; - } else { - w -= iov[cnt].iov_len; - } - } - } - ASSERT(w == 0); - } /* else Non-empty buffer */ - } /* while (cnt< iovcnt) */ - return 1; -} - -int -efile_read(Efile_error* errInfo, /* Where to return error codes. */ - int flags, /* Flags given when file was opened. */ - int fd, /* File descriptor to read from. */ - char* buf, /* Buffer to read into. */ - size_t count, /* Number of bytes to read. */ - size_t *pBytesRead) /* Where to return number of - bytes read. */ -{ - ssize_t n; - - for (;;) { - if ((n = read(fd, buf, count)) >= 0) - break; - else if (errno != EINTR) - return check_error(-1, errInfo); - } - *pBytesRead = (size_t) n; - return 1; -} - - -/* pread() and pwrite() */ -/* Some unix systems, notably Solaris has these syscalls */ -/* It is especially nice for i.e. the dets module to have support */ -/* for this, even if the underlying OS dosn't support it, it is */ -/* reasonably easy to work around by first calling seek, and then */ -/* calling read(). */ -/* This later strategy however changes the file pointer, which pread() */ -/* does not do. We choose to ignore this and say that the location */ -/* of the file pointer is undefined after a call to any of the p functions*/ - - -int -efile_pread(Efile_error* errInfo, /* Where to return error codes. */ - int fd, /* File descriptor to read from. */ - Sint64 offset, /* Offset in bytes from BOF. */ - char* buf, /* Buffer to read into. */ - size_t count, /* Number of bytes to read. */ - size_t *pBytesRead) /* Where to return - number of bytes read. */ -{ -#if defined(HAVE_PREAD) && defined(HAVE_PWRITE) - ssize_t n; - off_t off = (off_t) offset; - if (off != offset) { - errno = EINVAL; - return check_error(-1, errInfo); - } - for (;;) { - if ((n = pread(fd, buf, count, offset)) >= 0) - break; - else if (errno != EINTR) - return check_error(-1, errInfo); - } - *pBytesRead = (size_t) n; - return 1; -#else - { - int res = efile_seek(errInfo, fd, offset, EFILE_SEEK_SET, NULL); - if (res) { - return efile_read(errInfo, 0, fd, buf, count, pBytesRead); - } else { - return res; - } - } -#endif -} - - - -int -efile_pwrite(Efile_error* errInfo, /* Where to return error codes. */ - int fd, /* File descriptor to write to. */ - char* buf, /* Buffer to write. */ - size_t count, /* Number of bytes to write. */ - Sint64 offset) /* where to write it */ -{ -#if defined(HAVE_PREAD) && defined(HAVE_PWRITE) - ssize_t written; /* Bytes written in last operation. */ - off_t off = (off_t) offset; - if (off != offset) { - errno = EINVAL; - return check_error(-1, errInfo); - } - - while (count > 0) { - if ((written = pwrite(fd, buf, count, offset)) < 0) { - if (errno != EINTR) - return check_error(-1, errInfo); - else - written = 0; - } - ASSERT(written <= count); - buf += written; - count -= written; - offset += written; - } - return 1; -#else /* For unix systems that don't support pread() and pwrite() */ - { - int res = efile_seek(errInfo, fd, offset, EFILE_SEEK_SET, NULL); - - if (res) { - return efile_write(errInfo, 0, fd, buf, count); - } else { - return res; - } - } -#endif -} - - -int -efile_seek(Efile_error* errInfo, /* Where to return error codes. */ - int fd, /* File descriptor to do the seek on. */ - Sint64 offset, /* Offset in bytes from the given - origin. */ - int origin, /* Origin of seek (SEEK_SET, SEEK_CUR, - SEEK_END). */ - Sint64 *new_location) /* Resulting new location in file. */ -{ - off_t off, result; - - switch (origin) { - case EFILE_SEEK_SET: origin = SEEK_SET; break; - case EFILE_SEEK_CUR: origin = SEEK_CUR; break; - case EFILE_SEEK_END: origin = SEEK_END; break; - default: - errno = EINVAL; - return check_error(-1, errInfo); - } - off = (off_t) offset; - if (off != offset) { - errno = EINVAL; - return check_error(-1, errInfo); - } - - errno = 0; - result = lseek(fd, off, origin); - - /* - * Note that the man page for lseek (on SunOs 5) says: - * - * "if fildes is a remote file descriptor and offset is - * negative, lseek() returns the file pointer even if it is - * negative." - */ - - if (result < 0 && errno == 0) - errno = EINVAL; - if (result < 0) - return check_error(-1, errInfo); - if (new_location) { - *new_location = result; - } - return 1; -} - - -int -efile_truncate_file(Efile_error* errInfo, int *fd, int flags) -{ -#ifndef NO_FTRUNCATE - off_t offset; - - return check_error((offset = lseek(*fd, 0, 1)) >= 0 && - ftruncate(*fd, offset) == 0 ? 1 : -1, - errInfo); -#else - return 1; -#endif -} - -int -efile_readlink(Efile_error* errInfo, char* name, char* buffer, size_t size) -{ - int len; - ASSERT(size > 0); - len = readlink(name, buffer, size-1); - if (len == -1) { - return check_error(-1, errInfo); - } - buffer[len] = '\0'; - return 1; -} - -int -efile_altname(Efile_error* errInfo, char* name, char* buffer, size_t size) -{ - errno = ENOTSUP; - return check_error(-1, errInfo); -} - -int -efile_link(Efile_error* errInfo, char* old, char* new) -{ - return check_error(link(old, new), errInfo); -} - -int -efile_symlink(Efile_error* errInfo, char* old, char* new) -{ - return check_error(symlink(old, new), errInfo); -} - -int -efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset, - Sint64 length, int advise) -{ -#ifdef HAVE_POSIX_FADVISE - return check_error(posix_fadvise(fd, offset, length, advise), errInfo); -#else - return check_error(0, errInfo); -#endif -} - -#ifdef HAVE_SENDFILE -/* For some reason the maximum size_t cannot be used as the max size - 3GB seems to work on all platforms */ -#define SENDFILE_CHUNK_SIZE ((1UL << 30) -1) - -/* - * sendfile: The implementation of the sendfile system call varies - * a lot on different *nix platforms so to make the api similar in all - * we have to emulate some things in linux and play with variables on - * bsd/darwin. - * - * All of the calls will split a command which tries to send more than - * SENDFILE_CHUNK_SIZE of data at once. - * - * On platforms where *nbytes of 0 does not mean the entire file, this is - * simulated. - * - * It could be possible to implement header/trailer in sendfile. Though - * you would have to emulate it in linux and on BSD/Darwin some complex - * calculations have to be made when using a non blocking socket to figure - * out how much of the header/file/trailer was sent in each command. - * - * The semantics of the API is this: - * Return value: 1 if all data was sent and the function does not need to - * be called again. 0 if an error occures OR if there is more data which - * has to be sent (EAGAIN or EINTR will be set appropriately) - * - * The amount of data written in a call is returned through nbytes. - * - */ - -int -efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd, - off_t *offset, Uint64 *nbytes, struct t_sendfile_hdtl* hdtl) -{ - Uint64 written = 0; -#if defined(__linux__) - ssize_t retval; - do { - /* check if *nbytes is 0 or greater than chunk size */ - if (*nbytes == 0 || *nbytes > SENDFILE_CHUNK_SIZE) - retval = sendfile(out_fd, in_fd, offset, SENDFILE_CHUNK_SIZE); - else - retval = sendfile(out_fd, in_fd, offset, *nbytes); - if (retval > 0) { - written += retval; - *nbytes -= retval; - } - } while (retval == SENDFILE_CHUNK_SIZE); - if (written != 0) { - /* -1 is not returned by the linux API so we have to simulate it */ - retval = -1; - errno = EAGAIN; - } -#elif defined(__sun) && defined(__SVR4) && defined(HAVE_SENDFILEV) - ssize_t retval; - size_t len; - sendfilevec_t fdrec; - fdrec.sfv_fd = in_fd; - fdrec.sfv_flag = 0; - do { - fdrec.sfv_off = *offset; - len = 0; - /* check if *nbytes is 0 or greater than chunk size */ - if (*nbytes == 0 || *nbytes > SENDFILE_CHUNK_SIZE) - fdrec.sfv_len = SENDFILE_CHUNK_SIZE; - else - fdrec.sfv_len = *nbytes; - - retval = sendfilev(out_fd, &fdrec, 1, &len); - - if (retval == -1 && errno == EINVAL) { - /* On some solaris versions (I've seen it on SunOS 5.10), - using a sfv_len larger then a filesize will result in - a -1 && errno == EINVAL return. We translate this so - a successful send of the data.*/ - retval = len; - } - - if (retval != -1 || errno == EAGAIN || errno == EINTR) { - *offset += len; - *nbytes -= len; - written += len; - } - } while (len == SENDFILE_CHUNK_SIZE); -#elif defined(__DARWIN__) - int retval; - off_t len; - do { - /* check if *nbytes is 0 or greater than chunk size */ - if(*nbytes > SENDFILE_CHUNK_SIZE) - len = SENDFILE_CHUNK_SIZE; - else - len = *nbytes; - retval = sendfile(in_fd, out_fd, *offset, &len, NULL, 0); - if (retval != -1 || errno == EAGAIN || errno == EINTR) { - *offset += len; - *nbytes -= len; - written += len; - } - } while (len == SENDFILE_CHUNK_SIZE); -#elif defined(__FreeBSD__) || defined(__DragonFly__) - off_t len; - int retval; - do { - if (*nbytes > SENDFILE_CHUNK_SIZE) - retval = sendfile(in_fd, out_fd, *offset, SENDFILE_CHUNK_SIZE, - NULL, &len, 0); - else - retval = sendfile(in_fd, out_fd, *offset, *nbytes, NULL, &len, 0); - if (retval != -1 || errno == EAGAIN || errno == EINTR) { - *offset += len; - *nbytes -= len; - written += len; - } - } while(len == SENDFILE_CHUNK_SIZE); -#endif - *nbytes = written; - return check_error(retval, errInfo); -} -#endif /* HAVE_SENDFILE */ - -#ifdef HAVE_POSIX_FALLOCATE -static int -call_posix_fallocate(int fd, Sint64 offset, Sint64 length) -{ - int ret; - - /* - * On Linux and Solaris for example, posix_fallocate() returns - * a positive error number on error and it does not set errno. - * On FreeBSD however (9.0 at least), it returns -1 on error - * and it sets errno. - */ - do { - ret = posix_fallocate(fd, (off_t) offset, (off_t) length); - if (ret > 0) { - errno = ret; - ret = -1; - } - } while (ret != 0 && errno == EINTR); - - return ret; -} -#endif /* HAVE_POSIX_FALLOCATE */ - -int -efile_fallocate(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length) -{ -#if defined HAVE_FALLOCATE - /* Linux specific, more efficient than posix_fallocate. */ - int ret; - - do { - ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, (off_t) offset, (off_t) length); - } while (ret != 0 && errno == EINTR); - -#if defined HAVE_POSIX_FALLOCATE - /* Fallback to posix_fallocate if available. */ - if (ret != 0) { - ret = call_posix_fallocate(fd, offset, length); - } -#endif - - return check_error(ret, errInfo); -#elif defined F_PREALLOCATE - /* Mac OS X specific, equivalent to posix_fallocate. */ - int ret; - fstore_t fs; - - memset(&fs, 0, sizeof(fs)); - fs.fst_flags = F_ALLOCATECONTIG; - fs.fst_posmode = F_VOLPOSMODE; - fs.fst_offset = (off_t) offset; - fs.fst_length = (off_t) length; - - ret = fcntl(fd, F_PREALLOCATE, &fs); - - if (-1 == ret) { - fs.fst_flags = F_ALLOCATEALL; - ret = fcntl(fd, F_PREALLOCATE, &fs); - -#if defined HAVE_POSIX_FALLOCATE - /* Fallback to posix_fallocate if available. */ - if (-1 == ret) { - ret = call_posix_fallocate(fd, offset, length); - } -#endif - } - - return check_error(ret, errInfo); -#elif defined HAVE_POSIX_FALLOCATE - /* Other Unixes, use posix_fallocate if available. */ - return check_error(call_posix_fallocate(fd, offset, length), errInfo); -#else - errno = ENOTSUP; - return check_error(-1, errInfo); -#endif -} diff --git a/erts/emulator/drivers/win32/win_efile.c b/erts/emulator/drivers/win32/win_efile.c deleted file mode 100644 index 2d366b5833..0000000000 --- a/erts/emulator/drivers/win32/win_efile.c +++ /dev/null @@ -1,2058 +0,0 @@ -/* - * %CopyrightBegin% - * - * Copyright Ericsson AB 1997-2016. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * %CopyrightEnd% - */ -/* - * Purpose: Provides file and directory operations for Windows. - */ - -#include <windows.h> -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif -#include "sys.h" -#include <ctype.h> -#include <wchar.h> -#include "erl_efile.h" - -#define DBG_TRACE_MASK 0 -/* 1 = file name ops - * 2 = file descr ops - * 4 = errors - * 8 = path name conversion - */ -#if !DBG_TRACE_MASK -# define DBG_TRACE(M,S) -# define DBG_TRACE1(M,FMT,A) -# define DBG_TRACE2(M,FMT,A,B) -#else -# define DBG_TRACE(M,S) do { if ((M)&DBG_TRACE_MASK) fwprintf(stderr, L"DBG_TRACE %d: %s\r\n", __LINE__, (WCHAR*)(S)); }while(0) -# define DBG_TRACE1(M,FMT,A) do { if ((M)&DBG_TRACE_MASK) fwprintf(stderr, L"DBG_TRACE %d: " L##FMT L"\r\n", __LINE__, (A)); }while(0) -# define DBG_TRACE2(M,FMT,A,B) do { if ((M)&DBG_TRACE_MASK) fwprintf(stderr, L"DBG_TRACE %d: " L##FMT L"\r\n", __LINE__, (A), (B)); }while(0) -#endif - -/* - * Microsoft-specific function to map a WIN32 error code to a Posix errno. - */ - -#define ISSLASH(a) ((a) == L'\\' || (a) == L'/') -#define ISDIR(st) (((st).st_mode&S_IFMT) == S_IFDIR) -#define ISREG(st) (((st).st_mode&S_IFMT) == S_IFREG) - -#define IS_DOT_OR_DOTDOT(s) \ - ((s)[0] == L'.' && ((s)[1] == L'\0' || ((s)[1] == L'.' && (s)[2] == L'\0'))) - -#define FILE_SHARE_FLAGS (FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE) - -#ifndef INVALID_FILE_ATTRIBUTES -#define INVALID_FILE_ATTRIBUTES ((DWORD) 0xFFFFFFFF) -#endif - -#define TICKS_PER_SECOND (10000000ULL) -#define EPOCH_DIFFERENCE (11644473600LL) - -#define FILETIME_TO_EPOCH(epoch, ft) \ - do { \ - ULARGE_INTEGER ull; \ - ull.LowPart = (ft).dwLowDateTime; \ - ull.HighPart = (ft).dwHighDateTime; \ - (epoch) = ((ull.QuadPart / TICKS_PER_SECOND) - EPOCH_DIFFERENCE); \ - } while(0) - -#define EPOCH_TO_FILETIME(ft, epoch) \ - do { \ - ULARGE_INTEGER ull; \ - ull.QuadPart = (((epoch) + EPOCH_DIFFERENCE) * TICKS_PER_SECOND); \ - (ft).dwLowDateTime = ull.LowPart; \ - (ft).dwHighDateTime = ull.HighPart; \ - } while(0) - - -static int check_error(int result, Efile_error* errInfo); -static int set_error(Efile_error* errInfo); -static int set_os_errno(Efile_error* errInfo, DWORD os_errno); -static int is_root_unc_name(const WCHAR *path); -static int extract_root(WCHAR *name); -static unsigned short dos_to_posix_mode(int attr, const WCHAR *name); - - -struct wpath_tmp_buffer { - struct wpath_tmp_buffer* next; - WCHAR buffer[1]; -}; - -typedef struct { - Efile_error* errInfo; - struct wpath_tmp_buffer* buf_list; -}Efile_call_state; - -static void call_state_init(Efile_call_state* state, Efile_error* errInfo) -{ - state->errInfo = errInfo; - state->buf_list = NULL; -} -static WCHAR* wpath_tmp_alloc(Efile_call_state* state, size_t len) -{ - size_t sz = offsetof(struct wpath_tmp_buffer, buffer) - + (len+1)*sizeof(WCHAR); - struct wpath_tmp_buffer* p = driver_alloc(sz); - p->next = state->buf_list; - state->buf_list = p; - return p->buffer; -} -static void call_state_free(Efile_call_state* state) -{ - while(state->buf_list) { - struct wpath_tmp_buffer* next = state->buf_list->next; - driver_free(state->buf_list); - state->buf_list = next; - } -} -static WCHAR* get_cwd_wpath_tmp(Efile_call_state* state) -{ - WCHAR dummy; - DWORD size = GetCurrentDirectoryW(0, &dummy); - WCHAR* ret = NULL; - - if (size) { - ret = wpath_tmp_alloc(state, size); - if (!GetCurrentDirectoryW(size, ret)) { - ret = NULL; - } - } - return ret; -} -static WCHAR* get_full_wpath_tmp(Efile_call_state* state, - const WCHAR* file, - WCHAR** file_part, - DWORD extra) -{ - WCHAR dummy; - DWORD size = GetFullPathNameW(file, 0, &dummy, NULL); - WCHAR* ret = NULL; - - if (size) { - int ok; - ret = wpath_tmp_alloc(state, size + extra); - if (file_part) { - ok = (GetFullPathNameW(file, size, ret, file_part) != 0); - } - else { - ok = (_wfullpath(ret, file, size) != NULL); - } - if (!ok) { - ret = NULL; - } - } - return ret; -} - -static void ensure_wpath_max(Efile_call_state* state, WCHAR** pathp, size_t max); -static int do_rmdir(Efile_call_state*, char* name); -static int do_rename(Efile_call_state*, char* src, char* dst); -static int do_readdir(Efile_call_state*, char* name, EFILE_DIR_HANDLE*, char* buffer, size_t *size); -static int do_fileinfo(Efile_call_state*, Efile_info*, char* orig_name, int info_for_link); -static char* do_readlink(Efile_call_state*, char* name, char* buffer, size_t size); -static int do_altname(Efile_call_state*, char* orig_name, char* buffer, size_t size); - - -static int errno_map(DWORD last_error) { - - switch (last_error) { - case ERROR_SUCCESS: - return 0; - case ERROR_INVALID_FUNCTION: - case ERROR_INVALID_DATA: - case ERROR_INVALID_PARAMETER: - case ERROR_INVALID_TARGET_HANDLE: - case ERROR_INVALID_CATEGORY: - case ERROR_NEGATIVE_SEEK: - return EINVAL; - case ERROR_DIR_NOT_EMPTY: - return EEXIST; - case ERROR_BAD_FORMAT: - return ENOEXEC; - case ERROR_PATH_NOT_FOUND: - case ERROR_FILE_NOT_FOUND: - case ERROR_NO_MORE_FILES: - return ENOENT; - case ERROR_TOO_MANY_OPEN_FILES: - return EMFILE; - case ERROR_ACCESS_DENIED: - case ERROR_INVALID_ACCESS: - case ERROR_CURRENT_DIRECTORY: - case ERROR_SHARING_VIOLATION: - case ERROR_LOCK_VIOLATION: - case ERROR_INVALID_PASSWORD: - case ERROR_DRIVE_LOCKED: - return EACCES; - case ERROR_INVALID_HANDLE: - return EBADF; - case ERROR_NOT_ENOUGH_MEMORY: - case ERROR_OUTOFMEMORY: - case ERROR_OUT_OF_STRUCTURES: - return ENOMEM; - case ERROR_INVALID_DRIVE: - case ERROR_BAD_UNIT: - case ERROR_NOT_READY: - case ERROR_REM_NOT_LIST: - case ERROR_DUP_NAME: - case ERROR_BAD_NETPATH: - case ERROR_NETWORK_BUSY: - case ERROR_DEV_NOT_EXIST: - case ERROR_BAD_NET_NAME: - return ENXIO; - case ERROR_NOT_SAME_DEVICE: - return EXDEV; - case ERROR_WRITE_PROTECT: - return EROFS; - case ERROR_BAD_LENGTH: - case ERROR_BUFFER_OVERFLOW: - return E2BIG; - case ERROR_SEEK: - case ERROR_SECTOR_NOT_FOUND: - return ESPIPE; - case ERROR_NOT_DOS_DISK: - return ENODEV; - case ERROR_GEN_FAILURE: - return ENODEV; - case ERROR_SHARING_BUFFER_EXCEEDED: - case ERROR_NO_MORE_SEARCH_HANDLES: - return EMFILE; - case ERROR_HANDLE_EOF: - case ERROR_BROKEN_PIPE: - return EPIPE; - case ERROR_HANDLE_DISK_FULL: - case ERROR_DISK_FULL: - return ENOSPC; - case ERROR_NOT_SUPPORTED: - return ENOTSUP; - case ERROR_FILE_EXISTS: - case ERROR_ALREADY_EXISTS: - case ERROR_CANNOT_MAKE: - return EEXIST; - case ERROR_ALREADY_ASSIGNED: - return EBUSY; - case ERROR_NO_PROC_SLOTS: - return EAGAIN; - case ERROR_CANT_RESOLVE_FILENAME: - return EMLINK; - case ERROR_PRIVILEGE_NOT_HELD: - return EPERM; - case ERROR_ARENA_TRASHED: - case ERROR_INVALID_BLOCK: - case ERROR_BAD_ENVIRONMENT: - case ERROR_BAD_COMMAND: - case ERROR_CRC: - case ERROR_OUT_OF_PAPER: - case ERROR_READ_FAULT: - case ERROR_WRITE_FAULT: - case ERROR_WRONG_DISK: - case ERROR_NET_WRITE_FAULT: - return EIO; - default: /* not to do with files I expect. */ - return EIO; - } -} - -static int -check_error(int result, Efile_error* errInfo) -{ - if (result < 0) { - errInfo->posix_errno = errno; - errInfo->os_errno = GetLastError(); - DBG_TRACE2(4, "ERROR os_error=%d errno=%d @@@@@@@@@@@@@@@@@@@@@@@@@@@@", - errInfo->os_errno, errInfo->posix_errno); - return 0; - } - return 1; -} - -static void -save_last_error(Efile_error* errInfo) -{ - errInfo->posix_errno = errno; - errInfo->os_errno = GetLastError(); - DBG_TRACE2(4, "ERROR os_error=%d errno=%d $$$$$$$$$$$$$$$$$$$$$$$$$$$$$", - errInfo->os_errno, errInfo->posix_errno); -} - - -/* - * Fills the provided error information structure with information - * with the error code given by GetLastError() and its corresponding - * Posix error number. - * - * Returns 0. - */ - -static int -set_error(Efile_error* errInfo) -{ - set_os_errno(errInfo, GetLastError()); - return 0; -} - - -static int -set_os_errno(Efile_error* errInfo, DWORD os_errno) -{ - errInfo->os_errno = os_errno; - errInfo->posix_errno = errno_map(os_errno); - DBG_TRACE2(4, "ERROR os_error=%d errno=%d ############################", - errInfo->os_errno, errInfo->posix_errno); - return 0; -} - -int -efile_init() { - return 1; -} - -/* - * A writev with Unix semantics, but with Windows arguments - */ -static int -win_writev(Efile_error* errInfo, - HANDLE fd, /* handle to file */ - FILE_SEGMENT_ELEMENT iov[], /* array of buffer pointers */ - DWORD *size) /* number of bytes to write */ -{ - OVERLAPPED ov; - ov.Offset = 0L; - ov.OffsetHigh = 0L; - ov.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); - if (ov.hEvent == NULL) - return set_error(errInfo); - if (! write_file_gather(fd, iov, *size, NULL, &ov)) - return set_error(errInfo); - if (WaitForSingleObject(ov.hEvent, INFINITE) != WAIT_OBJECT_0) - return set_error(errInfo); - if (! GetOverlappedResult(fd, &ov, size, FALSE)) - return set_error(errInfo); - return 1; -} - - -/* Check '*pathp' and convert it if needed to something that windows will accept. - * Typically use UNC path with \\?\ prefix if absolute path is longer than 260. - */ -static void ensure_wpath(Efile_call_state* state, WCHAR** pathp) -{ - ensure_wpath_max(state, pathp, MAX_PATH); -} - -static void ensure_wpath_max(Efile_call_state* state, WCHAR** pathp, size_t max) -{ - WCHAR* path = *pathp; - WCHAR* p; - size_t len = wcslen(path); - int unc_fixup = 0; - - if (path[0] == 0) { - DBG_TRACE(8, L"Let empty path pass through"); - return; - } - - DBG_TRACE1(8,"IN: %s", path); - - if (path[1] == L':' && ISSLASH(path[2])) { /* absolute path */ - if (len >= max) { - WCHAR *src, *dst; - - *pathp = wpath_tmp_alloc(state, 4+len+1); - dst = *pathp; - wcscpy(dst, L"\\\\?\\"); - for (src=path,dst+=4; *src; src++) { - if (*src == L'/') { - if (dst[-1] != L'\\') { - *dst++ = L'\\'; - } - /*else ignore redundant slashes */ - } - else - *dst++ = *src; - } - *dst = 0; - unc_fixup = 1; - } - } - else if (!(ISSLASH(path[0]) && ISSLASH(path[1]))) { /* relative path */ - DWORD cwdLen = GetCurrentDirectoryW(0, NULL); - DWORD absLen = cwdLen + 1 + len; - if (absLen >= max) { - WCHAR *fullPath = wpath_tmp_alloc(state, 4+4+absLen); - DWORD fullLen; - - fullLen = GetFullPathNameW(path, 4 + absLen, fullPath+4, NULL); - if (fullLen >= 4+absLen) { - *pathp = path; - DBG_TRACE2(8,"ensure_wpath FAILED absLen=%u %s", (int)absLen, path); - return; - } - /* GetFullPathNameW can return paths longer than MAX_PATH without the \\?\ prefix. - * At least seen on Windows 7. Go figure... - */ - if (fullLen >= max && wcsncmp(fullPath+4, L"\\\\?\\", 4) != 0) { - wcsncpy(fullPath, L"\\\\?\\", 4); - *pathp = fullPath; - } - else { - *pathp = fullPath + 4; - } - } - } - - if (unc_fixup) { - WCHAR* endp; - - p = *pathp; - len = wcslen(p); - endp = p + len; - if (len > 4) { - p += 4; - while (*p) { - if (p[0] == L'\\' && p[1] == L'.') { - if (p[2] == L'\\' || !p[2]) { /* single dot */ - wmemmove(p, p+2, (&endp[1] - &p[2])); - endp -= 2; - } - else if (p[2] == L'.' && (p[3] == L'\\' || !p[3])) { /* double dot */ - WCHAR* r; - for (r=p-1; *r == L'\\'; --r) - /*skip redundant slashes*/; - for (; *r != L'\\'; --r) - /*find start of prev directory*/; - if (r < *pathp + 6) - break; - wmemmove(r, p+3, (&endp[1] - &p[3])); - p = r; - } - else p += 3; - } - else ++p; - } - } - } - DBG_TRACE1(8,"OUT: %s", *pathp); -} - -int -efile_mkdir(Efile_error* errInfo, /* Where to return error codes. */ - char* name) /* Name of directory to create. */ -{ - Efile_call_state state; - WCHAR* wname = (WCHAR*)name; - int ret; - - DBG_TRACE(1, name); - call_state_init(&state, errInfo); - ensure_wpath_max(&state, &wname, 248); /* Yes, 248 limit for normal paths */ - - ret = (int) CreateDirectoryW(wname, NULL); - if (!ret) - set_error(errInfo); - - call_state_free(&state); - return ret; -} - -int -efile_rmdir(Efile_error* errInfo, /* Where to return error codes. */ - char* name) /* Name of directory to delete. */ -{ - Efile_call_state state; - int ret; - - DBG_TRACE(1, name); - call_state_init(&state, errInfo); - ret = do_rmdir(&state, name); - call_state_free(&state); - return ret; -} - -static int do_rmdir(Efile_call_state* state, char* name) -{ - OSVERSIONINFO os; - DWORD attr; - WCHAR *wname = (WCHAR *) name; - WCHAR *buffer = NULL; - - ensure_wpath(state, &wname); - - if (RemoveDirectoryW(wname) != FALSE) { - return 1; - } - errno = errno_map(GetLastError()); - if (errno == EACCES) { - attr = GetFileAttributesW(wname); - if (attr != (DWORD) -1) { - if ((attr & FILE_ATTRIBUTE_DIRECTORY) == 0) { - /* - * Windows 95 reports calling RemoveDirectory on a file as an - * EACCES, not an ENOTDIR. - */ - - errno = ENOTDIR; - goto end; - } - - /* - * Windows 95 reports removing a non-empty directory as - * an EACCES, not an EEXIST. If the directory is not empty, - * change errno so caller knows what's going on. - */ - - os.dwOSVersionInfoSize = sizeof(os); - GetVersionEx(&os); - if (os.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { - HANDLE handle; - WIN32_FIND_DATAW data; - int len = wcslen(wname); - - buffer = wpath_tmp_alloc(state, len + 4); - wcscpy(buffer, wname); - if (buffer[0] && buffer[len-1] != L'\\' && buffer[len-1] != L'/') { - wcscat(buffer, L"\\"); - } - wcscat(buffer, L"*.*"); - handle = FindFirstFileW(buffer, &data); - if (handle != INVALID_HANDLE_VALUE) { - while (1) { - if ((wcscmp(data.cFileName, L".") != 0) - && (wcscmp(data.cFileName, L"..") != 0)) { - /* - * Found something in this directory. - */ - - errno = EEXIST; - break; - } - if (FindNextFileW(handle, &data) == FALSE) { - break; - } - } - FindClose(handle); - } - } - } - } - - if (errno == ENOTEMPTY) { - /* - * Posix allows both EEXIST or ENOTEMPTY, but we'll always - * return EEXIST to allow easy matching in Erlang code. - */ - - errno = EEXIST; - } - - end: - save_last_error(state->errInfo); - return 0; -} - -int -efile_delete_file(Efile_error* errInfo, /* Where to return error codes. */ - char* name) /* Name of file to delete. */ -{ - Efile_call_state state; - int ret; - DBG_TRACE(1, name); - call_state_init(&state, errInfo); - ret = do_delete_file(&state, name); - call_state_free(&state); - return ret; -} - -static int do_delete_file(Efile_call_state* state, char* name) -{ - DWORD attr; - WCHAR *wname = (WCHAR *) name; - - ensure_wpath(state, &wname); - - if (DeleteFileW(wname) != FALSE) { - return 1; - } - - errno = errno_map(GetLastError()); - if (errno == EACCES) { - attr = GetFileAttributesW(wname); - if (attr != (DWORD) -1) { - if (attr & FILE_ATTRIBUTE_DIRECTORY) { - /* - * Windows NT reports removing a directory as EACCES instead - * of EPERM. - */ - - errno = EPERM; - } - } - } else if (errno == ENOENT) { - attr = GetFileAttributesW(wname); - if (attr != (DWORD) -1) { - if (attr & FILE_ATTRIBUTE_DIRECTORY) { - /* - * Windows 95 reports removing a directory as ENOENT instead - * of EPERM. - */ - - errno = EPERM; - } - } - } else if (errno == EINVAL) { - /* - * Windows NT reports removing a char device as EINVAL instead of - * EACCES. - */ - - errno = EACCES; - } - - return check_error(-1, state->errInfo); -} - -/* - *--------------------------------------------------------------------------- - * - * Changes the name of an existing file or directory, from src to dst. - * If src and dst refer to the same file or directory, does nothing - * and returns success. Otherwise if dst already exists, it will be - * deleted and replaced by src subject to the following conditions: - * If src is a directory, dst may be an empty directory. - * If src is a file, dst may be a file. - * In any other situation where dst already exists, the rename will - * fail. - * - * Some possible error codes: - * - * EACCES: src or dst parent directory can't be read and/or written. - * EEXIST: dst is a non-empty directory. - * EINVAL: src is a root directory or dst is a subdirectory of src. - * EISDIR: dst is a directory, but src is not. - * ENOENT: src doesn't exist, or src or dst is "". - * ENOTDIR: src is a directory, but dst is not. - * EXDEV: src and dst are on different filesystems. - * - * Side effects: - * The implementation of rename may allow cross-filesystem renames, - * but the caller should be prepared to emulate it with copy and - * delete if errno is EXDEV. - * - *--------------------------------------------------------------------------- - */ - -int -efile_rename(Efile_error* errInfo, char* src, char* dst) -{ - Efile_call_state state; - int ret; - DBG_TRACE(1, src); - call_state_init(&state, errInfo); - ret = do_rename(&state, src, dst); - call_state_free(&state); - return ret; -} - -static int -do_rename(Efile_call_state* state, - char* src, /* Original name. */ - char* dst) /* New name. */ -{ - DWORD srcAttr, dstAttr; - WCHAR *wsrc = (WCHAR *) src; - WCHAR *wdst = (WCHAR *) dst; - - ensure_wpath(state, &wsrc); - ensure_wpath(state, &wdst); - - if (MoveFileW(wsrc, wdst) != FALSE) { - return 1; - } - - errno = errno_map(GetLastError()); - srcAttr = GetFileAttributesW(wsrc); - dstAttr = GetFileAttributesW(wdst); - if (srcAttr == (DWORD) -1) { - srcAttr = 0; - } - if (dstAttr == (DWORD) -1) { - dstAttr = 0; - } - - if (errno == EBADF) { - errno = EACCES; - return check_error(-1, state->errInfo); - } - if (errno == EACCES) { - decode: - if (srcAttr & FILE_ATTRIBUTE_DIRECTORY) { - WCHAR *srcPath, *dstPath; - WCHAR *srcRest, *dstRest; - int size; - - srcPath = get_full_wpath_tmp(state, wsrc, &srcRest, 0); - if (!srcPath) { - save_last_error(state->errInfo); - return 0; - } - - dstPath = get_full_wpath_tmp(state, wdst, &dstRest, 0); - if (!dstPath) { - save_last_error(state->errInfo); - return 0; - } - - if (srcRest == NULL) { - srcRest = srcPath + wcslen(srcPath); - } - if (_wcsnicmp(srcPath, dstPath, srcRest - srcPath) == 0) { - /* - * Trying to move a directory into itself. - */ - - errno = EINVAL; - } - if (extract_root(srcPath)) { - /* - * Attempt to move a root directory. Never allowed. - */ - errno = EINVAL; - } - - (void) extract_root(dstPath); - if (dstPath[0] == L'\0') { - /* - * The filename was invalid. (Don't know why, - * but play it safe.) - */ - errno = EINVAL; - } - if (_wcsicmp(srcPath, dstPath) != 0) { - /* - * If src is a directory and dst filesystem != src - * filesystem, errno should be EXDEV. It is very - * important to get this behavior, so that the caller - * can respond to a cross filesystem rename by - * simulating it with copy and delete. The MoveFile - * system call already handles the case of moving a - * *file* between filesystems. - */ - - errno = EXDEV; - } - } - - /* - * Other types of access failure is that dst is a read-only - * filesystem, that an open file referred to src or dest, or that - * src or dest specified the current working directory on the - * current filesystem. EACCES is returned for those cases. - */ - - } else if (errno == EEXIST) { - /* - * Reports EEXIST any time the target already exists. If it makes - * sense, remove the old file and try renaming again. - */ - - if (srcAttr & FILE_ATTRIBUTE_DIRECTORY) { - if (dstAttr & FILE_ATTRIBUTE_DIRECTORY) { - /* - * Overwrite empty dst directory with src directory. The - * following call will remove an empty directory. If it - * fails, it's because it wasn't empty. - */ - - if (RemoveDirectoryW(wdst)) { - /* - * Now that that empty directory is gone, we can try - * renaming again. If that fails, we'll put this empty - * directory back, for completeness. - */ - - if (MoveFileW(wsrc, wdst) != FALSE) { - return 1; - } - - /* - * Some new error has occurred. Don't know what it - * could be, but report this one. - */ - - errno = errno_map(GetLastError()); - CreateDirectoryW(wdst, NULL); - SetFileAttributesW(wdst, dstAttr); - if (errno == EACCES) { - /* - * Decode the EACCES to a more meaningful error. - */ - - goto decode; - } - } - } else { /* (dstAttr & FILE_ATTRIBUTE_DIRECTORY) == 0 */ - errno = ENOTDIR; - } - } else { /* (srcAttr & FILE_ATTRIBUTE_DIRECTORY) == 0 */ - if (dstAttr & FILE_ATTRIBUTE_DIRECTORY) { - errno = EISDIR; - } else { - /* - * Overwrite existing file by: - * - * 1. Rename existing file to temp name. - * 2. Rename old file to new name. - * 3. If success, delete temp file. If failure, - * put temp file back to old name. - */ - - WCHAR *tempName; - int result; - WCHAR *rest; - - tempName = get_full_wpath_tmp(state, wdst, &rest, 14); - if (!tempName || !rest) { - save_last_error(state->errInfo); - return 0; - } - - *rest = L'\0'; - result = -1; - if (GetTempFileNameW(tempName, L"erlr", 0, tempName) != 0) { - /* - * Strictly speaking, need the following DeleteFile and - * MoveFile to be joined as an atomic operation so no - * other app comes along in the meantime and creates the - * same temp file. - */ - - DeleteFileW(tempName); - if (MoveFileW(wdst, tempName) != FALSE) { - if (MoveFileW(wsrc, wdst) != FALSE) { - SetFileAttributesW(tempName, FILE_ATTRIBUTE_NORMAL); - DeleteFileW(tempName); - return 1; - } else { - DeleteFileW(wdst); - MoveFileW(tempName, wdst); - } - } - - /* - * Can't backup dst file or move src file. Return that - * error. Could happen if an open file refers to dst. - */ - - errno = errno_map(GetLastError()); - if (errno == EACCES) { - /* - * Decode the EACCES to a more meaningful error. - */ - goto decode; - } - } - return result; - } - } - } - return check_error(-1, state->errInfo); -} - -int -efile_chdir(Efile_error* errInfo, /* Where to return error codes. */ - char* name) /* Name of directory to make current. */ -{
- /* We don't even try to handle long paths here
- * as current working directory is always limited to MAX_PATH
- * even if we use UNC paths and SetCurrentDirectoryW()
- */
- int success = check_error(_wchdir((WCHAR *) name), errInfo);
- if (!success && errInfo->posix_errno == EINVAL)
- /* POSIXification of errno */
- errInfo->posix_errno = ENOENT; - return success; -} - -int -efile_getdcwd(Efile_error* errInfo, /* Where to return error codes. */ - int drive, /* 0 - current, 1 - A, 2 - B etc. */ - char* buffer, /* Where to return the current directory. */ - size_t size) /* Size of buffer. */ -{ - WCHAR *wbuffer = (WCHAR *) buffer; - size_t wbuffer_size = size / 2; - DBG_TRACE(1, L"#getdcwd#"); - if (_wgetdcwd(drive, wbuffer, wbuffer_size) == NULL) { - return check_error(-1, errInfo); - } - DBG_TRACE1(8, "getdcwd OS=%s", wbuffer); - if (wcsncmp(wbuffer, L"\\\\?\\", 4) == 0) { - wmemmove(wbuffer, wbuffer+4, wcslen(wbuffer+4)+1); - } - for ( ; *wbuffer; wbuffer++) - if (*wbuffer == L'\\') - *wbuffer = L'/'; - DBG_TRACE1(8, "getdcwd ERLANG=%s", (WCHAR*)buffer); - return 1; -} - -int -efile_readdir(Efile_error* errInfo, char* name, EFILE_DIR_HANDLE* dir_handle, - char* buffer, size_t *size) -{ - Efile_call_state state; - int ret; - DBG_TRACE(dir_handle?2:1, name); - call_state_init(&state, errInfo); - ret = do_readdir(&state, name, dir_handle, buffer, size); - call_state_free(&state); - return ret; -} - -static int do_readdir(Efile_call_state* state, - char* name, /* Name of directory to list */ - EFILE_DIR_HANDLE* dir_handle, /* Handle of opened directory or NULL */ - char* buffer, /* Buffer to put one filename in */ - size_t *size) /* in-out size of buffer/size of filename excluding zero - termination in bytes*/ -{ - HANDLE dir; /* Handle to directory. */ - WIN32_FIND_DATAW findData; /* Data found by FindFirstFile() or FindNext(). */ - /* Alignment is not honored, this works on x86 because of alignment fixup by processor. - Not perfect, but faster than alinging by hand (really) */ - WCHAR *wbuffer = (WCHAR *) buffer; - - /* - * First time we must setup everything. - */ - - if (*dir_handle == NULL) { - WCHAR *wname = (WCHAR *) name; - WCHAR* wildcard; - int length; - WCHAR* s; - - ensure_wpath_max(state, &wname, MAX_PATH-2); - length = wcslen(wname); - - wildcard = wpath_tmp_alloc(state, length+3); - - wcscpy(wildcard, wname); - s = wildcard+length-1; - if (*s != L'/' && *s != L'\\') - *++s = L'\\'; - *++s = L'*'; - *++s = L'\0'; - DEBUGF(("Reading %ws\n", wildcard)); - dir = FindFirstFileW(wildcard, &findData); - if (dir == INVALID_HANDLE_VALUE) { - set_error(state->errInfo); - return 0; - } - *dir_handle = (EFILE_DIR_HANDLE) dir; - - if (!IS_DOT_OR_DOTDOT(findData.cFileName)) { - wcscpy(wbuffer, findData.cFileName); - *size = wcslen(wbuffer)*2; - return 1; - } - } - - /* - * Retrieve the name of the next file using the directory handle. - */ - - dir = (HANDLE) *dir_handle; - - for (;;) { - if (FindNextFileW(dir, &findData)) { - if (IS_DOT_OR_DOTDOT(findData.cFileName)) - continue; - wcscpy(wbuffer, findData.cFileName); - *size = wcslen(wbuffer)*2; - return 1; - } - - if (GetLastError() == ERROR_NO_MORE_FILES) { - state->errInfo->posix_errno = state->errInfo->os_errno = 0; - } - else { - set_error(state->errInfo); - } - FindClose(dir); - return 0; - } -} - -int -efile_openfile(Efile_error* errInfo, char* name, int flags, int* pfd, Sint64* pSize) -{ - Efile_call_state state; - int ret; - DBG_TRACE1(1, "openfile(%s)", name); - call_state_init(&state, errInfo); - ret = do_openfile(&state, name, flags, pfd, pSize); - call_state_free(&state); - return ret; -} - -static -int do_openfile(Efile_call_state* state, /* Where to return error codes. */ - char* name, /* Name of directory to open. */ - int flags, /* Flags to use for opening. */ - int* pfd, /* Where to store the file descriptor. */ - Sint64* pSize) /* Where to store the size of the file. */ -{ - Efile_error* errInfo = state->errInfo; - BY_HANDLE_FILE_INFORMATION fileInfo; /* File information from a handle. */ - HANDLE fd; /* Handle to open file. */ - DWORD access; /* Access mode: GENERIC_READ, GENERIC_WRITE. */ - DWORD crFlags; - DWORD flagsAndAttrs = FILE_ATTRIBUTE_NORMAL; - WCHAR *wname = (WCHAR *) name; - - switch (flags & (EFILE_MODE_READ|EFILE_MODE_WRITE)) { - case EFILE_MODE_READ: - access = GENERIC_READ; - crFlags = OPEN_EXISTING; - break; - case EFILE_MODE_WRITE: - access = GENERIC_WRITE; - crFlags = CREATE_ALWAYS; - break; - case EFILE_MODE_READ_WRITE: - access = GENERIC_READ|GENERIC_WRITE; - crFlags = OPEN_ALWAYS; - break; - default: - errno = EINVAL; - check_error(-1, errInfo); - return 0; - } - - if (flags & EFILE_MODE_SYNC) { - flagsAndAttrs = FILE_FLAG_WRITE_THROUGH; - } - - if (flags & EFILE_MODE_APPEND) { - crFlags = OPEN_ALWAYS; - } - if (flags & EFILE_MODE_EXCL) { - crFlags = CREATE_NEW; - } - ensure_wpath(state, &wname); - fd = CreateFileW(wname, access, - FILE_SHARE_FLAGS, - NULL, crFlags, flagsAndAttrs, NULL); - - /* - * Check for errors. - */ - - if (fd == INVALID_HANDLE_VALUE) { - DWORD attr; - - set_error(errInfo); - - /* - * If the error is EACESS, the reason could be that we tried to - * open a directory. In that case, we'll change the error code - * to EISDIR. - */ - if (errInfo->posix_errno && - (attr = GetFileAttributesW(wname)) != INVALID_FILE_ATTRIBUTES && - (attr & FILE_ATTRIBUTE_DIRECTORY)) { - errInfo->posix_errno = EISDIR; - } - return 0; - } - - /* - * Get and return the length of the open file. - */ - - if (!GetFileInformationByHandle(fd, &fileInfo)) - return set_error(errInfo); - *pfd = (int) fd; - if (pSize) { - *pSize = (Sint64) - (((Uint64)fileInfo.nFileSizeHigh << 32) | - (Uint64)fileInfo.nFileSizeLow); - } - return 1; -} - -int -efile_may_openfile(Efile_error* errInfo, char *name) -{ - Efile_call_state state; - WCHAR *wname = (WCHAR *) name; - DWORD attr; - int ret; - - DBG_TRACE(1, name); - call_state_init(&state, errInfo); - ensure_wpath(&state, &wname); - if ((attr = GetFileAttributesW(wname)) == INVALID_FILE_ATTRIBUTES) { - errno = ENOENT; - ret = check_error(-1, errInfo); - } - else if (attr & FILE_ATTRIBUTE_DIRECTORY) { - errno = EISDIR; - ret = check_error(-1, errInfo); - } - else ret = 1; - - call_state_free(&state); - return ret; -} - -void -efile_closefile(fd) -int fd; /* File descriptor for file to close. */ -{ - DBG_TRACE(2, L""); - CloseHandle((HANDLE) fd); -} - -FILE* efile_wfopen(const WCHAR* name, const WCHAR* mode) -{ - Efile_call_state state; - Efile_error dummy; - FILE* f; - call_state_init(&state, &dummy); - ensure_wpath(&state, (WCHAR**)&name); - f = _wfopen(name, mode); - call_state_free(&state); - return f; -} - -int -efile_fdatasync(errInfo, fd) -Efile_error* errInfo; /* Where to return error codes. */ -int fd; /* File descriptor for file to sync. */ -{ - DBG_TRACE(2, L""); - /* Not available in Windows, just call regular fsync */ - return efile_fsync(errInfo, fd); -} - -int -efile_fsync(errInfo, fd) -Efile_error* errInfo; /* Where to return error codes. */ -int fd; /* File descriptor for file to sync. */ -{ - DBG_TRACE(2, L""); - if (!FlushFileBuffers((HANDLE) fd)) { - return check_error(-1, errInfo); - } - return 1; -} - -int -efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo, - char* orig_name, int info_for_link) -{ - Efile_call_state state; - int ret; - DBG_TRACE(1, L""); - call_state_init(&state, errInfo); - ret = do_fileinfo(&state, pInfo, orig_name, info_for_link); - call_state_free(&state); - return ret; -} - -static int -do_fileinfo(Efile_call_state* state, Efile_info* pInfo, - char* orig_name, int info_for_link) -{ - Efile_error* errInfo = state->errInfo; - HANDLE findhandle; /* Handle returned by FindFirstFile(). */ - WIN32_FIND_DATAW findbuf; /* Data return by FindFirstFile(). */ - WCHAR* name = NULL; - WCHAR* win_path; - int name_len; - int drive; /* Drive for filename (1 = A:, 2 = B: etc). */ - WCHAR *worig_name = (WCHAR *) orig_name; - - ensure_wpath(state, &worig_name); - /* Don't allow wildcards to be interpreted by system */ - - - /* - * Move the name to a buffer and make sure to remove a trailing - * slash, because it causes FindFirstFile() to fail on Win95. - */ - - name_len = wcslen(worig_name); - - name = wpath_tmp_alloc(state, name_len+1); - wcscpy(name, worig_name); - if (name_len > 2 && ISSLASH(name[name_len-1]) && - name[name_len-2] != L':') { - name[name_len-1] = L'\0'; - } - - win_path = name; - if (wcsncmp(name, L"\\\\?\\", 4) == 0) { - win_path += 4; - } - - if (wcspbrk(win_path, L"?*")) { - enoent: - errInfo->posix_errno = ENOENT; - errInfo->os_errno = ERROR_FILE_NOT_FOUND; - return 0; - } - - /* Try to get disk from name. If none, get current disk. */ - - if (win_path[1] != L':') { - WCHAR* cwd_path = get_cwd_wpath_tmp(state); - drive = 0; - if (cwd_path[1] == L':') { - drive = towlower(cwd_path[0]) - L'a' + 1; - } - } else if (*win_path && win_path[2] == L'\0') { - /* - * X: and nothing more is an error. - */ - errInfo->posix_errno = ENOENT; - errInfo->os_errno = ERROR_FILE_NOT_FOUND; - return 0; - } else { - drive = towlower(*win_path) - L'a' + 1; - } - - findhandle = FindFirstFileW(name, &findbuf); - if (findhandle == INVALID_HANDLE_VALUE) { - WCHAR* path = NULL; - - if (!(wcspbrk(name, L"./\\") && - (path = get_full_wpath_tmp(state, name, NULL, 0)) && - /* root dir. ('C:\') or UNC root dir. ('\\server\share\') */ - ((wcslen(path) == 3) || is_root_unc_name(path)) && - (GetDriveTypeW(path) > 1) ) ) { - - errInfo->posix_errno = ENOENT; - errInfo->os_errno = ERROR_FILE_NOT_FOUND; - return 0; - } - - /* - * Root directories (such as C:\ or \\server\share\ are fabricated. - */ - - findbuf.dwFileAttributes = FILE_ATTRIBUTE_DIRECTORY; - findbuf.nFileSizeHigh = 0; - findbuf.nFileSizeLow = 0; - findbuf.cFileName[0] = L'\0'; - - pInfo->links = 1; - pInfo->cTime = pInfo->accessTime = pInfo->modifyTime = 0; - } else { - SYSTEMTIME SystemTime; - FILETIME LocalFTime; - - /*first check if we are a symlink */ - if (!info_for_link && (findbuf.dwFileAttributes & - FILE_ATTRIBUTE_REPARSE_POINT)){ - /* - * given that we know this is a symlink, - we should be able to find its target */ - WCHAR* target_name = (WCHAR*) do_readlink(state, (char *) name, NULL, 0); - if (target_name) { - FindClose(findhandle); - return do_fileinfo(state, pInfo, - (char *) target_name, info_for_link); - } - } - - /* number of links: */ - { - HANDLE handle; /* Handle returned by CreateFile() */ - BY_HANDLE_FILE_INFORMATION fileInfo; /* from CreateFile() */ - - /* We initialise nNumberOfLinks as GetFileInformationByHandle - does not always initialise this field */ - fileInfo.nNumberOfLinks = 1; - if (handle = CreateFileW(name, GENERIC_READ, FILE_SHARE_FLAGS, NULL, - OPEN_EXISTING, 0, NULL)) { - GetFileInformationByHandle(handle, &fileInfo); - pInfo->links = fileInfo.nNumberOfLinks; - CloseHandle(handle); - } else { - pInfo->links = 1; - } - } - - FILETIME_TO_EPOCH(pInfo->modifyTime, findbuf.ftLastWriteTime); - - if (findbuf.ftLastAccessTime.dwLowDateTime == 0 && - findbuf.ftLastAccessTime.dwHighDateTime == 0) { - pInfo->accessTime = pInfo->modifyTime; - } else { - FILETIME_TO_EPOCH(pInfo->accessTime, findbuf.ftLastAccessTime); - } - - if (findbuf.ftCreationTime.dwLowDateTime == 0 && - findbuf.ftCreationTime.dwHighDateTime == 0) { - pInfo->cTime = pInfo->modifyTime; - } else { - FILETIME_TO_EPOCH(pInfo->cTime ,findbuf.ftCreationTime); - } - FindClose(findhandle); - } - - pInfo->size_low = findbuf.nFileSizeLow; - pInfo->size_high = findbuf.nFileSizeHigh; - - if (info_for_link && (findbuf.dwFileAttributes & - FILE_ATTRIBUTE_REPARSE_POINT)) - pInfo->type = FT_SYMLINK; - else if (findbuf.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - pInfo->type = FT_DIRECTORY; - else - pInfo->type = FT_REGULAR; - - if (findbuf.dwFileAttributes & FILE_ATTRIBUTE_READONLY) - pInfo->access = FA_READ; - else - pInfo->access = FA_READ|FA_WRITE; - - pInfo->mode = dos_to_posix_mode(findbuf.dwFileAttributes, name); - pInfo->major_device = drive; - pInfo->minor_device = 0; - pInfo->inode = 0; - pInfo->uid = 0; - pInfo->gid = 0; - - return 1; -} - -int -efile_write_info(Efile_error* errInfo, - Efile_info* pInfo, - char* name) -{ - Efile_call_state state; - int ret; - call_state_init(&state, errInfo); - ret = do_write_info(&state, pInfo, name); - call_state_free(&state); - return ret; -} - -static int -do_write_info(Efile_call_state* state, - Efile_info* pInfo, - char* name) -{ - Efile_error* errInfo = state->errInfo; - SYSTEMTIME timebuf; - FILETIME ModifyFileTime; - FILETIME AccessFileTime; - FILETIME CreationFileTime; - HANDLE fd; - DWORD attr; - DWORD tempAttr; - WCHAR *wname = (WCHAR *) name; - - DBG_TRACE(1, name); - - ensure_wpath(state, &wname); - - /* - * Get the attributes for the file. - */ - - tempAttr = attr = GetFileAttributesW(wname); - if (attr == 0xffffffff) { - return set_error(errInfo); - } - if (pInfo->mode != -1) { - if (pInfo->mode & _S_IWRITE) { - /* clear read only bit */ - attr &= ~FILE_ATTRIBUTE_READONLY; - } else { - /* set read only bit */ - attr |= FILE_ATTRIBUTE_READONLY; - } - } - - /* - * Construct all file times. - */ - - EPOCH_TO_FILETIME(ModifyFileTime, pInfo->modifyTime); - EPOCH_TO_FILETIME(AccessFileTime, pInfo->accessTime); - EPOCH_TO_FILETIME(CreationFileTime, pInfo->cTime); - - /* - * If necessary, set the file times. - */ - - /* - * If the has read only access, we must temporarily turn on - * write access (this is necessary for native filesystems, - * but not for NFS filesystems). - */ - - if (tempAttr & FILE_ATTRIBUTE_READONLY) { - tempAttr &= ~FILE_ATTRIBUTE_READONLY; - if (!SetFileAttributesW(wname, tempAttr)) { - return set_error(errInfo); - } - } - - fd = CreateFileW(wname, GENERIC_READ|GENERIC_WRITE, - FILE_SHARE_FLAGS, - NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - if (fd != INVALID_HANDLE_VALUE) { - BOOL result = SetFileTime(fd, &CreationFileTime, &AccessFileTime, &ModifyFileTime); - if (!result) { - return set_error(errInfo); - } - CloseHandle(fd); - } - - /* - * If the file doesn't have the correct attributes, set them now. - * (It could have been done before setting the file times, above). - */ - - if (tempAttr != attr) { - if (!SetFileAttributesW(wname, attr)) { - return set_error(errInfo); - } - } - return 1; -} - - -int -efile_pwrite(errInfo, fd, buf, count, offset) -Efile_error* errInfo; /* Where to return error codes. */ -int fd; /* File descriptor to write to. */ -char* buf; /* Buffer to write. */ -size_t count; /* Number of bytes to write. */ -Sint64 offset; /* where to write it */ -{ - int res; - DBG_TRACE(2, L""); - res = efile_seek(errInfo, fd, offset, EFILE_SEEK_SET, NULL); - if (res) { - return efile_write(errInfo, EFILE_MODE_WRITE, fd, buf, count); - } else { - return res; - } -} - -/* position and read/write as a single atomic op */ -int -efile_pread(errInfo, fd, offset, buf, count, pBytesRead) -Efile_error* errInfo; /* Where to return error codes. */ -int fd; /* File descriptor to read from. */ -Sint64 offset; /* Offset in bytes from BOF. */ -char* buf; /* Buffer to read into. */ -size_t count; /* Number of bytes to read. */ -size_t* pBytesRead; /* Where to return number of bytes read. */ -{ - int res; - DBG_TRACE(2, L""); - res = efile_seek(errInfo, fd, offset, EFILE_SEEK_SET, NULL); - if (res) { - return efile_read(errInfo, EFILE_MODE_READ, fd, buf, count, pBytesRead); - } else { - return res; - } -} - - - -int -efile_write(errInfo, flags, fd, buf, count) -Efile_error* errInfo; /* Where to return error codes. */ -int flags; /* Flags given when file was opened. */ -int fd; /* File descriptor to write to. */ -char* buf; /* Buffer to write. */ -size_t count; /* Number of bytes to write. */ -{ - DWORD written; /* Bytes written in last operation. */ - OVERLAPPED overlapped; - OVERLAPPED* pOverlapped = NULL; - - DBG_TRACE(2, L""); - if (flags & EFILE_MODE_APPEND) { - memset(&overlapped, 0, sizeof(overlapped)); - overlapped.Offset = 0xffffffff; - overlapped.OffsetHigh = 0xffffffff; - pOverlapped = &overlapped; - } - while (count > 0) { - if (!WriteFile((HANDLE) fd, buf, count, &written, pOverlapped)) - return set_error(errInfo); - buf += written; - count -= written; - } - return 1; -} - -int -efile_writev(Efile_error* errInfo, /* Where to return error codes */ - int flags, /* Flags given when file was - * opened */ - int fd, /* File descriptor to write to */ - SysIOVec* iov, /* Vector of buffer structs. - * The structs are unchanged - * after the call */ - int iovcnt) /* Number of structs in vector */ -{ - int cnt; /* Buffers so far written */ - OVERLAPPED overlapped; - OVERLAPPED* pOverlapped = NULL; - - DBG_TRACE(2, L""); - ASSERT(iovcnt >= 0); - - if (flags & EFILE_MODE_APPEND) { - memset(&overlapped, 0, sizeof(overlapped)); - overlapped.Offset = 0xffffffff; - overlapped.OffsetHigh = 0xffffffff; - pOverlapped = &overlapped; - } - for (cnt = 0; cnt < iovcnt; cnt++) { - if (iov[cnt].iov_base && iov[cnt].iov_len > 0) { - /* Non-empty buffer */ - int p; /* Position in buffer */ - int w = iov[cnt].iov_len;/* Bytes written in this call */ - for (p = 0; p < iov[cnt].iov_len; p += w) { - if (!WriteFile((HANDLE) fd, - iov[cnt].iov_base + p, - iov[cnt].iov_len - p, - &w, - pOverlapped)) - return set_error(errInfo); - } - } - } - return 1; -} - -int -efile_read(errInfo, flags, fd, buf, count, pBytesRead) -Efile_error* errInfo; /* Where to return error codes. */ -int flags; /* Flags given when file was opened. */ -int fd; /* File descriptor to read from. */ -char* buf; /* Buffer to read into. */ -size_t count; /* Number of bytes to read. */ -size_t* pBytesRead; /* Where to return number of bytes read. */ -{ - DWORD nbytes = 0; - - DBG_TRACE(2, L""); - if (!ReadFile((HANDLE) fd, buf, count, &nbytes, NULL)) - return set_error(errInfo); - - *pBytesRead = nbytes; - return 1; -} - -int -efile_seek(errInfo, fd, offset, origin, new_location) -Efile_error* errInfo; /* Where to return error codes. */ -int fd; /* File descriptor to do the seek on. */ -Sint64 offset; /* Offset in bytes from the given origin. */ -int origin; /* Origin of seek (SEEK_SET, SEEK_CUR, - * SEEK_END). - */ -Sint64* new_location; /* Resulting new location in file. */ -{ - LARGE_INTEGER off, new_loc; - - DBG_TRACE(2, L""); - switch (origin) { - case EFILE_SEEK_SET: origin = FILE_BEGIN; break; - case EFILE_SEEK_CUR: origin = FILE_CURRENT; break; - case EFILE_SEEK_END: origin = FILE_END; break; - default: - errno = EINVAL; - check_error(-1, errInfo); - break; - } - - off.QuadPart = offset; - if (! SetFilePointerEx((HANDLE) fd, off, - new_location ? &new_loc : NULL, origin)) { - return set_error(errInfo); - } - if (new_location) { - *new_location = new_loc.QuadPart; - DEBUGF(("efile_seek(offset=%ld, origin=%d) -> %ld\n", - (long) offset, origin, (long) *new_location)); - } else { - DEBUGF(("efile_seek(offset=%ld, origin=%d)\n", (long) offset, origin)); - } - return 1; -} - -int -efile_truncate_file(errInfo, fd, flags) -Efile_error* errInfo; /* Where to return error codes. */ -int *fd; /* File descriptor for file to truncate. */ -int flags; -{ - DBG_TRACE(2, L""); - if (!SetEndOfFile((HANDLE) (*fd))) - return set_error(errInfo); - return 1; -} - - -/* - * is_root_unc_name - returns TRUE if the argument is a UNC name specifying - * a root share. That is, if it is of the form \\server\share\. - * This routine will also return true if the argument is of the - * form \\server\share (no trailing slash) but Win32 currently - * does not like that form. - * - * Forward slashes ('/') may be used instead of backslashes ('\'). - */ - -static int -is_root_unc_name(const WCHAR *path) -{ - /* - * If a root UNC name, path will start with 2 (but not 3) slashes - */ - - if ((wcslen(path) >= 5) /* minimum string is "//x/y" */ - && ISSLASH(path[0]) && ISSLASH(path[1])) - { - const WCHAR *p = path + 2; - - /* - * find the slash between the server name and share name - */ - while ( * ++ p ) - if ( ISSLASH(*p) ) - break ; - - if ( *p && p[1] ) - { - /* - * is there a further slash? - */ - while ( * ++ p ) - if ( ISSLASH(*p) ) - break ; - - /* - * just final slash (or no final slash) - */ - if ( !*p || !p[1]) - return 1; - } - } - - return 0 ; -} - -/* - * Extracts the root part of an absolute filename (by modifying the string - * pointed to by the name argument). The name can start - * with either a driver letter (for example, C:\), or a UNC name - * (for example, \\guinness\bjorn). - * - * If the name is invalid, the buffer will be modified to point to - * an empty string. - * - * Returns: 1 if the name consists of just the root part, 0 if - * the name was longer. - */ - -static int -extract_root(WCHAR* name) -{ - int len = wcslen(name); - - if (iswalpha(name[0]) && name[1] == L':' && ISSLASH(name[2])) { - WCHAR c = name[3]; - name[3] = L'\0'; - return c == L'\0'; - } else if (len < 5 || !ISSLASH(name[0]) || !ISSLASH(name[1])) { - goto error; - } else { /* Try to find the end of the UNC name. */ - WCHAR* p; - WCHAR c; - - /* - * Find the slash between the server name and share name. - */ - - for (p = name + 2; *p; p++) - if (ISSLASH(*p)) - break; - if (*p == L'\0') - goto error; - - /* - * Find the slash after the share name. - */ - - for (p++; *p; p++) - if (ISSLASH(*p)) - break; - c = *p; - *p = L'\0'; - return c == L'\0' || p[1] == L'\0'; - } - - error: - *name = L'\0'; - return 1; -} - -static unsigned short -dos_to_posix_mode(int attr, const WCHAR *name) -{ - register unsigned short uxmode; - unsigned dosmode; - register const WCHAR *p; - - dosmode = attr & 0xff; - if ((p = name)[1] == L':') - p += 2; - - /* check to see if this is a directory - note we must make a special - * check for the root, which DOS thinks is not a directory - */ - - uxmode = (unsigned short) - (((ISSLASH(*p) && !p[1]) || (dosmode & FILE_ATTRIBUTE_DIRECTORY) || - *p == L'\0') ? _S_IFDIR|_S_IEXEC : _S_IFREG); - - /* If attribute byte does not have read-only bit, it is read-write */ - - uxmode |= (dosmode & FILE_ATTRIBUTE_READONLY) ? - _S_IREAD : (_S_IREAD|_S_IWRITE); - - /* see if file appears to be executable - check extension of name */ - - if (p = wcsrchr(name, L'.')) { - if (!_wcsicmp(p, L".exe") || - !_wcsicmp(p, L".cmd") || - !_wcsicmp(p, L".bat") || - !_wcsicmp(p, L".com")) - uxmode |= _S_IEXEC; - } - - /* propagate user read/write/execute bits to group/other fields */ - - uxmode |= (uxmode & 0700) >> 3; - uxmode |= (uxmode & 0700) >> 6; - - return uxmode; -} - - -int -efile_readlink(Efile_error* errInfo, char* name, char* buffer, size_t size) -{ - Efile_call_state state; - int ret; - DBG_TRACE(1, name); - call_state_init(&state, errInfo); - ret = !!do_readlink(&state, name, buffer, size); - call_state_free(&state); - return ret; -} - -/* If buffer==0, return buffer allocated by wpath_tmp_allocate -*/ -static char* -do_readlink(Efile_call_state* state, char* name, char* buffer, size_t size) -{ - /* - * load dll and see if we have CreateSymbolicLink at runtime: - * (Vista only) - */ - HINSTANCE hModule = NULL; - WCHAR *wname = (WCHAR *) name; - WCHAR *wbuffer = (WCHAR *) buffer; - DWORD wsize = size / sizeof(WCHAR); - char* ret = NULL; - - if ((hModule = LoadLibrary("kernel32.dll")) != NULL) { - typedef DWORD (WINAPI * GETFINALPATHNAMEBYHANDLEPTR)( - HANDLE hFile, - LPCWSTR lpFilePath, - DWORD cchFilePath, - DWORD dwFlags); - - GETFINALPATHNAMEBYHANDLEPTR pGetFinalPathNameByHandle = - (GETFINALPATHNAMEBYHANDLEPTR)GetProcAddress(hModule, "GetFinalPathNameByHandleW"); - - if (pGetFinalPathNameByHandle != NULL) { - DWORD fileAttributes; - ensure_wpath(state, &wname); - /* first check if file is a symlink; {error, einval} otherwise */ - fileAttributes = GetFileAttributesW(wname); - if ((fileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)) { - DWORD success = 0; - HANDLE h = CreateFileW(wname, GENERIC_READ, FILE_SHARE_FLAGS, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); - int len; - if(h != INVALID_HANDLE_VALUE) { - if (!wbuffer) { /* dynamic allocation */ - WCHAR dummy; - wsize = pGetFinalPathNameByHandle(h, &dummy, 0, 0); - if (wsize) { - wbuffer = wpath_tmp_alloc(state, wsize); - } - } - if (wbuffer - && (success = pGetFinalPathNameByHandle(h, wbuffer, wsize, 0)) - && success < wsize) { - WCHAR* wp; - - /* GetFinalPathNameByHandle prepends path with "\\?\": */ - len = wcslen(wbuffer); - wmemmove(wbuffer,wbuffer+4,len-3); - if (len - 4 >= 2 && wbuffer[1] == L':' && wbuffer[0] >= L'A' && - wbuffer[0] <= L'Z') { - wbuffer[0] = wbuffer[0] + L'a' - L'A'; - } - - for (wp=wbuffer ; *wp; wp++) - if (*wp == L'\\') - *wp = L'/'; - } - CloseHandle(h); - } - if (success) { - ret = (char*) wbuffer; - } else { - set_error(state->errInfo); - } - } else { - errno = EINVAL; - save_last_error(state->errInfo); - } - goto done; - } - } - errno = ENOTSUP; - save_last_error(state->errInfo); - -done: - if (hModule) - FreeLibrary(hModule); - return ret; -} - - -int -efile_altname(Efile_error* errInfo, char* orig_name, char* buffer, size_t size) -{ - Efile_call_state state; - int ret; - DBG_TRACE(1, orig_name); - call_state_init(&state, errInfo); - ret = do_altname(&state, orig_name, buffer, size); - call_state_free(&state); - return ret; -} - -static int -do_altname(Efile_call_state* state, char* orig_name, char* buffer, size_t size) -{ - WIN32_FIND_DATAW wfd; - HANDLE fh; - WCHAR* name; - int name_len; - WCHAR* full_path = NULL; - WCHAR *worig_name = (WCHAR *) orig_name; - WCHAR *wbuffer = (WCHAR *) buffer; - int drive; /* Drive for filename (1 = A:, 2 = B: etc). */ - - /* Don't allow wildcards to be interpreted by system */ - - if (wcspbrk(worig_name, L"?*")) { - enoent: - state->errInfo->posix_errno = ENOENT; - state->errInfo->os_errno = ERROR_FILE_NOT_FOUND; - return 0; - } - - /* - * Move the name to a buffer and make sure to remove a trailing - * slash, because it causes FindFirstFile() to fail on Win95. - */ - ensure_wpath(state, &worig_name); - name_len = wcslen(worig_name); - - name = wpath_tmp_alloc(state, name_len + 1); - wcscpy(name, worig_name); - if (name_len > 2 && ISSLASH(name[name_len-1]) && - name[name_len-2] != L':') { - name[name_len-1] = L'\0'; - } - - /* Try to get disk from name. If none, get current disk. */ - - if (name[1] != L':') { - WCHAR* cwd_path = get_cwd_wpath_tmp(state); - drive = 0; - if (cwd_path[1] == L':') { - drive = towlower(cwd_path[0]) - L'a' + 1; - } - } else if (*name && name[2] == L'\0') { - /* - * X: and nothing more is an error. - */ - goto enoent; - } else { - drive = towlower(*name) - L'a' + 1; - } - fh = FindFirstFileW(name,&wfd); - if (fh == INVALID_HANDLE_VALUE) { - DWORD fff_error = GetLastError(); - if (!(wcspbrk(name, L"./\\") && - (full_path = get_full_wpath_tmp(state, name, NULL, 0)) && - /* root dir. ('C:\') or UNC root dir. ('\\server\share\') */ - ((wcslen(full_path) == 3) || is_root_unc_name(full_path)) && - (GetDriveTypeW(full_path) > 1) ) ) { - - set_os_errno(state->errInfo, fff_error); - return 0; - } - /* - * Root directories (such as C:\ or \\server\share\ are fabricated. - */ - wcscpy(wbuffer,name); - return 1; - } - - wcscpy(wbuffer,wfd.cAlternateFileName); - if (!*wbuffer) { - wcscpy(wbuffer,wfd.cFileName); - } - FindClose(fh); - return 1; -} - - -int -efile_link(Efile_error* errInfo, char* old, char* new) -{ - Efile_call_state state; - WCHAR *wold = (WCHAR *) old; - WCHAR *wnew = (WCHAR *) new; - int ret; - DBG_TRACE(1, old); - call_state_init(&state, errInfo); - ensure_wpath(&state, &wold); - ensure_wpath(&state, &wnew); - if(!CreateHardLinkW(wnew, wold, NULL)) { - ret = set_error(errInfo); - } - else ret =1; - call_state_free(&state); - return ret; -} - -int -efile_symlink(Efile_error* errInfo, char* old, char* new) -{ - Efile_call_state state; - int ret; - DBG_TRACE2(1, "symlink(%s <- %s)", old, new); - call_state_init(&state, errInfo); - ret = do_symlink(&state, old, new); - call_state_free(&state); - return ret; -} - -static int -do_symlink(Efile_call_state* state, char* old, char* new) -{ - /* - * Load dll and see if we have CreateSymbolicLink at runtime: - * (Vista only) - */ - HINSTANCE hModule = NULL; - WCHAR *wold = (WCHAR *) old; - WCHAR *wnew = (WCHAR *) new; - - DBG_TRACE(1, old); - if ((hModule = LoadLibrary("kernel32.dll")) != NULL) { - typedef BOOLEAN (WINAPI * CREATESYMBOLICLINKFUNCPTR) ( - LPCWSTR lpSymlinkFileName, - LPCWSTR lpTargetFileName, - DWORD dwFlags); - - CREATESYMBOLICLINKFUNCPTR pCreateSymbolicLink = - (CREATESYMBOLICLINKFUNCPTR) GetProcAddress(hModule, - "CreateSymbolicLinkW"); - /* A for MBCS, W for UNICODE... char* above implies 'W'! */ - if (pCreateSymbolicLink != NULL) { - ensure_wpath(state, &wold); - ensure_wpath(state, &wnew); - { - DWORD attr = GetFileAttributesW(wold); - int flag = (attr != INVALID_FILE_ATTRIBUTES && - attr & FILE_ATTRIBUTE_DIRECTORY) ? 1 : 0; - /* SYMBOLIC_LINK_FLAG_DIRECTORY = 1 */ - BOOLEAN success = pCreateSymbolicLink(wnew, wold, flag); - FreeLibrary(hModule); - - if (success) { - return 1; - } else { - return set_error(state->errInfo); - } - } - } else - FreeLibrary(hModule); - } - errno = ENOTSUP; - return check_error(-1, state->errInfo); -} - -int -efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset, - Sint64 length, int advise) -{ - DBG_TRACE(2, L""); - /* posix_fadvise is not available on Windows, do nothing */ - errno = ERROR_SUCCESS; - return check_error(0, errInfo); -} - -int -efile_fallocate(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length) -{ - DBG_TRACE(2, L""); - /* No file preallocation method available in Windows. */ - errno = errno_map(ERROR_NOT_SUPPORTED); - SetLastError(ERROR_NOT_SUPPORTED); - - return check_error(-1, errInfo); -} diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c new file mode 100644 index 0000000000..6874f41d75 --- /dev/null +++ b/erts/emulator/nifs/common/prim_file_nif.c @@ -0,0 +1,1237 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson 2017. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#define STATIC_ERLANG_NIF 1 + +#include "erl_nif.h" +#include "config.h" +#include "sys.h" + +#ifdef VALGRIND +# include <valgrind/memcheck.h> +#endif + +#include "erl_driver.h" +#include "prim_file_nif.h" + +/* NIF interface declarations */ +static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info); +static int upgrade(ErlNifEnv *env, void** priv_data, void** old_priv_data, ERL_NIF_TERM load_info); +static void unload(ErlNifEnv *env, void* priv_data); + +static ErlNifResourceType *efile_resource_type; + +static ERL_NIF_TERM am_ok; +static ERL_NIF_TERM am_error; +static ERL_NIF_TERM am_continue; + +static ERL_NIF_TERM am_file_info; + +/* File modes */ +static ERL_NIF_TERM am_read; +static ERL_NIF_TERM am_write; +static ERL_NIF_TERM am_exclusive; +static ERL_NIF_TERM am_append; +static ERL_NIF_TERM am_sync; +static ERL_NIF_TERM am_skip_type_check; + +/* enum efile_access_t; read and write are defined above.*/ +static ERL_NIF_TERM am_read_write; +static ERL_NIF_TERM am_none; + +/* enum efile_advise_t */ +static ERL_NIF_TERM am_normal; +static ERL_NIF_TERM am_random; +static ERL_NIF_TERM am_sequential; +static ERL_NIF_TERM am_will_need; +static ERL_NIF_TERM am_dont_need; +static ERL_NIF_TERM am_no_reuse; + +/* enum efile_filetype_t */ +static ERL_NIF_TERM am_device; +static ERL_NIF_TERM am_directory; +static ERL_NIF_TERM am_regular; +static ERL_NIF_TERM am_symlink; +static ERL_NIF_TERM am_other; + +/* enum efile_seek_t, 'eof' marker. */ +static ERL_NIF_TERM am_bof; +static ERL_NIF_TERM am_cur; +static ERL_NIF_TERM am_eof; + +static ERL_NIF_TERM read_info_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM set_permissions_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM set_owner_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM set_time_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +static ERL_NIF_TERM read_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM list_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +static ERL_NIF_TERM make_hard_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM make_soft_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM rename_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM make_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM del_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM del_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM get_device_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM get_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM set_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +static ERL_NIF_TERM get_handle_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM altname_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +/* All file handle operations are passed through a wrapper that handles state + * transitions, marking it as busy during the course of the operation, and + * closing on completion if the owner died in the middle of an operation. + * + * This is pretty ugly but required as there's no way to tell when it's safe to + * asynchronously close a file; the event could have fired just before landing + * in a system call which will fail with EBADF at best or alias a newly opened + * fd at worst. + * + * The old driver got away with enqueueing the close operation on the same + * async queue as all of its other operations, but since dirty schedulers use a + * single global queue there's no natural way to schedule an asynchronous close + * "behind" other operations. + * + * The states may transition as follows: + * + * IDLE -> + * BUSY (file_handle_wrapper) | + * CLOSED (owner_death_callback) + * + * BUSY -> + * IDLE (file_handle_wrapper) + * CLOSED (close_nif_impl) + * CLOSE_PENDING (owner_death_callback) + * + * CLOSE_PENDING -> + * CLOSED (file_handle_wrapper) + */ + +typedef ERL_NIF_TERM (*file_op_impl_t)(efile_data_t *d, ErlNifEnv *env, + int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env, + int argc, const ERL_NIF_TERM argv[]); + +#define WRAP_FILE_HANDLE_EXPORT(name) \ + static ERL_NIF_TERM name ## _impl (efile_data_t *d, ErlNifEnv *env, \ + int argc, const ERL_NIF_TERM argv[]);\ + static ERL_NIF_TERM name(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { \ + return file_handle_wrapper( name ## _impl , env, argc, argv); \ + } + +WRAP_FILE_HANDLE_EXPORT(close_nif) +WRAP_FILE_HANDLE_EXPORT(read_nif) +WRAP_FILE_HANDLE_EXPORT(write_nif) +WRAP_FILE_HANDLE_EXPORT(pread_nif) +WRAP_FILE_HANDLE_EXPORT(pwrite_nif) +WRAP_FILE_HANDLE_EXPORT(seek_nif) +WRAP_FILE_HANDLE_EXPORT(sync_nif) +WRAP_FILE_HANDLE_EXPORT(truncate_nif) +WRAP_FILE_HANDLE_EXPORT(allocate_nif) +WRAP_FILE_HANDLE_EXPORT(advise_nif) +WRAP_FILE_HANDLE_EXPORT(get_handle_nif) +WRAP_FILE_HANDLE_EXPORT(ipread_s32bu_p32bu_nif) + +static ErlNifFunc nif_funcs[] = { + /* File handle ops */ + {"open_nif", 2, open_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"close_nif", 1, close_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"read_nif", 2, read_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"write_nif", 2, write_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"pread_nif", 3, pread_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"pwrite_nif", 3, pwrite_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"seek_nif", 3, seek_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"sync_nif", 2, sync_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"truncate_nif", 1, truncate_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"allocate_nif", 3, allocate_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"advise_nif", 4, advise_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + + /* Filesystem ops */ + {"make_hard_link_nif", 2, make_hard_link_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"make_soft_link_nif", 2, make_soft_link_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"rename_nif", 2, rename_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"read_info_nif", 2, read_info_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"set_permissions_nif", 2, set_permissions_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"set_owner_nif", 3, set_owner_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"set_time_nif", 4, set_time_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"read_link_nif", 1, read_link_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"list_dir_nif", 1, list_dir_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"make_dir_nif", 1, make_dir_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"del_file_nif", 1, del_file_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"del_dir_nif", 1, del_dir_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"get_device_cwd_nif", 1, get_device_cwd_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"set_cwd_nif", 1, set_cwd_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"get_cwd_nif", 0, get_cwd_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + + /* These operations are equivalent to chained calls of other operations, + * but have been moved down to avoid excessive rescheduling. */ + {"ipread_s32bu_p32bu_nif", 3, ipread_s32bu_p32bu_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"read_file_nif", 1, read_file_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + + /* Internal ops. */ + {"get_handle_nif", 1, get_handle_nif}, + {"altname_nif", 1, altname_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, +}; + +ERL_NIF_INIT(prim_file, nif_funcs, load, NULL, upgrade, unload) + +static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon); +static void gc_callback(ErlNifEnv *env, void* data); + +static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM load_info) +{ + ErlNifResourceTypeInit callbacks; + + am_ok = enif_make_atom(env, "ok"); + am_error = enif_make_atom(env, "error"); + am_continue = enif_make_atom(env, "continue"); + + am_read = enif_make_atom(env, "read"); + am_write = enif_make_atom(env, "write"); + am_exclusive = enif_make_atom(env, "exclusive"); + am_append = enif_make_atom(env, "append"); + am_sync = enif_make_atom(env, "sync"); + am_skip_type_check = enif_make_atom(env, "skip_type_check"); + + am_read_write = enif_make_atom(env, "read_write"); + am_none = enif_make_atom(env, "none"); + + am_normal = enif_make_atom(env, "normal"); + am_random = enif_make_atom(env, "random"); + am_sequential = enif_make_atom(env, "sequential"); + am_will_need = enif_make_atom(env, "will_need"); + am_dont_need = enif_make_atom(env, "dont_need"); + am_no_reuse = enif_make_atom(env, "no_reuse"); + + am_device = enif_make_atom(env, "device"); + am_directory = enif_make_atom(env, "directory"); + am_regular = enif_make_atom(env, "regular"); + am_symlink = enif_make_atom(env, "symlink"); + am_other = enif_make_atom(env, "other"); + + am_file_info = enif_make_atom(env, "file_info"); + + am_bof = enif_make_atom(env, "bof"); + am_cur = enif_make_atom(env, "cur"); + am_eof = enif_make_atom(env, "eof"); + + callbacks.down = owner_death_callback; + callbacks.dtor = gc_callback; + callbacks.stop = NULL; + + efile_resource_type = enif_open_resource_type_x(env, "efile", &callbacks, + ERL_NIF_RT_CREATE, NULL); + + *priv_data = NULL; + + return 0; +} + +static void unload(ErlNifEnv *env, void* priv_data) +{ + +} + +static int upgrade(ErlNifEnv *env, void** priv_data, void** old_priv_data, ERL_NIF_TERM load_info) +{ + if(*old_priv_data != NULL) { + return -1; /* Don't know how to do that */ + } + if(*priv_data != NULL) { + return -1; /* Don't know how to do that */ + } + if(load(env, priv_data, load_info)) { + return -1; + } + return 0; +} + +static ERL_NIF_TERM posix_error_to_tuple(ErlNifEnv *env, posix_errno_t posix_errno) { + ERL_NIF_TERM error = enif_make_atom(env, erl_errno_id(posix_errno)); + return enif_make_tuple2(env, am_error, error); +} + +static int get_file_data(ErlNifEnv *env, ERL_NIF_TERM opaque, efile_data_t **d) { + return enif_get_resource(env, opaque, efile_resource_type, (void **)d); +} + +static ERL_NIF_TERM file_handle_wrapper(file_op_impl_t operation, ErlNifEnv *env, + int argc, const ERL_NIF_TERM argv[]) { + + efile_data_t *d; + + enum efile_state_t previous_state; + ERL_NIF_TERM result; + + if(argc < 1 || !get_file_data(env, argv[0], &d)) { + return enif_make_badarg(env); + } + + previous_state = erts_atomic32_cmpxchg_acqb(&d->state, + EFILE_STATE_BUSY, EFILE_STATE_IDLE); + + if(previous_state == EFILE_STATE_IDLE) { + result = operation(d, env, argc - 1, &argv[1]); + + previous_state = erts_atomic32_cmpxchg_relb(&d->state, + EFILE_STATE_IDLE, EFILE_STATE_BUSY); + + ASSERT(previous_state != EFILE_STATE_IDLE); + + if(previous_state == EFILE_STATE_CLOSE_PENDING) { + /* This is the only point where a change from CLOSE_PENDING is + * possible, and we're running synchronously, so we can't race with + * anything else here. */ + erts_atomic32_set_acqb(&d->state, EFILE_STATE_CLOSED); + efile_close(d); + } + } else { + /* CLOSE_PENDING should be impossible at this point since it requires + * a transition from BUSY; the only valid state here is CLOSED. */ + ASSERT(previous_state == EFILE_STATE_CLOSED); + + result = posix_error_to_tuple(env, EINVAL); + } + + return result; +} + +static void owner_death_callback(ErlNifEnv* env, void* obj, ErlNifPid* pid, ErlNifMonitor* mon) { + efile_data_t *d = (efile_data_t*)obj; + + (void)env; + (void)pid; + (void)mon; + + for(;;) { + enum efile_state_t previous_state; + + previous_state = erts_atomic32_cmpxchg_acqb(&d->state, + EFILE_STATE_CLOSED, EFILE_STATE_IDLE); + + switch(previous_state) { + case EFILE_STATE_IDLE: + efile_close(d); + return; + case EFILE_STATE_CLOSE_PENDING: + case EFILE_STATE_CLOSED: + /* We're either already closed or managed to mark ourselves for + * closure in the previous iteration. */ + return; + case EFILE_STATE_BUSY: + /* Schedule ourselves to be closed once the current operation + * finishes, retrying the [IDLE -> CLOSED] transition in case we + * narrowly passed the [BUSY -> IDLE] one. */ + erts_atomic32_cmpxchg_nob(&d->state, + EFILE_STATE_CLOSE_PENDING, EFILE_STATE_BUSY); + break; + } + } +} + +static void gc_callback(ErlNifEnv *env, void* data) { + efile_data_t *d = (efile_data_t*)data; + + enum efile_state_t previous_state; + + (void)env; + + previous_state = erts_atomic32_cmpxchg_acqb(&d->state, + EFILE_STATE_CLOSED, EFILE_STATE_IDLE); + + ASSERT(previous_state != EFILE_STATE_CLOSE_PENDING && + previous_state != EFILE_STATE_BUSY); + + if(previous_state == EFILE_STATE_IDLE) { + efile_close(d); + } +} + +static ERL_NIF_TERM efile_filetype_to_atom(enum efile_filetype_t type) { + switch(type) { + case EFILE_FILETYPE_DEVICE: return am_device; + case EFILE_FILETYPE_DIRECTORY: return am_directory; + case EFILE_FILETYPE_REGULAR: return am_regular; + case EFILE_FILETYPE_SYMLINK: return am_symlink; + case EFILE_FILETYPE_OTHER: return am_other; + } + + return am_other; +} + +static ERL_NIF_TERM efile_access_to_atom(enum efile_access_t type) { + if(type & EFILE_ACCESS_READ && !(type & EFILE_ACCESS_WRITE)) { + return am_read; + } else if(type & EFILE_ACCESS_WRITE && !(type & EFILE_ACCESS_READ)) { + return am_write; + } else if(type & EFILE_ACCESS_READ_WRITE) { + return am_read_write; + } + + return am_none; +} + +static enum efile_modes_t efile_translate_modelist(ErlNifEnv *env, ERL_NIF_TERM list) { + enum efile_modes_t modes; + ERL_NIF_TERM head, tail; + + modes = 0; + + while(enif_get_list_cell(env, list, &head, &tail)) { + if(enif_is_identical(head, am_read)) { + modes |= EFILE_MODE_READ; + } else if(enif_is_identical(head, am_write)) { + modes |= EFILE_MODE_WRITE; + } else if(enif_is_identical(head, am_exclusive)) { + modes |= EFILE_MODE_EXCLUSIVE; + } else if(enif_is_identical(head, am_append)) { + modes |= EFILE_MODE_APPEND; + } else if(enif_is_identical(head, am_sync)) { + modes |= EFILE_MODE_SYNC; + } else if(enif_is_identical(head, am_skip_type_check)) { + modes |= EFILE_MODE_SKIP_TYPE_CHECK; + } else { + /* Modes like 'raw', 'ram', 'delayed_writes' etc are handled + * further up the chain. */ + } + + list = tail; + } + + if(modes & (EFILE_MODE_APPEND | EFILE_MODE_EXCLUSIVE)) { + /* 'append' and 'exclusive' are documented as "open for writing." */ + modes |= EFILE_MODE_WRITE; + } else if(!(modes & EFILE_MODE_READ_WRITE)) { + /* Defaulting to read if !(W|R) is undocumented, but specifically + * tested against in file_SUITE. */ + modes |= EFILE_MODE_READ; + } + + return modes; +} + +static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + efile_data_t *d; + + ErlNifPid controlling_process; + enum efile_modes_t modes; + ERL_NIF_TERM result; + efile_path_t path; + + if(argc != 2 || !enif_is_list(env, argv[1])) { + return enif_make_badarg(env); + } + + modes = efile_translate_modelist(env, argv[1]); + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_open(&path, modes, efile_resource_type, &d))) { + return posix_error_to_tuple(env, posix_errno); + } + + result = enif_make_resource(env, d); + enif_release_resource(d); + + enif_self(env, &controlling_process); + + if(enif_monitor_process(env, d, &controlling_process, &d->monitor)) { + return posix_error_to_tuple(env, EINVAL); + } + + return enif_make_tuple2(env, am_ok, result); +} + +static ERL_NIF_TERM close_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + enum efile_state_t previous_state; + + if(argc != 0) { + return enif_make_badarg(env); + } + + previous_state = erts_atomic32_cmpxchg_acqb(&d->state, + EFILE_STATE_CLOSED, EFILE_STATE_BUSY); + + ASSERT(previous_state == EFILE_STATE_CLOSE_PENDING || + previous_state == EFILE_STATE_BUSY); + + if(previous_state == EFILE_STATE_BUSY) { + enif_demonitor_process(env, d, &d->monitor); + + if(!efile_close(d)) { + return posix_error_to_tuple(env, d->posix_errno); + } + } + + return am_ok; +} + +static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + Sint64 bytes_read, block_size; + SysIOVec read_vec[1]; + ErlNifBinary result; + + if(argc != 1 || !enif_is_number(env, argv[0])) { + return enif_make_badarg(env); + } + + if(!enif_get_int64(env, argv[0], &block_size) || block_size < 0) { + return posix_error_to_tuple(env, EINVAL); + } + + if(!enif_alloc_binary(block_size, &result)) { + return posix_error_to_tuple(env, ENOMEM); + } + + read_vec[0].iov_base = result.data; + read_vec[0].iov_len = result.size; + + bytes_read = efile_readv(d, read_vec, 1); + ASSERT(bytes_read <= block_size); + + if(bytes_read < 0) { + return posix_error_to_tuple(env, d->posix_errno); + } else if(bytes_read == 0) { + enif_release_binary(&result); + return am_eof; + } + + if(bytes_read < block_size && !enif_realloc_binary(&result, bytes_read)) { + ERTS_INTERNAL_ERROR("Failed to shrink read result."); + } + + return enif_make_tuple2(env, am_ok, enif_make_binary(env, &result)); +} + +static ERL_NIF_TERM write_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + ErlNifIOVec vec, *input = &vec; + Sint64 bytes_written; + ERL_NIF_TERM tail; + + if(argc != 1 || !enif_inspect_iovec(env, 64, argv[0], &tail, &input)) { + return enif_make_badarg(env); + } + + bytes_written = efile_writev(d, input->iov, input->iovcnt); + + if(bytes_written < 0) { + return posix_error_to_tuple(env, d->posix_errno); + } + + if(!enif_is_empty_list(env, tail)) { + ASSERT(bytes_written > 0); + return enif_make_tuple2(env, am_continue, tail); + } + + return am_ok; +} + +static ERL_NIF_TERM pread_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + Sint64 bytes_read, block_size, offset; + SysIOVec read_vec[1]; + ErlNifBinary result; + + if(argc != 2 || !enif_is_number(env, argv[0]) + || !enif_is_number(env, argv[1])) { + return enif_make_badarg(env); + } + + if(!enif_get_int64(env, argv[0], &offset) || + !enif_get_int64(env, argv[1], &block_size) || + (offset < 0 || block_size < 0)) { + return posix_error_to_tuple(env, EINVAL); + } + + if(!enif_alloc_binary(block_size, &result)) { + return posix_error_to_tuple(env, ENOMEM); + } + + read_vec[0].iov_base = result.data; + read_vec[0].iov_len = result.size; + + bytes_read = efile_preadv(d, offset, read_vec, 1); + + if(bytes_read < 0) { + return posix_error_to_tuple(env, d->posix_errno); + } else if(bytes_read == 0) { + enif_release_binary(&result); + return am_eof; + } + + if(bytes_read < block_size && !enif_realloc_binary(&result, bytes_read)) { + ERTS_INTERNAL_ERROR("Failed to shrink pread result."); + } + + return enif_make_tuple2(env, am_ok, enif_make_binary(env, &result)); +} + +static ERL_NIF_TERM pwrite_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + ErlNifIOVec vec, *input = &vec; + Sint64 bytes_written, offset; + ERL_NIF_TERM tail; + + if(argc != 2 || !enif_is_number(env, argv[0]) + || !enif_inspect_iovec(env, 64, argv[1], &tail, &input)) { + return enif_make_badarg(env); + } + + if(!enif_get_int64(env, argv[0], &offset) || offset < 0) { + return posix_error_to_tuple(env, EINVAL); + } + + bytes_written = efile_pwritev(d, offset, input->iov, input->iovcnt); + + if(bytes_written < 0) { + return posix_error_to_tuple(env, d->posix_errno); + } + + if(!enif_is_empty_list(env, tail)) { + ASSERT(bytes_written > 0); + return enif_make_tuple3(env, am_continue, + enif_make_int64(env, bytes_written), tail); + } + + return am_ok; +} + +static ERL_NIF_TERM seek_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + Sint64 new_position, offset; + enum efile_seek_t seek; + + if(argc != 2 || !enif_get_int64(env, argv[1], &offset)) { + return enif_make_badarg(env); + } + + if(enif_is_identical(argv[0], am_bof)) { + seek = EFILE_SEEK_BOF; + } else if(enif_is_identical(argv[0], am_cur)) { + seek = EFILE_SEEK_CUR; + } else if(enif_is_identical(argv[0], am_eof)) { + seek = EFILE_SEEK_EOF; + } else { + return enif_make_badarg(env); + } + + if(!efile_seek(d, seek, offset, &new_position)) { + return posix_error_to_tuple(env, d->posix_errno); + } + + return enif_make_tuple2(env, am_ok, enif_make_uint64(env, new_position)); +} + +static ERL_NIF_TERM sync_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + int data_only; + + if(argc != 1 || !enif_get_int(env, argv[0], &data_only)) { + return enif_make_badarg(env); + } + + if(!efile_sync(d, data_only)) { + return posix_error_to_tuple(env, d->posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM truncate_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if(argc != 0) { + return enif_make_badarg(env); + } + + if(!efile_truncate(d)) { + return posix_error_to_tuple(env, d->posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM allocate_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + Sint64 offset, length; + + if(argc != 2 || !enif_is_number(env, argv[0]) + || !enif_is_number(env, argv[1])) { + return enif_make_badarg(env); + } + + if(!enif_get_int64(env, argv[0], &offset) || + !enif_get_int64(env, argv[1], &length) || + (offset < 0 || length < 0)) { + return posix_error_to_tuple(env, EINVAL); + } + + if(!efile_allocate(d, offset, length)) { + return posix_error_to_tuple(env, d->posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM advise_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + enum efile_advise_t advise; + Sint64 offset, length; + + if(argc != 3 || !enif_is_number(env, argv[0]) + || !enif_is_number(env, argv[1])) { + return enif_make_badarg(env); + } + + if(!enif_get_int64(env, argv[0], &offset) || + !enif_get_int64(env, argv[1], &length) || + (offset < 0 || length < 0)) { + return posix_error_to_tuple(env, EINVAL); + } + + if(enif_is_identical(argv[2], am_normal)) { + advise = EFILE_ADVISE_NORMAL; + } else if(enif_is_identical(argv[2], am_random)) { + advise = EFILE_ADVISE_RANDOM; + } else if(enif_is_identical(argv[2], am_sequential)) { + advise = EFILE_ADVISE_SEQUENTIAL; + } else if(enif_is_identical(argv[2], am_will_need)) { + advise = EFILE_ADVISE_WILL_NEED; + } else if(enif_is_identical(argv[2], am_dont_need)) { + advise = EFILE_ADVISE_DONT_NEED; + } else if(enif_is_identical(argv[2], am_no_reuse)) { + advise = EFILE_ADVISE_NO_REUSE; + } else { + /* The tests check for EINVAL instead of badarg. Sigh. */ + return posix_error_to_tuple(env, EINVAL); + } + + if(!efile_advise(d, offset, length, advise)) { + return posix_error_to_tuple(env, d->posix_errno); + } + + return am_ok; +} + +/* This undocumented function reads a pointer and then reads the data block + * described by said pointer. It was reverse-engineered from the old + * implementation so while all tests pass it may not be entirely correct. Our + * current understanding is as follows: + * + * Pointer layout: + * + * <<Size:1/integer-unit:32, Offset:1/integer-unit:32>> + * + * Where Offset is the -absolute- address to the data block. + * + * *) If we fail to read the pointer block in its entirety, we return eof. + * *) If the provided max_payload_size is larger than Size, we return eof. + * *) If we fail to read any data whatsoever at Offset, we return + * {ok, {Size, Offset, eof}} + * *) Otherwise, we return {ok, {Size, Offset, Data}}. Note that the size + * of Data may be smaller than Size if we encounter EOF before we could + * read the entire block. + * + * On errors we'll return {error, posix()} regardless of whether they + * happened before or after reading the pointer block. */ +static ERL_NIF_TERM ipread_s32bu_p32bu_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + Sint64 payload_offset, payload_size; + + SysIOVec read_vec[1]; + Sint64 bytes_read; + + ErlNifBinary payload; + + if(argc != 2 || !enif_is_number(env, argv[0]) + || !enif_is_number(env, argv[1])) { + return enif_make_badarg(env); + } + + { + Sint64 max_payload_size, pointer_offset; + unsigned char pointer_block[8]; + + if(!enif_get_int64(env, argv[0], &pointer_offset) || + !enif_get_int64(env, argv[1], &max_payload_size) || + (pointer_offset < 0 || max_payload_size >= 1u << 31)) { + return posix_error_to_tuple(env, EINVAL); + } + + read_vec[0].iov_base = pointer_block; + read_vec[0].iov_len = sizeof(pointer_block); + + bytes_read = efile_preadv(d, pointer_offset, read_vec, 1); + + if(bytes_read < 0) { + return posix_error_to_tuple(env, d->posix_errno); + } else if(bytes_read < sizeof(pointer_block)) { + return am_eof; + } + + payload_size = (Uint32)get_int32(&pointer_block[0]); + payload_offset = (Uint32)get_int32(&pointer_block[4]); + + if(payload_size > max_payload_size) { + return am_eof; + } + } + + if(!enif_alloc_binary(payload_size, &payload)) { + return posix_error_to_tuple(env, ENOMEM); + } + + read_vec[0].iov_base = payload.data; + read_vec[0].iov_len = payload.size; + + bytes_read = efile_preadv(d, payload_offset, read_vec, 1); + + if(bytes_read < 0) { + return posix_error_to_tuple(env, d->posix_errno); + } else if(bytes_read == 0) { + enif_release_binary(&payload); + + return enif_make_tuple2(env, am_ok, + enif_make_tuple3(env, + enif_make_uint(env, payload_size), + enif_make_uint(env, payload_offset), + am_eof)); + } + + if(bytes_read < payload.size && !enif_realloc_binary(&payload, bytes_read)) { + ERTS_INTERNAL_ERROR("Failed to shrink ipread payload."); + } + + return enif_make_tuple2(env, am_ok, + enif_make_tuple3(env, + enif_make_uint(env, payload_size), + enif_make_uint(env, payload_offset), + enif_make_binary(env, &payload))); +} + +static ERL_NIF_TERM get_handle_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if(argc != 0) { + return enif_make_badarg(env); + } + + return efile_get_handle(env, d); +} + +static ERL_NIF_TERM read_info_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_fileinfo_t info = {0}; + efile_path_t path; + int follow_links; + + if(argc != 2 || !enif_get_int(env, argv[1], &follow_links)) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_read_info(&path, follow_links, &info))) { + return posix_error_to_tuple(env, posix_errno); + } + + /* #file_info as declared in file.hrl */ + return enif_make_tuple(env, 14, + am_file_info, + enif_make_uint64(env, info.size), + efile_filetype_to_atom(info.type), + efile_access_to_atom(info.access), + enif_make_int64(env, MAX(EFILE_MIN_FILETIME, info.a_time)), + enif_make_int64(env, MAX(EFILE_MIN_FILETIME, info.m_time)), + enif_make_int64(env, MAX(EFILE_MIN_FILETIME, info.c_time)), + enif_make_uint(env, info.mode), + enif_make_uint(env, info.links), + enif_make_uint(env, info.major_device), + enif_make_uint(env, info.minor_device), + enif_make_uint(env, info.inode), + enif_make_uint(env, info.uid), + enif_make_uint(env, info.gid) + ); +} + +static ERL_NIF_TERM set_permissions_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + Uint32 permissions; + + if(argc != 2 || !enif_get_uint(env, argv[1], &permissions)) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_set_permissions(&path, permissions))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM set_owner_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + Uint32 uid, gid; + + if(argc != 3 || !enif_get_uint(env, argv[1], &uid) + || !enif_get_uint(env, argv[2], &gid)) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_set_owner(&path, uid, gid))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM set_time_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + Sint64 accessed, modified, created; + efile_path_t path; + + if(argc != 4 || !enif_get_int64(env, argv[1], &accessed) + || !enif_get_int64(env, argv[2], &modified) + || !enif_get_int64(env, argv[3], &created)) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_set_time(&path, accessed, modified, created))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM read_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + ERL_NIF_TERM result; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_read_link(env, &path, &result))) { + return posix_error_to_tuple(env, posix_errno); + } + + return enif_make_tuple2(env, am_ok, result); +} + +static ERL_NIF_TERM list_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + ERL_NIF_TERM result; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_list_dir(env, &path, &result))) { + return posix_error_to_tuple(env, posix_errno); + } + + return enif_make_tuple2(env, am_ok, result); +} + +static ERL_NIF_TERM rename_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t existing_path, new_path; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &existing_path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_marshal_path(env, argv[1], &new_path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_rename(&existing_path, &new_path))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM make_hard_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t existing_path, new_path; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &existing_path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_marshal_path(env, argv[1], &new_path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_make_hard_link(&existing_path, &new_path))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM make_soft_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t existing_path, new_path; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &existing_path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_marshal_path(env, argv[1], &new_path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_make_soft_link(&existing_path, &new_path))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM make_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_make_dir(&path))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM del_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_del_file(&path))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM del_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_del_dir(&path))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +static ERL_NIF_TERM get_device_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + ERL_NIF_TERM result; + int device_index; + + if(argc != 1 || !enif_get_int(env, argv[0], &device_index)) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_get_device_cwd(env, device_index, &result))) { + return posix_error_to_tuple(env, posix_errno); + } + + return enif_make_tuple2(env, am_ok, result); +} + +static ERL_NIF_TERM get_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + ERL_NIF_TERM result; + + if(argc != 0) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_get_cwd(env, &result))) { + return posix_error_to_tuple(env, posix_errno); + } + + return enif_make_tuple2(env, am_ok, result); +} + +static ERL_NIF_TERM set_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_set_cwd(&path))) { + return posix_error_to_tuple(env, posix_errno); + } + + return am_ok; +} + +/** @brief Reads an entire file into \c result, stopping after \c size bytes or + * EOF. It will read until EOF if size is 0. */ +static posix_errno_t read_file(efile_data_t *d, size_t size, ErlNifBinary *result) { + size_t initial_buffer_size; + ssize_t bytes_read; + + if(size == 0) { + initial_buffer_size = 16 << 10; + } else { + initial_buffer_size = size; + } + + if(!enif_alloc_binary(initial_buffer_size, result)) { + return ENOMEM; + } + + bytes_read = 0; + + for(;;) { + ssize_t block_bytes_read; + SysIOVec read_vec[1]; + + read_vec[0].iov_base = result->data + bytes_read; + read_vec[0].iov_len = result->size - bytes_read; + + block_bytes_read = efile_readv(d, read_vec, 1); + + if(block_bytes_read < 0) { + enif_release_binary(result); + return d->posix_errno; + } + + bytes_read += block_bytes_read; + + if(block_bytes_read < (result->size - bytes_read)) { + /* EOF */ + break; + } else if(bytes_read == size) { + break; + } + + if(!enif_realloc_binary(result, bytes_read * 2)) { + enif_release_binary(result); + return ENOMEM; + } + } + + /* The file may have shrunk since we queried its size, so we have to do + * this even when the size is known. */ + if(bytes_read < result->size && !enif_realloc_binary(result, bytes_read)) { + ERTS_INTERNAL_ERROR("Failed to shrink read_file result."); + } + + return 0; +} + +static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_fileinfo_t info = {0}; + efile_path_t path; + efile_data_t *d; + + ErlNifBinary result; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_read_info(&path, 1, &info))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_open(&path, EFILE_MODE_READ, efile_resource_type, &d))) { + return posix_error_to_tuple(env, posix_errno); + } + + posix_errno = read_file(d, info.size, &result); + enif_release_resource(d); + + if(posix_errno) { + return posix_error_to_tuple(env, posix_errno); + } + + return enif_make_tuple2(env, am_ok, enif_make_binary(env, &result)); +} + +static ERL_NIF_TERM altname_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + posix_errno_t posix_errno; + + efile_path_t path; + ERL_NIF_TERM result; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if((posix_errno = efile_marshal_path(env, argv[0], &path))) { + return posix_error_to_tuple(env, posix_errno); + } else if((posix_errno = efile_altname(env, &path, &result))) { + return posix_error_to_tuple(env, posix_errno); + } + + return enif_make_tuple2(env, am_ok, result); +} diff --git a/erts/emulator/nifs/common/prim_file_nif.h b/erts/emulator/nifs/common/prim_file_nif.h new file mode 100644 index 0000000000..cc9bc8f5c3 --- /dev/null +++ b/erts/emulator/nifs/common/prim_file_nif.h @@ -0,0 +1,240 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson 2017. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +typedef int posix_errno_t; + +enum efile_modes_t { + EFILE_MODE_READ = (1 << 0), + EFILE_MODE_WRITE = (1 << 1), /* Implies truncating file when used alone. */ + EFILE_MODE_APPEND = (1 << 2), + EFILE_MODE_EXCLUSIVE = (1 << 3), + EFILE_MODE_SYNC = (1 << 4), + + EFILE_MODE_SKIP_TYPE_CHECK = (1 << 5), /* Special for device files on Unix. */ + EFILE_MODE_NO_TRUNCATE = (1 << 6), /* Special for reopening on VxWorks. */ + + EFILE_MODE_READ_WRITE = EFILE_MODE_READ | EFILE_MODE_WRITE +}; + +enum efile_access_t { + EFILE_ACCESS_NONE = 0, + EFILE_ACCESS_READ = 1, + EFILE_ACCESS_WRITE = 2, + EFILE_ACCESS_READ_WRITE = EFILE_ACCESS_READ | EFILE_ACCESS_WRITE +}; + +enum efile_seek_t { + EFILE_SEEK_BOF, + EFILE_SEEK_CUR, + EFILE_SEEK_EOF +}; + +enum efile_filetype_t { + EFILE_FILETYPE_DEVICE, + EFILE_FILETYPE_DIRECTORY, + EFILE_FILETYPE_REGULAR, + EFILE_FILETYPE_SYMLINK, + EFILE_FILETYPE_OTHER +}; + +enum efile_advise_t { + EFILE_ADVISE_NORMAL, + EFILE_ADVISE_RANDOM, + EFILE_ADVISE_SEQUENTIAL, + EFILE_ADVISE_WILL_NEED, + EFILE_ADVISE_DONT_NEED, + EFILE_ADVISE_NO_REUSE +}; + +enum efile_state_t { + EFILE_STATE_IDLE = 0, + EFILE_STATE_BUSY = 1, + EFILE_STATE_CLOSE_PENDING = 2, + EFILE_STATE_CLOSED = 3 +}; + +typedef struct { + Sint64 size; /* Size of file */ + Uint32 type; /* Type of file -- one of EFILE_FILETYPE_*. */ + Uint32 access; /* Access to file -- one of EFILE_ACCESS_*. */ + Uint32 mode; /* Access permissions -- bit field. */ + Uint32 links; /* Number of links to file. */ + Uint32 major_device; /* Major device or file system. */ + Uint32 minor_device; /* Minor device (for devices). */ + Uint32 inode; /* Inode number. */ + Uint32 uid; /* User id of owner. */ + Uint32 gid; /* Group id of owner. */ + Sint64 a_time; /* Last time the file was accessed. */ + Sint64 m_time; /* Last time the file was modified. */ + Sint64 c_time; /* Windows: creation time, Unix: last inode + * change. */ +} efile_fileinfo_t; + +/* The smallest value that can be converted freely between universal, local, + * and POSIX time, as required by read_file_info/2. Corresponds to + * {{1902,1,1},{0,0,0}} */ +#define EFILE_MIN_FILETIME -2145916800 + +/* Initializes an efile_data_t; must be used in efile_open on success. */ +#define EFILE_INIT_RESOURCE(__d, __modes) do { \ + erts_atomic32_init_acqb(&(__d)->state, EFILE_STATE_IDLE); \ + (__d)->posix_errno = 0; \ + (__d)->modes = __modes; \ + } while(0) + +typedef struct { + erts_atomic32_t state; + + posix_errno_t posix_errno; + enum efile_modes_t modes; + + ErlNifMonitor monitor; +} efile_data_t; + +typedef ErlNifBinary efile_path_t; + +/* @brief Translates the given "raw name" into the format expected by the APIs + * used by the underlying implementation. The result is transient and does not + * need to be released. + * + * This may change the structure of the path and its results should never be + * passed on to the user. Refer to the OS-specific implementation for details. + * + * @param path The term to translate; it must have been encoded with + * prim_file:internal_native2name for compatibility reasons. */ +posix_errno_t efile_marshal_path(ErlNifEnv *env, ERL_NIF_TERM path, efile_path_t *result); + +/* @brief Returns the underlying handle as an implementation-defined term. + * + * This is an internal function intended to support tests and tricky + * operations like sendfile(2). */ +ERL_NIF_TERM efile_get_handle(ErlNifEnv *env, efile_data_t *d); + +/* @brief Read until EOF or the given iovec has been filled. + * + * @return -1 on failure, or the number of bytes read on success. The return + * value will be 0 if no bytes could be read before EOF or the end of the + * iovec. */ +Sint64 efile_readv(efile_data_t *d, SysIOVec *iov, int iovlen); + +/* @brief Write the entirety of the given iovec. + * + * @return -1 on failure, or the number of bytes written on success. "Partial" + * failures will be reported with -1 and not the number of bytes we managed to + * write to disk before the failure. */ +Sint64 efile_writev(efile_data_t *d, SysIOVec *iov, int iovlen); + +/* @brief As \c efile_readv, but starting from a file offset. */ +Sint64 efile_preadv(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen); + +/* @brief As \c efile_writev, but starting from a file offset. */ +Sint64 efile_pwritev(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen); + +int efile_seek(efile_data_t *d, enum efile_seek_t seek, Sint64 offset, Sint64 *new_position); + +int efile_sync(efile_data_t *d, int data_only); + +int efile_advise(efile_data_t *d, Sint64 offset, Sint64 length, enum efile_advise_t advise); +int efile_allocate(efile_data_t *d, Sint64 offset, Sint64 length); +int efile_truncate(efile_data_t *d); + +posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, + ErlNifResourceType *nif_type, efile_data_t **d); + +/** @brief Closes a file. The file must have entered the CLOSED state prior to + * calling this to prevent double close. */ +int efile_close(efile_data_t *d); + +/* **** **** **** **** **** **** **** **** **** **** **** **** **** **** **** */ + +posix_errno_t efile_read_info(const efile_path_t *path, int follow_link, efile_fileinfo_t *result); + +/** @brief Sets the file times to the given values. Refer to efile_fileinfo_t + * for a description of each. */ +posix_errno_t efile_set_time(const efile_path_t *path, Sint64 a_time, Sint64 m_time, Sint64 c_time); + +/** @brief On Unix, this sets the file permissions according to the docs for + * file:write_file_info/2. On Windows it uses the "owner write permission" flag + * to toggle whether the file is read-only or not. */ +posix_errno_t efile_set_permissions(const efile_path_t *path, Uint32 permissions); + +/** @brief On Unix, this will set the owner/group to the given values. It will + * do nothing on other platforms. */ +posix_errno_t efile_set_owner(const efile_path_t *path, Uint32 owner, Uint32 group); + +/** @brief Resolves the final path of the given link. */ +posix_errno_t efile_read_link(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result); + +/** @brief Lists the contents of the given directory. + * @param result [out] A list of all the directory/file names contained in the + * given directory. */ +posix_errno_t efile_list_dir(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result); + +/** @brief Changes the name of an existing file or directory, from old_path + * to new_path. + * + * If old_path and new_path refer to the same file or directory, it does + * nothing and returns success. Otherwise if new_path already exists, it will + * be deleted and replaced by src subject to the following conditions: + * + * If old_path is a directory, new_path may be an empty directory. + * If old_path is a file, new_path may be a file. + * + * Neither of these are guaranteed to be atomic. In any other situation where + * new_path already exists, the rename will fail. + * + * Some possible error codes: + * + * - EACCES: Either paths or one of their parent directories can't be read + * and/or written. + * - EEXIST: new_path is a non-empty directory. + * - EINVAL: old_path is a root directory or new_path is a subdirectory + * of new_path. + * - EISDIR: new_path is a directory, but old_path is not. + * - ENOTDIR: old_path is a directory, but new_path is not. + * - ENOENT: old_path doesn't exist, or either path is "". + * - EXDEV: The paths are on different filesystems. + * + * The implementation of rename may allow cross-filesystem renames, + * but the caller should be prepared to emulate it with copy and + * delete if errno is EXDEV. */ +posix_errno_t efile_rename(const efile_path_t *old_path, const efile_path_t *new_path); + +posix_errno_t efile_make_hard_link(const efile_path_t *existing_path, const efile_path_t *new_path); +posix_errno_t efile_make_soft_link(const efile_path_t *existing_path, const efile_path_t *new_path); +posix_errno_t efile_make_dir(const efile_path_t *path); + +posix_errno_t efile_del_file(const efile_path_t *path); +posix_errno_t efile_del_dir(const efile_path_t *path); + +posix_errno_t efile_get_cwd(ErlNifEnv *env, ERL_NIF_TERM *result); +posix_errno_t efile_set_cwd(const efile_path_t *path); + +/** @brief A Windows-specific function for returning the working directory of a + * given device. + * + * @param device_index The drive index; 1 for A, 2 for B, etc. + * @param result [out] The working directory of the given device + */ +posix_errno_t efile_get_device_cwd(ErlNifEnv *env, int device_index, ERL_NIF_TERM *result); + +/** @brief A Windows-specific function for returning the 8.3-name of a given + * file or directory. */ +posix_errno_t efile_altname(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result); diff --git a/erts/emulator/nifs/unix/unix_prim_file.c b/erts/emulator/nifs/unix/unix_prim_file.c new file mode 100644 index 0000000000..57c8ef62e1 --- /dev/null +++ b/erts/emulator/nifs/unix/unix_prim_file.c @@ -0,0 +1,957 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson 2017. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#include "erl_nif.h" +#include "config.h" +#include "sys.h" + +#ifdef VALGRIND +# include <valgrind/memcheck.h> +#endif + +#include "prim_file_nif.h" + +#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) +#define __DARWIN__ 1 +#endif + +#if defined(__DARWIN__) || defined(HAVE_LINUX_FALLOC_H) || defined(HAVE_POSIX_FALLOCATE) +#include <fcntl.h> +#endif + +#ifdef HAVE_LINUX_FALLOC_H +#include <linux/falloc.h> +#endif + +#include <utime.h> + +/* Macros for testing file types. */ +#ifdef NO_UMASK +#define FILE_MODE 0644 +#define DIR_MODE 0755 +#else +#define FILE_MODE 0666 +#define DIR_MODE 0777 +#endif + +/* Old platforms might not have IOV_MAX defined. */ +#if !defined(IOV_MAX) && defined(UIO_MAXIOV) +#define IOV_MAX UIO_MAXIOV +#elif !defined(IOV_MAX) +#define IOV_MAX 16 +#endif + +typedef struct { + efile_data_t common; + int fd; +} efile_unix_t; + +static int has_invalid_null_termination(const ErlNifBinary *path) { + const char *null_pos, *end_pos; + + null_pos = memchr(path->data, '\0', path->size); + end_pos = (const char*)&path->data[path->size] - 1; + + if(null_pos == NULL) { + return 1; + } + + /* prim_file:internal_name2native sometimes feeds us data that is "doubly" + * NUL-terminated, so we'll accept any number of trailing NULs so long as + * they aren't interrupted by anything else. */ + while(null_pos < end_pos && (*null_pos) == '\0') { + null_pos++; + } + + return null_pos != end_pos; +} + +posix_errno_t efile_marshal_path(ErlNifEnv *env, ERL_NIF_TERM path, efile_path_t *result) { + if(!enif_inspect_binary(env, path, result)) { + return EINVAL; + } + + if(has_invalid_null_termination(result)) { + return EINVAL; + } + + return 0; +} + +ERL_NIF_TERM efile_get_handle(ErlNifEnv *env, efile_data_t *d) { + efile_unix_t *u = (efile_unix_t*)d; + + ERL_NIF_TERM result; + unsigned char *bits; + + bits = enif_make_new_binary(env, sizeof(u->fd), &result); + memcpy(bits, &u->fd, sizeof(u->fd)); + + return result; +} + +static int open_file_type_check(const efile_path_t *path, int fd) { + struct stat file_info; + int error; + +#ifndef HAVE_FSTAT + error = stat((const char*)path->data, &file_info); + (void)fd; +#else + error = fstat(fd, &file_info); + (void)path; +#endif + + if(error < 0) { + /* If we failed to stat assume success and let the next call handle the + * error. The old driver checked whether the file was to be used + * immediately in a read within the call, but the new implementation + * never does that. */ + return 1; + } else { + /* The old driver tolerated opening /dev/null despite the "no devices" + * limitation. It provided no explanation for this but we still need + * to match the behavior. We're checking through stat(2) instead of + * comparing the name to account for links. */ + struct stat null_device_info; + int is_dev_null; + + is_dev_null = (stat("/dev/null", &null_device_info) == 0); + is_dev_null &= (file_info.st_ino == null_device_info.st_ino); + is_dev_null &= (file_info.st_dev == null_device_info.st_dev); + + if(is_dev_null) { + return 1; + } + } + + if(!S_ISREG(file_info.st_mode)) { + return 0; + } + + return 1; +} + +posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, + ErlNifResourceType *nif_type, efile_data_t **d) { + + int flags, fd; + + flags = 0; + + if(modes & EFILE_MODE_READ && !(modes & EFILE_MODE_WRITE)) { + flags |= O_RDONLY; + } else if(modes & EFILE_MODE_WRITE && !(modes & EFILE_MODE_READ)) { + if(!(modes & EFILE_MODE_NO_TRUNCATE)) { + flags |= O_TRUNC; + } + + flags |= O_WRONLY | O_CREAT; + } else if(modes & EFILE_MODE_READ_WRITE) { + flags |= O_RDWR | O_CREAT; + } else { + return EINVAL; + } + + if(modes & EFILE_MODE_APPEND) { + flags &= ~O_TRUNC; + flags |= O_APPEND; + } + + if(modes & EFILE_MODE_EXCLUSIVE) { + flags |= O_EXCL; + } + + if(modes & EFILE_MODE_SYNC) { +#ifndef O_SYNC + return ENOTSUP; +#else + flags |= O_SYNC; +#endif + } + + do { + fd = open((const char*)path->data, flags, FILE_MODE); + } while(fd == -1 && errno == EINTR); + + if(fd != -1) { + efile_unix_t *u; + + if(!(modes & EFILE_MODE_SKIP_TYPE_CHECK) && !open_file_type_check(path, fd)) { + close(fd); + + /* This is blatantly incorrect, but we're documented as returning + * this for everything that isn't a file. */ + return EISDIR; + } + + u = (efile_unix_t*)enif_alloc_resource(nif_type, sizeof(efile_unix_t)); + u->fd = fd; + + EFILE_INIT_RESOURCE(&u->common, modes); + (*d) = &u->common; + + return 0; + } + + (*d) = NULL; + return errno; +} + +int efile_close(efile_data_t *d) { + efile_unix_t *u = (efile_unix_t*)d; + int fd; + + ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED); + ASSERT(u->fd != -1); + + fd = u->fd; + u->fd = -1; + + /* close(2) either always closes (*BSD, Linux) or leaves the fd in an + * undefined state (POSIX 2008, Solaris), so we must not retry on EINTR. */ + + if(close(fd) < 0) { + u->common.posix_errno = errno; + return 0; + } + + return 1; +} + +static void shift_iov(SysIOVec **iov, int *iovlen, ssize_t shift) { + SysIOVec *head_vec = (*iov); + + ASSERT(shift >= 0); + + while(shift > 0) { + ASSERT(head_vec < &(*iov)[*iovlen]); + + if(shift < head_vec->iov_len) { + head_vec->iov_base = (char*)head_vec->iov_base + shift; + head_vec->iov_len -= shift; + break; + } else { + shift -= head_vec->iov_len; + head_vec++; + } + } + + (*iovlen) -= head_vec - (*iov); + (*iov) = head_vec; +} + +Sint64 efile_readv(efile_data_t *d, SysIOVec *iov, int iovlen) { + efile_unix_t *u = (efile_unix_t*)d; + + Sint64 bytes_read; + ssize_t result; + + bytes_read = 0; + + do { + int use_fallback = 0; + + if(iovlen < 1) { + result = 0; + break; + } + + /* writev(2) implies readv(2) */ +#ifdef HAVE_WRITEV + result = readv(u->fd, iov, MIN(IOV_MAX, iovlen)); + + /* Fall back to using read(2) if readv(2) reports that the combined + * size of iov is greater than SSIZE_T_MAX. */ + use_fallback = (result < 0 && errno == EINVAL); +#else + use_fallback = 1; +#endif + + if(use_fallback) { + result = read(u->fd, iov->iov_base, iov->iov_len); + } + + if(result > 0) { + shift_iov(&iov, &iovlen, result); + bytes_read += result; + } + } while(result > 0 || (result < 0 && errno == EINTR)); + + u->common.posix_errno = errno; + + if(result == 0 && bytes_read > 0) { + return bytes_read; + } + + return result; +} + +Sint64 efile_writev(efile_data_t *d, SysIOVec *iov, int iovlen) { + efile_unix_t *u = (efile_unix_t*)d; + + Sint64 bytes_written; + ssize_t result; + + bytes_written = 0; + + do { + int use_fallback = 0; + + if(iovlen < 1) { + result = 0; + break; + } + +#ifdef HAVE_WRITEV + result = writev(u->fd, iov, MIN(IOV_MAX, iovlen)); + + /* Fall back to using write(2) if writev(2) reports that the combined + * size of iov is greater than SSIZE_T_MAX. */ + use_fallback = (result < 0 && errno == EINVAL); +#else + use_fallback = 1; +#endif + + if(use_fallback) { + result = write(u->fd, iov->iov_base, iov->iov_len); + } + + if(result > 0) { + shift_iov(&iov, &iovlen, result); + bytes_written += result; + } + } while(result > 0 || (result < 0 && errno == EINTR)); + + u->common.posix_errno = errno; + + if(result == 0 && bytes_written > 0) { + return bytes_written; + } + + return result; +} + +Sint64 efile_preadv(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen) { + efile_unix_t *u = (efile_unix_t*)d; + + Uint64 bytes_read; + Sint64 result; + +#if !defined(HAVE_PREADV) && !defined(HAVE_PREAD) + /* This function is documented as leaving the file position undefined, but + * the old driver always reset it so there's probably code in the wild that + * relies on this behavior. */ + off_t original_position = lseek(u->fd, 0, SEEK_CUR); + + if(original_position < 0 || lseek(u->fd, offset, SEEK_SET) < 0) { + u->common.posix_errno = errno; + return -1; + } +#endif + + bytes_read = 0; + + do { + if(iovlen < 1) { + result = 0; + break; + } + +#if defined(HAVE_PREADV) + result = preadv(u->fd, iov, MIN(IOV_MAX, iovlen), offset); +#elif defined(HAVE_PREAD) + result = pread(u->fd, iov->iov_base, iov->iov_len, offset); +#else + result = read(u->fd, iov->iov_base, iov->iov_len); +#endif + + if(result > 0) { + shift_iov(&iov, &iovlen, result); + bytes_read += result; + offset += result; + } + } while(result > 0 || (result < 0 && errno == EINTR)); + + u->common.posix_errno = errno; + +#if !defined(HAVE_PREADV) && !defined(HAVE_PREAD) + if(result >= 0) { + if(lseek(u->fd, original_position, SEEK_SET) < 0) { + u->common.posix_errno = errno; + return -1; + } + } +#endif + + if(result == 0 && bytes_read > 0) { + return bytes_read; + } + + return result; +} + +Sint64 efile_pwritev(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen) { + efile_unix_t *u = (efile_unix_t*)d; + + Sint64 bytes_written; + ssize_t result; + +#if !defined(HAVE_PWRITEV) && !defined(HAVE_PWRITE) + off_t original_position = lseek(u->fd, 0, SEEK_CUR); + + if(original_position < 0 || lseek(u->fd, offset, SEEK_SET) < 0) { + u->common.posix_errno = errno; + return -1; + } +#endif + + bytes_written = 0; + + do { + if(iovlen < 1) { + result = 0; + break; + } + +#if defined(HAVE_PWRITEV) + result = pwritev(u->fd, iov, MIN(IOV_MAX, iovlen), offset); +#elif defined(HAVE_PWRITE) + result = pwrite(u->fd, iov->iov_base, iov->iov_len, offset); +#else + result = write(u->fd, iov->iov_base, iov->iov_len); +#endif + + if(result > 0) { + shift_iov(&iov, &iovlen, result); + bytes_written += result; + offset += result; + } + } while(result > 0 || (result < 0 && errno == EINTR)); + + u->common.posix_errno = errno; + +#if !defined(HAVE_PWRITEV) && !defined(HAVE_PWRITE) + if(result >= 0) { + if(lseek(u->fd, original_position, SEEK_SET) < 0) { + u->common.posix_errno = errno; + return -1; + } + } +#endif + + if(result == 0 && bytes_written > 0) { + return bytes_written; + } + + return result; +} + +int efile_seek(efile_data_t *d, enum efile_seek_t seek, Sint64 offset, Sint64 *new_position) { + efile_unix_t *u = (efile_unix_t*)d; + off_t result; + int whence; + + switch(seek) { + case EFILE_SEEK_BOF: whence = SEEK_SET; break; + case EFILE_SEEK_CUR: whence = SEEK_CUR; break; + case EFILE_SEEK_EOF: whence = SEEK_END; break; + default: ERTS_INTERNAL_ERROR("Invalid seek parameter"); + } + + result = lseek(u->fd, offset, whence); + + /* + * The man page for lseek (on SunOs 5) says: + * + * "if fildes is a remote file descriptor and offset is negative, lseek() + * returns the file pointer even if it is negative." + */ + if(result < 0 && errno == 0) { + errno = EINVAL; + } + + if(result < 0) { + u->common.posix_errno = errno; + return 0; + } + + (*new_position) = result; + + return 1; +} + +int efile_sync(efile_data_t *d, int data_only) { + efile_unix_t *u = (efile_unix_t*)d; + +#if defined(HAVE_FDATASYNC) && !defined(__DARWIN__) + if(data_only) { + if(fdatasync(u->fd) < 0) { + u->common.posix_errno = errno; + return 0; + } + + return 1; + } +#endif + +#if defined(__DARWIN__) && defined(F_FULLFSYNC) + if(fcntl(u->fd, F_FULLFSYNC) < 0) { +#else + if(fsync(u->fd) < 0) { +#endif + u->common.posix_errno = errno; + return 0; + } + + return 1; +} + +int efile_advise(efile_data_t *d, Sint64 offset, Sint64 length, enum efile_advise_t advise) { + efile_unix_t *u = (efile_unix_t*)d; +#ifdef HAVE_POSIX_FADVISE + int p_advise; + + switch(advise) { + case EFILE_ADVISE_NORMAL: p_advise = POSIX_FADV_NORMAL; break; + case EFILE_ADVISE_RANDOM: p_advise = POSIX_FADV_RANDOM; break; + case EFILE_ADVISE_SEQUENTIAL: p_advise = POSIX_FADV_SEQUENTIAL; break; + case EFILE_ADVISE_WILL_NEED: p_advise = POSIX_FADV_WILLNEED; break; + case EFILE_ADVISE_DONT_NEED: p_advise = POSIX_FADV_DONTNEED; break; + case EFILE_ADVISE_NO_REUSE: p_advise = POSIX_FADV_NOREUSE; break; + default: + u->common.posix_errno = EINVAL; + return 0; + } + + if(posix_fadvise(u->fd, offset, length, p_advise) < 0) { + u->common.posix_errno = errno; + return 0; + } + + return 1; +#else + /* We'll pretend to support this syscall as it's only a recommendation even + * on systems that do support it. */ + return 1; +#endif +} + +int efile_allocate(efile_data_t *d, Sint64 offset, Sint64 length) { + efile_unix_t *u = (efile_unix_t*)d; + int ret = -1; + + /* We prefer OS-specific methods, but fall back to posix_fallocate on + * failure. It's unclear whether this has any practical benefit on + * modern systems, but the old driver did it. */ + +#if defined(HAVE_FALLOCATE) + /* Linux-specific */ + do { + ret = fallocate(u->fd, FALLOC_FL_KEEP_SIZE, offset, length); + } while(ret < 0 && errno == EINTR); +#elif defined(F_PREALLOCATE) + /* Mac-specific */ + fstore_t fs = {}; + + fs.fst_flags = F_ALLOCATECONTIG; + fs.fst_posmode = F_VOLPOSMODE; + fs.fst_offset = offset; + fs.fst_length = length; + + ret = fcntl(u->fd, F_PREALLOCATE, &fs); + if(ret < 0) { + fs.fst_flags = F_ALLOCATEALL; + ret = fcntl(u->fd, F_PREALLOCATE, &fs); + } +#elif !defined(HAVE_POSIX_FALLOCATE) + u->common.posix_errno = ENOTSUP; + return 0; +#endif + +#ifdef HAVE_POSIX_FALLOCATE + if(ret < 0) { + do { + ret = posix_fallocate(u->fd, offset, length); + + /* On Linux and Solaris for example, posix_fallocate() returns a + * positive error number on error and it does not set errno. On + * FreeBSD however (9.0 at least), it returns -1 on error and it + * sets errno. */ + if (ret > 0) { + errno = ret; + ret = -1; + } + } while(ret < 0 && errno == EINTR); + } +#endif + + if(ret < 0) { + u->common.posix_errno = errno; + return 0; + } + + return 1; +} + +int efile_truncate(efile_data_t *d) { + efile_unix_t *u = (efile_unix_t*)d; + off_t offset; + + offset = lseek(u->fd, 0, SEEK_CUR); + + if(offset < 0) { + u->common.posix_errno = errno; + return 0; + } + + if(ftruncate(u->fd, offset) < 0) { + u->common.posix_errno = errno; + return 0; + } + + return 1; +} + +posix_errno_t efile_read_info(const efile_path_t *path, int follow_links, efile_fileinfo_t *result) { + struct stat data; + + if(follow_links) { + if(stat((const char*)path->data, &data) < 0) { + return errno; + } + } else { + if(lstat((const char*)path->data, &data) < 0) { + return errno; + } + } + + if(S_ISCHR(data.st_mode) || S_ISBLK(data.st_mode)) { + result->type = EFILE_FILETYPE_DEVICE; + } else if(S_ISDIR(data.st_mode)) { + result->type = EFILE_FILETYPE_DIRECTORY; + } else if(S_ISREG(data.st_mode)) { + result->type = EFILE_FILETYPE_REGULAR; + } else if(S_ISLNK(data.st_mode)) { + result->type = EFILE_FILETYPE_SYMLINK; + } else { + result->type = EFILE_FILETYPE_OTHER; + } + + result->a_time = (Sint64)data.st_atime; + result->m_time = (Sint64)data.st_mtime; + result->c_time = (Sint64)data.st_ctime; + result->size = data.st_size; + + result->major_device = data.st_dev; + result->minor_device = data.st_rdev; + result->links = data.st_nlink; + result->inode = data.st_ino; + result->mode = data.st_mode; + result->uid = data.st_uid; + result->gid = data.st_gid; + +#ifndef NO_ACCESS + result->access = EFILE_ACCESS_NONE; + + if(access((const char*)path->data, R_OK) == 0) { + result->access |= EFILE_ACCESS_READ; + } + if(access((const char*)path->data, W_OK) == 0) { + result->access |= EFILE_ACCESS_WRITE; + } +#else + /* Just look at read/write access for owner. */ + result->access = ((data.st_mode >> 6) & 07) >> 1; +#endif + + return 0; +} + +posix_errno_t efile_set_permissions(const efile_path_t *path, Uint32 permissions) { + const mode_t MUTABLE_MODES = (S_ISUID | S_ISGID | S_IRWXU | S_IRWXG | S_IRWXO); + mode_t new_modes = permissions & MUTABLE_MODES; + + if(chmod((const char*)path->data, new_modes) < 0) { + new_modes &= ~(S_ISUID | S_ISGID); + + if (chmod((const char*)path->data, new_modes) < 0) { + return errno; + } + } + + return 0; +} + +posix_errno_t efile_set_owner(const efile_path_t *path, Uint32 owner, Uint32 group) { + if(chown((const char*)path->data, owner, group) < 0) { + return errno; + } + + return 0; +} + +posix_errno_t efile_set_time(const efile_path_t *path, Sint64 a_time, Sint64 m_time, Sint64 c_time) { + struct utimbuf tval; + + tval.actime = (time_t)a_time; + tval.modtime = (time_t)m_time; + + (void)c_time; + + if(utime((const char*)path->data, &tval) < 0) { + return errno; + } + + return 0; +} + +posix_errno_t efile_read_link(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result) { + ErlNifBinary result_bin; + + if(!enif_alloc_binary(256, &result_bin)) { + return ENOMEM; + } + + for(;;) { + ssize_t bytes_copied; + + bytes_copied = readlink((const char*)path->data, (char*)result_bin.data, + result_bin.size); + + if(bytes_copied <= 0) { + posix_errno_t saved_errno = errno; + enif_release_binary(&result_bin); + return saved_errno; + } else if(bytes_copied < result_bin.size) { + if(!enif_realloc_binary(&result_bin, bytes_copied)) { + enif_release_binary(&result_bin); + return ENOMEM; + } + + (*result) = enif_make_binary(env, &result_bin); + + return 0; + } + + /* The result didn't fit into the buffer, so we'll try again with a + * larger one. */ + + if(!enif_realloc_binary(&result_bin, result_bin.size * 2)) { + enif_release_binary(&result_bin); + return ENOMEM; + } + } +} + +static int is_ignored_name(int name_length, const char *name) { + if(name_length == 1 && name[0] == '.') { + return 1; + } else if(name_length == 2 && memcmp(name, "..", 2) == 0) { + return 1; + } + + return 0; +} + +posix_errno_t efile_list_dir(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result) { + ERL_NIF_TERM list_head; + + struct dirent *dir_entry; + DIR *dir_stream; + + dir_stream = opendir((const char*)path->data); + if(dir_stream == NULL) { + posix_errno_t saved_errno = errno; + *result = enif_make_list(env, 0); + return saved_errno; + } + + list_head = enif_make_list(env, 0); + dir_entry = readdir(dir_stream); + + while(dir_entry != NULL) { + int name_length = strlen(dir_entry->d_name); + + if(!is_ignored_name(name_length, dir_entry->d_name)) { + unsigned char *name_bytes; + ERL_NIF_TERM name_term; + + name_bytes = enif_make_new_binary(env, name_length, &name_term); + sys_memcpy(name_bytes, dir_entry->d_name, name_length); + + list_head = enif_make_list_cell(env, name_term, list_head); + } + + dir_entry = readdir(dir_stream); + } + + (*result) = list_head; + closedir(dir_stream); + + return 0; +} + +posix_errno_t efile_rename(const efile_path_t *old_path, const efile_path_t *new_path) { + if(rename((const char*)old_path->data, (const char*)new_path->data) < 0) { + if(errno == ENOTEMPTY) { + return EEXIST; + } + + if(strcmp((const char*)old_path->data, "/") == 0) { + /* Alpha reports renaming / as EBUSY and Linux reports it as EACCES + * instead of EINVAL.*/ + return EINVAL; + } + + return errno; + } + + return 0; +} + +posix_errno_t efile_make_hard_link(const efile_path_t *existing_path, const efile_path_t *new_path) { + if(link((const char*)existing_path->data, (const char*)new_path->data) < 0) { + return errno; + } + + return 0; +} + +posix_errno_t efile_make_soft_link(const efile_path_t *existing_path, const efile_path_t *new_path) { + if(symlink((const char*)existing_path->data, (const char*)new_path->data) < 0) { + return errno; + } + + return 0; +} + +posix_errno_t efile_make_dir(const efile_path_t *path) { +#ifdef NO_MKDIR_MODE + if(mkdir((const char*)path->data) < 0) { +#else + if(mkdir((const char*)path->data, DIR_MODE) < 0) { +#endif + return errno; + } + + return 0; +} + +posix_errno_t efile_del_file(const efile_path_t *path) { + if(unlink((const char*)path->data) < 0) { + /* Linux sets the wrong error code. */ + if(errno == EISDIR) { + return EPERM; + } + + return errno; + } + + return 0; +} + +posix_errno_t efile_del_dir(const efile_path_t *path) { + if(rmdir((const char*)path->data) < 0) { + posix_errno_t saved_errno = errno; + + if(saved_errno == ENOTEMPTY) { + saved_errno = EEXIST; + } + + /* The error code might be wrong if we're trying to delete the current + * directory. */ + if(saved_errno == EEXIST) { + struct stat path_stat, cwd_stat; + int has_stat; + + has_stat = (stat((const char*)path->data, &path_stat) == 0); + has_stat &= (stat(".", &cwd_stat) == 0); + + if(has_stat && path_stat.st_ino == cwd_stat.st_ino) { + if(path_stat.st_dev == cwd_stat.st_dev) { + return EINVAL; + } + } + } + + return saved_errno; + } + + return 0; +} + +posix_errno_t efile_set_cwd(const efile_path_t *path) { + if(chdir((const char*)path->data) < 0) { + return errno; + } + + return 0; +} + +posix_errno_t efile_get_device_cwd(ErlNifEnv *env, int device_index, ERL_NIF_TERM *result) { + (void)device_index; + (void)result; + (void)env; + + return ENOTSUP; +} + +posix_errno_t efile_get_cwd(ErlNifEnv *env, ERL_NIF_TERM *result) { + ErlNifBinary result_bin; + size_t bytes_copied; + + if(!enif_alloc_binary(256, &result_bin)) { + return ENOMEM; + } + + while(getcwd((char*)result_bin.data, result_bin.size) == NULL) { + posix_errno_t saved_errno = errno; + + if(saved_errno != ERANGE) { + enif_release_binary(&result_bin); + return saved_errno; + } else { + if(!enif_realloc_binary(&result_bin, result_bin.size * 2)) { + enif_release_binary(&result_bin); + return ENOMEM; + } + } + } + + /* getcwd(2) guarantees null-termination. */ + bytes_copied = strlen((const char*)result_bin.data); + + if(!enif_realloc_binary(&result_bin, bytes_copied)) { + enif_release_binary(&result_bin); + return ENOMEM; + } + + (*result) = enif_make_binary(env, &result_bin); + + return 0; +} + +posix_errno_t efile_altname(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result) { + (void)path; + (void)result; + + return ENOTSUP; +} diff --git a/erts/emulator/nifs/win32/win_prim_file.c b/erts/emulator/nifs/win32/win_prim_file.c new file mode 100644 index 0000000000..9f993f1d24 --- /dev/null +++ b/erts/emulator/nifs/win32/win_prim_file.c @@ -0,0 +1,1427 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson 2017. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#include "erl_nif.h" +#include "config.h" +#include "sys.h" + +#include "prim_file_nif.h" + +#include <windows.h> +#include <strsafe.h> +#include <wchar.h> + +#define IS_SLASH(a) ((a) == L'\\' || (a) == L'/') + +#define FILE_SHARE_FLAGS (FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE) + +#define LP_PREFIX L"\\\\?\\" +#define LP_PREFIX_SIZE (sizeof(LP_PREFIX) - sizeof(WCHAR)) +#define LP_PREFIX_LENGTH (LP_PREFIX_SIZE / sizeof(WCHAR)) + +#define PATH_LENGTH(path) (path->size / sizeof(WCHAR) - 1) + +#define ASSERT_PATH_FORMAT(path) \ + do { \ + ASSERT(PATH_LENGTH(path) >= 4 && \ + !memcmp(path->data, LP_PREFIX, LP_PREFIX_SIZE)); \ + ASSERT(PATH_LENGTH(path) == wcslen((WCHAR*)path->data)); \ + } while(0) + +#define TICKS_PER_SECOND (10000000ULL) +#define EPOCH_DIFFERENCE (11644473600LL) + +#define FILETIME_TO_EPOCH(epoch, ft) \ + do { \ + ULARGE_INTEGER ull; \ + ull.LowPart = (ft).dwLowDateTime; \ + ull.HighPart = (ft).dwHighDateTime; \ + (epoch) = ((ull.QuadPart / TICKS_PER_SECOND) - EPOCH_DIFFERENCE); \ + } while(0) + +#define EPOCH_TO_FILETIME(ft, epoch) \ + do { \ + ULARGE_INTEGER ull; \ + ull.QuadPart = (((epoch) + EPOCH_DIFFERENCE) * TICKS_PER_SECOND); \ + (ft).dwLowDateTime = ull.LowPart; \ + (ft).dwHighDateTime = ull.HighPart; \ + } while(0) + +typedef struct { + efile_data_t common; + HANDLE handle; +} efile_win_t; + +static int windows_to_posix_errno(DWORD last_error); + +static int has_invalid_null_termination(const ErlNifBinary *path) { + const WCHAR *null_pos, *end_pos; + + null_pos = wmemchr((const WCHAR*)path->data, L'\0', path->size); + end_pos = (const WCHAR*)&path->data[path->size] - 1; + + if(null_pos == NULL) { + return 1; + } + + /* prim_file:internal_name2native sometimes feeds us data that is "doubly" + * NUL-terminated, so we'll accept any number of trailing NULs so long as + * they aren't interrupted by anything else. */ + while(null_pos < end_pos && (*null_pos) == L'\0') { + null_pos++; + } + + return null_pos != end_pos; +} + +static posix_errno_t get_full_path(ErlNifEnv *env, WCHAR *input, efile_path_t *result) { + DWORD maximum_length, actual_length; + int add_long_prefix; + + maximum_length = GetFullPathNameW(input, 0, NULL, NULL); + add_long_prefix = 0; + + if(maximum_length == 0) { + /* POSIX doesn't have the concept of a "path error" in the same way + * Windows does, so we'll return ENOENT since that's what most POSIX + * APIs would return if they were fed such garbage. */ + return ENOENT; + } + + maximum_length += LP_PREFIX_LENGTH; + + if(!enif_alloc_binary(maximum_length * sizeof(WCHAR), result)) { + return ENOMEM; + } + + actual_length = GetFullPathNameW(input, maximum_length, (WCHAR*)result->data, NULL); + + if(actual_length < maximum_length) { + int has_long_path_prefix; + WCHAR *path_start; + + /* Make sure we have a long-path prefix; GetFullPathNameW only adds one + * if the path is relative. */ + has_long_path_prefix = actual_length >= LP_PREFIX_LENGTH && + !sys_memcmp(result->data, LP_PREFIX, LP_PREFIX_SIZE); + + if(!has_long_path_prefix) { + sys_memmove(result->data + LP_PREFIX_SIZE, result->data, + (actual_length + 1) * sizeof(WCHAR)); + sys_memcpy(result->data, LP_PREFIX, LP_PREFIX_SIZE); + actual_length += LP_PREFIX_LENGTH; + } + + path_start = (WCHAR*)result->data; + + /* We're removing trailing slashes since quite a few APIs refuse to + * work with them, and none require them. We only check the last + * character since GetFullPathNameW folds slashes together. */ + if(IS_SLASH(path_start[actual_length - 1])) { + if(path_start[actual_length - 2] != L':') { + path_start[actual_length - 1] = L'\0'; + actual_length--; + } + } + + if(!enif_realloc_binary(result, (actual_length + 1) * sizeof(WCHAR))) { + enif_release_binary(result); + return ENOMEM; + } + + enif_make_binary(env, result); + return 0; + } + + /* We may end up here if the current directory changes to something longer + * between/during GetFullPathName. There's nothing sensible we can do about + * this. */ + + enif_release_binary(result); + + return EINVAL; +} + +posix_errno_t efile_marshal_path(ErlNifEnv *env, ERL_NIF_TERM path, efile_path_t *result) { + ErlNifBinary raw_path; + + if(!enif_inspect_binary(env, path, &raw_path)) { + return EINVAL; + } else if(raw_path.size % sizeof(WCHAR)) { + return EINVAL; + } + + if(has_invalid_null_termination(&raw_path)) { + return EINVAL; + } + + return get_full_path(env, (WCHAR*)raw_path.data, result); +} + +ERL_NIF_TERM efile_get_handle(ErlNifEnv *env, efile_data_t *d) { + efile_win_t *w = (efile_win_t*)d; + + ERL_NIF_TERM result; + unsigned char *bits; + + bits = enif_make_new_binary(env, sizeof(w->handle), &result); + memcpy(bits, &w->handle, sizeof(w->handle)); + + return result; +} + +/** @brief Converts a native path to the preferred form in "erlang space," + * without path-prefixes, forward-slashes, or NUL terminators. */ +static int normalize_path_result(ErlNifBinary *path) { + WCHAR *path_iterator, *path_start, *path_end; + int length; + + path_start = (WCHAR*)path->data; + length = wcslen(path_start); + + ASSERT(length < path->size / sizeof(WCHAR)); + + /* Get rid of the long-path prefix, if present. */ + if(length >= LP_PREFIX_LENGTH) { + if(!sys_memcmp(path_start, LP_PREFIX, LP_PREFIX_SIZE)) { + length -= LP_PREFIX_LENGTH; + + sys_memmove(path_start, &path_start[LP_PREFIX_LENGTH], + length * sizeof(WCHAR)); + } + } + + path_end = &path_start[length]; + path_iterator = path_start; + + /* Convert drive letters to lowercase, if present. */ + if(length >= 2 && path_start[1] == L':') { + WCHAR drive_letter = path_start[0]; + + if(drive_letter >= L'A' && drive_letter <= L'Z') { + path_start[0] = drive_letter - L'A' + L'a'; + } + } + + while(path_iterator < path_end) { + if(*path_iterator == L'\\') { + *path_iterator = L'/'; + } + + path_iterator++; + } + + /* Truncate the result to its actual length; we don't want to include the + * NUL terminator. */ + return enif_realloc_binary(path, length * sizeof(WCHAR)); +} + +/* @brief Checks whether all the given attributes are set on the object at the + * given path. Note that it assumes false on errors. */ +static int has_file_attributes(const efile_path_t *path, DWORD mask) { + DWORD attributes = GetFileAttributesW((WCHAR*)path->data); + + if(attributes == INVALID_FILE_ATTRIBUTES) { + return 0; + } + + return !!((attributes & mask) == mask); +} + +static int is_ignored_name(int name_length, const WCHAR *name) { + if(name_length == 1 && name[0] == L'.') { + return 1; + } else if(name_length == 2 && !sys_memcmp(name, L"..", 2 * sizeof(WCHAR))) { + return 1; + } + + return 0; +} + +static int get_drive_number(const efile_path_t *path) { + const WCHAR *path_start; + int length; + + ASSERT_PATH_FORMAT(path); + + path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH; + length = PATH_LENGTH(path) - LP_PREFIX_LENGTH; + + if(length >= 2 && path_start[1] == L':') { + WCHAR drive_letter = path_start[0]; + + if(drive_letter >= L'A' && drive_letter <= L'Z') { + return drive_letter - L'A' + 1; + } else if(drive_letter >= L'a' && drive_letter <= L'z') { + return drive_letter - L'a' + 1; + } + } + + return -1; +} + +/* @brief Checks whether two *paths* are on the same mount point; they don't + * have to refer to existing or accessible files/directories. */ +static int has_same_mount_point(const efile_path_t *path_a, const efile_path_t *path_b) { + WCHAR *mount_a, *mount_b; + int result = 0; + + mount_a = enif_alloc(path_a->size); + mount_b = enif_alloc(path_b->size); + + if(mount_a != NULL && mount_b != NULL) { + int length_a, length_b; + + length_a = PATH_LENGTH(path_a); + length_b = PATH_LENGTH(path_b); + + if(GetVolumePathNameW((WCHAR*)path_a->data, mount_a, length_a)) { + ASSERT(wcslen(mount_a) <= length_a); + + if(GetVolumePathNameW((WCHAR*)path_b->data, mount_b, length_b)) { + ASSERT(wcslen(mount_b) <= length_b); + + result = !_wcsicmp(mount_a, mount_b); + } + } + } + + if(mount_b != NULL) { + enif_free(mount_b); + } + + if(mount_a != NULL) { + enif_free(mount_a); + } + + return result; +} + +/* Mirrors the PathIsRootW function of the shell API, but doesn't choke on + * paths longer than MAX_PATH. */ +static int is_path_root(const efile_path_t *path) { + const WCHAR *path_start, *path_end; + int length; + + ASSERT_PATH_FORMAT(path); + + path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH; + length = PATH_LENGTH(path) - LP_PREFIX_LENGTH; + + path_end = &path_start[length]; + + if(length == 1) { + /* A single \ refers to the root of the current working directory. */ + return IS_SLASH(path_start[0]); + } else if(length == 3 && iswalpha(path_start[0]) && path_start[1] == L':') { + /* Drive letter. */ + return IS_SLASH(path_start[2]); + } else if(length >= 4) { + /* Check whether we're a UNC root, eg. \\server, \\server\share */ + const WCHAR *path_iterator; + + if(!IS_SLASH(path_start[0]) || !IS_SLASH(path_start[1])) { + return 0; + } + + path_iterator = path_start + 2; + + /* Slide to the slash between the server and share names, if present. */ + while(path_iterator < path_end && !IS_SLASH(*path_iterator)) { + path_iterator++; + } + + /* Slide past the end of the string, stopping at the first slash we + * encounter. */ + do { + path_iterator++; + } while(path_iterator < path_end && !IS_SLASH(*path_iterator)); + + /* If we're past the end of the string and it didnt't end with a slash, + * then we're a root path. */ + return path_iterator >= path_end && !IS_SLASH(path_start[length - 1]); + } + + return 0; +} + +posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes, + ErlNifResourceType *nif_type, efile_data_t **d) { + + DWORD attributes, access_flags, open_mode; + HANDLE handle; + + ASSERT_PATH_FORMAT(path); + + access_flags = 0; + open_mode = 0; + + if(modes & EFILE_MODE_READ && !(modes & EFILE_MODE_WRITE)) { + access_flags = GENERIC_READ; + open_mode = OPEN_EXISTING; + } else if(modes & EFILE_MODE_WRITE && !(modes & EFILE_MODE_READ)) { + access_flags = GENERIC_WRITE; + open_mode = CREATE_ALWAYS; + } else if(modes & EFILE_MODE_READ_WRITE) { + access_flags = GENERIC_READ | GENERIC_WRITE; + open_mode = OPEN_ALWAYS; + } else { + return EINVAL; + } + + if(modes & EFILE_MODE_APPEND) { + access_flags |= FILE_APPEND_DATA; + open_mode = OPEN_ALWAYS; + } + + if(modes & EFILE_MODE_EXCLUSIVE) { + open_mode = CREATE_NEW; + } + + if(modes & EFILE_MODE_SYNC) { + attributes = FILE_FLAG_WRITE_THROUGH; + } else { + attributes = FILE_ATTRIBUTE_NORMAL; + } + + handle = CreateFileW((WCHAR*)path->data, access_flags, + FILE_SHARE_FLAGS, NULL, open_mode, attributes, NULL); + + if(handle != INVALID_HANDLE_VALUE) { + efile_win_t *w; + + w = (efile_win_t*)enif_alloc_resource(nif_type, sizeof(efile_win_t)); + w->handle = handle; + + EFILE_INIT_RESOURCE(&w->common, modes); + (*d) = &w->common; + + return 0; + } else { + DWORD last_error = GetLastError(); + + /* Rewrite all failures on directories to EISDIR to match the old + * driver. */ + if(has_file_attributes(path, FILE_ATTRIBUTE_DIRECTORY)) { + return EISDIR; + } + + return windows_to_posix_errno(last_error); + } +} + +int efile_close(efile_data_t *d) { + efile_win_t *w = (efile_win_t*)d; + HANDLE handle; + + ASSERT(erts_atomic32_read_nob(&d->state) == EFILE_STATE_CLOSED); + ASSERT(w->handle != INVALID_HANDLE_VALUE); + + handle = w->handle; + w->handle = INVALID_HANDLE_VALUE; + + if(!CloseHandle(handle)) { + w->common.posix_errno = windows_to_posix_errno(GetLastError()); + return 0; + } + + return 1; +} + +static void shift_overlapped(OVERLAPPED *overlapped, DWORD shift) { + LARGE_INTEGER offset; + + ASSERT(shift >= 0); + + offset.HighPart = overlapped->OffsetHigh; + offset.LowPart = overlapped->Offset; + + /* ~(Uint64)0 is a magic value ("append to end of file") which needs to be + * preserved. Other positions resulting in overflow would have errored out + * just prior to this point. */ + if(offset.QuadPart != ERTS_UINT64_MAX) { + offset.QuadPart += shift; + } + + /* All unused fields must be zeroed for the next call. */ + sys_memset(overlapped, 0, sizeof(*overlapped)); + overlapped->OffsetHigh = offset.HighPart; + overlapped->Offset = offset.LowPart; +} + +static void shift_iov(SysIOVec **iov, int *iovlen, DWORD shift) { + SysIOVec *head_vec = (*iov); + + ASSERT(shift >= 0); + + while(shift > 0) { + ASSERT(head_vec < &(*iov)[*iovlen]); + + if(shift < head_vec->iov_len) { + head_vec->iov_base = (char*)head_vec->iov_base + shift; + head_vec->iov_len -= shift; + break; + } else { + shift -= head_vec->iov_len; + head_vec++; + } + } + + (*iovlen) -= head_vec - (*iov); + (*iov) = head_vec; +} + +typedef BOOL (WINAPI *io_op_t)(HANDLE, LPVOID, DWORD, LPDWORD, LPOVERLAPPED); + +static Sint64 internal_sync_io(efile_win_t *w, io_op_t operation, + SysIOVec *iov, int iovlen, OVERLAPPED *overlapped) { + + Sint64 bytes_processed = 0; + + for(;;) { + DWORD block_bytes_processed, last_error; + BOOL succeeded; + + if(iovlen < 1) { + return bytes_processed; + } + + succeeded = operation(w->handle, iov->iov_base, iov->iov_len, + &block_bytes_processed, overlapped); + last_error = GetLastError(); + + if(!succeeded && (last_error != ERROR_HANDLE_EOF)) { + w->common.posix_errno = windows_to_posix_errno(last_error); + return -1; + } else if(block_bytes_processed == 0) { + /* EOF */ + return bytes_processed; + } + + if(overlapped != NULL) { + shift_overlapped(overlapped, block_bytes_processed); + } + + shift_iov(&iov, &iovlen, block_bytes_processed); + + bytes_processed += block_bytes_processed; + } +} + +Sint64 efile_readv(efile_data_t *d, SysIOVec *iov, int iovlen) { + efile_win_t *w = (efile_win_t*)d; + + return internal_sync_io(w, ReadFile, iov, iovlen, NULL); +} + +Sint64 efile_writev(efile_data_t *d, SysIOVec *iov, int iovlen) { + efile_win_t *w = (efile_win_t*)d; + + OVERLAPPED __overlapped, *overlapped; + Uint64 bytes_written; + + if(w->common.modes & EFILE_MODE_APPEND) { + overlapped = &__overlapped; + + sys_memset(overlapped, 0, sizeof(*overlapped)); + overlapped->OffsetHigh = 0xFFFFFFFF; + overlapped->Offset = 0xFFFFFFFF; + } else { + overlapped = NULL; + } + + return internal_sync_io(w, WriteFile, iov, iovlen, overlapped); +} + +Sint64 efile_preadv(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen) { + efile_win_t *w = (efile_win_t*)d; + + OVERLAPPED overlapped; + + sys_memset(&overlapped, 0, sizeof(overlapped)); + overlapped.OffsetHigh = (offset >> 32) & 0xFFFFFFFF; + overlapped.Offset = offset & 0xFFFFFFFF; + + return internal_sync_io(w, ReadFile, iov, iovlen, &overlapped); +} + +Sint64 efile_pwritev(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen) { + efile_win_t *w = (efile_win_t*)d; + + OVERLAPPED overlapped; + + sys_memset(&overlapped, 0, sizeof(overlapped)); + overlapped.OffsetHigh = (offset >> 32) & 0xFFFFFFFF; + overlapped.Offset = offset & 0xFFFFFFFF; + + return internal_sync_io(w, WriteFile, iov, iovlen, &overlapped); +} + +int efile_seek(efile_data_t *d, enum efile_seek_t seek, Sint64 offset, Sint64 *new_position) { + efile_win_t *w = (efile_win_t*)d; + + LARGE_INTEGER large_offset, large_new_position; + DWORD whence; + + switch(seek) { + case EFILE_SEEK_BOF: whence = FILE_BEGIN; break; + case EFILE_SEEK_CUR: whence = FILE_CURRENT; break; + case EFILE_SEEK_EOF: whence = FILE_END; break; + default: ERTS_INTERNAL_ERROR("Invalid seek parameter"); + } + + large_offset.QuadPart = offset; + + if(!SetFilePointerEx(w->handle, large_offset, &large_new_position, whence)) { + w->common.posix_errno = windows_to_posix_errno(GetLastError()); + return 0; + } + + (*new_position) = large_new_position.QuadPart; + + return 1; +} + +int efile_sync(efile_data_t *d, int data_only) { + efile_win_t *w = (efile_win_t*)d; + + /* Windows doesn't support data-only syncing. */ + (void)data_only; + + if(!FlushFileBuffers(w->handle)) { + w->common.posix_errno = windows_to_posix_errno(GetLastError()); + return 0; + } + + return 1; +} + +int efile_advise(efile_data_t *d, Sint64 offset, Sint64 length, enum efile_advise_t advise) { + /* Windows doesn't support this, but we'll pretend it does since the call + * is only a recommendation even on systems that do support it. */ + + (void)d; + (void)offset; + (void)length; + (void)advise; + + return 1; +} + +int efile_allocate(efile_data_t *d, Sint64 offset, Sint64 length) { + efile_win_t *w = (efile_win_t*)d; + + (void)d; + (void)offset; + (void)length; + + w->common.posix_errno = ENOTSUP; + + return 0; +} + +int efile_truncate(efile_data_t *d) { + efile_win_t *w = (efile_win_t*)d; + + if(!SetEndOfFile(w->handle)) { + w->common.posix_errno = windows_to_posix_errno(GetLastError()); + return 0; + } + + return 1; +} + +static int is_executable_file(const efile_path_t *path) { + /* We're using the file extension in order to be quirks-compliant with the + * old driver, which never bothered to check the actual permissions. We + * could easily do so now (cf. GetNamedSecurityInfo) but the execute + * permission is only relevant for files that are started with the default + * loader, and batch files run just fine with read permission alone. */ + + int length = PATH_LENGTH(path); + + if(length >= 4) { + const WCHAR *last_four = &((WCHAR*)path->data)[length - 4]; + + if (!_wcsicmp(last_four, L".exe") || + !_wcsicmp(last_four, L".cmd") || + !_wcsicmp(last_four, L".bat") || + !_wcsicmp(last_four, L".com")) { + return 1; + } + } + + return 0; +} + +posix_errno_t efile_read_info(const efile_path_t *path, int follow_links, efile_fileinfo_t *result) { + BY_HANDLE_FILE_INFORMATION native_file_info; + DWORD attributes; + + sys_memset(&native_file_info, 0, sizeof(native_file_info)); + + attributes = GetFileAttributesW((WCHAR*)path->data); + + if(attributes == INVALID_FILE_ATTRIBUTES) { + DWORD last_error = GetLastError(); + + /* Querying a network share root fails with ERROR_BAD_NETPATH, so we'll + * fake it as a directory just like local roots. */ + if(!is_path_root(path) || last_error != ERROR_BAD_NETPATH) { + return windows_to_posix_errno(last_error); + } + + attributes = FILE_ATTRIBUTE_DIRECTORY; + } else if(is_path_root(path)) { + /* Local (or mounted) roots can be queried with GetFileAttributesW but + * lack support for GetFileInformationByHandle, so we'll skip that + * part. */ + } else { + HANDLE handle; + + if(follow_links && (attributes & FILE_ATTRIBUTE_REPARSE_POINT)) { + posix_errno_t posix_errno; + efile_path_t resolved_path; + + posix_errno = internal_read_link(path, &resolved_path); + + if(posix_errno != 0) { + return posix_errno; + } + + return efile_read_info(&resolved_path, 0, result); + } + + handle = CreateFileW((const WCHAR*)path->data, GENERIC_READ, + FILE_SHARE_FLAGS, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, + NULL); + + /* The old driver never cared whether this succeeded. */ + if(handle != INVALID_HANDLE_VALUE) { + GetFileInformationByHandle(handle, &native_file_info); + CloseHandle(handle); + } + + FILETIME_TO_EPOCH(result->m_time, native_file_info.ftLastWriteTime); + FILETIME_TO_EPOCH(result->a_time, native_file_info.ftLastAccessTime); + FILETIME_TO_EPOCH(result->c_time, native_file_info.ftCreationTime); + + if(result->m_time == -EPOCH_DIFFERENCE) { + /* Default to 1970 just like the old driver. */ + result->m_time = 0; + } + + if(result->a_time == -EPOCH_DIFFERENCE) { + result->a_time = result->m_time; + } + + if(result->c_time == -EPOCH_DIFFERENCE) { + result->c_time = result->m_time; + } + } + + if(attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + result->type = EFILE_FILETYPE_SYMLINK; + /* This should be _S_IFLNK, but the old driver always set + * non-directories to _S_IFREG. */ + result->mode |= _S_IFREG; + } else if(attributes & FILE_ATTRIBUTE_DIRECTORY) { + result->type = EFILE_FILETYPE_DIRECTORY; + result->mode |= _S_IFDIR | _S_IEXEC; + } else { + if(is_executable_file(path)) { + result->mode |= _S_IEXEC; + } + + result->type = EFILE_FILETYPE_REGULAR; + result->mode |= _S_IFREG; + } + + if(!(attributes & FILE_ATTRIBUTE_READONLY)) { + result->access = EFILE_ACCESS_READ | EFILE_ACCESS_WRITE; + result->mode |= _S_IREAD | _S_IWRITE; + } else { + result->access = EFILE_ACCESS_READ; + result->mode |= _S_IREAD; + } + + /* Propagate user mode-bits to group/other fields */ + result->mode |= (result->mode & 0700) >> 3; + result->mode |= (result->mode & 0700) >> 6; + + result->size = + ((Uint64)native_file_info.nFileSizeHigh << 32ull) | + (Uint64)native_file_info.nFileSizeLow; + + result->links = MAX(1, native_file_info.nNumberOfLinks); + + result->major_device = get_drive_number(path); + result->minor_device = 0; + result->inode = 0; + result->uid = 0; + result->gid = 0; + + return 0; +} + +posix_errno_t efile_set_permissions(const efile_path_t *path, Uint32 permissions) { + DWORD attributes = GetFileAttributesW((WCHAR*)path->data); + + if(attributes == INVALID_FILE_ATTRIBUTES) { + return windows_to_posix_errno(GetLastError()); + } + + if(permissions & _S_IWRITE) { + attributes &= ~FILE_ATTRIBUTE_READONLY; + } else { + attributes |= FILE_ATTRIBUTE_READONLY; + } + + if(SetFileAttributesW((WCHAR*)path->data, attributes)) { + return 0; + } + + return windows_to_posix_errno(GetLastError()); +} + +posix_errno_t efile_set_owner(const efile_path_t *path, Uint32 owner, Uint32 group) { + (void)path; + (void)owner; + (void)group; + + return 0; +} + +posix_errno_t efile_set_time(const efile_path_t *path, Sint64 a_time, Sint64 m_time, Sint64 c_time) { + FILETIME accessed, modified, created; + DWORD last_error, attributes; + HANDLE handle; + + attributes = GetFileAttributesW((WCHAR*)path->data); + + if(attributes == INVALID_FILE_ATTRIBUTES) { + return windows_to_posix_errno(GetLastError()); + } + + /* If the file is read-only, we have to make it temporarily writable while + * setting new metadata. */ + if(attributes & FILE_ATTRIBUTE_READONLY) { + DWORD without_readonly = attributes & ~FILE_ATTRIBUTE_READONLY; + + if(!SetFileAttributesW((WCHAR*)path->data, without_readonly)) { + return windows_to_posix_errno(GetLastError()); + } + } + + EPOCH_TO_FILETIME(modified, m_time); + EPOCH_TO_FILETIME(accessed, a_time); + EPOCH_TO_FILETIME(created, c_time); + + handle = CreateFileW((WCHAR*)path->data, GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_FLAGS, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + last_error = GetLastError(); + + if(handle != INVALID_HANDLE_VALUE) { + if(SetFileTime(handle, &created, &accessed, &modified)) { + last_error = ERROR_SUCCESS; + } else { + last_error = GetLastError(); + } + + CloseHandle(handle); + } + + if(attributes & FILE_ATTRIBUTE_READONLY) { + SetFileAttributesW((WCHAR*)path->data, attributes); + } + + return windows_to_posix_errno(last_error); +} + +static posix_errno_t internal_read_link(const efile_path_t *path, efile_path_t *result) { + DWORD required_length, actual_length; + HANDLE link_handle; + DWORD last_error; + + link_handle = CreateFileW((WCHAR*)path->data, GENERIC_READ, + FILE_SHARE_FLAGS, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + last_error = GetLastError(); + + if(link_handle == INVALID_HANDLE_VALUE) { + return windows_to_posix_errno(last_error); + } + + required_length = GetFinalPathNameByHandleW(link_handle, NULL, 0, 0); + last_error = GetLastError(); + + if(required_length <= 0) { + CloseHandle(link_handle); + return windows_to_posix_errno(last_error); + } + + /* Unlike many other path functions (eg. GetFullPathNameW), this one + * includes the NUL terminator in its required length. */ + if(!enif_alloc_binary(required_length * sizeof(WCHAR), result)) { + CloseHandle(link_handle); + return ENOMEM; + } + + actual_length = GetFinalPathNameByHandleW(link_handle, + (WCHAR*)result->data, required_length, 0); + last_error = GetLastError(); + + CloseHandle(link_handle); + + if(actual_length == 0 || actual_length >= required_length) { + enif_release_binary(result); + return windows_to_posix_errno(last_error); + } + + /* GetFinalPathNameByHandle always prepends with "\\?\" and NUL-terminates, + * so we never have to touch-up the resulting path. */ + + ASSERT_PATH_FORMAT(result); + + return 0; +} + +posix_errno_t efile_read_link(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result) { + posix_errno_t posix_errno; + ErlNifBinary result_bin; + DWORD attributes; + + ASSERT_PATH_FORMAT(path); + + attributes = GetFileAttributesW((WCHAR*)path->data); + + if(attributes == INVALID_FILE_ATTRIBUTES) { + return windows_to_posix_errno(GetLastError()); + } else if(!(attributes & FILE_ATTRIBUTE_REPARSE_POINT)) { + return EINVAL; + } + + posix_errno = internal_read_link(path, &result_bin); + + if(posix_errno == 0) { + if(!normalize_path_result(&result_bin)) { + enif_release_binary(&result_bin); + return ENOMEM; + } + + (*result) = enif_make_binary(env, &result_bin); + } + + return posix_errno; +} + +posix_errno_t efile_list_dir(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result) { + ERL_NIF_TERM list_head; + WIN32_FIND_DATAW data; + HANDLE search_handle; + WCHAR *search_path; + DWORD last_error; + + ASSERT_PATH_FORMAT(path); + + search_path = enif_alloc(path->size + 2 * sizeof(WCHAR)); + + if(search_path == NULL) { + return ENOMEM; + } + + sys_memcpy(search_path, path->data, path->size); + search_path[PATH_LENGTH(path) + 0] = L'\\'; + search_path[PATH_LENGTH(path) + 1] = L'*'; + search_path[PATH_LENGTH(path) + 2] = L'\0'; + + search_handle = FindFirstFileW(search_path, &data); + last_error = GetLastError(); + + enif_free(search_path); + + if(search_handle == INVALID_HANDLE_VALUE) { + return windows_to_posix_errno(last_error); + } + + list_head = enif_make_list(env, 0); + + do { + int name_length = wcslen(data.cFileName); + + if(!is_ignored_name(name_length, data.cFileName)) { + unsigned char *name_bytes; + ERL_NIF_TERM name_term; + size_t name_size; + + name_size = name_length * sizeof(WCHAR); + + name_bytes = enif_make_new_binary(env, name_size, &name_term); + sys_memcpy(name_bytes, data.cFileName, name_size); + + list_head = enif_make_list_cell(env, name_term, list_head); + } + } while(FindNextFileW(search_handle, &data)); + + FindClose(search_handle); + (*result) = list_head; + + return 0; +} + +posix_errno_t efile_rename(const efile_path_t *old_path, const efile_path_t *new_path) { + BOOL old_is_directory, new_is_directory; + DWORD move_flags, last_error; + + ASSERT_PATH_FORMAT(old_path); + ASSERT_PATH_FORMAT(new_path); + + move_flags = MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH; + + if(MoveFileExW((WCHAR*)old_path->data, (WCHAR*)new_path->data, move_flags)) { + return 0; + } + + last_error = GetLastError(); + + old_is_directory = has_file_attributes(old_path, FILE_ATTRIBUTE_DIRECTORY); + new_is_directory = has_file_attributes(new_path, FILE_ATTRIBUTE_DIRECTORY); + + switch(last_error) { + case ERROR_SHARING_VIOLATION: + case ERROR_ACCESS_DENIED: + if(old_is_directory) { + BOOL moved_into_itself; + + moved_into_itself = (old_path->size <= new_path->size) && + !_wcsnicmp((WCHAR*)old_path->data, (WCHAR*)new_path->data, + PATH_LENGTH(old_path)); + + if(moved_into_itself) { + return EINVAL; + } else if(is_path_root(old_path)) { + return EINVAL; + } + + /* Renaming a directory across volumes needs to be rewritten as + * EXDEV so that the caller can respond by simulating it with + * copy/delete operations. + * + * Files are handled through MOVEFILE_COPY_ALLOWED. */ + if(!has_same_mount_point(old_path, new_path)) { + return EXDEV; + } + } + break; + case ERROR_PATH_NOT_FOUND: + case ERROR_FILE_NOT_FOUND: + return ENOENT; + case ERROR_ALREADY_EXISTS: + case ERROR_FILE_EXISTS: + if(old_is_directory && !new_is_directory) { + return ENOTDIR; + } else if(!old_is_directory && new_is_directory) { + return EISDIR; + } else if(old_is_directory && new_is_directory) { + /* This will fail if the destination isn't empty. */ + if(RemoveDirectoryW((WCHAR*)new_path->data)) { + return efile_rename(old_path, new_path); + } + + return EEXIST; + } else if(!old_is_directory && !new_is_directory) { + /* This is pretty iffy; the public documentation says that the + * operation may EACCES on some systems when either file is open, + * which gives us room to use MOVEFILE_REPLACE_EXISTING and be done + * with it, but the old implementation simulated Unix semantics and + * there's a lot of code that relies on that. + * + * The simulation renames the destination to a scratch name to get + * around the fact that it's impossible to open (and by extension + * rename) a file that's been deleted while open. It has a few + * drawbacks though; + * + * 1) It's not atomic as there's a small window where there's no + * file at all on the destination path. + * 2) It will confuse applications that subscribe to folder + * changes. + * 3) It will fail if we lack general permission to write in the + * same folder. */ + + WCHAR *swap_path = enif_alloc(new_path->size + sizeof(WCHAR) * 64); + + if(swap_path == NULL) { + return ENOMEM; + } else { + static LONGLONG unique_counter = 0; + WCHAR *swap_path_end; + + /* We swap in the same folder as the destination to be + * reasonably sure that it's on the same volume. Note that + * we're avoiding GetTempFileNameW as it will fail on long + * paths. */ + + sys_memcpy(swap_path, (WCHAR*)new_path->data, new_path->size); + swap_path_end = swap_path + PATH_LENGTH(new_path); + + while(!IS_SLASH(*swap_path_end)) { + ASSERT(swap_path_end > swap_path); + swap_path_end--; + } + + StringCchPrintfW(&swap_path_end[1], 64, L"erl-%lx-%llx.tmp", + GetCurrentProcessId(), unique_counter); + InterlockedIncrement64(&unique_counter); + } + + if(MoveFileExW((WCHAR*)new_path->data, swap_path, MOVEFILE_REPLACE_EXISTING)) { + if(MoveFileExW((WCHAR*)old_path->data, (WCHAR*)new_path->data, move_flags)) { + last_error = ERROR_SUCCESS; + DeleteFileW(swap_path); + } else { + last_error = GetLastError(); + MoveFileW(swap_path, (WCHAR*)new_path->data); + } + } else { + last_error = GetLastError(); + DeleteFileW(swap_path); + } + + enif_free(swap_path); + + return windows_to_posix_errno(last_error); + } + + return EEXIST; + } + + return windows_to_posix_errno(last_error); +} + +posix_errno_t efile_make_hard_link(const efile_path_t *existing_path, const efile_path_t *new_path) { + ASSERT_PATH_FORMAT(existing_path); + ASSERT_PATH_FORMAT(new_path); + + if(!CreateHardLinkW((WCHAR*)new_path->data, (WCHAR*)existing_path->data, NULL)) { + return windows_to_posix_errno(GetLastError()); + } + + return 0; +} + +posix_errno_t efile_make_soft_link(const efile_path_t *existing_path, const efile_path_t *new_path) { + DWORD link_flags; + + ASSERT_PATH_FORMAT(existing_path); + ASSERT_PATH_FORMAT(new_path); + + if(has_file_attributes(existing_path, FILE_ATTRIBUTE_DIRECTORY)) { + link_flags = SYMBOLIC_LINK_FLAG_DIRECTORY; + } else { + link_flags = 0; + } + + if(!CreateSymbolicLinkW((WCHAR*)new_path->data, (WCHAR*)existing_path->data, link_flags)) { + return windows_to_posix_errno(GetLastError()); + } + + return 0; +} + +posix_errno_t efile_make_dir(const efile_path_t *path) { + ASSERT_PATH_FORMAT(path); + + if(!CreateDirectoryW((WCHAR*)path->data, NULL)) { + return windows_to_posix_errno(GetLastError()); + } + + return 0; +} + +posix_errno_t efile_del_file(const efile_path_t *path) { + ASSERT_PATH_FORMAT(path); + + if(!DeleteFileW((WCHAR*)path->data)) { + DWORD last_error = GetLastError(); + + switch(last_error) { + case ERROR_INVALID_NAME: + /* Attempted to delete a device or similar. */ + return EACCES; + case ERROR_ACCESS_DENIED: + /* Windows NT reports removing a directory as EACCES instead of + * EPERM. */ + if(has_file_attributes(path, FILE_ATTRIBUTE_DIRECTORY)) { + return EPERM; + } + break; + } + + return windows_to_posix_errno(last_error); + } + + return 0; +} + +posix_errno_t efile_del_dir(const efile_path_t *path) { + ASSERT_PATH_FORMAT(path); + + if(!RemoveDirectoryW((WCHAR*)path->data)) { + DWORD last_error = GetLastError(); + + if(last_error == ERROR_DIRECTORY) { + return ENOTDIR; + } + + return windows_to_posix_errno(last_error); + } + + return 0; +} + +posix_errno_t efile_set_cwd(const efile_path_t *path) { + const WCHAR *path_start; + + ASSERT_PATH_FORMAT(path); + + /* We have to use _wchdir since that's the only function that updates the + * per-drive working directory, but it naively assumes that all paths + * starting with \\ are UNC paths, so we have to skip the \\?\-prefix. */ + path_start = (WCHAR*)path->data + LP_PREFIX_LENGTH; + + if(_wchdir(path_start)) { + return windows_to_posix_errno(GetLastError()); + } + + return 0; +} + +static int is_valid_drive(int device_index) { + WCHAR drive_path[4] = {L'?', L':', L'\\', L'\0'}; + + if(device_index == 0) { + /* Default drive; always valid. */ + return 1; + } else if(device_index > (L'Z' - L'A' + 1)) { + return 0; + } + + drive_path[0] = device_index + L'A' - 1; + + switch(GetDriveTypeW(drive_path)) { + case DRIVE_NO_ROOT_DIR: + case DRIVE_UNKNOWN: + return 0; + } + + return 1; +} + +posix_errno_t efile_get_device_cwd(ErlNifEnv *env, int device_index, ERL_NIF_TERM *result) { + ErlNifBinary result_bin; + + /* _wgetdcwd might crash the entire emulator on debug builds since the CRT + * invalid parameter handler asserts if passed a non-existent drive (Or + * simply one that has been unmounted), so we check it ourselves to avoid + * that. */ + if(!is_valid_drive(device_index)) { + return EACCES; + } + + if(!enif_alloc_binary(MAX_PATH * sizeof(WCHAR), &result_bin)) { + return ENOMEM; + } + + if(_wgetdcwd(device_index, (WCHAR*)result_bin.data, MAX_PATH) == NULL) { + enif_release_binary(&result_bin); + return EACCES; + } + + if(!normalize_path_result(&result_bin)) { + enif_release_binary(&result_bin); + return ENOMEM; + } + + (*result) = enif_make_binary(env, &result_bin); + + return 0; +} + +posix_errno_t efile_get_cwd(ErlNifEnv *env, ERL_NIF_TERM *result) { + return efile_get_device_cwd(env, 0, result); +} + +posix_errno_t efile_altname(ErlNifEnv *env, const efile_path_t *path, ERL_NIF_TERM *result) { + ErlNifBinary result_bin; + + ASSERT_PATH_FORMAT(path); + + if(is_path_root(path)) { + /* Root paths can't be queried so we'll just return them as they are. */ + if(!enif_alloc_binary(path->size, &result_bin)) { + return ENOMEM; + } + + sys_memcpy(result_bin.data, path->data, path->size); + } else { + WIN32_FIND_DATAW data; + HANDLE handle; + + WCHAR *name_buffer; + int name_length; + + /* Reject path wildcards. */ + if(wcspbrk(&((const WCHAR*)path->data)[4], L"?*")) { + return ENOENT; + } + + handle = FindFirstFileW((const WCHAR*)path->data, &data); + + if(handle == INVALID_HANDLE_VALUE) { + return windows_to_posix_errno(GetLastError()); + } + + FindClose(handle); + + name_length = wcslen(data.cAlternateFileName); + + if(name_length > 0) { + name_buffer = data.cAlternateFileName; + } else { + name_length = wcslen(data.cFileName); + name_buffer = data.cFileName; + } + + /* Include NUL-terminator; it will be removed after normalization. */ + name_length += 1; + + if(!enif_alloc_binary(name_length * sizeof(WCHAR), &result_bin)) { + return ENOMEM; + } + + sys_memcpy(result_bin.data, name_buffer, name_length * sizeof(WCHAR)); + } + + if(!normalize_path_result(&result_bin)) { + enif_release_binary(&result_bin); + return ENOMEM; + } + + (*result) = enif_make_binary(env, &result_bin); + + return 0; +} + +static int windows_to_posix_errno(DWORD last_error) { + switch(last_error) { + case ERROR_SUCCESS: + return 0; + case ERROR_INVALID_FUNCTION: + case ERROR_INVALID_DATA: + case ERROR_INVALID_PARAMETER: + case ERROR_INVALID_TARGET_HANDLE: + case ERROR_INVALID_CATEGORY: + case ERROR_NEGATIVE_SEEK: + return EINVAL; + case ERROR_DIR_NOT_EMPTY: + return EEXIST; + case ERROR_BAD_FORMAT: + return ENOEXEC; + case ERROR_PATH_NOT_FOUND: + case ERROR_FILE_NOT_FOUND: + case ERROR_NO_MORE_FILES: + return ENOENT; + case ERROR_TOO_MANY_OPEN_FILES: + return EMFILE; + case ERROR_ACCESS_DENIED: + case ERROR_INVALID_ACCESS: + case ERROR_CURRENT_DIRECTORY: + case ERROR_SHARING_VIOLATION: + case ERROR_LOCK_VIOLATION: + case ERROR_INVALID_PASSWORD: + case ERROR_DRIVE_LOCKED: + return EACCES; + case ERROR_INVALID_HANDLE: + return EBADF; + case ERROR_NOT_ENOUGH_MEMORY: + case ERROR_OUTOFMEMORY: + case ERROR_OUT_OF_STRUCTURES: + return ENOMEM; + case ERROR_INVALID_DRIVE: + case ERROR_BAD_UNIT: + case ERROR_NOT_READY: + case ERROR_REM_NOT_LIST: + case ERROR_DUP_NAME: + case ERROR_BAD_NETPATH: + case ERROR_NETWORK_BUSY: + case ERROR_DEV_NOT_EXIST: + case ERROR_BAD_NET_NAME: + return ENXIO; + case ERROR_NOT_SAME_DEVICE: + return EXDEV; + case ERROR_WRITE_PROTECT: + return EROFS; + case ERROR_BAD_LENGTH: + case ERROR_BUFFER_OVERFLOW: + return E2BIG; + case ERROR_SEEK: + case ERROR_SECTOR_NOT_FOUND: + return ESPIPE; + case ERROR_NOT_DOS_DISK: + return ENODEV; + case ERROR_GEN_FAILURE: + return ENODEV; + case ERROR_SHARING_BUFFER_EXCEEDED: + case ERROR_NO_MORE_SEARCH_HANDLES: + return EMFILE; + case ERROR_HANDLE_EOF: + case ERROR_BROKEN_PIPE: + return EPIPE; + case ERROR_HANDLE_DISK_FULL: + case ERROR_DISK_FULL: + return ENOSPC; + case ERROR_NOT_SUPPORTED: + return ENOTSUP; + case ERROR_FILE_EXISTS: + case ERROR_ALREADY_EXISTS: + case ERROR_CANNOT_MAKE: + return EEXIST; + case ERROR_ALREADY_ASSIGNED: + return EBUSY; + case ERROR_NO_PROC_SLOTS: + return EAGAIN; + case ERROR_CANT_RESOLVE_FILENAME: + return EMLINK; + case ERROR_PRIVILEGE_NOT_HELD: + return EPERM; + case ERROR_ARENA_TRASHED: + case ERROR_INVALID_BLOCK: + case ERROR_BAD_ENVIRONMENT: + case ERROR_BAD_COMMAND: + case ERROR_CRC: + case ERROR_OUT_OF_PAPER: + case ERROR_READ_FAULT: + case ERROR_WRITE_FAULT: + case ERROR_WRONG_DISK: + case ERROR_NET_WRITE_FAULT: + return EIO; + default: /* not to do with files I expect. */ + return EIO; + } +} |