aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorTuncer Ayaz <[email protected]>2011-01-13 12:36:14 +0100
committerLukas Larsson <[email protected]>2011-11-29 14:30:35 +0100
commit195e1f19b06095f39a4fb0da46dfab2ec5b10e9a (patch)
tree8f4a8c587b72c3a20d18b4cb6ccd7d1d52eb6286 /erts
parent7292c3d9f5285592aa4de996f6f106cd365d7895 (diff)
downloadotp-195e1f19b06095f39a4fb0da46dfab2ec5b10e9a.tar.gz
otp-195e1f19b06095f39a4fb0da46dfab2ec5b10e9a.tar.bz2
otp-195e1f19b06095f39a4fb0da46dfab2ec5b10e9a.zip
Implement file:sendfile
Allow Erlang code to use sendfile() where available by wrapping it as file:sendfile/4 and file:sendfile/2. sendfile(2) - Linux man page: "sendfile() copies data between one file descriptor and another. Because this copying is done within the kernel, sendfile() is more efficient than the combination of read(2) and write(2), which would require transferring data to and from user space."
Diffstat (limited to 'erts')
-rw-r--r--erts/configure.in16
-rw-r--r--erts/emulator/drivers/common/efile_drv.c234
-rw-r--r--erts/emulator/drivers/common/erl_efile.h2
-rw-r--r--erts/emulator/drivers/unix/unix_efile.c44
-rw-r--r--erts/emulator/drivers/win32/win_efile.c24
-rw-r--r--erts/preloaded/src/prim_file.erl19
6 files changed, 336 insertions, 3 deletions
diff --git a/erts/configure.in b/erts/configure.in
index e3eb6034e6..548e4cc9d5 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -1690,6 +1690,22 @@ dnl fdatasync requires linking against -lrt on SunOS <= 5.10.
dnl OpenSolaris 2009.06 is SunOS 5.11 and does not require -lrt.
AC_SEARCH_LIBS(fdatasync, [rt])
+
+dnl sendfile syscall
+case $host_os in
+ linux*|freebsd*|dragonfly*|darwin*)
+ AC_CHECK_FUNCS([sendfile])
+ ;;
+ solaris*)
+ AC_SEARCH_LIBS(sendfile, sendfile, AC_DEFINE(HAVE_SENDFILE, 1))
+ ;;
+ win32)
+ LIBS="$LIBS -lmswsock"
+ ;;
+ *)
+ ;;
+esac
+
dnl ----------------------------------------------------------------------
dnl Checks for library functions.
dnl ----------------------------------------------------------------------
diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c
index 901d98c09d..509c4fe48c 100644
--- a/erts/emulator/drivers/common/efile_drv.c
+++ b/erts/emulator/drivers/common/efile_drv.c
@@ -55,6 +55,7 @@
#define FILE_READ_LINE 29
#define FILE_FDATASYNC 30
#define FILE_FADVISE 31
+#define FILE_SENDFILE 32
/* Return codes */
@@ -217,6 +218,7 @@ typedef unsigned char uchar;
static ErlDrvData file_start(ErlDrvPort port, char* command);
static int file_init(void);
static void file_stop(ErlDrvData);
+static void file_ready_output(ErlDrvData data, ErlDrvEvent event);
static void file_output(ErlDrvData, char* buf, int len);
static int file_control(ErlDrvData, unsigned int command,
char* buf, int len, char **rbuf, int rlen);
@@ -224,6 +226,7 @@ static void file_timeout(ErlDrvData);
static void file_outputv(ErlDrvData, ErlIOVec*);
static void file_async_ready(ErlDrvData, ErlDrvThreadData);
static void file_flush(ErlDrvData);
+static void file_stop_select(ErlDrvEvent event, void* _);
@@ -253,6 +256,18 @@ typedef struct {
ErlDrvPDL q_mtx; /* Mutex for the driver queue, known by the emulator. Also used for
mutual exclusion when accessing field(s) below. */
size_t write_buffered;
+ ErlDrvTermData caller; /* recipient of sync reply */
+ /* sendfile call state to retry/resume on event */
+ int command; /* same as d->command. for sendfile. TODO: this seems wrong */
+ struct {
+ int eagain;
+ int out_fd;
+ /* TODO: Use Sint64 instead? What about 32-bit off_t linux */
+ off_t offset;
+ size_t count;
+ size_t chunksize;
+ ErlDrvSInt64 written;
+ } sendfile;
} file_descriptor;
@@ -264,7 +279,7 @@ struct erl_drv_entry efile_driver_entry = {
file_stop,
file_output,
NULL,
- NULL,
+ file_ready_output,
"efile",
NULL,
NULL,
@@ -279,7 +294,9 @@ struct erl_drv_entry efile_driver_entry = {
ERL_DRV_EXTENDED_MAJOR_VERSION,
ERL_DRV_EXTENDED_MINOR_VERSION,
ERL_DRV_FLAG_USE_PORT_LOCKING,
- NULL
+ NULL,
+ NULL,
+ file_stop_select
};
@@ -613,6 +630,111 @@ static struct t_data *cq_deq(file_descriptor *desc) {
}
+/*********************************************************************
+ * Command queue functions
+ */
+
+static ErlDrvTermData am_ok;
+static ErlDrvTermData am_error;
+static ErlDrvTermData am_efile_reply;
+
+#define INIT_ATOM(NAME) am_ ## NAME = driver_mk_atom(#NAME)
+
+#define LOAD_ATOM_CNT 2
+#define LOAD_ATOM(vec, i, atom) \
+ (((vec)[(i)] = ERL_DRV_ATOM), \
+ ((vec)[(i)+1] = (atom)), \
+ ((i)+LOAD_ATOM_CNT))
+
+#define LOAD_INT_CNT 2
+#define LOAD_INT(vec, i, val) \
+ (((vec)[(i)] = ERL_DRV_INT), \
+ ((vec)[(i)+1] = (ErlDrvTermData)(val)), \
+ ((i)+LOAD_INT_CNT))
+
+#define LOAD_INT64_CNT 2
+#define LOAD_INT64(vec, i, val) \
+ (((vec)[(i)] = ERL_DRV_INT64), \
+ ((vec)[(i)+1] = (ErlDrvTermData)(val)), \
+ ((i)+LOAD_INT64_CNT))
+
+#define LOAD_PORT_CNT 2
+#define LOAD_PORT(vec, i, port) \
+ (((vec)[(i)] = ERL_DRV_PORT), \
+ ((vec)[(i)+1] = (port)), \
+ ((i)+LOAD_PORT_CNT))
+
+#define LOAD_PID_CNT 2
+#define LOAD_PID(vec, i, pid) \
+ (((vec)[(i)] = ERL_DRV_PID), \
+ ((vec)[(i)+1] = (pid)), \
+ ((i)+LOAD_PID_CNT))
+
+#define LOAD_TUPLE_CNT 2
+#define LOAD_TUPLE(vec, i, size) \
+ (((vec)[(i)] = ERL_DRV_TUPLE), \
+ ((vec)[(i)+1] = (size)), \
+ ((i)+LOAD_TUPLE_CNT))
+
+/* send:
+** {efile_reply, Pid, Port, {ok, int64()}}
+*/
+
+static int ef_send_ok_int64(file_descriptor *desc, ErlDrvTermData caller,
+ ErlDrvSInt64 *n)
+{
+ ErlDrvTermData spec[2*LOAD_ATOM_CNT + LOAD_PID_CNT + LOAD_PORT_CNT
+ + LOAD_INT64_CNT + 2*LOAD_TUPLE_CNT];
+ int i = 0;
+
+ i = LOAD_ATOM(spec, i, am_efile_reply);
+ i = LOAD_PID(spec, i, caller);
+ i = LOAD_PORT(spec, i, driver_mk_port(desc->port));
+ i = LOAD_ATOM(spec, i, am_ok);
+ i = LOAD_INT64(spec, i, n);
+ i = LOAD_TUPLE(spec, i, 2);
+ i = LOAD_TUPLE(spec, i, 4);
+ ASSERT(i == sizeof(spec)/sizeof(*spec));
+
+ return driver_send_term(desc->port, caller, spec, i);
+}
+
+static ErlDrvTermData error_atom(int err)
+{
+ char errstr[256];
+ char* s;
+ char* t;
+
+ for (s = erl_errno_id(err), t = errstr; *s; s++, t++)
+ *t = tolower(*s);
+ *t = '\0';
+ return driver_mk_atom(errstr);
+}
+
+/* send:
+** {efile_reply, Pid, Port, {error, posix_error()}
+*/
+
+static int ef_send_posix_error(file_descriptor *desc, ErlDrvTermData caller,
+ int e)
+{
+ ErlDrvTermData spec[3*LOAD_ATOM_CNT + LOAD_PID_CNT + LOAD_PORT_CNT
+ + 2*LOAD_TUPLE_CNT];
+ int i = 0;
+
+ i = LOAD_ATOM(spec, i, am_efile_reply);
+ i = LOAD_PID(spec, i, caller);
+ i = LOAD_PORT(spec, i, driver_mk_port(desc->port));
+ i = LOAD_ATOM(spec, i, am_error);
+ /* TODO: safe? set of error codes should be limited and safe */
+ i = LOAD_ATOM(spec, i, error_atom(e));
+ i = LOAD_TUPLE(spec, i, 2);
+ i = LOAD_TUPLE(spec, i, 4);
+ ASSERT(i == sizeof(spec)/sizeof(*spec));
+
+ desc->caller = 0;
+ return driver_send_term(desc->port, caller, spec, i);
+}
/*********************************************************************
* Driver entry point -> init
@@ -628,6 +750,11 @@ file_init(void)
? atoi(buf)
: 0);
driver_system_info(&sys_info, sizeof(ErlDrvSysInfo));
+
+ INIT_ATOM(ok);
+ INIT_ATOM(error);
+ INIT_ATOM(efile_reply);
+
return 0;
}
@@ -1694,6 +1821,74 @@ static void invoke_fadvise(void *data)
d->result_ok = efile_fadvise(&d->errInfo, fd, offset, length, advise);
}
+
+
+static void do_sendfile(file_descriptor *desc);
+static void file_ready_output(ErlDrvData data, ErlDrvEvent event)
+{
+ file_descriptor* d = (file_descriptor*) data;
+
+ switch (d->command) {
+ case FILE_SENDFILE:
+ driver_select(d->port, (ErlDrvEvent)d->sendfile.out_fd,
+ ERL_DRV_WRITE, 0);
+ do_sendfile(d);
+ break;
+ default:
+ break;
+ }
+}
+
+static void file_stop_select(ErlDrvEvent event, void* _)
+{
+ /* TODO: close socket? */
+}
+
+static void invoke_sendfile(void *data)
+{
+ ((struct t_data *)data)->again = 0;
+}
+
+static void do_sendfile(file_descriptor *d)
+{
+ int fd = d->fd;
+ int out_fd = d->sendfile.out_fd;
+ off_t offset = d->sendfile.offset;
+ size_t count = d->sendfile.count;
+ size_t chunksize = count < d->sendfile.chunksize
+ ? count : d->sendfile.chunksize;
+ int result_ok = 0;
+ Efile_error errInfo;
+
+ result_ok = efile_sendfile(&errInfo, fd, out_fd, &offset, &chunksize);
+
+ if (result_ok) {
+ d->sendfile.offset += chunksize;
+ d->sendfile.written += chunksize;
+ d->sendfile.count -= chunksize;
+ if (d->sendfile.count > 0) {
+ driver_select(d->port, (ErlDrvEvent)d->sendfile.out_fd,
+ ERL_DRV_USE|ERL_DRV_WRITE, 1);
+ } else {
+ printf("==> sendfile DONE eagain=%d\n", d->sendfile.eagain);
+ ef_send_ok_int64(d, d->caller, &d->sendfile.written);
+ }
+ } else if (errInfo.posix_errno == EAGAIN || errInfo.posix_errno == EINTR) {
+ if (chunksize > 0) {
+ d->sendfile.offset += chunksize;
+ d->sendfile.written += chunksize;
+ d->sendfile.count -= chunksize;
+ }
+ d->sendfile.eagain++;
+
+ driver_select(d->port, (ErlDrvEvent)d->sendfile.out_fd,
+ ERL_DRV_USE|ERL_DRV_WRITE, 1);
+ } else {
+ printf("==> sendfile ERROR %s\n", erl_errno_id(errInfo.posix_errno));
+ ef_send_posix_error(d, d->caller, errInfo.posix_errno);
+ }
+}
+
static void free_readdir(void *data)
{
struct t_data *d = (struct t_data *) data;
@@ -2105,6 +2300,13 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data)
}
free_preadv(data);
break;
+ case FILE_SENDFILE:
+ /* Return 'ok' and let prim_file:sendfile wait for message */
+ reply_ok(desc);
+ driver_select(desc->port, (ErlDrvEvent)desc->sendfile.out_fd,
+ ERL_DRV_USE|ERL_DRV_WRITE, 1);
+ free_data(data);
+ break;
default:
abort();
}
@@ -2452,6 +2654,34 @@ file_output(ErlDrvData e, char* buf, int count)
goto done;
}
+ case FILE_SENDFILE:
+ {
+ d = EF_SAFE_ALLOC(sizeof(struct t_data));
+ d->fd = fd;
+ d->command = command;
+ d->invoke = invoke_sendfile;
+ d->free = free_data;
+ d->level = 2;
+ desc->sendfile.out_fd = get_int32((uchar*) buf);
+ /* TODO: are off_t and size_t 64bit on all platforms?
+ off_t is 32bit on win32 msvc. maybe configurable in msvc.
+ Maybe use '#if SIZEOF_SIZE_T == 4'? */
+ desc->sendfile.offset = get_int64(((uchar*) buf)
+ + sizeof(Sint32));
+ desc->sendfile.count = get_int64(((uchar*) buf)
+ + sizeof(Sint32)
+ + sizeof(Sint64));
+ desc->sendfile.chunksize = get_int64(((uchar*) buf)
+ + sizeof(Sint32)
+ + 2*sizeof(Sint64));
+ desc->sendfile.written = 0;
+ desc->sendfile.eagain = 0;
+ /* TODO: shouldn't d->command be enough? */
+ desc->command = command;
+ desc->caller = driver_caller(desc->port);
+ goto done;
+ }
+
}
/*
diff --git a/erts/emulator/drivers/common/erl_efile.h b/erts/emulator/drivers/common/erl_efile.h
index 3097ded3f1..3c6c2ec2db 100644
--- a/erts/emulator/drivers/common/erl_efile.h
+++ b/erts/emulator/drivers/common/erl_efile.h
@@ -162,3 +162,5 @@ int efile_symlink(Efile_error* errInfo, char* old, char* new);
int efile_may_openfile(Efile_error* errInfo, char *name);
int efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length,
int advise);
+int efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd, off_t *offset,
+ size_t *count);
diff --git a/erts/emulator/drivers/unix/unix_efile.c b/erts/emulator/drivers/unix/unix_efile.c
index 4b3934657c..5b001b3819 100644
--- a/erts/emulator/drivers/unix/unix_efile.c
+++ b/erts/emulator/drivers/unix/unix_efile.c
@@ -33,6 +33,9 @@
#include <sys/types.h>
#include <sys/uio.h>
#endif
+#if defined(__linux__) || (defined(__sun) && defined(__SVR4))
+#include <sys/sendfile.h>
+#endif
#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__)
#define DARWIN 1
@@ -1464,3 +1467,44 @@ efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset,
return check_error(0, errInfo);
#endif
}
+
+#ifdef HAVE_SENDFILE
+int
+efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd,
+ off_t *offset, size_t *count)
+{
+#if defined(__linux__) || (defined(__sun) && defined(__SVR4))
+ ssize_t retval = sendfile(out_fd, in_fd, offset, *count);
+ if (retval >= 0) {
+ if (retval != *count) {
+ *count = retval;
+ retval = -1;
+ errno = EAGAIN;
+ } else {
+ *count = retval;
+ }
+ } else if (retval == -1 && (errno == EINTR || errno == EAGAIN)) {
+ *count = 0;
+ }
+ return check_error(retval == -1 ? -1 : 0, errInfo);
+#elif defined(DARWIN)
+ off_t len = *count;
+ int retval = sendfile(in_fd, out_fd, *offset, &len, NULL, 0);
+ *count = len;
+ return check_error(retval, errInfo);
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
+ off_t len = 0;
+ int retval = sendfile(in_fd, out_fd, *offset, *count, NULL, &len, 0);
+ *count = len;
+ return check_error(retval, errInfo);
+#endif
+}
+#else /* no sendfile() */
+int
+efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd,
+ off_t *offset, size_t *count)
+{
+ errno = ENOTSUP;
+ return check_error(-1, errInfo);
+}
+#endif
diff --git a/erts/emulator/drivers/win32/win_efile.c b/erts/emulator/drivers/win32/win_efile.c
index 931bb196f1..0f41a09bf6 100644
--- a/erts/emulator/drivers/win32/win_efile.c
+++ b/erts/emulator/drivers/win32/win_efile.c
@@ -1581,3 +1581,27 @@ efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset,
errno = ERROR_SUCCESS;
return check_error(0, errInfo);
}
+
+int
+efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd,
+ off_t *offset, size_t *count)
+{
+ /* TODO: write proper Windows TransmitFile based implementation */
+ /* use overlapped I/O and driver_select on the structure? */
+ /* int res = efile_seek(errInfo, in_fd, *offset, EFILE_SEEK_SET, NULL); */
+ /* if (res) { */
+ /* /\* TODO: could in_fd be shared and require protecting/locking */
+ /* efile_seek/SetFilePointerEx? *\/ */
+ /* if (TransmitFile((SOCKET) out_fd, (HANDLE) in_fd, *count, */
+ /* 0, NULL, NULL, 0)) { */
+ /* return check_error(0, errInfo); */
+ /* } else { */
+ /* /\* TODO: correct error handling? *\/ */
+ /* return set_error(errInfo); */
+ /* } */
+ /* } else { */
+ /* return res; */
+ /* } */
+ errno = ENOTSUP;
+ return check_error(-1, errInfo);
+}
diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl
index 30b7a5246a..f3f977a30b 100644
--- a/erts/preloaded/src/prim_file.erl
+++ b/erts/preloaded/src/prim_file.erl
@@ -26,7 +26,8 @@
%% Generic file contents operations
-export([open/2, close/1, datasync/1, sync/1, advise/4, position/2, truncate/1,
- write/2, pwrite/2, pwrite/3, read/2, read_line/1, pread/2, pread/3, copy/3]).
+ write/2, pwrite/2, pwrite/3, read/2, read_line/1, pread/2, pread/3,
+ copy/3, sendfile/5]).
%% Specialized file operations
-export([open/1, open/3]).
@@ -98,6 +99,7 @@
-define(FILE_READ_LINE, 29).
-define(FILE_FDATASYNC, 30).
-define(FILE_ADVISE, 31).
+-define(FILE_SENDFILE, 32).
%% Driver responses
-define(FILE_RESP_OK, 0).
@@ -539,6 +541,21 @@ write_file(_, _) ->
{error, badarg}.
+%% Returns {error, Reason} | {ok, BytesCopied}
+sendfile(#file_descriptor{module = ?MODULE, data = {Port, _}},
+ DestFD, Offset, Bytes, ChunkSize) ->
+ ok = drv_command(Port, <<?FILE_SENDFILE, DestFD:32, Offset:64, Bytes:64,
+ ChunkSize:64>>),
+ Self = self(),
+ %% Should we use a ref()?
+ receive
+ {efile_reply, Self, Port, {ok, _Written}=OKRes}->
+ OKRes;
+ {efile_reply, Self, Port, {error, _PosixError}=Error}->
+ Error;
+ Unexpected ->
+ Unexpected
+ end.
%%%-----------------------------------------------------------------
%%% Functions operating on files without handle to the file. ?DRV.