diff options
Diffstat (limited to 'erts/emulator')
-rw-r--r-- | erts/emulator/drivers/common/efile_drv.c | 202 | ||||
-rw-r--r-- | erts/emulator/drivers/common/erl_efile.h | 17 | ||||
-rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 67 | ||||
-rw-r--r-- | erts/emulator/drivers/unix/unix_efile.c | 77 |
4 files changed, 350 insertions, 13 deletions
diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c index 901d98c09d..5c52b99348 100644 --- a/erts/emulator/drivers/common/efile_drv.c +++ b/erts/emulator/drivers/common/efile_drv.c @@ -55,6 +55,7 @@ #define FILE_READ_LINE 29 #define FILE_FDATASYNC 30 #define FILE_FADVISE 31 +#define FILE_SENDFILE 32 /* Return codes */ @@ -98,7 +99,13 @@ # include "config.h" #endif #include <stdlib.h> + +// Need (NON)BLOCKING macros for sendfile +#ifndef WANT_NONBLOCKING +#define WANT_NONBLOCKING +#endif #include "sys.h" + #include "erl_driver.h" #include "erl_efile.h" #include "erl_threads.h" @@ -225,9 +232,16 @@ static void file_outputv(ErlDrvData, ErlIOVec*); static void file_async_ready(ErlDrvData, ErlDrvThreadData); static void file_flush(ErlDrvData); +#ifdef HAVE_SENDFILE +static void file_ready_output(ErlDrvData data, ErlDrvEvent event); +static void file_stop_select(ErlDrvEvent event, void* _); +#endif /* HAVE_SENDFILE */ enum e_timer {timer_idle, timer_again, timer_write}; +#ifdef HAVE_SENDFILE +enum e_sendfile {sending, not_sending}; +#endif /* HAVE_SENDFILE */ struct t_data; @@ -242,6 +256,9 @@ typedef struct { struct t_data *cq_head; /* Queue of incoming commands */ struct t_data *cq_tail; /* -""- */ enum e_timer timer_state; +#ifdef HAVE_SENDFILE + enum e_sendfile sendfile_state; +#endif /* HAVE_SENDFILE */ size_t read_bufsize; ErlDrvBinary *read_binp; size_t read_offset; @@ -264,7 +281,11 @@ struct erl_drv_entry efile_driver_entry = { file_stop, file_output, NULL, +#ifdef HAVE_SENDFILE + file_ready_output, +#else NULL, +#endif /* HAVE_SENDFILE */ "efile", NULL, NULL, @@ -279,7 +300,13 @@ struct erl_drv_entry efile_driver_entry = { ERL_DRV_EXTENDED_MAJOR_VERSION, ERL_DRV_EXTENDED_MINOR_VERSION, ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL, + NULL, +#ifdef HAVE_SENDFILE + file_stop_select +#else NULL +#endif /* HAVE_SENDFILE */ }; @@ -398,6 +425,14 @@ struct t_data Sint64 length; int advise; } fadvise; +#ifdef HAVE_SENDFILE + struct { + int out_fd; + off_t offset; + Uint64 nbytes; + Uint64 written; + } sendfile; +#endif /* HAVE_SENDFILE */ } c; char b[1]; }; @@ -485,7 +520,6 @@ static void *ef_safe_realloc(void *op, Uint s) : 0) - #if 0 static void ev_clear(ErlIOVec *ev) { @@ -613,7 +647,6 @@ static struct t_data *cq_deq(file_descriptor *desc) { } - /********************************************************************* * Driver entry point -> init */ @@ -628,6 +661,7 @@ file_init(void) ? atoi(buf) : 0); driver_system_info(&sys_info, sizeof(ErlDrvSysInfo)); + return 0; } @@ -655,6 +689,9 @@ file_start(ErlDrvPort port, char* command) desc->cq_head = NULL; desc->cq_tail = NULL; desc->timer_state = timer_idle; +#ifdef HAVE_SENDFILE + desc->sendfile_state = not_sending; +#endif desc->read_bufsize = 0; desc->read_binp = NULL; desc->read_offset = 0; @@ -893,8 +930,6 @@ static int reply_eof(file_descriptor *desc) { driver_output2(desc->port, &c, 1, NULL, 0); return 0; } - - static void invoke_name(void *data, int (*f)(Efile_error *, char *)) { @@ -1694,6 +1729,66 @@ static void invoke_fadvise(void *data) d->result_ok = efile_fadvise(&d->errInfo, fd, offset, length, advise); } +#ifdef HAVE_SENDFILE +static void invoke_sendfile(void *data) +{ + struct t_data *d = (struct t_data *)data; + int fd = d->fd; + int out_fd = (int)d->c.sendfile.out_fd; + Uint64 nbytes = d->c.sendfile.nbytes; + int result = 0; + d->again = 0; + + result = efile_sendfile(&d->errInfo, fd, out_fd, &d->c.sendfile.offset, &nbytes, NULL); + + d->c.sendfile.written += nbytes; + + if (result == 1) { + if (sys_info.async_threads != 0) { + d->result_ok = 0; + } else if (d->c.sendfile.nbytes == 0 && nbytes != 0) { + d->result_ok = 1; + } else if ((d->c.sendfile.nbytes - nbytes) != 0) { + d->result_ok = 1; + d->c.sendfile.nbytes -= nbytes; + } else { + d->result_ok = 0; + } + } else if (result == 0 && (d->errInfo.posix_errno == EAGAIN + || d->errInfo.posix_errno == EINTR)) { + d->result_ok = 1; + } else { + d->result_ok = -1; + } +} + +static void free_sendfile(void *data) { + EF_FREE(data); +} + +static void file_ready_output(ErlDrvData data, ErlDrvEvent event) +{ + file_descriptor* fd = (file_descriptor*) data; + + switch (fd->d->command) { + case FILE_SENDFILE: + driver_select(fd->port, event, + (int)ERL_DRV_WRITE,(int) 0); + invoke_sendfile((void *)fd->d); + file_async_ready(data, (ErlDrvThreadData)fd->d); + break; + default: + break; + } +} + +static void file_stop_select(ErlDrvEvent event, void* _) +{ + +} +#endif /* HAVE_SENDFILE */ + + static void free_readdir(void *data) { struct t_data *d = (struct t_data *) data; @@ -1755,6 +1850,10 @@ static void cq_execute(file_descriptor *desc) { register void *void_ptr; /* Soft cast variable */ if (desc->timer_state == timer_again) return; +#ifdef HAVE_SENDFILE + if (desc->sendfile_state == sending) + return; +#endif if (! (d = cq_deq(desc))) return; TRACE_F(("x%i", (int) d->command)); @@ -2105,6 +2204,37 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data) } free_preadv(data); break; +#ifdef HAVE_SENDFILE + case FILE_SENDFILE: + if (d->result_ok == -1) { + desc->sendfile_state = not_sending; + reply_error(desc, &d->errInfo); + if (sys_info.async_threads != 0) { + SET_NONBLOCKING(d->c.sendfile.out_fd); + free_sendfile(data); + } else { + driver_select(desc->port, (ErlDrvEvent)(long)d->c.sendfile.out_fd, + ERL_DRV_USE, 0); + free_sendfile(data); + } + } else if (d->result_ok == 0) { + desc->sendfile_state = not_sending; + reply_Sint64(desc, d->c.sendfile.written); + if (sys_info.async_threads != 0) { + SET_NONBLOCKING(d->c.sendfile.out_fd); + free_sendfile(data); + } else { + driver_select(desc->port, (ErlDrvEvent)(long)d->c.sendfile.out_fd, ERL_DRV_USE, 0); + free_sendfile(data); + } + } else if (d->result_ok == 1) { // If we are using select to send the rest of the data + desc->sendfile_state = sending; + desc->d = d; + driver_select(desc->port, (ErlDrvEvent)(long)d->c.sendfile.out_fd, + ERL_DRV_USE|ERL_DRV_WRITE, 1); + } + break; +#endif default: abort(); } @@ -3245,9 +3375,69 @@ file_outputv(ErlDrvData e, ErlIOVec *ev) { goto done; } /* case FILE_OPT_DELAYED_WRITE: */ } ASSERT(0); goto done; /* case FILE_SETOPT: */ - + + case FILE_SENDFILE: { + +#ifdef HAVE_SENDFILE + struct t_data *d; + Uint32 out_fd, offsetH, offsetL, hd_len, tl_len; + Uint64 nbytes; + char flags; + + if (ev->size < 1 + 7 * sizeof(Uint32) + sizeof(char) + || !EV_GET_UINT32(ev, &out_fd, &p, &q) + || !EV_GET_CHAR(ev, &flags, &p, &q) + || !EV_GET_UINT32(ev, &offsetH, &p, &q) + || !EV_GET_UINT32(ev, &offsetL, &p, &q) + || !EV_GET_UINT64(ev, &nbytes, &p, &q) + || !EV_GET_UINT32(ev, &hd_len, &p, &q) + || !EV_GET_UINT32(ev, &tl_len, &p, &q)) { + /* Buffer has wrong length to contain all the needed values */ + reply_posix_error(desc, EINVAL); + goto done; + } + + if (hd_len != 0 || tl_len != 0 || flags != 0) { + // We do not allow header, trailers and/or flags right now + reply_posix_error(desc, EINVAL); + goto done; + } + + d = EF_SAFE_ALLOC(sizeof(struct t_data)); + d->fd = desc->fd; + d->command = command; + d->invoke = invoke_sendfile; + d->free = NULL; + d->level = 2; + + d->c.sendfile.out_fd = (int) out_fd; + d->c.sendfile.written = 0; + + #if SIZEOF_OFF_T == 4 + if (offsetH != 0) { + reply_posix_error(desc, EINVAL); + goto done; + } + d->c.sendfile.offset = (off_t) offsetL; + #else + d->c.sendfile.offset = ((off_t) offsetH << 32) | offsetL; + #endif + + d->c.sendfile.nbytes = nbytes; + + if (sys_info.async_threads != 0) { + SET_BLOCKING(d->c.sendfile.out_fd); + } + + cq_enq(desc, d); +#else + reply_posix_error(desc, ENOTSUP); +#endif + goto done; + } /* case FILE_SENDFILE: */ + } /* switch(command) */ - + if (lseek_flush_read(desc, &err) < 0) { reply_posix_error(desc, err); goto done; diff --git a/erts/emulator/drivers/common/erl_efile.h b/erts/emulator/drivers/common/erl_efile.h index 3097ded3f1..349ab0e17b 100644 --- a/erts/emulator/drivers/common/erl_efile.h +++ b/erts/emulator/drivers/common/erl_efile.h @@ -118,6 +118,19 @@ typedef struct _Efile_info { */ } Efile_info; + +#ifdef HAVE_SENDFILE +/* + * Described the structure of header/trailers for sendfile + */ +struct t_sendfile_hdtl { + SysIOVec *headers; + int hdr_cnt; + SysIOVec *trailers; + int trl_cnt; +}; +#endif /* HAVE_SENDFILE */ + /* * Functions. */ @@ -162,3 +175,7 @@ int efile_symlink(Efile_error* errInfo, char* old, char* new); int efile_may_openfile(Efile_error* errInfo, char *name); int efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length, int advise); +#ifdef HAVE_SENDFILE +int efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd, + off_t *offset, Uint64 *nbytes, struct t_sendfile_hdtl *hdtl); +#endif /* HAVE_SENDFILE */ diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index db052523a8..7d952b0c71 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -445,6 +445,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) driver_select(port, e, mode | (on?ERL_DRV_USE:0), on) #define sock_select(d, flags, onoff) do { \ + ASSERT(!onoff || !(d)->is_ignored); \ (d)->event_mask = (onoff) ? \ ((d)->event_mask | (flags)) : \ ((d)->event_mask & ~(flags)); \ @@ -538,6 +539,8 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_REQ_GETIFADDRS 25 #define INET_REQ_ACCEPT 26 #define INET_REQ_LISTEN 27 +#define INET_REQ_IGNOREFD 28 + /* TCP requests */ /* #define TCP_REQ_ACCEPT 40 MOVED */ /* #define TCP_REQ_LISTEN 41 MERGED */ @@ -725,6 +728,11 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) /* Max interface name */ #define INET_IFNAMSIZ 16 +/* INET Ignore states */ +#define INET_IGNORE_NONE 0 +#define INET_IGNORE_READ 1 +#define INET_IGNORE_WRITE 1 << 1 + /* Max length of Erlang Term Buffer (for outputting structured terms): */ #ifdef HAVE_SCTP #define PACKET_ERL_DRV_TERM_DATA_LEN 512 @@ -864,6 +872,9 @@ typedef struct { double send_avg; /* average packet size sent */ subs_list empty_out_q_subs; /* Empty out queue subscribers */ + int is_ignored; /* if a fd is ignored by from the inet_drv, + this should be set to true when the fd is used + outside of inet_drv. */ } inet_descriptor; @@ -7344,6 +7355,8 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) sys_memzero((char *)&desc->remote,sizeof(desc->remote)); + desc->is_ignored = 0; + return (ErlDrvData)desc; } @@ -7626,6 +7639,33 @@ static int inet_ctl(inet_descriptor* desc, int cmd, char* buf, int len, return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); } + case INET_REQ_IGNOREFD: { + DEBUGF(("inet_ctl(%ld): IGNOREFD, IGNORED = %d\r\n", + (long)desc->port,(int)*buf)); + + /* + * FD can only be ignored for connected TCP connections for now, + * possible to add UDP and SCTP support if needed. + */ + if (!IS_CONNECTED(desc)) + return ctl_error(ENOTCONN, rbuf, rsize); + + if (!desc->stype == SOCK_STREAM) + return ctl_error(EINVAL, rbuf, rsize); + + if (*buf == 1 && !desc->is_ignored) { + desc->is_ignored = INET_IGNORE_READ; + sock_select(desc, (FD_READ|FD_WRITE|FD_CLOSE|ERL_DRV_USE_NO_CALLBACK), 0); + } else if (*buf == 0 && desc->is_ignored) { + int flags = (FD_READ|FD_CLOSE|((desc->is_ignored & INET_IGNORE_WRITE)?FD_WRITE:0)); + desc->is_ignored = INET_IGNORE_NONE; + sock_select(desc, flags, 1); + } else + return ctl_error(EINVAL, rbuf, rsize); + + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + } + #ifndef VXWORKS case INET_REQ_GETSERVBYNAME: { /* L1 Name-String L2 Proto-String */ @@ -8001,6 +8041,7 @@ static int tcp_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, int len, char** rbuf, int rsize) { tcp_descriptor* desc = (tcp_descriptor*)e; + switch(cmd) { case INET_REQ_OPEN: { /* open socket and return internal index */ int domain; @@ -8266,13 +8307,14 @@ static int tcp_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, int len, if (enq_async(INETP(desc), tbuf, TCP_REQ_RECV) < 0) return ctl_error(EALREADY, rbuf, rsize); - if (tcp_recv(desc, n) == 0) { + if (INETP(desc)->is_ignored || tcp_recv(desc, n) == 0) { if (timeout == 0) async_error_am(INETP(desc), am_timeout); else { if (timeout != INET_INFINITY) - driver_set_timer(desc->inet.port, timeout); - sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); + driver_set_timer(desc->inet.port, timeout); + if (!INETP(desc)->is_ignored) + sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); } } return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); @@ -9012,6 +9054,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) #ifdef DEBUG long port = (long) desc->inet.port; /* Used after driver_exit() */ #endif + ASSERT(!INETP(desc)->is_ignored); DEBUGF(("tcp_inet_input(%ld) {s=%d\r\n", port, desc->inet.s)); if (desc->inet.state == INET_STATE_ACCEPTING) { SOCKET s; @@ -9273,7 +9316,11 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) DEBUGF(("tcp_sendv(%ld): s=%d, about to send %d,%d bytes\r\n", (long)desc->inet.port, desc->inet.s, h_len, len)); - if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { + + if (INETP(desc)->is_ignored) { + INETP(desc)->is_ignored |= INET_IGNORE_WRITE; + n = 0; + } else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { n = 0; } else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov, vsize, &n, 0))) { @@ -9301,7 +9348,8 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) DEBUGF(("tcp_sendv(%ld): s=%d, Send failed, queuing\r\n", (long)desc->inet.port, desc->inet.s)); driver_enqv(ix, ev, n); - sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1); + if (!INETP(desc)->is_ignored) + sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1); } return 0; } @@ -9366,7 +9414,10 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, int len) DEBUGF(("tcp_send(%ld): s=%d, about to send %d,%d bytes\r\n", (long)desc->inet.port, desc->inet.s, h_len, len)); - if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { + if (INETP(desc)->is_ignored) { + INETP(desc)->is_ignored |= INET_IGNORE_WRITE; + n = 0; + } else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { sock_send(desc->inet.s, buf, 0, 0); n = 0; } else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s,iov,2,&n,0))) { @@ -9397,7 +9448,8 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, int len) n -= h_len; driver_enq(ix, ptr+n, len-n); } - sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1); + if (!INETP(desc)->is_ignored) + sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1); } return 0; } @@ -9421,6 +9473,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) int ret = 0; ErlDrvPort ix = desc->inet.port; + ASSERT(!INETP(desc)->is_ignored); DEBUGF(("tcp_inet_output(%ld) {s=%d\r\n", (long)desc->inet.port, desc->inet.s)); if (desc->inet.state == INET_STATE_CONNECTING) { diff --git a/erts/emulator/drivers/unix/unix_efile.c b/erts/emulator/drivers/unix/unix_efile.c index 4b3934657c..72911641d3 100644 --- a/erts/emulator/drivers/unix/unix_efile.c +++ b/erts/emulator/drivers/unix/unix_efile.c @@ -33,6 +33,9 @@ #include <sys/types.h> #include <sys/uio.h> #endif +#if defined(HAVE_SENDFILE) && (defined(__linux__) || (defined(__sun) && defined(__SVR4))) +#include <sys/sendfile.h> +#endif #if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) #define DARWIN 1 @@ -1464,3 +1467,77 @@ efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset, return check_error(0, errInfo); #endif } + +#ifdef HAVE_SENDFILE +#define SENDFILE_CHUNK_SIZE ((1 << 30) -1) + +/* + * sendfile: The implementation of the sendfile system call varies + * a lot on different *nix platforms so to make the api similar in all + * we have to emulate some things in linux and play with variables on + * bsd/darwin. + * + * It could be possible to implement header/trailer in sendfile, though + * you would have to emulate it in linux and on BSD/Darwin some complex + * calculations have to be made when using a non blocking socket to figure + * out how much of the header/file/trailer was sent in each command. + */ + +int +efile_sendfile(Efile_error* errInfo, int in_fd, int out_fd, + off_t *offset, Uint64 *nbytes, struct t_sendfile_hdtl* hdtl) +{ + Uint64 written = 0; +#if defined(__linux__) || (defined(__sun) && defined(__SVR4)) + ssize_t retval; + do { + // check if *nbytes is 0 or greater than the largest size_t + if (*nbytes == 0 || *nbytes > SENDFILE_CHUNK_SIZE) + retval = sendfile(out_fd, in_fd, offset, SENDFILE_CHUNK_SIZE); + else + retval = sendfile(out_fd, in_fd, offset, *nbytes); + if (retval > 0) { + written += retval; + *nbytes -= retval; + } + } while (retval != -1 && retval == SENDFILE_CHUNK_SIZE); + *nbytes = written; + return check_error(retval == -1 ? -1 : 0, errInfo); +#elif defined(DARWIN) + int retval; + off_t len; + do { + // check if *nbytes is 0 or greater than the largest off_t + if(*nbytes > SENDFILE_CHUNK_SIZE) + len = SENDFILE_CHUNK_SIZE; + else + len = *nbytes; + retval = sendfile(in_fd, out_fd, *offset, &len, NULL, 0); + if (retval != -1 || errno == EAGAIN || errno == EINTR) { + *offset += len; + *nbytes -= len; + written += len; + } + } while (len == SENDFILE_CHUNK_SIZE); + *nbytes = written; + return check_error(retval, errInfo); +#elif defined(__FreeBSD__) || defined(__DragonFly__) + off_t len; + int retval; + do { + if (*nbytes > SENDFILE_CHUNK_SIZE) + retval = sendfile(in_fd, out_fd, *offset, SENDFILE_CHUNK_SIZE, + NULL, &len, 0); + else + retval = sendfile(in_fd, out_fd, *offset, *nbytes, NULL, &len, 0); + if (retval != -1 || errno == EAGAIN || errno == EINTR) { + *offset += len; + *nbytes -= len; + written += len; + } + } while(len == SENDFILE_CHUNK_SIZE); + *nbytes = written; + return check_error(retval, errInfo); +#endif +} +#endif /* HAVE_SENDFILE */ |