aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/drivers/common/inet_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/drivers/common/inet_drv.c')
-rw-r--r--erts/emulator/drivers/common/inet_drv.c1724
1 files changed, 1394 insertions, 330 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 554c48059f..b71ce0389d 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2017. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2018. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -38,6 +38,7 @@
#include <ctype.h>
#include <sys/types.h>
#include <errno.h>
+#include <stdint.h>
#define IDENTITY(c) c
#define STRINGIFY_1(b) IDENTITY(#b)
@@ -63,6 +64,20 @@
#include <sys/un.h>
#endif
+#ifdef HAVE_SENDFILE
+#if defined(__linux__) || (defined(__sun) && defined(__SVR4))
+ #include <sys/sendfile.h>
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
+ /* Need to define __BSD_VISIBLE in order to expose prototype of sendfile */
+ #define __BSD_VISIBLE 1
+ #include <sys/socket.h>
+#endif
+#endif
+
+#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__)
+ #define __DARWIN__ 1
+#endif
+
/* All platforms fail on malloc errors. */
#define FATAL_MALLOC
@@ -559,7 +574,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
driver_select(port, e, mode | (on?ERL_DRV_USE:0), on)
#define sock_select(d, flags, onoff) do { \
- ASSERT(!(d)->is_ignored); \
+ ASSERT(!INET_IGNORED(d)); \
(d)->event_mask = (onoff) ? \
((d)->event_mask | (flags)) : \
((d)->event_mask & ~(flags)); \
@@ -610,6 +625,30 @@ static size_t my_strnlen(const char *s, size_t maxlen)
# define VALGRIND_MAKE_MEM_DEFINED(ptr,size)
#endif
+#ifndef __WIN32__
+/* Calculate CMSG_NXTHDR without having a struct msghdr*.
+ * CMSG_LEN only caters for alignment for start of data.
+ * To get how much to advance we need to use CMSG_SPACE
+ * on the payload length. To get the payload length we
+ * take the calculated cmsg->cmsg_len and subtract the
+ * header length. To get the header length we use
+ * the pointer difference from the cmsg start pointer
+ * to the CMSG_DATA(cmsg) pointer.
+ *
+ * Some platforms (seen on ppc Linux 2.6.29-3.ydl61.3)
+ * may return 0 as the cmsg_len if the cmsg is to be ignored.
+ */
+#define LEN_CMSG_DATA(cmsg) \
+ ((cmsg)->cmsg_len < sizeof (struct cmsghdr) ? 0 : \
+ (cmsg)->cmsg_len - ((char*)CMSG_DATA(cmsg) - (char*)(cmsg)))
+#define NXT_CMSG_HDR(cmsg) \
+ ((struct cmsghdr*)(((char*)(cmsg)) + CMSG_SPACE(LEN_CMSG_DATA(cmsg))))
+#endif
+
+#if !defined(IPV6_PKTOPTIONS) && defined(IPV6_2292PKTOPTIONS)
+#define IPV6_PKTOPTIONS IPV6_2292PKTOPTIONS
+#endif
+
/*
Magic errno value used locally for return of {error, system_limit}
- the emulator definition of SYSTEM_LIMIT is not available here.
@@ -701,6 +740,7 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#define TCP_REQ_RECV 42
#define TCP_REQ_UNRECV 43
#define TCP_REQ_SHUTDOWN 44
+#define TCP_REQ_SENDFILE 45
/* UDP and SCTP requests */
#define PACKET_REQ_RECV 60 /* Common for UDP and SCTP */
/* #define SCTP_REQ_LISTEN 61 MERGED Different from TCP; not for UDP */
@@ -723,6 +763,7 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occurred on send or shutdown */
#define TCP_ADDF_SHUTDOWN_WR_DONE 256 /* A shutdown(sock, SHUT_WR) or SHUT_RDWR was made */
#define TCP_ADDF_LINGER_ZERO 512 /* Discard driver queue on port close */
+#define TCP_ADDF_SENDFILE 1024 /* Send from an fd instead of the driver queue */
/* *_REQ_* replies */
#define INET_REP_ERROR 0
@@ -771,6 +812,12 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#define INET_LOPT_LINE_DELIM 40 /* Line delimiting char */
#define INET_OPT_TCLASS 41 /* IPv6 transport class */
#define INET_OPT_BIND_TO_DEVICE 42 /* get/set network device the socket is bound to */
+#define INET_OPT_RECVTOS 43 /* IP_RECVTOS ancillary data */
+#define INET_OPT_RECVTCLASS 44 /* IPV6_RECVTCLASS ancillary data */
+#define INET_OPT_PKTOPTIONS 45 /* IP(V6)_PKTOPTIONS get ancillary data */
+#define INET_OPT_TTL 46 /* IP_TTL */
+#define INET_OPT_RECVTTL 47 /* IP_RECVTTL ancillary data */
+#define TCP_OPT_NOPUSH 48 /* super-Nagle, aka TCP_CORK */
/* SCTP options: a separate range, from 100: */
#define SCTP_OPT_RTOINFO 100
#define SCTP_OPT_ASSOCINFO 101
@@ -846,6 +893,20 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#define SCTP_FLAG_SACDELAY_ENABLE (32 /* am_sackdelay_enable */)
#define SCTP_FLAG_SACDELAY_DISABLE (64 /* am_sackdelay_disable */)
+/* Flags for recv_cmsgflags */
+#define INET_CMSG_RECVTOS (1 << 0) /* am_recvtos, am_tos */
+#define INET_CMSG_RECVTCLASS (1 << 1) /* am_recvtclass, am_tclass */
+#define INET_CMSG_RECVTTL (1 << 2) /* am_recvttl, am_ttl */
+
+/* Inet flags */
+#define INET_FLG_BUFFER_SET (1 << 0) /* am_buffer has been set by user */
+#define INET_FLG_IS_IGNORED (1 << 1) /* If a fd is ignored by the inet_drv.
+ This flag should be set to true when
+ the fd is used outside of inet_drv. */
+#define INET_FLG_IS_IGNORED_RD (1 << 2)
+#define INET_FLG_IS_IGNORED_WR (1 << 3)
+#define INET_FLG_IS_IGNORED_PASS (1 << 4)
+
/*
** End of interface constants.
**--------------------------------------------------------------------------*/
@@ -891,18 +952,31 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#define INET_IFNAMSIZ 16
/* INET Ignore states */
-#define INET_IGNORE_NONE 0
-#define INET_IGNORE_READ (1 << 0)
-#define INET_IGNORE_WRITE (1 << 1)
-#define INET_IGNORE_PASSIVE (1 << 2)
+#define INET_IGNORE_CLEAR(desc) ((desc)->flags & ~(INET_IGNORE_READ|INET_IGNORE_WRITE|INET_IGNORE_PASSIVE))
+#define INET_IGNORED(desc) ((desc)->flags & INET_FLG_IS_IGNORED)
+#define INET_IGNORE_READ (INET_FLG_IS_IGNORED|INET_FLG_IS_IGNORED_RD)
+#define INET_IGNORE_WRITE (INET_FLG_IS_IGNORED|INET_FLG_IS_IGNORED_WR)
+#define INET_IGNORE_PASSIVE (INET_FLG_IS_IGNORED|INET_FLG_IS_IGNORED_PASS)
/* Max length of Erlang Term Buffer (for outputting structured terms): */
#ifdef HAVE_SCTP
#define PACKET_ERL_DRV_TERM_DATA_LEN 512
#else
+#ifndef __WIN32__
+/* Assume we have recvmsg() and might need room for ancillary data */
+#define PACKET_ERL_DRV_TERM_DATA_LEN 64
+#else
#define PACKET_ERL_DRV_TERM_DATA_LEN 32
#endif
+#endif
+typedef struct _tcp_descriptor tcp_descriptor;
+
+#if defined(TCP_CORK)
+#define INET_TCP_NOPUSH TCP_CORK
+#elif defined(TCP_NOPUSH) && !defined(__DARWIN__)
+#define INET_TCP_NOPUSH TCP_NOPUSH
+#endif
#define BIN_REALLOC_MARGIN(x) ((x)/4) /* 25% */
@@ -952,16 +1026,19 @@ typedef struct _multi_timer_data {
struct _multi_timer_data *prev;
} MultiTimerData;
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
- ErlDrvTermData caller, unsigned timeout,
- void (*timeout_fun)(ErlDrvData drv_data,
- ErlDrvTermData caller));
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ ErlDrvTermData caller, unsigned timeout,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data);
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p);
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p);
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller);
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port);
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port);
typedef struct {
int id; /* id used to identify reply */
@@ -1018,6 +1095,7 @@ typedef struct {
inet_async_op* oph; /* queue head or NULL */
inet_async_op* opt; /* queue tail or NULL */
inet_async_op op_queue[INET_MAX_ASYNC]; /* call queue */
+ int op_ref; /* queue reference generator */
int active; /* 0 = passive, 1 = active, 2 = active once */
Sint16 active_count; /* counter for {active,N} */
@@ -1060,13 +1138,12 @@ typedef struct {
double send_avg; /* average packet size sent */
subs_list empty_out_q_subs; /* Empty out queue subscribers */
- int is_ignored; /* if a fd is ignored by the inet_drv.
- This flag should be set to true when
- the fd is used outside of inet_drv. */
+ int flags;
#ifdef HAVE_SETNS
char *netns; /* Socket network namespace name
as full file path */
#endif
+ int recv_cmsgflags; /* Which ancillary data to expect */
} inet_descriptor;
@@ -1136,7 +1213,6 @@ static int packet_inet_init(void);
static void packet_inet_stop(ErlDrvData);
static void packet_inet_command(ErlDrvData, char*, ErlDrvSizeT);
static void packet_inet_drv_input(ErlDrvData data, ErlDrvEvent event);
-static void packet_inet_drv_output(ErlDrvData data, ErlDrvEvent event);
static ErlDrvData udp_inet_start(ErlDrvPort, char* command);
#ifdef HAVE_SCTP
static ErlDrvData sctp_inet_start(ErlDrvPort, char* command);
@@ -1161,7 +1237,7 @@ static struct erl_drv_entry udp_inet_driver_entry =
NULL,
#else
packet_inet_drv_input,
- packet_inet_drv_output,
+ NULL,
#endif
"udp_inet",
NULL,
@@ -1196,7 +1272,7 @@ static struct erl_drv_entry sctp_inet_driver_entry =
NULL,
#else
packet_inet_drv_input,
- packet_inet_drv_output,
+ NULL,
#endif
"sctp_inet",
NULL,
@@ -1219,7 +1295,7 @@ static struct erl_drv_entry sctp_inet_driver_entry =
};
#endif
-typedef struct {
+struct _tcp_descriptor {
inet_descriptor inet; /* common data structure (DON'T MOVE) */
int high; /* high watermark */
int low; /* low watermark */
@@ -1235,8 +1311,24 @@ typedef struct {
int http_state; /* 0 = response|request 1=headers fields */
inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */
inet_async_multi_op *multi_last;
- MultiTimerData *mtd; /* Timer structures for multiple accept */
-} tcp_descriptor;
+ MultiTimerData *mtd; /* Timer structures for multiple accept */
+ MultiTimerData *mtd_cache; /* A cache for timer allocations */
+#ifdef HAVE_SENDFILE
+ struct {
+ ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time
+ * sendfile was issued, which must be sent
+ * before issuing the sendfile call itself. */
+ int dup_file_fd; /* The file handle to send from; this is
+ * duplicated when sendfile is issued to
+ * reduce (but not eliminate) the impact of a
+ * nasty race, so we have to remember to close
+ * it. */
+ Uint64 bytes_sent;
+ Uint64 offset;
+ Uint64 length;
+ } sendfile;
+#endif
+};
/* send function */
static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len);
@@ -1246,6 +1338,11 @@ static int tcp_deliver(tcp_descriptor* desc, int len);
static int tcp_shutdown_error(tcp_descriptor* desc, int err);
+#ifdef HAVE_SENDFILE
+static int tcp_inet_sendfile(tcp_descriptor* desc);
+static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error);
+#endif
+
static int tcp_inet_output(tcp_descriptor* desc, HANDLE event);
static int tcp_inet_input(tcp_descriptor* desc, HANDLE event);
@@ -1262,14 +1359,12 @@ typedef struct {
static int packet_inet_input(udp_descriptor* udesc, HANDLE event);
-static int packet_inet_output(udp_descriptor* udesc, HANDLE event);
#endif
/* convert descriptor pointer to inet_descriptor pointer */
#define INETP(d) (&(d)->inet)
-static int async_ref = 0; /* async reference id generator */
-#define NEW_ASYNC_ID() ((async_ref++) & 0xffff)
+#define NEW_ASYNC_ID(desc) ((desc)->op_ref++ & 0xffff)
/* check for transition from active to passive */
#define INET_CHECK_ACTIVE_TO_PASSIVE(inet) \
@@ -1305,6 +1400,11 @@ static ErlDrvTermData am_udp_error;
#ifdef HAVE_SYS_UN_H
static ErlDrvTermData am_local;
#endif
+#ifndef __WIN32__
+static ErlDrvTermData am_tos;
+static ErlDrvTermData am_tclass;
+static ErlDrvTermData am_ttl;
+#endif
#ifdef HAVE_SCTP
static ErlDrvTermData am_sctp;
static ErlDrvTermData am_sctp_passive;
@@ -1325,12 +1425,16 @@ static ErlDrvTermData am_sndbuf;
static ErlDrvTermData am_reuseaddr;
static ErlDrvTermData am_dontroute;
static ErlDrvTermData am_priority;
-static ErlDrvTermData am_tos;
-static ErlDrvTermData am_tclass;
+static ErlDrvTermData am_recvtos;
+static ErlDrvTermData am_recvtclass;
+static ErlDrvTermData am_recvttl;
static ErlDrvTermData am_ipv6_v6only;
static ErlDrvTermData am_netns;
static ErlDrvTermData am_bind_to_device;
#endif
+#ifdef HAVE_SENDFILE
+static ErlDrvTermData am_sendfile;
+#endif
static char str_eafnosupport[] = "eafnosupport";
static char str_einval[] = "einval";
@@ -1514,8 +1618,10 @@ static void *realloc_wrapper(void *current, ErlDrvSizeT size){
# define ASSOC_ID_LEN 4
# define LOAD_ASSOC_ID LOAD_UINT
# define LOAD_ASSOC_ID_CNT LOAD_UINT_CNT
-# define SCTP_ANC_BUFF_SIZE INET_DEF_BUFFER/2 /* XXX: not very good... */
+#else
+# define IS_SCTP(desc) 0
#endif
+# define ANC_BUFF_SIZE INET_DEF_BUFFER/2 /* XXX: not very good... */
#ifdef HAVE_UDP
static int load_address(ErlDrvTermData* spec, int i, char* buf)
@@ -1751,6 +1857,7 @@ static void release_buffer(ErlDrvBinary* buf)
#ifdef HAVE_UDP
static ErlDrvBinary* realloc_buffer(ErlDrvBinary* buf, ErlDrvSizeT newsz)
{
+ DEBUGF(("realloc_buffer: %ld -> %ld\r\n", (buf==NULL) ? 0 : buf->orig_size, newsz));
return driver_realloc_binary(buf, newsz);
}
#endif
@@ -1921,7 +2028,7 @@ static void enq_multi_op(tcp_descriptor *desc, char *buf, int req,
ErlDrvTermData caller, MultiTimerData *timeout,
ErlDrvMonitor *monitorp)
{
- int id = NEW_ASYNC_ID();
+ int id = NEW_ASYNC_ID(INETP(desc));
enq_old_multi_op(desc,id,req,caller,timeout,monitorp);
if (buf != NULL)
put_int16(id, buf);
@@ -1990,7 +2097,7 @@ static int remove_multi_op(tcp_descriptor *desc, int *id_p, int *req_p,
static int enq_async_w_tmo(inet_descriptor* desc, char* buf, int req, unsigned timeout,
ErlDrvMonitor *monitorp)
{
- int id = NEW_ASYNC_ID();
+ int id = NEW_ASYNC_ID(desc);
inet_async_op* opp;
if ((opp = desc->oph) == NULL) /* queue empty */
@@ -2715,6 +2822,66 @@ static int inet_async_data(inet_descriptor* desc, const char* buf, int len)
}
}
+#ifndef __WIN32__
+static int load_cmsg_int(ErlDrvTermData *spec, int i,
+ struct cmsghdr *cmsg) {
+ union u {
+ byte uint8;
+ Uint16 uint16;
+ Uint32 uint32;
+ Uint64 uint64;
+ } *p;
+ p = (union u*) CMSG_DATA(cmsg);
+ switch (LEN_CMSG_DATA(cmsg) * CHAR_BIT) {
+ case 8:
+ return LOAD_INT(spec, i, p->uint8);
+ case 16:
+ return LOAD_INT(spec, i, p->uint16);
+
+ case 32:
+ return LOAD_INT(spec, i, p->uint32);
+
+ case 64:
+ return LOAD_INT(spec, i, p->uint64);
+ }
+ return LOAD_INT(spec, i, 0);
+}
+
+static int parse_ancillary_data_item(ErlDrvTermData *spec, int i,
+ struct cmsghdr *cmsg, int *n) {
+#define LOAD_CMSG_INT(proto, type, am) \
+ if (cmsg->cmsg_level == (proto) && \
+ cmsg->cmsg_type == (type)) { \
+ i = LOAD_ATOM(spec, i, (am)); \
+ i = load_cmsg_int(spec, i, cmsg); \
+ i = LOAD_TUPLE(spec, i, 2); \
+ (*n)++; \
+ return i; \
+ }
+#if defined(IPPROTO_IP) && defined(IP_TOS)
+ LOAD_CMSG_INT(IPPROTO_IP, IP_TOS, am_tos);
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
+ LOAD_CMSG_INT(IPPROTO_IPV6, IPV6_TCLASS, am_tclass);
+#endif
+#if defined(IPPROTO_IP) && defined(IP_TTL)
+ LOAD_CMSG_INT(IPPROTO_IP, IP_TTL, am_ttl);
+#endif
+ /* BSD uses the RECV* names in CMSG fields */
+#if defined(IPPROTO_IP) && defined(IP_RECVTOS)
+ LOAD_CMSG_INT(IPPROTO_IP, IP_RECVTOS, am_tos);
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_RECVTCLASS)
+ LOAD_CMSG_INT(IPPROTO_IPV6, IPV6_RECVTCLASS, am_tclass);
+#endif
+#if defined(IPPROTO_IP) && defined(IP_RECVTTL)
+ LOAD_CMSG_INT(IPPROTO_IP, IP_RECVTTL, am_ttl);
+#endif
+#undef LOAD_CMSG_INT
+ return i;
+}
+#endif /* #ifndef __WIN32__ */
+
#ifdef HAVE_SCTP
/*
** SCTP-related atoms:
@@ -2846,11 +3013,18 @@ static int sctp_parse_ancillary_data
for (cmsg = frst_msg; cmsg != NULL; cmsg = CMSG_NXTHDR(mptr,cmsg))
{
struct sctp_sndrcvinfo * sri;
-
+#ifndef __WIN32
+ int old_s;
+
+ /* Parse ancillary data common to UDP */
+ old_s = s;
+ i = parse_ancillary_data_item(spec, i, cmsg, &s);
+ if (s > old_s) continue;
/* Skip other possible ancillary data, e.g. from IPv6: */
if (cmsg->cmsg_level != IPPROTO_SCTP ||
cmsg->cmsg_type != SCTP_SNDRCV)
continue;
+#endif
if (((char*)cmsg + cmsg->cmsg_len) - (char*)frst_msg >
mptr->msg_controllen)
@@ -3190,6 +3364,23 @@ static int sctp_parse_async_event
}
#endif /* HAVE_SCTP */
+#ifndef __WIN32__
+static int udp_parse_ancillary_data(ErlDrvTermData *spec, int i,
+ struct msghdr *mptr) {
+ struct cmsghdr *cmsg;
+ int n;
+
+ n = 0;
+ for (cmsg = CMSG_FIRSTHDR(mptr);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR(mptr, cmsg)) {
+ i = parse_ancillary_data_item(spec, i, cmsg, &n);
+ }
+ i = LOAD_NIL(spec, i);
+ return LOAD_LIST(spec, i, n+1);
+}
+#endif /* ifndef __WIN32__ */
+
/*
** passive mode reply:
** for UDP:
@@ -3212,7 +3403,7 @@ static int sctp_parse_async_event
static int
inet_async_binary_data
(inet_descriptor* desc, unsigned int phsz,
- ErlDrvBinary * bin, int offs, int len, void * extra)
+ ErlDrvBinary * bin, int offs, int len, void *mp)
{
unsigned int hsz = desc->hsz + phsz;
ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN];
@@ -3245,9 +3436,10 @@ inet_async_binary_data
if (IS_SCTP(desc))
{ /* For SCTP we always have desc->hsz==0 (i.e., no application-level
headers are used), so hsz==phsz (see above): */
- struct msghdr* mptr;
int sz;
-
+ struct msghdr *mptr;
+
+ mptr = mp;
ASSERT (hsz == phsz && hsz != 0);
sz = len - hsz; /* Size of the msg data proper, w/o the addr */
@@ -3255,7 +3447,6 @@ inet_async_binary_data
i = LOAD_STRING(spec, i, bin->orig_bytes+offs, hsz);
/* Put in the list (possibly empty) of Ancillary Data: */
- mptr = (struct msghdr *) extra;
i = sctp_parse_ancillary_data (spec, i, mptr);
/* Then: Data or Event (Notification)? */
@@ -3284,20 +3475,32 @@ inet_async_binary_data
}
else
#endif /* HAVE_SCTP */
- /* Generic case. Both Addr and Data (or a single list of them together) are
- returned: */
+ {
+ /* Generic case. Both Addr and Data
+ * (or a single list of them together) are returned: */
- if ((desc->mode == INET_MODE_LIST) || (hsz > len)) {
- /* INET_MODE_LIST => [H1,H2,...Hn] */
- i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
- }
- else {
- /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] or [Binary]: */
- int sz = len - hsz;
- i = LOAD_BINARY(spec, i, bin, offs+hsz, sz);
- if (hsz > 0)
- i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
+ if ((desc->mode == INET_MODE_LIST) || (hsz > len)) {
+ /* INET_MODE_LIST => [H1,H2,...Hn] */
+ i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
+ }
+ else {
+ /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] or [Binary]: */
+ int sz = len - hsz;
+ i = LOAD_BINARY(spec, i, bin, offs+hsz, sz);
+ if (hsz > 0)
+ i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
+ }
+
+#ifndef __WIN32__
+ if (mp) {
+ /* We got ancillary data from an UDP recvmsg.
+ * Insert an additional tuple level {[F|AddrData],AncData} */
+ i = udp_parse_ancillary_data(spec, i, (struct msghdr*)mp);
+ i = LOAD_TUPLE(spec, i, 2);
+ }
+#endif
}
+
/* Close up the {ok, ...} or {error, ...} tuple: */
i = LOAD_TUPLE(spec, i, 2);
@@ -3429,8 +3632,9 @@ static int tcp_error_message(tcp_descriptor* desc, int err)
** [AddrLen, H2,...,HSz] are msg headers for UDP AF_UNIX only
** Data : List() | Binary()
*/
-static int packet_binary_message
- (inet_descriptor* desc, ErlDrvBinary* bin, int offs, int len, void* extra)
+static int packet_binary_message(inet_descriptor* desc,
+ ErlDrvBinary* bin, int offs, int len,
+ void *mp)
{
unsigned int hsz = desc->hsz;
ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN];
@@ -3455,8 +3659,14 @@ static int packet_binary_message
# ifdef HAVE_SCTP
if (!IS_SCTP(desc))
- {
# endif
+ {
+#ifndef __WIN32__
+ if (mp) i = udp_parse_ancillary_data(spec, i, (struct msghdr*)mp);
+#endif
+ /* We got ancillary data from an UDP recvmsg.
+ * Insert an additional tuple level {AncData,[F|AddrData]}
+ */
if ((desc->mode == INET_MODE_LIST) || (hsz > len))
/* INET_MODE_LIST, or only headers => [H1,H2,...Hn] */
i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
@@ -3468,16 +3678,24 @@ static int packet_binary_message
if (hsz > 0)
i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
}
-# ifdef HAVE_SCTP
+ /* Close up the outer 5-or-6-tuple */
+#ifndef __WIN32__
+ if (mp) i = LOAD_TUPLE(spec, i, 6);
+ else
+#endif
+ i = LOAD_TUPLE(spec, i, 5);
}
+# ifdef HAVE_SCTP
else
- { /* For SCTP we always have desc->hsz==0 (i.e., no application-level
+ {
+ struct msghdr *mptr;
+
+ mptr = mp;
+ /* For SCTP we always have desc->hsz==0 (i.e., no application-level
headers are used): */
- struct msghdr* mptr;
ASSERT(hsz == 0);
/* Put in the list (possibly empty) of Ancillary Data: */
- mptr = (struct msghdr *) extra;
i = sctp_parse_ancillary_data (spec, i, mptr);
/* Then: Data or Event (Notification)? */
@@ -3503,11 +3721,11 @@ static int packet_binary_message
/* Close up the {[AncilData], Event_OR_Data} tuple: */
i = LOAD_TUPLE (spec, i, 2);
+ /* Close up the outer 5-tuple: */
+ i = LOAD_TUPLE(spec, i, 5);
}
# endif /* HAVE_SCTP */
- /* Close up the outer 5-tuple: */
- i = LOAD_TUPLE(spec, i, 5);
ASSERT(i <= PACKET_ERL_DRV_TERM_DATA_LEN);
return erl_drv_output_term(desc->dport, spec, i);
}
@@ -3641,19 +3859,19 @@ tcp_reply_binary_data(tcp_descriptor* desc, ErlDrvBinary* bin, int offs, int len
static int
packet_reply_binary_data(inet_descriptor* desc, unsigned int hsz,
ErlDrvBinary * bin, int offs, int len,
- void * extra)
+ void *mp)
{
int code;
if (desc->active == INET_PASSIVE)
/* "inet" is actually for both UDP and SCTP, as well as TCP! */
- return inet_async_binary_data(desc, hsz, bin, offs, len, extra);
+ return inet_async_binary_data(desc, hsz, bin, offs, len, mp);
else
{ /* INET_ACTIVE or INET_ONCE: */
if (desc->deliver == INET_DELIVER_PORT)
code = inet_port_binary_data(desc, bin, offs, len);
else
- code = packet_binary_message(desc, bin, offs, len, extra);
+ code = packet_binary_message(desc, bin, offs, len, mp);
if (code < 0)
return code;
INET_CHECK_ACTIVE_TO_PASSIVE(desc);
@@ -3720,8 +3938,9 @@ static void inet_init_sctp(void) {
INIT_ATOM(reuseaddr);
INIT_ATOM(dontroute);
INIT_ATOM(priority);
- INIT_ATOM(tos);
- INIT_ATOM(tclass);
+ INIT_ATOM(recvtos);
+ INIT_ATOM(recvtclass);
+ INIT_ATOM(recvttl);
INIT_ATOM(ipv6_v6only);
INIT_ATOM(netns);
INIT_ATOM(bind_to_device);
@@ -3864,6 +4083,11 @@ static int inet_init()
#endif
INIT_ATOM(empty_out_q);
INIT_ATOM(ssl_tls);
+#ifndef __WIN32__
+ INIT_ATOM(tos);
+ INIT_ATOM(tclass);
+ INIT_ATOM(ttl);
+#endif
INIT_ATOM(http_eoh);
INIT_ATOM(http_header);
@@ -3877,6 +4101,10 @@ static int inet_init()
INIT_ATOM(https);
INIT_ATOM(scheme);
+#ifdef HAVE_SENDFILE
+ INIT_ATOM(sendfile);
+#endif
+
/* add TCP, UDP and SCTP drivers */
add_driver_entry(&tcp_inet_driver_entry);
#ifdef HAVE_UDP
@@ -4331,7 +4559,7 @@ static void desc_close(inet_descriptor* desc)
* We should close the fd here, but the other driver might still
* be selecting on it.
*/
- if (!desc->is_ignored)
+ if (!INET_IGNORED(desc))
driver_select(desc->port,(ErlDrvEvent)(long)desc->event,
ERL_DRV_USE, 0);
else
@@ -4978,6 +5206,71 @@ static int hwaddr_libdlpi_lookup(const char *ifnm,
}
#endif
+#ifdef HAVE_GETIFADDRS
+/* Returns 0 for success and errno() for failure */
+static int call_getifaddrs(inet_descriptor* desc_p, struct ifaddrs **ifa_pp)
+{
+ int result, save_errno;
+#ifdef HAVE_SETNS
+ int current_ns;
+
+ current_ns = 0;
+ if (desc_p->netns != NULL) {
+ int new_ns;
+ /* Temporarily change network namespace for this thread
+ * over the getifaddrs() call
+ */
+ current_ns = open("/proc/self/ns/net", O_RDONLY);
+ if (current_ns == INVALID_SOCKET)
+ return sock_errno();
+ new_ns = open(desc_p->netns, O_RDONLY);
+ if (new_ns == INVALID_SOCKET) {
+ save_errno = sock_errno();
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ return save_errno;
+ }
+ if (setns(new_ns, CLONE_NEWNET) != 0) {
+ save_errno = sock_errno();
+ while (close(new_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ return save_errno;
+ }
+ else {
+ while (close(new_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ }
+ }
+#endif
+ save_errno = 0;
+ result = getifaddrs(ifa_pp);
+ if (result < 0)
+ save_errno = sock_errno();
+#ifdef HAVE_SETNS
+ if (desc_p->netns != NULL) {
+ /* Restore network namespace */
+ if (setns(current_ns, CLONE_NEWNET) != 0) {
+ /* XXX Failed to restore network namespace.
+ * What to do? Tidy up and return an error...
+ * Note that the thread now might still be in the set namespace.
+ * Can this even happen? Should the emulator be aborted?
+ */
+ if (result >= 0) {
+ /* We got a result but have to waste it */
+ save_errno = sock_errno();
+ freeifaddrs(*ifa_pp);
+ }
+ }
+ while (close(current_ns) == INVALID_SOCKET &&
+ sock_errno() == EINTR);
+ }
+#endif
+ return save_errno;
+}
+#endif /* #ifdef HAVE_GETIFADDRS */
+
/* FIXME: temporary hack */
#ifndef IFHWADDRLEN
#define IFHWADDRLEN 6
@@ -5055,8 +5348,8 @@ static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc,
struct sockaddr_dl *sdlp;
int found = 0;
- if (getifaddrs(&ifa) == -1)
- goto error;
+ if (call_getifaddrs(desc, &ifa) != 0)
+ goto error;
for (ifp = ifa; ifp; ifp = ifp->ifa_next) {
if ((ifp->ifa_addr->sa_family == AF_LINK) &&
@@ -5078,8 +5371,8 @@ static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc,
sys_memcpy(sptr,
sdlp->sdl_data + sdlp->sdl_nlen,
sdlp->sdl_alen);
- freeifaddrs(ifa);
sptr += sdlp->sdl_alen;
+ freeifaddrs(ifa);
#endif
break;
}
@@ -5774,6 +6067,7 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
ErlDrvSizeT buf_size;
char *buf_p;
char *buf_alloc_p;
+ int save_errno;
buf_size = GETIFADDRS_BUFSZ;
buf_alloc_p = ALLOC(GETIFADDRS_BUFSZ);
@@ -5808,9 +6102,9 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
} \
} while (0)
- if (getifaddrs(&ifa_p) < 0) {
- return ctl_error(sock_errno(), rbuf_pp, rsize);
- }
+ if ((save_errno = call_getifaddrs(desc_p, &ifa_p)) != 0)
+ return ctl_error(save_errno, rbuf_pp, rsize);
+
ifa_free_p = ifa_p;
*buf_p++ = INET_REP_OK;
for (; ifa_p; ifa_p = ifa_p->ifa_next) {
@@ -5892,7 +6186,8 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
but ditto for the other worked and that was actually the requested
option, failure was still reported to erlang. */
-#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY)
+#if defined(IP_TOS) && defined(IPPROTO_IP) \
+ && defined(SO_PRIORITY) && !defined(__WIN32__)
static int setopt_prio_tos_trick
(int fd, int proto, int type, char* arg_ptr, int arg_sz, int propagate)
{
@@ -5914,14 +6209,14 @@ static int setopt_prio_tos_trick
res_prio = sock_getopt(fd, SOL_SOCKET, SO_PRIORITY,
(char *) &tmp_ival_prio, &tmp_arg_sz_prio);
- res_tos = sock_getopt(fd, SOL_IP, IP_TOS,
+ res_tos = sock_getopt(fd, IPPROTO_IP, IP_TOS,
(char *) &tmp_ival_tos, &tmp_arg_sz_tos);
res = sock_setopt(fd, proto, type, arg_ptr, arg_sz);
if (res == 0) {
if (type != SO_PRIORITY) {
if (type != IP_TOS && res_tos == 0) {
res_tos = sock_setopt(fd,
- SOL_IP,
+ IPPROTO_IP,
IP_TOS,
(char *) &tmp_ival_tos,
tmp_arg_sz_tos);
@@ -5965,7 +6260,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
int proto;
int opt;
struct linger li_val;
-#ifdef HAVE_MULTICAST_SUPPORT
+#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
struct ip_mreq mreq_val;
#endif
int ival;
@@ -5988,6 +6283,8 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
/* XXX { int i; for(i=0;i<len;++i) fprintf(stderr,"0x%02X, ", (unsigned) ptr[i]); fprintf(stderr,"\r\n");} */
while(len >= 5) {
+ int recv_cmsgflags;
+
opt = *ptr++;
ival = get_int32(ptr);
ptr += 4;
@@ -5996,6 +6293,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
arg_sz = sizeof(ival);
proto = SOL_SOCKET;
propagate = 0;
+ recv_cmsgflags = desc->recv_cmsgflags;
switch(opt) {
case INET_LOPT_HEADER:
@@ -6022,6 +6320,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
(long)desc->port, desc->s, ival));
if (ival < INET_MIN_BUFFER) ival = INET_MIN_BUFFER;
desc->bufsz = ival;
+ desc->flags |= INET_FLG_BUFFER_SET;
continue;
case INET_LOPT_ACTIVE:
@@ -6217,6 +6516,16 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
case INET_OPT_RCVBUF: type = SO_RCVBUF;
DEBUGF(("inet_set_opts(%ld): s=%d, SO_RCVBUF=%d\r\n",
(long)desc->port, desc->s, ival));
+ if (!(desc->flags & INET_FLG_BUFFER_SET)) {
+ /* make sure we have desc->bufsz >= SO_RCVBUF */
+ if (ival > (1 << 16) && desc->stype == SOCK_DGRAM && !IS_SCTP(desc))
+ /* For UDP we don't want to automatically
+ set the buffer size to be larger than
+ the theoretical max MTU */
+ desc->bufsz = 1 << 16;
+ else if (ival > desc->bufsz)
+ desc->bufsz = ival;
+ }
break;
case INET_OPT_LINGER: type = SO_LINGER;
if (len < 4)
@@ -6246,28 +6555,80 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
(long)desc->port, desc->s, ival));
break;
#else
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented */
continue;
#endif
case INET_OPT_TOS:
-#if defined(IP_TOS) && defined(SOL_IP)
- proto = SOL_IP;
+#if defined(IP_TOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
type = IP_TOS;
propagate = 1;
DEBUGF(("inet_set_opts(%ld): s=%d, IP_TOS=%d\r\n",
(long)desc->port, desc->s, ival));
break;
#else
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented. */
continue;
#endif
-#if defined(IPV6_TCLASS) && defined(SOL_IPV6)
+#if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
case INET_OPT_TCLASS:
- proto = SOL_IPV6;
+ proto = IPPROTO_IPV6;
type = IPV6_TCLASS;
propagate = 1;
DEBUGF(("inet_set_opts(%ld): s=%d, IPV6_TCLASS=%d\r\n",
(long)desc->port, desc->s, ival));
break;
#endif
+#if defined(IP_TTL) && defined(IPPROTO_IP)
+ case INET_OPT_TTL:
+ proto = IPPROTO_IP;
+ type = IP_TTL;
+ propagate = 1;
+ DEBUGF(("inet_set_opts(%ld): s=%d, IP_TTL=%d\r\n",
+ (long)desc->port, desc->s, ival));
+ break;
+#endif
+#if defined(IP_RECVTOS) && defined(IPPROTO_IP)
+ case INET_OPT_RECVTOS:
+ proto = IPPROTO_IP;
+ type = IP_RECVTOS;
+ propagate = 1;
+ recv_cmsgflags =
+ ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTOS) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTOS);
+ DEBUGF(("inet_set_opts(%ld): s=%d, IP_RECVTOS=%d\r\n",
+ (long)desc->port, desc->s, ival));
+ break;
+#endif
+#if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
+ case INET_OPT_RECVTCLASS:
+ proto = IPPROTO_IPV6;
+ type = IPV6_RECVTCLASS;
+ propagate = 1;
+ recv_cmsgflags =
+ ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTCLASS) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTCLASS);
+ DEBUGF(("inet_set_opts(%ld): s=%d, IPV6_RECVTCLASS=%d\r\n",
+ (long)desc->port, desc->s, ival));
+ break;
+#endif
+#if defined(IP_RECVTTL) && defined(IPPROTO_IP)
+ case INET_OPT_RECVTTL:
+ proto = IPPROTO_IP;
+ type = IP_RECVTTL;
+ propagate = 1;
+ recv_cmsgflags =
+ ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTTL) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTTL);
+ DEBUGF(("inet_set_opts(%ld): s=%d, IP_RECVTTL=%d\r\n",
+ (long)desc->port, desc->s, ival));
+ break;
+#endif
case TCP_OPT_NODELAY:
proto = IPPROTO_TCP;
@@ -6276,7 +6637,20 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
(long)desc->port, desc->s, ival));
break;
-#ifdef HAVE_MULTICAST_SUPPORT
+ case TCP_OPT_NOPUSH:
+#if defined(INET_TCP_NOPUSH)
+ proto = IPPROTO_TCP;
+ type = INET_TCP_NOPUSH;
+ DEBUGF(("inet_set_opts(%ld): s=%d, t=%d TCP_NOPUSH=%d\r\n",
+ (long)desc->port, desc->s, type, ival));
+ break;
+#else
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented, just in case */
+ continue;
+#endif
+
+#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
case UDP_OPT_MULTICAST_TTL:
proto = IPPROTO_IP;
@@ -6322,10 +6696,10 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
arg_sz = sizeof(mreq_val);
break;
-#endif /* HAVE_MULTICAST_SUPPORT */
+#endif /* defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) */
case INET_OPT_IPV6_V6ONLY:
-#if HAVE_DECL_IPV6_V6ONLY
+#if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
proto = IPPROTO_IPV6;
type = IPV6_V6ONLY;
propagate = 1;
@@ -6385,28 +6759,29 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
default:
return -1;
}
-#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY)
+#if defined(IP_TOS) && defined(IPPROTO_IP) \
+ && defined(SO_PRIORITY) && !defined(__WIN32__)
res = setopt_prio_tos_trick (desc->s, proto, type, arg_ptr, arg_sz, propagate);
#else
res = sock_setopt (desc->s, proto, type, arg_ptr, arg_sz);
#endif
+ if (res == 0) desc->recv_cmsgflags = recv_cmsgflags;
if (propagate && res != 0) {
return -1;
}
DEBUGF(("inet_set_opts(%ld): s=%d returned %d\r\n",
(long)desc->port, desc->s, res));
- if (type == SO_RCVBUF) {
- /* make sure we have desc->bufsz >= SO_RCVBUF */
- if (ival > desc->bufsz)
- desc->bufsz = ival;
- }
}
if ( ((desc->stype == SOCK_STREAM) && IS_CONNECTED(desc)) ||
((desc->stype == SOCK_DGRAM) && IS_OPEN(desc))) {
- if (desc->active != old_active)
+ if (desc->active != old_active) {
+ /* Need to cancel the read_packet timer if we go from active to passive. */
+ if (desc->active == INET_PASSIVE && desc->stype == SOCK_DGRAM)
+ driver_cancel_timer(desc->port);
sock_select(desc, (FD_READ|FD_CLOSE), (desc->active>0));
+ }
/* XXX: UDP sockets could also trigger immediate read here NIY */
if ((desc->stype==SOCK_STREAM) && desc->active) {
@@ -6526,10 +6901,14 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
while (curr < ptr + len)
{
+ int recv_cmsgflags;
/* Get the Erlang-encoded option type -- always 1 byte: */
- int eopt = *curr;
+ int eopt;
+
+ eopt = *curr;
curr++;
+ recv_cmsgflags = desc->recv_cmsgflags;
/* Get the option value. XXX: The condition (curr < ptr + len)
does not preclude us from reading from beyond the buffer end,
if the Erlang part of the driver specifies its input wrongly!
@@ -6544,6 +6923,7 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
if (desc->bufsz < INET_MIN_BUFFER)
desc->bufsz = INET_MIN_BUFFER;
+ desc->flags |= INET_FLG_BUFFER_SET;
res = 0; /* This does not affect the kernel buffer size */
continue;
@@ -6665,6 +7045,7 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
smaller than the kernel one: */
if (desc->bufsz <= arg.ival)
desc->bufsz = arg.ival;
+ desc->flags |= INET_FLG_BUFFER_SET;
break;
}
case INET_OPT_SNDBUF:
@@ -6675,10 +7056,6 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
arg_ptr = (char*) (&arg.ival);
arg_sz = sizeof ( arg.ival);
- /* Adjust the size of the user-level recv buffer, so it's not
- smaller than the kernel one: */
- if (desc->bufsz <= arg.ival)
- desc->bufsz = arg.ival;
break;
}
case INET_OPT_REUSEADDR:
@@ -6710,28 +7087,32 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
break;
}
# else
- continue; /* Option not supported -- ignore it */
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented, just in case */
+ continue;
# endif
case INET_OPT_TOS:
-# if defined(IP_TOS) && defined(SOL_IP)
+# if defined(IP_TOS) && defined(IPPROTO_IP)
{
arg.ival= get_int32 (curr); curr += 4;
- proto = SOL_IP;
+ proto = IPPROTO_IP;
type = IP_TOS;
arg_ptr = (char*) (&arg.ival);
arg_sz = sizeof ( arg.ival);
break;
}
# else
- continue; /* Option not supported -- ignore it */
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented, just in case */
+ continue;
# endif
-# if defined(IPV6_TCLASS) && defined(SOL_IPV6)
+# if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
case INET_OPT_TCLASS:
{
arg.ival= get_int32 (curr); curr += 4;
- proto = SOL_IPV6;
+ proto = IPPROTO_IPV6;
type = IPV6_TCLASS;
arg_ptr = (char*) (&arg.ival);
arg_sz = sizeof ( arg.ival);
@@ -6739,9 +7120,69 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
}
# endif
+# if defined(IP_TTL) && defined(IPPROTO_IP)
+ case INET_OPT_TTL:
+ {
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = IPPROTO_IP;
+ type = IP_TTL;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ break;
+ }
+# endif
+
+# if defined(IP_RECVTOS) && defined(IPPROTO_IP)
+ case INET_OPT_RECVTOS:
+ {
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = IPPROTO_IP;
+ type = IP_RECVTOS;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ recv_cmsgflags =
+ arg.ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTOS) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTOS);
+ break;
+ }
+# endif
+
+# if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
+ case INET_OPT_RECVTCLASS:
+ {
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = IPPROTO_IPV6;
+ type = IPV6_RECVTCLASS;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ recv_cmsgflags =
+ arg.ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTCLASS) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTCLASS);
+ break;
+ }
+# endif
+
+# if defined(IP_RECVTTL) && defined(IPPROTO_IP)
+ case INET_OPT_RECVTTL:
+ {
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = IPPROTO_IP;
+ type = IP_RECVTTL;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ recv_cmsgflags =
+ arg.ival ?
+ (desc->recv_cmsgflags | INET_CMSG_RECVTTL) :
+ (desc->recv_cmsgflags & ~INET_CMSG_RECVTTL);
+ break;
+ }
+# endif
+
case INET_OPT_IPV6_V6ONLY:
-# if HAVE_DECL_IPV6_V6ONLY
+# if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
{
arg.ival= get_int32 (curr); curr += 4;
proto = IPPROTO_IPV6;
@@ -6991,13 +7432,15 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
*/
return -1;
}
-#if defined(IP_TOS) && defined(SOL_IP) && defined(SO_PRIORITY)
+#if defined(IP_TOS) && defined(IPPROTO_IP) \
+ && defined(SO_PRIORITY) && !defined(__WIN32__)
res = setopt_prio_tos_trick (desc->s, proto, type, arg_ptr, arg_sz, 1);
#else
res = sock_setopt (desc->s, proto, type, arg_ptr, arg_sz);
#endif
/* The return values of "sock_setopt" can only be 0 or -1: */
ASSERT(res == 0 || res == -1);
+ if (res == 0) desc->recv_cmsgflags = recv_cmsgflags;
if (res == -1)
{ /* Got an error, DO NOT continue with other options. However, on
Solaris 10, we DO allow SO_SNDBUF and SO_RCVBUF to fail, assu-
@@ -7018,6 +7461,35 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
}
#endif /* HAVE_SCTP */
+#ifndef __WIN32__
+static void put_cmsg_int32(struct cmsghdr *cmsg, char *ptr) {
+ union u {
+ byte uint8;
+ Uint16 uint16;
+ Uint32 uint32;
+ Uint64 uint64;
+ } *p;
+ p = (union u*) CMSG_DATA(cmsg);
+ switch (LEN_CMSG_DATA(cmsg) * CHAR_BIT) {
+ case 8:
+ put_int32((Uint32) p->uint8, ptr);
+ break;
+ case 16:
+ put_int32((Uint32) p->uint16, ptr);
+ break;
+ case 32:
+ put_int32(p->uint32, ptr);
+ break;
+ case 64:
+ put_int32((Uint32) p->uint64, ptr);
+ break;
+ default:
+ put_int32(0, ptr);
+ }
+ return;
+}
+#endif
+
/* load all option values into the buf and reply
** return total length of reply filled into ptr
** ptr should point to a buffer with 9*len +1 to be safe!!
@@ -7252,8 +7724,8 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
continue;
#endif
case INET_OPT_TOS:
-#if defined(IP_TOS) && defined(SOL_IP)
- proto = SOL_IP;
+#if defined(IP_TOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
type = IP_TOS;
break;
#else
@@ -7262,14 +7734,50 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
continue;
#endif
case INET_OPT_TCLASS:
-#if defined(IPV6_TCLASS) && defined(SOL_IPV6)
- proto = SOL_IPV6;
+#if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
+ proto = IPPROTO_IPV6;
type = IPV6_TCLASS;
break;
#else
TRUNCATE_TO(0,ptr);
continue;
#endif
+ case INET_OPT_TTL:
+#if defined(IP_TTL) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_TTL;
+ break;
+#else
+ TRUNCATE_TO(0,ptr);
+ continue;
+#endif
+ case INET_OPT_RECVTOS:
+#if defined(IP_RECVTOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_RECVTOS;
+ break;
+#else
+ TRUNCATE_TO(0,ptr);
+ continue;
+#endif
+ case INET_OPT_RECVTCLASS:
+#if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
+ proto = IPPROTO_IPV6;
+ type = IPV6_RECVTCLASS;
+ break;
+#else
+ TRUNCATE_TO(0,ptr);
+ continue;
+#endif
+ case INET_OPT_RECVTTL:
+#if defined(IP_RECVTTL) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_RECVTTL;
+ break;
+#else
+ TRUNCATE_TO(0,ptr);
+ continue;
+#endif
case INET_OPT_REUSEADDR:
type = SO_REUSEADDR;
break;
@@ -7295,8 +7803,18 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
proto = IPPROTO_TCP;
type = TCP_NODELAY;
break;
+ case TCP_OPT_NOPUSH:
+#if defined(INET_TCP_NOPUSH)
+ proto = IPPROTO_TCP;
+ type = INET_TCP_NOPUSH;
+ break;
+#else
+ *ptr++ = opt;
+ put_int32(0, ptr);
+ continue;
+#endif
-#ifdef HAVE_MULTICAST_SUPPORT
+#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
case UDP_OPT_MULTICAST_TTL:
proto = IPPROTO_IP;
type = IP_MULTICAST_TTL;
@@ -7315,10 +7833,10 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
arg_ptr = (char*) &li_val;
type = SO_LINGER;
break;
-#endif /* HAVE_MULTICAST_SUPPORT */
+#endif /* defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) */
case INET_OPT_IPV6_V6ONLY:
-#if HAVE_DECL_IPV6_V6ONLY
+#if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
proto = IPPROTO_IPV6;
type = IPV6_V6ONLY;
break;
@@ -7396,6 +7914,93 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
continue;
#endif
+#ifndef __WIN32__
+ /* Winsock does not have struct cmsghdr */
+ case INET_OPT_PKTOPTIONS: {
+ struct cmsghdr *cmsg, *cmsg_top;
+ SOCKLEN_T cmsg_sz;
+ union {
+ /* Ensure alignment */
+ struct cmsghdr hdr;
+ /* Room for (IP_TOS | IPV6_TCLASS) + IP_TTL */
+ char buf[2*CMSG_SPACE(sizeof(int))];
+ } cmsgbuf;
+ /* Select between IPv4 or IPv6 PKTOPTIONS
+ * depending on the socket protocol family
+ */
+ switch (desc->sfamily) {
+#if defined(IPPROTO_IP) && defined(IP_PKTOPTIONS)
+ case AF_INET: {
+ proto = IPPROTO_IP;
+ type = IP_PKTOPTIONS;
+ }
+ break;
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_PKTOPTIONS) && defined(AF_INET6)
+ case AF_INET6: {
+ proto = IPPROTO_IPV6;
+ type = IPV6_PKTOPTIONS;
+ }
+ break;
+#endif
+ default: {
+ RETURN_ERROR();
+ }
+ } /* switch */
+ TRUNCATE_TO(0, ptr);
+ /* Fetch a cmsg buffer from the socket */
+ cmsg_sz = sizeof(cmsgbuf.buf);
+ if (IS_SOCKET_ERROR(sock_getopt(desc->s, proto, type,
+ cmsgbuf.buf, &cmsg_sz))) {
+ continue;
+ }
+ /* Reply with Opt/8, Length/32, [COpt/8, Value/32]*
+ * i.e opt, total length and then all returned
+ * cmsg options and values
+ */
+ PLACE_FOR(1+4, ptr);
+ *ptr++ = opt;
+ arg_ptr = ptr; /* Where to put total length */
+ arg_sz = 0; /* Total length */
+ for (cmsg_top = (struct cmsghdr*)(cmsgbuf.buf + cmsg_sz),
+ cmsg = (struct cmsghdr*)cmsgbuf.buf;
+ cmsg < cmsg_top;
+ cmsg = NXT_CMSG_HDR(cmsg)) {
+#define PUT_CMSG_INT32(CMSG_LEVEL, CMSG_TYPE, OPT) \
+ if ((cmsg->cmsg_level == CMSG_LEVEL) && \
+ (cmsg->cmsg_type == CMSG_TYPE)) { \
+ PLACE_FOR(1+4, ptr); \
+ *ptr++ = OPT; \
+ put_cmsg_int32(cmsg, ptr); \
+ arg_sz += 1+4; \
+ continue; \
+ }
+#if defined(IPPROTO_IP) && defined(IP_TOS)
+ PUT_CMSG_INT32(IPPROTO_IP, IP_TOS, INET_OPT_TOS);
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
+ PUT_CMSG_INT32(IPPROTO_IPV6, IPV6_TCLASS, INET_OPT_TCLASS);
+#endif
+#if defined(IPPROTO_IP) && defined(IP_TTL)
+ PUT_CMSG_INT32(IPPROTO_IP, IP_TTL, INET_OPT_TTL);
+#endif
+ /* BSD uses the RECV* names in CMSG fields */
+#if defined(IPPROTO_IP) && defined(IP_RECVTOS)
+ PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTOS, INET_OPT_TOS);
+#endif
+#if defined(IPPROTO_IPV6) && defined(IPV6_RECVTCLASS)
+ PUT_CMSG_INT32(IPPROTO_IPV6, IPV6_RECVTCLASS, INET_OPT_TCLASS);
+#endif
+#if defined(IPPROTO_IP) && defined(IP_RECVTTL)
+ PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTTL, INET_OPT_TTL);
+#endif
+#undef PUT_CMSG_INT32
+ }
+ put_int32(arg_sz, arg_ptr); /* Put total length */
+ continue;
+ }
+#endif /* #ifdef __WIN32__ */
+
default:
RETURN_ERROR();
}
@@ -7455,6 +8060,7 @@ static int load_paddrinfo (ErlDrvTermData * spec, int i,
return i;
}
+
/*
** "sctp_fill_opts": Returns {ok, Results}, or an error:
*/
@@ -7704,6 +8310,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
case INET_OPT_PRIORITY :
case INET_OPT_TOS :
case INET_OPT_TCLASS :
+ case INET_OPT_TTL :
case INET_OPT_IPV6_V6ONLY:
case SCTP_OPT_AUTOCLOSE:
case SCTP_OPT_MAXSEG :
@@ -7711,6 +8318,9 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
case SCTP_OPT_NODELAY :
case SCTP_OPT_DISABLE_FRAGMENTS:
case SCTP_OPT_I_WANT_MAPPED_V4_ADDR:
+ case INET_OPT_RECVTOS :
+ case INET_OPT_RECVTCLASS :
+ case INET_OPT_RECVTTL :
{
int res = 0;
unsigned int sz = sizeof(res);
@@ -7766,8 +8376,8 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
}
case INET_OPT_TOS:
{
-# if defined(IP_TOS) && defined(SOL_IP)
- proto = SOL_IP;
+# if defined(IP_TOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
type = IP_TOS;
is_int = 1;
tag = am_tos;
@@ -7779,8 +8389,8 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
}
case INET_OPT_TCLASS:
{
-# if defined(IPV6_TCLASS) && defined(SOL_IPV6)
- proto = SOL_IPV6;
+# if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
+ proto = IPPROTO_IPV6;
type = IPV6_TCLASS;
is_int = 1;
tag = am_tclass;
@@ -7790,8 +8400,60 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
continue;
# endif
}
+ case INET_OPT_TTL:
+ {
+# if defined(IP_TTL) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_TTL;
+ is_int = 1;
+ tag = am_ttl;
+ break;
+# else
+ /* Not supported -- ignore */
+ continue;
+# endif
+ }
+ case INET_OPT_RECVTOS:
+ {
+# if defined(IP_RECVTOS) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_RECVTOS;
+ is_int = 0;
+ tag = am_recvtos;
+ break;
+# else
+ /* Not supported -- ignore */
+ continue;
+# endif
+ }
+ case INET_OPT_RECVTCLASS:
+ {
+# if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
+ proto = IPPROTO_IPV6;
+ type = IPV6_RECVTCLASS;
+ is_int = 0;
+ tag = am_recvtclass;
+ break;
+# else
+ /* Not supported -- ignore */
+ continue;
+# endif
+ }
+ case INET_OPT_RECVTTL:
+ {
+# if defined(IP_RECVTTL) && defined(IPPROTO_IP)
+ proto = IPPROTO_IP;
+ type = IP_RECVTTL;
+ is_int = 0;
+ tag = am_recvttl;
+ break;
+# else
+ /* Not supported -- ignore */
+ continue;
+# endif
+ }
case INET_OPT_IPV6_V6ONLY:
-# if HAVE_DECL_IPV6_V6ONLY
+# if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
{
proto = IPPROTO_IPV6;
type = IPV6_V6ONLY;
@@ -8423,6 +9085,7 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol)
desc->delimiter = '\n'; /* line delimiting char */
desc->oph = NULL;
desc->opt = NULL;
+ desc->op_ref = 0;
desc->peer_ptr = NULL;
desc->name_ptr = NULL;
@@ -8449,12 +9112,14 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol)
sys_memzero((char *)&desc->remote,sizeof(desc->remote));
- desc->is_ignored = 0;
+ desc->flags = 0;
#ifdef HAVE_SETNS
desc->netns = NULL;
#endif
+ desc->recv_cmsgflags = 0;
+
return (ErlDrvData)desc;
}
@@ -8849,19 +9514,19 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf,
if (desc->stype != SOCK_STREAM)
return ctl_error(EINVAL, rbuf, rsize);
- if (*buf == 1 && !desc->is_ignored) {
+ if (*buf == 1 && !INET_IGNORED(desc)) {
sock_select(desc, (FD_READ|FD_WRITE|FD_CLOSE|ERL_DRV_USE_NO_CALLBACK), 0);
if (desc->active)
- desc->is_ignored = INET_IGNORE_READ;
+ desc->flags |= INET_IGNORE_READ;
else
- desc->is_ignored = INET_IGNORE_PASSIVE;
- } else if (*buf == 0 && desc->is_ignored) {
+ desc->flags |= INET_IGNORE_PASSIVE;
+ } else if (*buf == 0 && INET_IGNORED(desc)) {
int flags = FD_CLOSE;
- if (desc->is_ignored & INET_IGNORE_READ)
+ if (desc->flags & INET_IGNORE_READ)
flags |= FD_READ;
- if (desc->is_ignored & INET_IGNORE_WRITE)
+ if (desc->flags & INET_IGNORE_WRITE)
flags |= FD_WRITE;
- desc->is_ignored = INET_IGNORE_NONE;
+ desc->flags = INET_IGNORE_CLEAR(desc);
if (flags != FD_CLOSE)
sock_select(desc, flags, 1);
} else
@@ -9131,6 +9796,7 @@ static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args)
desc->tcp_add_flags = 0;
desc->http_state = 0;
desc->mtd = NULL;
+ desc->mtd_cache = NULL;
desc->multi_first = desc->multi_last = NULL;
DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port));
return (ErlDrvData) desc;
@@ -9234,15 +9900,14 @@ static void tcp_close_check(tcp_descriptor* desc)
driver_demonitor_process(desc->inet.port, &monitor);
send_async_error(desc->inet.dport, id, caller, am_closed);
}
- clean_multi_timers(&(desc->mtd), desc->inet.port);
}
-
else if (desc->inet.state == INET_STATE_CONNECTING) {
async_error_am(INETP(desc), am_closed);
}
else if (desc->inet.state == INET_STATE_CONNECTED) {
async_error_am_all(INETP(desc), am_closed);
}
+ clean_multi_timers(desc, desc->inet.port);
}
/*
@@ -9272,12 +9937,28 @@ static void tcp_inet_stop(ErlDrvData e)
* will be freed through tcp_inet_stop later on. */
static void tcp_desc_close(tcp_descriptor* desc)
{
+#ifdef HAVE_SENDFILE
+ if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE;
+ close(desc->sendfile.dup_file_fd);
+ }
+#endif
+
tcp_clear_input(desc);
tcp_clear_output(desc);
erl_inet_close(INETP(desc));
}
+static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(!desc->inet.active);
+ sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
+ desc->i_remain = 0;
+ async_error_am(INETP(desc), am_timeout);
+}
+
/* TCP requests from Erlang */
static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
char* buf, ErlDrvSizeT len,
@@ -9285,6 +9966,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
{
tcp_descriptor* desc = (tcp_descriptor*)e;
+ cmd -= ERTS_INET_DRV_CONTROL_MAGIC_NUMBER;
switch(cmd) {
case INET_REQ_OPEN: { /* open socket and return internal index */
int domain;
@@ -9448,12 +10130,12 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
if (time_left <= 0) {
time_left = 1;
}
- omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller,
+ omtd = add_multi_timer(desc, desc->inet.port, ocaller,
time_left, &tcp_inet_multi_timeout);
}
enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor);
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -9468,7 +10150,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
return ctl_xerror("noproc", rbuf, rsize);
}
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -9560,16 +10242,17 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
if (enq_async(INETP(desc), tbuf, TCP_REQ_RECV) < 0)
return ctl_error(EALREADY, rbuf, rsize);
- if (INETP(desc)->is_ignored || tcp_recv(desc, n) == 0) {
+ if (INET_IGNORED(INETP(desc)) || tcp_recv(desc, n) == 0) {
if (timeout == 0)
async_error_am(INETP(desc), am_timeout);
else {
if (timeout != INET_INFINITY)
- driver_set_timer(desc->inet.port, timeout);
- if (!INETP(desc)->is_ignored)
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ timeout, &tcp_inet_recv_timeout);
+ if (!INET_IGNORED(INETP(desc)))
sock_select(INETP(desc),(FD_READ|FD_CLOSE),1);
else
- INETP(desc)->is_ignored |= INET_IGNORE_READ;
+ INETP(desc)->flags |= INET_IGNORE_READ;
}
}
return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
@@ -9610,6 +10293,59 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
}
}
+
+ case TCP_REQ_SENDFILE: {
+#ifdef HAVE_SENDFILE
+ const ErlDrvSizeT required_len =
+ sizeof(desc->sendfile.dup_file_fd) +
+ sizeof(Uint64) * 2;
+
+ int raw_file_fd;
+
+ DEBUGF(("tcp_inet_ctl(%ld): SENDFILE\r\n", (long)desc->inet.port));
+
+ if (len != required_len) {
+ return ctl_error(EINVAL, rbuf, rsize);
+ } else if (!IS_CONNECTED(INETP(desc))) {
+ return ctl_error(ENOTCONN, rbuf, rsize);
+ }
+
+ sys_memcpy(&raw_file_fd, buf, sizeof(raw_file_fd));
+ buf += sizeof(raw_file_fd);
+
+ desc->sendfile.dup_file_fd = dup(raw_file_fd);
+
+ if(desc->sendfile.dup_file_fd == -1) {
+ return ctl_error(errno, rbuf, rsize);
+ }
+
+ desc->sendfile.offset = get_int64(buf);
+ buf += sizeof(Uint64);
+
+ desc->sendfile.length = get_int64(buf);
+ buf += sizeof(Uint64);
+
+ ASSERT(desc->sendfile.offset >= 0);
+ ASSERT(desc->sendfile.length >= 0);
+
+ desc->sendfile.ioq_skip = driver_sizeq(desc->inet.port);
+ desc->sendfile.bytes_sent = 0;
+
+ desc->inet.caller = driver_caller(desc->inet.port);
+ desc->tcp_add_flags |= TCP_ADDF_SENDFILE;
+
+ /* See if we can finish sending without selecting & rescheduling. */
+ if (tcp_inet_sendfile(desc) == 0) {
+ if(desc->sendfile.length > 0) {
+ sock_select(INETP(desc), FD_WRITE, 1);
+ }
+ }
+ return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
+#else
+ return ctl_error(ENOTSUP, rbuf, rsize);
+#endif
+ }
+
default:
DEBUGF(("tcp_inet_ctl(%ld): %u\r\n", (long)desc->inet.port, cmd));
return inet_ctl(INETP(desc), cmd, buf, len, rbuf, rsize);
@@ -9617,12 +10353,27 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
}
+static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(IS_BUSY(INETP(desc)));
+ ASSERT(desc->busy_on_send);
+ desc->inet.caller = desc->inet.busy_caller;
+ desc->inet.state &= ~INET_F_BUSY;
+ desc->busy_on_send = 0;
+ set_busy_port(desc->inet.port, 0);
+ inet_reply_error_am(INETP(desc), am_timeout);
+ if (desc->send_timeout_close) {
+ tcp_desc_close(desc);
+ }
+}
+
/*
** tcp_inet_timeout:
** called when timer expire:
** TCP socket may be:
**
-** a) receiving -- deselect
+** a) receiving -- send timeout
** b) connecting -- close socket
** c) accepting -- reset listener
**
@@ -9636,26 +10387,9 @@ static void tcp_inet_timeout(ErlDrvData e)
DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n",
(long)desc->inet.port, desc->inet.s));
if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */
- fire_multi_timers(&(desc->mtd), desc->inet.port, e);
+ fire_multi_timers(desc, desc->inet.port, e);
} else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) {
- if (desc->busy_on_send) {
- ASSERT(IS_BUSY(INETP(desc)));
- desc->inet.caller = desc->inet.busy_caller;
- desc->inet.state &= ~INET_F_BUSY;
- desc->busy_on_send = 0;
- set_busy_port(desc->inet.port, 0);
- inet_reply_error_am(INETP(desc), am_timeout);
- if (desc->send_timeout_close) {
- tcp_desc_close(desc);
- }
- }
- else {
- /* assume recv timeout */
- ASSERT(!desc->inet.active);
- sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
- desc->i_remain = 0;
- async_error_am(INETP(desc), am_timeout);
- }
+ fire_multi_timers(desc, desc->inet.port, e);
}
else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) {
/* assume connect timeout */
@@ -9749,12 +10483,27 @@ static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev)
static void tcp_inet_flush(ErlDrvData e)
{
tcp_descriptor* desc = (tcp_descriptor*)e;
- if (!(desc->inet.event_mask & FD_WRITE)) {
- /* Discard send queue to avoid hanging port (OTP-7615) */
- tcp_clear_output(desc);
+ int discard_output;
+
+ /* Discard send queue to avoid hanging port (OTP-7615) */
+ discard_output = !(desc->inet.event_mask & FD_WRITE);
+
+ discard_output |= desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO;
+
+#ifdef HAVE_SENDFILE
+ /* The old file driver aborted when it was stopped during sendfile, so
+ * we'll clear the flag and discard all output. */
+ if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE;
+ close(desc->sendfile.dup_file_fd);
+
+ discard_output = 1;
+ }
+#endif
+
+ if (discard_output) {
+ tcp_clear_output(desc);
}
- if (desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO)
- tcp_clear_output(desc);
}
static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp)
@@ -9770,7 +10519,7 @@ static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp)
return;
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
if (desc->multi_first == NULL) {
sock_select(INETP(desc),FD_ACCEPT,0);
@@ -9801,6 +10550,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
#ifdef DEBUG
long port = (long) desc->inet.port; /* Used after driver_exit() */
#endif
+ int blocking_send = 0;
DEBUGF(("tcp_recv_closed(%ld): s=%d, in %s, line %d\r\n",
port, desc->inet.s, __FILE__, __LINE__));
if (IS_BUSY(INETP(desc))) {
@@ -9808,7 +10558,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port));
}
@@ -9816,10 +10566,25 @@ static int tcp_recv_closed(tcp_descriptor* desc)
set_busy_port(desc->inet.port, 0);
inet_reply_error_am(INETP(desc), am_closed);
DEBUGF(("tcp_recv_closed(%ld): busy reply 'closed'\r\n", port));
+ blocking_send = 1;
}
+#ifdef HAVE_SENDFILE
+ if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ tcp_sendfile_aborted(desc, ENOTCONN);
+ blocking_send = 1;
+ }
+#endif
+ if (!blocking_send) {
+ /* No blocking send op to reply to right now.
+ * If next op is a send, make sure it returns {error,closed}
+ * rather than {error,enotconn}.
+ */
+ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
+ }
+
if (!desc->inet.active) {
- /* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ /* We must cancel any timer here ! */
+ clean_multi_timers(desc, INETP(desc)->port);
/* passive mode do not terminate port ! */
tcp_clear_input(desc);
if (desc->inet.exitf) {
@@ -9854,16 +10619,21 @@ static int tcp_recv_error(tcp_descriptor* desc, int err)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
inet_reply_error_am(INETP(desc), am_closed);
}
+#ifdef HAVE_SENDFILE
+ if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ tcp_sendfile_aborted(desc, err);
+ }
+#endif
if (!desc->inet.active) {
/* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ clean_multi_timers(desc, INETP(desc)->port);
tcp_clear_input(desc);
if (desc->inet.exitf) {
tcp_desc_close(desc);
@@ -9968,13 +10738,13 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
if (len == 0) {
/* empty buffer or waiting for more input */
if ((desc->i_buf == NULL) || (desc->i_remain > 0))
- return count;
+ return 0;
if ((n = tcp_remain(desc, &len)) != 0) {
if (n < 0) /* packet error */
return n;
if (len > 0) /* more data pending */
desc->i_remain = len;
- return count;
+ return 0;
}
}
@@ -10026,9 +10796,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
len = 0;
if (!desc->inet.active) {
- if (!desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- }
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout);
sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
if (desc->i_buf != NULL)
tcp_restart_input(desc);
@@ -10054,7 +10822,7 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
int len;
int nread;
- if (desc->i_buf == NULL) { /* allocte a read buffer */
+ if (desc->i_buf == NULL) { /* allocate a read buffer */
int sz = (request_len > 0) ? request_len : desc->inet.bufsz;
if ((desc->i_buf = alloc_buffer(sz)) == NULL)
@@ -10127,10 +10895,11 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start);
}
else {
- if ((nread = tcp_remain(desc, &len)) < 0)
+ nread = tcp_remain(desc, &len);
+ if (nread < 0)
return tcp_recv_error(desc, EMSGSIZE);
else if (nread == 0)
- return tcp_deliver(desc, len);
+ return tcp_deliver(desc, len);
else if (len > 0)
desc->i_remain = len; /* set remain */
}
@@ -10350,7 +11119,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event)
#ifdef DEBUG
long port = (long) desc->inet.port; /* Used after driver_exit() */
#endif
- ASSERT(!INETP(desc)->is_ignored);
+ ASSERT(!INET_IGNORED(INETP(desc)));
DEBUGF(("tcp_inet_input(%ld) {s=%d\r\n", port, desc->inet.s));
/* XXX fprintf(stderr,"tcp_inet_input(%ld) {s=%d}\r\n",(long) desc->inet.port, desc->inet.s); */
if (desc->inet.state == INET_STATE_ACCEPTING) {
@@ -10449,7 +11218,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event)
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
driver_demonitor_process(desc->inet.port, &monitor);
@@ -10508,8 +11277,8 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
if (IS_BUSY(INETP(desc))) {
desc->inet.caller = desc->inet.busy_caller;
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- desc->busy_on_send = 0;
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
+ desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
@@ -10524,27 +11293,31 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n",
(long)desc->inet.port, __FILE__, __LINE__));
if (desc->inet.active) {
+ ErlDrvTermData err_atom;
if (show_econnreset) {
tcp_error_message(desc, err);
- tcp_closed_message(desc);
- inet_reply_error(INETP(desc), err);
+ err_atom = error_atom(err);
} else {
- tcp_closed_message(desc);
- inet_reply_error_am(INETP(desc), am_closed);
+ err_atom = am_closed;
}
+ tcp_closed_message(desc);
+ if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE))
+ inet_reply_error_am(INETP(desc), err_atom);
+
if (desc->inet.exitf)
driver_exit(desc->inet.port, 0);
else
tcp_desc_close(desc);
} else {
tcp_close_check(desc);
- tcp_desc_close(desc);
if (desc->inet.caller) {
- if (show_econnreset)
- inet_reply_error(INETP(desc), err);
- else
- inet_reply_error_am(INETP(desc), am_closed);
+ if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) {
+ if (show_econnreset)
+ inet_reply_error(INETP(desc), err);
+ else
+ inet_reply_error_am(INETP(desc), am_closed);
+ }
}
else {
/* No blocking send op to reply to right now.
@@ -10553,6 +11326,7 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
*/
desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
}
+ tcp_desc_close(desc);
/*
* Make sure that the next receive operation gets an {error,closed}
@@ -10609,6 +11383,12 @@ static int tcp_shutdown_error(tcp_descriptor* desc, int err)
return tcp_send_or_shutdown_error(desc, err);
}
+static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy)
+{
+ tcp_descriptor *desc = (tcp_descriptor*)data;
+ (void)tcp_inet_output(desc, INETP(desc)->s);
+}
+
/*
** Send non-blocking vector data
*/
@@ -10649,7 +11429,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
ev->size += h_len;
}
- if ((sz = driver_sizeq(ix)) > 0) {
+ sz = driver_sizeq(ix);
+
+ if ((desc->tcp_add_flags & TCP_ADDF_SENDFILE) || sz > 0) {
driver_enqv(ix, ev, 0);
if (sz+ev->size >= desc->high) {
DEBUGF(("tcp_sendv(%ld): s=%d, sender forced busy\r\n",
@@ -10659,7 +11441,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -10670,11 +11454,14 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
DEBUGF(("tcp_sendv(%ld): s=%d, about to send "LLU","LLU" bytes\r\n",
(long)desc->inet.port, desc->inet.s, (llu_t)h_len, (llu_t)len));
- if (INETP(desc)->is_ignored) {
- INETP(desc)->is_ignored |= INET_IGNORE_WRITE;
+ if (INET_IGNORED(INETP(desc))) {
+ INETP(desc)->flags |= INET_IGNORE_WRITE;
n = 0;
} else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) {
- n = 0;
+ driver_enqv(ix, ev, 0);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ 0, &tcp_inet_delay_send);
+ return 0;
} else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov,
vsize, &n, 0))) {
if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) {
@@ -10703,7 +11490,7 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
DEBUGF(("tcp_sendv(%ld): s=%d, Send failed, queuing\r\n",
(long)desc->inet.port, desc->inet.s));
driver_enqv(ix, ev, n);
- if (!INETP(desc)->is_ignored)
+ if (!INET_IGNORED(INETP(desc)))
sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1);
}
return 0;
@@ -10743,8 +11530,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
inet_output_count(INETP(desc), len+h_len);
+ sz = driver_sizeq(ix);
- if ((sz = driver_sizeq(ix)) > 0) {
+ if ((desc->tcp_add_flags & TCP_ADDF_SENDFILE) || sz > 0) {
if (h_len > 0)
driver_enq(ix, buf, h_len);
driver_enq(ix, ptr, len);
@@ -10756,7 +11544,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -10769,8 +11559,8 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
DEBUGF(("tcp_send(%ld): s=%d, about to send "LLU","LLU" bytes\r\n",
(long)desc->inet.port, desc->inet.s, (llu_t)h_len, (llu_t)len));
- if (INETP(desc)->is_ignored) {
- INETP(desc)->is_ignored |= INET_IGNORE_WRITE;
+ if (INET_IGNORED(INETP(desc))) {
+ INETP(desc)->flags |= INET_IGNORE_WRITE;
n = 0;
} else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) {
sock_send(desc->inet.s, buf, 0, 0);
@@ -10803,7 +11593,7 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
n -= h_len;
driver_enq(ix, ptr+n, len-n);
}
- if (!INETP(desc)->is_ignored)
+ if (!INET_IGNORED(INETP(desc)))
sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1);
}
return 0;
@@ -10834,6 +11624,247 @@ static void tcp_inet_drv_input(ErlDrvData data, ErlDrvEvent event)
(void)tcp_inet_input((tcp_descriptor*)data, (HANDLE)event);
}
+#ifdef HAVE_SENDFILE
+static int tcp_sendfile_completed(tcp_descriptor* desc) {
+ ErlDrvTermData spec[LOAD_PORT_CNT + LOAD_TUPLE_CNT * 2 +
+ LOAD_ATOM_CNT * 2 + LOAD_UINT_CNT * 2];
+ Uint32 sent_low, sent_high;
+ int i;
+
+ desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE;
+ close(desc->sendfile.dup_file_fd);
+
+ /* While we flushed the output queue prior to sending the file, we've
+ * deferred clearing busy status until now as there's no point in doing so
+ * while we still have a file to send.
+ *
+ * The watermark is checked since more data may have been added while we
+ * were sending the file. */
+
+ if (driver_sizeq(desc->inet.port) <= desc->low) {
+ if (IS_BUSY(INETP(desc))) {
+ desc->inet.caller = desc->inet.busy_caller;
+ desc->inet.state &= ~INET_F_BUSY;
+
+ set_busy_port(desc->inet.port, 0);
+
+ /* if we have a timer then cancel and send ok to client */
+ if (desc->busy_on_send) {
+ cancel_multi_timer(desc, INETP(desc)->port,
+ &tcp_inet_send_timeout);
+ desc->busy_on_send = 0;
+ }
+
+ inet_reply_ok(INETP(desc));
+ }
+ }
+
+ if (driver_sizeq(desc->inet.port) == 0) {
+ sock_select(INETP(desc), FD_WRITE, 0);
+ send_empty_out_q_msgs(INETP(desc));
+
+ if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) {
+ tcp_shutdown_async(desc);
+ }
+ }
+
+ sent_low = ((Uint64)desc->sendfile.bytes_sent >> 0) & 0xFFFFFFFF;
+ sent_high = ((Uint64)desc->sendfile.bytes_sent >> 32) & 0xFFFFFFFF;
+
+ i = LOAD_ATOM(spec, 0, am_sendfile);
+ i = LOAD_PORT(spec, i, desc->inet.dport);
+ i = LOAD_ATOM(spec, i, am_ok);
+ i = LOAD_UINT(spec, i, sent_low);
+ i = LOAD_UINT(spec, i, sent_high);
+ i = LOAD_TUPLE(spec, i, 3);
+ i = LOAD_TUPLE(spec, i, 3);
+
+ ASSERT(i == sizeof(spec)/sizeof(*spec));
+
+ return erl_drv_output_term(desc->inet.dport, spec, i);
+}
+
+static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error) {
+ ErlDrvTermData spec[LOAD_PORT_CNT + LOAD_TUPLE_CNT * 2 + LOAD_ATOM_CNT * 3];
+ int i;
+
+ /* We don't clean up sendfile state here, as that's done in tcp_desc_close
+ * following normal error handling. All we do here is report the failure. */
+
+ i = LOAD_ATOM(spec, 0, am_sendfile);
+ i = LOAD_PORT(spec, i, desc->inet.dport);
+ i = LOAD_ATOM(spec, i, am_error);
+
+ switch (socket_error) {
+ case ECONNRESET:
+ case ENOTCONN:
+ case EPIPE:
+ i = LOAD_ATOM(spec, i, am_closed);
+ break;
+ default:
+ i = LOAD_ATOM(spec, i, error_atom(socket_error));
+ }
+
+ i = LOAD_TUPLE(spec, i, 2);
+ i = LOAD_TUPLE(spec, i, 3);
+
+ ASSERT(i == sizeof(spec)/sizeof(*spec));
+
+ return erl_drv_output_term(desc->inet.dport, spec, i);
+}
+
+static int tcp_inet_sendfile(tcp_descriptor* desc) {
+ ErlDrvPort ix = desc->inet.port;
+ int result = 0;
+ ssize_t n;
+
+ DEBUGF(("tcp_inet_sendfile(%ld) {s=%d\r\n", (long)ix, desc->inet.s));
+
+ /* If there was any data in the queue by the time sendfile was issued,
+ * we'll need to skip it first. Note that we don't clear busy status until
+ * we're finished sending the file. */
+ while (desc->sendfile.ioq_skip > 0) {
+ ssize_t bytes_to_send;
+ SysIOVec* iov;
+ int vsize;
+
+ ASSERT(driver_sizeq(ix) >= desc->sendfile.ioq_skip);
+
+ if ((iov = driver_peekq(ix, &vsize)) == NULL) {
+ ERTS_INTERNAL_ERROR("ioq empty when sendfile.ioq_skip > 0");
+ }
+
+ bytes_to_send = MIN(desc->sendfile.ioq_skip, iov[0].iov_len);
+ n = sock_send(desc->inet.s, iov[0].iov_base, bytes_to_send, 0);
+
+ if (!IS_SOCKET_ERROR(n)) {
+ desc->sendfile.ioq_skip -= n;
+ driver_deq(ix, n);
+ } else if (sock_errno() == ERRNO_BLOCK) {
+#ifdef __WIN32__
+ desc->inet.send_would_block = 1;
+#endif
+ goto done;
+ } else if (sock_errno() != EINTR) {
+ goto socket_error;
+ }
+ }
+
+ while (desc->sendfile.length > 0) {
+ /* For some reason the maximum ssize_t cannot be used as the max size.
+ * 1GB seems to work on all platforms */
+ const Sint64 SENDFILE_CHUNK_SIZE = ((1UL << 30) - 1);
+
+ ssize_t bytes_to_send = MIN(SENDFILE_CHUNK_SIZE, desc->sendfile.length);
+ off_t offset = desc->sendfile.offset;
+
+#if defined(__linux__)
+ n = sendfile(desc->inet.s, desc->sendfile.dup_file_fd, &offset,
+ bytes_to_send);
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__DARWIN__)
+ {
+ off_t bytes_sent;
+ int error;
+
+ #if defined(__DARWIN__)
+ bytes_sent = bytes_to_send;
+
+ error = sendfile(desc->sendfile.dup_file_fd, desc->inet.s, offset,
+ &bytes_sent, NULL, 0);
+ n = bytes_sent;
+ #else
+ error = sendfile(desc->sendfile.dup_file_fd, desc->inet.s, offset,
+ bytes_to_send, NULL, &bytes_sent, 0);
+ n = bytes_sent;
+ #endif
+
+ if(error < 0) {
+ /* EAGAIN/EINTR report partial success by setting bytes_sent,
+ * so we have to skip error handling if nonzero, and skip EOF
+ * handling if zero, as it's possible that we didn't manage to
+ * send anything at all before being interrupted by a
+ * signal. */
+ if((errno != EAGAIN && errno != EINTR) || bytes_sent == 0) {
+ n = -1;
+ }
+ }
+ }
+#elif defined(__sun) && defined(__SVR4) && defined(HAVE_SENDFILEV)
+ {
+ sendfilevec_t sfvec[1];
+ size_t bytes_sent;
+ ssize_t error;
+
+ sfvec[0].sfv_fd = desc->sendfile.dup_file_fd;
+ sfvec[0].sfv_len = bytes_to_send;
+ sfvec[0].sfv_off = offset;
+ sfvec[0].sfv_flag = 0;
+
+ error = sendfilev(desc->inet.s, sfvec, 1, &bytes_sent);
+ n = bytes_sent;
+
+ if(error < 0) {
+ if(errno == EINVAL) {
+ /* On some solaris versions (I've seen it on SunOS 5.10),
+ * using a sfv_len larger than the filesize will result in
+ * a (-1 && errno == EINVAL). We translate this to a
+ * successful send of the data.*/
+ } else {
+ /* EAGAIN/EINTR behavior is identical to *BSD. */
+ if((errno != EAGAIN && errno != EINTR) || bytes_sent == 0) {
+ n = -1;
+ }
+ }
+ }
+ }
+#else
+ #error "Unsupported sendfile syscall; update configure test."
+#endif
+
+ if (n > 0) {
+ desc->sendfile.bytes_sent += n;
+ desc->sendfile.offset += n;
+ desc->sendfile.length -= n;
+ } else if (n == 0) {
+ /* EOF. */
+ desc->sendfile.length = 0;
+ break;
+ } else if (IS_SOCKET_ERROR(n) && sock_errno() != EINTR) {
+ if (sock_errno() != ERRNO_BLOCK) {
+ goto socket_error;
+ }
+
+#ifdef __WIN32__
+ desc->inet.send_would_block = 1;
+#endif
+ break;
+ }
+ }
+
+ if (desc->sendfile.length == 0) {
+ tcp_sendfile_completed(desc);
+ }
+
+ goto done;
+
+socket_error: {
+ int socket_errno = sock_errno();
+
+ DEBUGF(("tcp_inet_sendfile(%ld): send errno = %d (errno %d)\r\n",
+ (long)desc->inet.port, socket_errno, errno));
+
+ tcp_sendfile_aborted(desc, socket_errno);
+ result = tcp_send_error(desc, socket_errno);
+
+ goto done;
+ }
+
+done:
+ DEBUGF(("tcp_inet_sendfile(%ld) }\r\n", (long)desc->inet.port));
+ return result;
+}
+#endif /* HAVE_SENDFILE */
+
/* socket ready for ouput:
** 1. INET_STATE_CONNECTING => non block connect ?
** 2. INET_STATE_CONNECTED => write output
@@ -10843,7 +11874,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
int ret = 0;
ErlDrvPort ix = desc->inet.port;
- ASSERT(!INETP(desc)->is_ignored);
+ ASSERT(!INET_IGNORED(INETP(desc)));
DEBUGF(("tcp_inet_output(%ld) {s=%d\r\n",
(long)desc->inet.port, desc->inet.s));
if (desc->inet.state == INET_STATE_CONNECTING) {
@@ -10894,7 +11925,14 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
async_ok(INETP(desc));
}
else if (IS_CONNECTED(INETP(desc))) {
- for (;;) {
+
+#ifdef HAVE_SENDFILE
+ if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ return tcp_inet_sendfile(desc);
+ }
+#endif
+
+ for (;;) {
int vsize;
ssize_t n;
SysIOVec* iov;
@@ -10920,6 +11958,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
#ifdef __WIN32__
desc->inet.send_would_block = 1;
#endif
+ /* If DELAY_SEND is set ready_output may have
+ been called without doing select so we do
+ a select in order to get into the correct
+ state */
+ if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND)
+ sock_select(INETP(desc), FD_WRITE, 1);
goto done;
} else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning
0 when sending packets with
@@ -10945,7 +11989,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
set_busy_port(desc->inet.port, 0);
/* if we have a timer then cancel and send ok to client */
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
inet_reply_ok(INETP(desc));
@@ -11152,6 +12196,7 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf,
int type = SOCK_DGRAM;
int af = AF_INET;
+ cmd -= ERTS_INET_DRV_CONTROL_MAGIC_NUMBER;
switch(cmd) {
case INET_REQ_OPEN: /* open socket and return internal index */
DEBUGF(("packet_inet_ctl(%ld): OPEN\r\n", (long)desc->port));
@@ -11300,24 +12345,20 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf,
(desc->sfamily, &remote, &buf, &len)) != NULL)
return ctl_xerror(xerror, rbuf, rsize);
- sock_select(desc, FD_CONNECT, 1);
code = sock_connect(desc->s, &remote.sa, len);
if (IS_SOCKET_ERROR(code) && (sock_errno() == EINPROGRESS)) {
/* XXX: Unix only -- WinSock would have a different cond! */
- desc->state = INET_STATE_CONNECTING;
if (timeout != INET_INFINITY)
driver_set_timer(desc->port, timeout);
enq_async(desc, tbuf, INET_REQ_CONNECT);
+ async_ok(desc);
}
else if (code == 0) { /* OK we are connected */
- sock_select(desc, FD_CONNECT, 0);
- desc->state = INET_STATE_CONNECTED;
enq_async(desc, tbuf, INET_REQ_CONNECT);
async_ok(desc);
}
else {
- sock_select(desc, FD_CONNECT, 0);
return ctl_error(sock_errno(), rbuf, rsize);
}
return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
@@ -11508,9 +12549,12 @@ static void packet_inet_timeout(ErlDrvData e)
{
udp_descriptor * udesc = (udp_descriptor*) e;
inet_descriptor * desc = INETP(udesc);
- if (!(desc->active))
+ if (!(desc->active)) {
sock_select(desc, FD_READ, 0);
- async_error_am (desc, am_timeout);
+ async_error_am (desc, am_timeout);
+ } else {
+ (void)packet_inet_input(udesc, desc->s);
+ }
}
@@ -11545,10 +12589,10 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len)
if (IS_SCTP(desc))
{
ErlDrvSizeT data_len;
- struct iovec iov[1]; /* For real data */
- struct msghdr mhdr; /* Message wrapper */
- struct sctp_sndrcvinfo *sri; /* The actual ancilary data */
- union { /* For ancilary data */
+ struct iovec iov[1]; /* For real data */
+ struct msghdr mhdr; /* Message wrapper */
+ struct sctp_sndrcvinfo *sri; /* The actual ancillary data */
+ union { /* For ancillary data */
struct cmsghdr hdr;
char ancd[CMSG_SPACE(sizeof(*sri))];
} cmsg;
@@ -11558,12 +12602,12 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len)
return;
}
- /* The ancilary data */
+ /* The ancillary data */
sri = (struct sctp_sndrcvinfo *) (CMSG_DATA(&cmsg.hdr));
/* Get the "sndrcvinfo" from the buffer, advancing the "ptr": */
ptr = sctp_get_sendparams(sri, ptr);
- /* The ancilary data wrapper */
+ /* The ancillary data wrapper */
cmsg.hdr.cmsg_level = IPPROTO_SCTP;
cmsg.hdr.cmsg_type = SCTP_SNDRCV;
cmsg.hdr.cmsg_len = CMSG_LEN(sizeof(*sri));
@@ -11578,7 +12622,7 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len)
iov[0].iov_base = ptr; /* The real data */
mhdr.msg_iov = iov;
mhdr.msg_iovlen = 1;
- mhdr.msg_control = cmsg.ancd; /* For ancilary data */
+ mhdr.msg_control = cmsg.ancd; /* For ancillary data */
mhdr.msg_controllen = cmsg.hdr.cmsg_len;
VALGRIND_MAKE_MEM_DEFINED(mhdr.msg_control, mhdr.msg_controllen); /*suppress "uninitialised bytes"*/
mhdr.msg_flags = 0; /* Not used with "sendmsg" */
@@ -11662,10 +12706,12 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
char abuf[sizeof(inet_address)]; /* buffer address; enough??? */
int packet_count = udesc->read_packets;
int count = 0; /* number of packets delivered to owner */
-#ifdef HAVE_SCTP
+#ifndef __WIN32__
struct msghdr mhdr; /* Top-level msg structure */
struct iovec iov[1]; /* Data or Notification Event */
- char ancd[SCTP_ANC_BUFF_SIZE]; /* Ancillary Data */
+ char ancd[ANC_BUFF_SIZE]; /* Ancillary Data */
+#endif
+#ifdef HAVE_SCTP
int short_recv = 0;
#endif
@@ -11675,15 +12721,11 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
sys_memzero((char *) &other, sizeof(other));
/* udesc->i_buf is only kept between SCTP fragments */
- if (udesc->i_buf == NULL) {
- udesc->i_bufsz = desc->bufsz + len;
- if ((udesc->i_buf = alloc_buffer(udesc->i_bufsz)) == NULL)
- return packet_error(udesc, ENOMEM);
- /* pointer to message start */
- udesc->i_ptr = udesc->i_buf->orig_bytes + len;
- } else {
- ErlDrvBinary* tmp;
+#ifdef HAVE_SCTP
+ if (udesc->i_buf != NULL) {
+ ErlDrvBinary* tmp;
int bufsz;
+ ASSERT(IS_SCTP(desc));
bufsz = desc->bufsz + (udesc->i_ptr - udesc->i_buf->orig_bytes);
if ((tmp = realloc_buffer(udesc->i_buf, bufsz)) == NULL) {
release_buffer(udesc->i_buf);
@@ -11695,6 +12737,15 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_buf = tmp;
udesc->i_bufsz = bufsz;
}
+ } else
+#endif
+ {
+ ASSERT(udesc->i_buf == NULL);
+ udesc->i_bufsz = desc->bufsz + len;
+ if ((udesc->i_buf = alloc_buffer(udesc->i_bufsz)) == NULL)
+ return packet_error(udesc, ENOMEM);
+ /* pointer to message start */
+ udesc->i_ptr = udesc->i_buf->orig_bytes + len;
}
/* Note: On Windows NT, recvfrom() fails if the socket is connected. */
@@ -11709,7 +12760,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
mhdr.msg_iov = iov;
mhdr.msg_iovlen = 1;
mhdr.msg_control = ancd;
- mhdr.msg_controllen = SCTP_ANC_BUFF_SIZE;
+ mhdr.msg_controllen = ANC_BUFF_SIZE;
mhdr.msg_flags = 0; /* To be filled by "recvmsg" */
/* Do the actual SCTP receive: */
@@ -11724,6 +12775,24 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
other = desc->remote;
goto check_result;
}
+#ifndef __WIN32__
+ /* recvmsg() does not exist in the Winsock API */
+ if (desc->recv_cmsgflags) {
+ /* Use recvmsg() */
+ iov->iov_base = udesc->i_ptr;
+ iov->iov_len = desc->bufsz;
+ mhdr.msg_name = &other;
+ mhdr.msg_namelen = len;
+ mhdr.msg_iov = iov;
+ mhdr.msg_iovlen = 1;
+ mhdr.msg_control = ancd;
+ mhdr.msg_controllen = ANC_BUFF_SIZE;
+ mhdr.msg_flags = 0;
+ n = sock_recvmsg(desc->s, &mhdr, 0);
+ len = mhdr.msg_namelen;
+ goto check_result;
+ }
+#endif
n = sock_recvfrom(desc->s, udesc->i_ptr, desc->bufsz,
0, &other.sa, &len);
check_result:
@@ -11736,7 +12805,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_buf = NULL;
if (!desc->active) {
async_error(desc, err);
- driver_cancel_timer(desc->port);
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
}
else {
@@ -11753,6 +12822,14 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
) {
sock_select(desc,FD_READ,1);
}
+#ifdef HAVE_SCTP
+ if (!short_recv) {
+#endif
+ release_buffer(udesc->i_buf);
+ udesc->i_buf = NULL;
+#ifdef HAVE_SCTP
+ }
+#endif
return count; /* strange, not ready */
}
@@ -11768,7 +12845,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
{
/* message received */
int code;
- void * extra = NULL;
+ void *mp;
char * ptr;
int nsz;
@@ -11799,21 +12876,25 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_ptr = NULL; /* not used from here */
}
}
+ mp = NULL;
#ifdef HAVE_SCTP
- if (IS_SCTP(desc)) extra = &mhdr;
+ if (IS_SCTP(desc)) mp = &mhdr;
+#endif
+#ifndef __WIN32__
+ if (desc->recv_cmsgflags) mp = &mhdr;
#endif
/* Actual parsing and return of the data received, occur here: */
code = packet_reply_binary_data(desc, len, udesc->i_buf,
(sizeof(other) - len),
nsz,
- extra);
+ mp);
free_buffer(udesc->i_buf);
udesc->i_buf = NULL;
if (code < 0)
return count;
count++;
if (!desc->active) {
- driver_cancel_timer(desc->port); /* possibly cancel */
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
return count; /* passive mode (read one packet only) */
}
@@ -11829,80 +12910,18 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
sock_select(desc, FD_READ, 1);
}
#endif
- return count;
-}
-
-static void packet_inet_drv_output(ErlDrvData e, ErlDrvEvent event)
-{
- (void) packet_inet_output((udp_descriptor*)e, (HANDLE)event);
-}
-/* UDP/SCTP socket ready for output:
-** This is a Back-End for Non-Block SCTP Connect (INET_STATE_CONNECTING)
-*/
-static int packet_inet_output(udp_descriptor* udesc, HANDLE event)
-{
- inet_descriptor* desc = INETP(udesc);
- int ret = 0;
- ErlDrvPort ix = desc->port;
+ /* We set a timer on the port to trigger now.
+ This emulates a "yield" operation as that is
+ what we want to do here. We do *NOT* do a deselect
+ as that is expensive, instead we check if the
+ socket it still active when the timeout triggers
+ and if it is not, then we just ignore the timeout */
+ driver_set_timer(desc->port, 0);
- DEBUGF(("packet_inet_output(%ld) {s=%d\r\n",
- (long)desc->port, desc->s));
-
- if (desc->state == INET_STATE_CONNECTING) {
- sock_select(desc, FD_CONNECT, 0);
-
- driver_cancel_timer(ix); /* posssibly cancel a timer */
-#ifndef __WIN32__
- /*
- * XXX This is strange. This *should* work on Windows NT too,
- * but doesn't. An bug in Winsock 2.0 for Windows NT?
- *
- * See "Unix Netwok Programming", W.R.Stevens, p 412 for a
- * discussion about Unix portability and non blocking connect.
- */
-
-#ifndef SO_ERROR
- {
- int sz = sizeof(desc->remote);
- int code = sock_peer(desc->s,
- (struct sockaddr*) &desc->remote, &sz);
-
- if (IS_SOCKET_ERROR(code)) {
- desc->state = INET_STATE_OPEN; /* restore state */
- ret = async_error(desc, sock_errno());
- goto done;
- }
- }
-#else
- {
- int error = 0; /* Has to be initiated, we check it */
- unsigned int sz = sizeof(error); /* even if we get -1 */
- int code = sock_getopt(desc->s, SOL_SOCKET, SO_ERROR,
- (void *)&error, &sz);
-
- if ((code < 0) || error) {
- desc->state = INET_STATE_OPEN; /* restore state */
- ret = async_error(desc, error);
- goto done;
- }
- }
-#endif /* SO_ERROR */
-#endif /* !__WIN32__ */
-
- desc->state = INET_STATE_CONNECTED;
- async_ok(desc);
- }
- else {
- sock_select(desc,FD_CONNECT,0);
-
- DEBUGF(("packet_inet_output(%ld): bad state: %04x\r\n",
- (long)desc->port, desc->state));
- }
- done:
- DEBUGF(("packet_inet_output(%ld) }\r\n", (long)desc->port));
- return ret;
+ return count;
}
+
#endif
/*---------------------------------------------------------------------------*/
@@ -11963,55 +12982,71 @@ make_noninheritable_handle(SOCKET s)
* Multi-timers
*/
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data)
{
- ErlDrvTime next_timeout;
- if (!*first) {
- ASSERT(0);
- return;
+ ErlDrvTime next_timeout = 0;
+ if (!desc->mtd) {
+ ASSERT(0);
+ return;
}
#ifdef DEBUG
{
ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC);
- ASSERT(chk >= (*first)->when);
+ ASSERT(chk >= desc->mtd->when);
}
#endif
do {
- MultiTimerData *save = *first;
- *first = save->next;
- (*(save->timeout_function))(data,save->caller);
- FREE(save);
- if (*first == NULL) {
+ MultiTimerData save = *desc->mtd;
+
+ /* We first remove the timer so that the timeout_functions has
+ can call clean_multi_timers without breaking anything */
+ if (desc->mtd_cache == NULL) {
+ desc->mtd_cache = desc->mtd;
+ } else {
+ FREE(desc->mtd);
+ }
+
+ desc->mtd = save.next;
+ if (desc->mtd != NULL)
+ desc->mtd->prev = NULL;
+
+ (*(save.timeout_function))(data,save.caller);
+
+ if (desc->mtd == NULL)
return;
- }
- (*first)->prev = NULL;
- next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+
+ next_timeout = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
} while (next_timeout <= 0);
+
driver_set_timer(port, (unsigned long) next_timeout);
}
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port)
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port)
{
- MultiTimerData *p;
- if (*first) {
+ if (desc->mtd) {
driver_cancel_timer(port);
}
- while (*first) {
- p = *first;
- *first = p->next;
- FREE(p);
+ while (desc->mtd) {
+ MultiTimerData *p = desc->mtd;
+ desc->mtd = p->next;
+ FREE(p);
+ }
+ desc->mtd = NULL;
+ if (desc->mtd_cache) {
+ FREE(desc->mtd_cache);
+ desc->mtd_cache = NULL;
}
}
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p)
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p)
{
if (p->prev != NULL) {
p->prev->next = p->next;
} else {
driver_cancel_timer(port);
- *first = p->next;
- if (*first) {
- ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ desc->mtd = p->next;
+ if (desc->mtd) {
+ ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
if (ntmo < 0)
ntmo = 0;
driver_set_timer(port, (unsigned long) ntmo);
@@ -12020,36 +13055,67 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim
if (p->next != NULL) {
p->next->prev = p->prev;
}
- FREE(p);
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = p;
+ else
+ FREE(p);
+}
+
+/* Cancel a timer based on the timeout_fun */
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller))
+{
+ MultiTimerData *timer = desc->mtd;
+ while(timer && timer->timeout_function != timeout_fun) {
+ timer = timer->next;
+ }
+ if (timer) {
+ remove_multi_timer(desc, port, timer);
+ }
}
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvTermData caller, unsigned timeout,
void (*timeout_fun)(ErlDrvData drv_data,
ErlDrvTermData caller))
{
MultiTimerData *mtd, *p, *s;
- mtd = ALLOC(sizeof(MultiTimerData));
- mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1;
+
+ /* Use cached timer if available */
+ if (desc->mtd_cache != NULL) {
+ mtd = desc->mtd_cache;
+ desc->mtd_cache = NULL;
+ } else
+ mtd = ALLOC(sizeof(MultiTimerData));
+
+ if (timeout)
+ mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout);
+ else
+ mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */
+
mtd->timeout_function = timeout_fun;
mtd->caller = caller;
mtd->next = mtd->prev = NULL;
- for(p = *first,s = NULL; p != NULL; s = p, p = p->next) {
+
+ /* Find correct slot in timer linked list */
+ for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) {
if (p->when >= mtd->when) {
break;
}
}
+ /* Insert in linked list */
if (!p) {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
}
} else {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
@@ -12057,10 +13123,8 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
mtd->next = p;
p->prev = mtd;
}
+ /* Possibly set new timer */
if (!s) {
- if (mtd->next) {
- driver_cancel_timer(port);
- }
driver_set_timer(port,timeout);
}
return mtd;