diff options
Diffstat (limited to 'erts/emulator/drivers/common/inet_drv.c')
-rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 1965 |
1 files changed, 968 insertions, 997 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 09bada457d..554c48059f 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -1,18 +1,19 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1997-2013. All Rights Reserved. + * Copyright Ericsson AB 1997-2017. All Rights Reserved. * - * The contents of this file are subject to the Erlang Public License, - * Version 1.1, (the "License"); you may not use this file except in - * compliance with the License. You should have received a copy of the - * Erlang Public License along with this software. If not, it can be - * retrieved online at http://www.erlang.org/. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Software distributed under the License is distributed on an "AS IS" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See - * the License for the specific language governing rights and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. * * %CopyrightEnd% */ @@ -33,6 +34,7 @@ #include <stdio.h> #include <stdlib.h> +#include <stddef.h> #include <ctype.h> #include <sys/types.h> #include <errno.h> @@ -57,6 +59,9 @@ #ifdef HAVE_NETPACKET_PACKET_H #include <netpacket/packet.h> #endif +#ifdef HAVE_SYS_UN_H +#include <sys/un.h> +#endif /* All platforms fail on malloc errors. */ #define FATAL_MALLOC @@ -93,10 +98,6 @@ typedef unsigned long long llu_t; #define INT16_MAX (32767) #endif -#ifdef __OSE__ -#include "inet.h" -#endif - #ifdef __WIN32__ #define STRNCASECMP strncasecmp @@ -268,14 +269,13 @@ static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD); #define sock_htonl(x) htonl((x)) #define sock_send(s,buf,len,flag) send((s),(buf),(len),(flag)) #define sock_sendv(s, vec, size, np, flag) \ - WSASend((s),(WSABUF*)(vec),\ - (size),(np),(flag),NULL,NULL) + WSASend((s),(WSABUF*)(vec),(size),(np),(flag),NULL,NULL) #define sock_recv(s,buf,len,flag) recv((s),(buf),(len),(flag)) #define sock_recvfrom(s,buf,blen,flag,addr,alen) \ - recvfrom((s),(buf),(blen),(flag),(addr),(alen)) + recvfrom((s),(buf),(blen),(flag),(addr),(alen)) #define sock_sendto(s,buf,blen,flag,addr,alen) \ - sendto((s),(buf),(blen),(flag),(addr),(alen)) + sendto((s),(buf),(blen),(flag),(addr),(alen)) #define sock_hostname(buf, len) gethostname((buf), (len)) #define sock_getservbyname(name,proto) getservbyname((name),(proto)) @@ -294,135 +294,11 @@ static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD); static unsigned long zero_value = 0; static unsigned long one_value = 1; -#elif defined (__OSE__) - -/* - * Some notes about how inet (currently only tcp) works on OSE. - * The driver uses OSE signals to communicate with the one_inet - * process. Because of the difference in how signals and file descriptors - * work the whole select/deselect mechanic is very different. - * In ose when a sock_select is done a function is called. That function - * notes the changes that the driver want to do, but does not act on it. - * later when the function returns the new desired state is compared - * to the previous state and the apprioriate actions are taken. The action - * is usually to either request more data from the stack or stop requesting - * data. - * - * One thing to note is that the driver never does select/deselect. It always - * listens for the signals. Flow of data is regulated by sending or not sending - * signals to the ose inet process. - * - * The interesting functions to look at are: - * * inet_driver_select : called when sock_select is called - * * tcp_inet_ose_dispatch_signal : checks state changes and sends new signals - * * tcp_inet_drv_output_ose : ready output callback, reads signals and calls - * dispatch_signal - * * tcp_inet_drv_input_ose : ready input callback. - */ - -#include "efs.h" -#include "sys/socket.h" -#include "sys/uio.h" -#include "sfk/sys/sfk_uio.h" -#include "netinet/in.h" -#include "netinet/tcp.h" -#include "netdb.h" -#include "ose_spi/socket.sig" - - -static ssize_t writev_fallback(int fd, const struct iovec *iov, int iovcnt, int max_sz); - -#define INVALID_SOCKET -1 -#define INVALID_EVENT -1 -#define SOCKET_ERROR -1 - -#define SOCKET int -#define HANDLE int -#define FD_READ ERL_DRV_READ -#define FD_WRITE ERL_DRV_WRITE -#define FD_CLOSE 0 -#define FD_CONNECT (1<<4) -#define FD_ACCEPT (1<<5) -#define SOCK_FD_ERROR (1<<6) - -#define sock_connect(s, addr, len) connect((s), (addr), (len)) -#define sock_listen(s, b) listen((s), (b)) -#define sock_bind(s, addr, len) bind((s), (addr), (len)) -#define sock_getopt(s,t,n,v,l) getsockopt((s),(t),(n),(v),(l)) -#define sock_setopt(s,t,n,v,l) setsockopt((s),(t),(n),(v),(l)) -#define sock_name(s, addr, len) getsockname((s), (addr), (len)) -#define sock_peer(s, addr, len) getpeername((s), (addr), (len)) -#define sock_ntohs(x) ntohs((x)) -#define sock_ntohl(x) ntohl((x)) -#define sock_htons(x) htons((x)) -#define sock_htonl(x) htonl((x)) - -#define sock_accept(s, addr, len) accept((s), (addr), (len)) -#define sock_send(s,buf,len,flag) inet_send((s),(buf),(len),(flag)) -#define sock_sendto(s,buf,blen,flag,addr,alen) \ - sendto((s),(buf),(blen),(flag),(addr),(alen)) -#define sock_sendv(s, vec, size, np, flag) \ - (*(np) = writev_fallback((s), (struct iovec*)(vec), (size), (*(np)))) -#define sock_sendmsg(s,msghdr,flag) sendmsg((s),(msghdr),(flag)) - -#define sock_open(af, type, proto) socket((af), (type), (proto)) -#define sock_close(s) close((s)) -#define sock_dup(s) dup((s)) -#define sock_shutdown(s, how) shutdown((s), (how)) - -#define sock_hostname(buf, len) gethostname((buf), (len)) -#define sock_getservbyname(name,proto) getservbyname((name), (proto)) -#define sock_getservbyport(port,proto) getservbyport((port), (proto)) - -#define sock_recv(s,buf,len,flag) recv((s),(buf),(len),(flag)) -#define sock_recvfrom(s,buf,blen,flag,addr,alen) \ - recvfrom((s),(buf),(blen),(flag),(addr),(alen)) -#define sock_recvmsg(s,msghdr,flag) recvmsg((s),(msghdr),(flag)) - -#define sock_errno() errno -#define sock_create_event(d) ((d)->s) /* return file descriptor */ -#define sock_close_event(e) /* do nothing */ - -#ifndef WANT_NONBLOCKING -#define WANT_NONBLOCKING -#endif -#include "sys.h" - -typedef unsigned long u_long; -#define IN_CLASSA(a) ((((in_addr_t)(a)) & 0x80000000) == 0) -#define IN_CLASSA_NET 0xff000000 -#define IN_CLASSA_NSHIFT 24 -#define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET) -#define IN_CLASSA_MAX 128 - -#define IN_CLASSB(a) ((((in_addr_t)(a)) & 0xc0000000) == 0x80000000) -#define IN_CLASSB_NET 0xffff0000 -#define IN_CLASSB_NSHIFT 16 -#define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET) -#define IN_CLASSB_MAX 65536 - -#define IN_CLASSC(a) ((((in_addr_t)(a)) & 0xe0000000) == 0xc0000000) -#define IN_CLASSC_NET 0xffffff00 -#define IN_CLASSC_NSHIFT 8 -#define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET) - -#define IN_CLASSD(a) ((((in_addr_t)(a)) & 0xf0000000) == 0xe0000000) -#define IN_MULTICAST(a) IN_CLASSD(a) - -#define IN_EXPERIMENTAL(a) ((((in_addr_t)(a)) & 0xe0000000) == 0xe0000000) -#define IN_BADCLASS(a) ((((in_addr_t)(a)) & 0xf0000000) == 0xf0000000) - -#define sock_select(d, flags, onoff) do { \ - ASSERT(!(d)->is_ignored); \ - (d)->event_mask = (onoff) ? \ - ((d)->event_mask | (flags)) : \ - ((d)->event_mask & ~(flags)); \ - DEBUGF(("(%s / %d) sock_select(%ld): flags=%02X, onoff=%d, event_mask=%02lX, s=%d\r\n", \ - __FILE__, __LINE__, (long) (d)->port, (flags), (onoff), (unsigned long) (d)->event_mask, (d)->s)); \ - inet_driver_select((d), (flags), (onoff)); \ - } while(0) +#define TCP_SHUT_WR SD_SEND +#define TCP_SHUT_RD SD_RECEIVE +#define TCP_SHUT_RDWR SD_BOTH -#else /* !__OSE__ && !__WIN32__ */ +#else /* !__WIN32__ */ #include <sys/time.h> #ifdef NETDB_H_NEEDS_IN_H @@ -692,13 +568,16 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) inet_driver_select((d)->port, (ErlDrvEvent)(long)(d)->event, (flags), (onoff)); \ } while(0) +#define TCP_SHUT_WR SHUT_WR +#define TCP_SHUT_RD SHUT_RD +#define TCP_SHUT_RDWR SHUT_RDWR -#endif /* !__WIN32__ && !__OSE__ */ +#endif /* !__WIN32__ */ #ifdef HAVE_SOCKLEN_T # define SOCKLEN_T socklen_t #else -# define SOCKLEN_T int +# define SOCKLEN_T size_t #endif #include "packet_parser.h" @@ -712,6 +591,18 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) (((unsigned char*) (s))[1] << 8) | \ (((unsigned char*) (s))[0])) +#if defined(HAVE_SYS_UN_H) || defined(SO_BINDTODEVICE) + +/* strnlen doesn't exist everywhere */ +static size_t my_strnlen(const char *s, size_t maxlen) +{ + size_t i = 0; + while (i < maxlen && s[i] != '\0') + i++; + return i; +} + +#endif #ifdef VALGRIND # include <valgrind/memcheck.h> @@ -732,10 +623,13 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) */ /* general address encode/decode tag */ +#define INET_AF_UNSPEC 0 #define INET_AF_INET 1 #define INET_AF_INET6 2 #define INET_AF_ANY 3 /* INADDR_ANY or IN6ADDR_ANY_INIT */ #define INET_AF_LOOPBACK 4 /* INADDR_LOOPBACK or IN6ADDR_LOOPBACK_INIT */ +#define INET_AF_LOCAL 5 +#define INET_AF_UNDEFINED 6 /* Unknown */ /* open and INET_REQ_GETTYPE enumeration */ #define INET_TYPE_STREAM 1 @@ -758,7 +652,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) /* INET_REQ_GETSTATUS enumeration */ #define INET_F_OPEN 0x0001 -#define INET_F_BOUND 0x0002 +/* INET_F_BOUND removed - renumber when there comes a bigger rewrite */ #define INET_F_ACTIVE 0x0004 #define INET_F_LISTEN 0x0008 #define INET_F_CON 0x0010 @@ -821,6 +715,14 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define TCP_ADDF_CLOSE_SENT 2 /* Close sent (active mode only) */ #define TCP_ADDF_DELAYED_CLOSE_RECV 4 /* If receive fails, report {error,closed} (passive mode) */ #define TCP_ADDF_DELAYED_CLOSE_SEND 8 /* If send fails, report {error,closed} (passive mode) */ +#define TCP_ADDF_PENDING_SHUT_WR 16 /* Call shutdown(sock, SHUT_WR) when queue empties */ +#define TCP_ADDF_PENDING_SHUT_RDWR 32 /* Call shutdown(sock, SHUT_RDWR) when queue empties */ +#define TCP_ADDF_PENDING_SHUTDOWN \ + (TCP_ADDF_PENDING_SHUT_WR | TCP_ADDF_PENDING_SHUT_RDWR) +#define TCP_ADDF_SHOW_ECONNRESET 64 /* Tell user about incoming RST */ +#define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occurred on send or shutdown */ +#define TCP_ADDF_SHUTDOWN_WR_DONE 256 /* A shutdown(sock, SHUT_WR) or SHUT_RDWR was made */ +#define TCP_ADDF_LINGER_ZERO 512 /* Discard driver queue on port close */ /* *_REQ_* replies */ #define INET_REP_ERROR 0 @@ -865,6 +767,10 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_LOPT_MSGQ_HIWTRMRK 36 /* set local msgq high watermark */ #define INET_LOPT_MSGQ_LOWTRMRK 37 /* set local msgq low watermark */ #define INET_LOPT_NETNS 38 /* Network namespace pathname */ +#define INET_LOPT_TCP_SHOW_ECONNRESET 39 /* tell user about incoming RST */ +#define INET_LOPT_LINE_DELIM 40 /* Line delimiting char */ +#define INET_OPT_TCLASS 41 /* IPv6 transport class */ +#define INET_OPT_BIND_TO_DEVICE 42 /* get/set network device the socket is bound to */ /* SCTP options: a separate range, from 100: */ #define SCTP_OPT_RTOINFO 100 #define SCTP_OPT_ASSOCINFO 101 @@ -946,19 +852,15 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_STATE_CLOSED (0) #define INET_STATE_OPEN (INET_F_OPEN) -#define INET_STATE_BOUND (INET_STATE_OPEN | INET_F_BOUND) -#define INET_STATE_CONNECTED (INET_STATE_BOUND | INET_F_ACTIVE) -#define INET_STATE_LISTENING (INET_STATE_BOUND | INET_F_LISTEN) -#define INET_STATE_CONNECTING (INET_STATE_BOUND | INET_F_CON) +#define INET_STATE_CONNECTED (INET_STATE_OPEN | INET_F_ACTIVE) +#define INET_STATE_LISTENING (INET_STATE_OPEN | INET_F_LISTEN) +#define INET_STATE_CONNECTING (INET_STATE_OPEN | INET_F_CON) #define INET_STATE_ACCEPTING (INET_STATE_LISTENING | INET_F_ACC) #define INET_STATE_MULTI_ACCEPTING (INET_STATE_ACCEPTING | INET_F_MULTI_CLIENT) #define IS_OPEN(d) \ (((d)->state & INET_F_OPEN) == INET_F_OPEN) -#define IS_BOUND(d) \ - (((d)->state & INET_F_BOUND) == INET_F_BOUND) - #define IS_CONNECTED(d) \ (((d)->state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) @@ -1011,23 +913,39 @@ typedef union { #ifdef HAVE_IN6 struct sockaddr_in6 sai6; #endif +#ifdef HAVE_SYS_UN_H + struct sockaddr_un sal; +#endif } inet_address; -/* for AF_INET & AF_INET6 */ -#define inet_address_port(x) ((x)->sai.sin_port) +#define inet_address_port(x) \ + ((((x)->sai.sin_family == AF_INET) || \ + ((x)->sai.sin_family == AF_INET6)) ? \ + ((x)->sai.sin_port) : -1) + +#ifdef HAVE_SYS_UN_H +#define localaddrlen(data) \ + ((((unsigned char*)(data))[0] == INET_AF_LOCAL) ? \ + (1 + 1 + ((unsigned char*)(data))[1]) : 1) +#else +#define localaddrlen(data) (1) +#endif #if defined(HAVE_IN6) && defined(AF_INET6) -#define addrlen(family) \ - ((family == AF_INET) ? sizeof(struct in_addr) : \ - ((family == AF_INET6) ? sizeof(struct in6_addr) : 0)) +#define addrlen(data) \ + ((((unsigned char*)(data))[0] == INET_AF_INET) ? \ + (1 + 2 + 4) : \ + ((((unsigned char*)(data))[0] == INET_AF_INET6) ? \ + (1 + 2 + 16) : localaddrlen(data))) #else -#define addrlen(family) \ - ((family == AF_INET) ? sizeof(struct in_addr) : 0) +#define addrlen(data) \ + ((((unsigned char*)(data))[0] == INET_AF_INET) ? \ + (1 + 2 + 4) : localaddrlen(data)) #endif typedef struct _multi_timer_data { - ErlDrvNowData when; + ErlDrvTime when; ErlDrvTermData caller; void (*timeout_function)(ErlDrvData drv_data, ErlDrvTermData caller); struct _multi_timer_data *next; @@ -1114,8 +1032,10 @@ typedef struct { inet_address peer_addr; /* fake peer address */ inet_address name_addr; /* fake local address */ - inet_address* peer_ptr; /* fake peername or NULL */ - inet_address* name_ptr; /* fake sockname or NULL */ + inet_address* peer_ptr; /* fake peername or NULL */ + inet_address* name_ptr; /* fake sockname or NULL */ + SOCKLEN_T peer_addr_len; /* fake peername size */ + SOCKLEN_T name_addr_len; /* fake sockname size */ int bufsz; /* minimum buffer constraint */ unsigned int hsz; /* the list header size, -1 is large !!! */ @@ -1134,6 +1054,7 @@ typedef struct { #else Uint32 send_oct[2]; /* number of octets sent, 64 bits */ #endif + char delimiter; /* Line delimiting character (def: '\n') */ unsigned long send_cnt; /* number of packets sent */ unsigned long send_max; /* maximum packet send */ double send_avg; /* average packet size sent */ @@ -1146,13 +1067,6 @@ typedef struct { char *netns; /* Socket network namespace name as full file path */ #endif -#ifdef __OSE__ - int select_state; /* state to keep track of whether we - should trigger another read/write - request at end of ready_input/output */ - ErlDrvEvent events[6]; -#endif - } inet_descriptor; @@ -1168,86 +1082,20 @@ static void tcp_inet_stop(ErlDrvData); static void tcp_inet_command(ErlDrvData, char*, ErlDrvSizeT); static void tcp_inet_commandv(ErlDrvData, ErlIOVec*); static void tcp_inet_flush(ErlDrvData drv_data); -#ifndef __OSE__ static void tcp_inet_drv_input(ErlDrvData, ErlDrvEvent); static void tcp_inet_drv_output(ErlDrvData data, ErlDrvEvent event); -#endif static ErlDrvData tcp_inet_start(ErlDrvPort, char* command); static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData, unsigned int, char*, ErlDrvSizeT, char**, ErlDrvSizeT); static void tcp_inet_timeout(ErlDrvData); static void tcp_inet_process_exit(ErlDrvData, ErlDrvMonitor *); static void inet_stop_select(ErlDrvEvent, void*); +static void inet_emergency_close(ErlDrvData); #ifdef __WIN32__ static void tcp_inet_event(ErlDrvData, ErlDrvEvent); static void find_dynamic_functions(void); #endif -#ifdef __OSE__ -/* The structure of the signal used for requesting asynchronous - * notification from the stack. Under normal circumstances the network stack - * shouldn't overwrite the value set in the fd field by the sender - * of the request */ -struct OseAsyncSig { - struct FmEvent event; - int fd; -}; - -union SIGNAL { - SIGSELECT signo; - struct OseAsyncSig async; -}; - -static ErlDrvSSizeT tcp_inet_ctl_ose(ErlDrvData e, unsigned int cmd, - char* buf, ErlDrvSizeT len, - char** rbuf, ErlDrvSizeT rsize); -static void tcp_inet_commandv_ose(ErlDrvData e, ErlIOVec* ev); -static void tcp_inet_drv_output_ose(ErlDrvData data, ErlDrvEvent event); -static void tcp_inet_drv_input_ose(ErlDrvData data, ErlDrvEvent event); -static ErlDrvOseEventId inet_resolve_signal(union SIGNAL *sig); - -#ifdef INET_DRV_DEBUG - -static char *read_req = "SO_EVENT_READ_REQUEST"; -static char *read_rep = "SO_EVENT_READ_REPLY"; -static char *write_req = "SO_EVENT_WRITE_REQUEST"; -static char *write_rep = "SO_EVENT_WRITE_REPLY"; -static char *eof_req = "SO_EVENT_EOF_REQUEST"; -static char *eof_rep = "SO_EVENT_EOF_REPLY"; -static char *accept_req = "SO_EVENT_ACCEPT_REQUEST"; -static char *accept_rep = "SO_EVENT_ACCEPT_REPLY"; -static char *connect_req = "SO_EVENT_CONNECT_REQUEST"; -static char *connect_rep = "SO_EVENT_CONNECT_REPLY"; -static char *error_req = "SO_EVENT_ERROR_REQUEST"; -static char *error_rep = "SO_EVENT_ERROR_REPLY"; -static char signo_tmp[32]; - -static char *signo_to_string(SIGSELECT signo) { - switch (signo) { - case SO_EVENT_READ_REQUEST: { return read_req; } - case SO_EVENT_READ_REPLY: { return read_rep; } - case SO_EVENT_WRITE_REQUEST: { return write_req; } - case SO_EVENT_WRITE_REPLY: { return write_rep; } - case SO_EVENT_EOF_REQUEST: { return eof_req; } - case SO_EVENT_EOF_REPLY: { return eof_rep; } - case SO_EVENT_ACCEPT_REQUEST: { return accept_req; } - case SO_EVENT_ACCEPT_REPLY: { return accept_rep; } - case SO_EVENT_CONNECT_REQUEST: { return connect_req; } - case SO_EVENT_CONNECT_REPLY: { return connect_rep; } - case SO_EVENT_ERROR_REQUEST: { return error_req; } - case SO_EVENT_ERROR_REPLY: { return error_rep; } - } - - snprintf(signo_tmp,32,"0x%x",signo); - - return signo_tmp; -} - -#endif - -#endif /* __OSE__ */ - - static struct erl_drv_entry tcp_inet_driver_entry = { tcp_inet_init, /* inet_init will add this driver !! */ @@ -1257,9 +1105,6 @@ static struct erl_drv_entry tcp_inet_driver_entry = #ifdef __WIN32__ tcp_inet_event, NULL, -#elif defined(__OSE__) - tcp_inet_drv_input_ose, /*ready_input*/ - tcp_inet_drv_output_ose, /*ready_output*/ #else tcp_inet_drv_input, tcp_inet_drv_output, @@ -1267,17 +1112,9 @@ static struct erl_drv_entry tcp_inet_driver_entry = "tcp_inet", NULL, NULL, -#ifdef __OSE__ - tcp_inet_ctl_ose, -#else tcp_inet_ctl, -#endif tcp_inet_timeout, -#ifdef __OSE__ - tcp_inet_commandv_ose, -#else tcp_inet_commandv, -#endif NULL, tcp_inet_flush, NULL, @@ -1288,7 +1125,8 @@ static struct erl_drv_entry tcp_inet_driver_entry = ERL_DRV_FLAG_USE_PORT_LOCKING|ERL_DRV_FLAG_SOFT_BUSY, NULL, tcp_inet_process_exit, - inet_stop_select + inet_stop_select, + inet_emergency_close }; @@ -1341,7 +1179,8 @@ static struct erl_drv_entry udp_inet_driver_entry = ERL_DRV_FLAG_USE_PORT_LOCKING, NULL, NULL, - inet_stop_select + inet_stop_select, + inet_emergency_close }; #endif @@ -1375,7 +1214,8 @@ static struct erl_drv_entry sctp_inet_driver_entry = ERL_DRV_FLAG_USE_PORT_LOCKING, NULL, NULL, /* process_exit */ - inet_stop_select + inet_stop_select, + inet_emergency_close }; #endif @@ -1404,9 +1244,13 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev); static int tcp_recv(tcp_descriptor* desc, int request_len); static int tcp_deliver(tcp_descriptor* desc, int len); +static int tcp_shutdown_error(tcp_descriptor* desc, int err); + static int tcp_inet_output(tcp_descriptor* desc, HANDLE event); static int tcp_inet_input(tcp_descriptor* desc, HANDLE event); +static void tcp_desc_close(tcp_descriptor*); + #ifdef HAVE_UDP typedef struct { inet_descriptor inet; /* common data structure (DON'T MOVE) */ @@ -1421,17 +1265,9 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event); static int packet_inet_output(udp_descriptor* udesc, HANDLE event); #endif -/* convert descriptor poiner to inet_descriptor pointer */ +/* convert descriptor pointer to inet_descriptor pointer */ #define INETP(d) (&(d)->inet) -#ifdef __OSE__ -static void inet_driver_select(inet_descriptor* desc, - int flags, int onoff); -static void tcp_inet_ose_dispatch_signals(tcp_descriptor *desc, - int prev_select_state, - union SIGNAL *sig); -#endif - static int async_ref = 0; /* async reference id generator */ #define NEW_ASYNC_ID() ((async_ref++) & 0xffff) @@ -1447,6 +1283,8 @@ static int async_ref = 0; /* async reference id generator */ } while (0) static ErlDrvTermData am_ok; +static ErlDrvTermData am_undefined; +static ErlDrvTermData am_unspec; static ErlDrvTermData am_tcp; static ErlDrvTermData am_error; static ErlDrvTermData am_einval; @@ -1464,6 +1302,9 @@ static ErlDrvTermData am_udp; static ErlDrvTermData am_udp_passive; static ErlDrvTermData am_udp_error; #endif +#ifdef HAVE_SYS_UN_H +static ErlDrvTermData am_local; +#endif #ifdef HAVE_SCTP static ErlDrvTermData am_sctp; static ErlDrvTermData am_sctp_passive; @@ -1485,11 +1326,16 @@ static ErlDrvTermData am_reuseaddr; static ErlDrvTermData am_dontroute; static ErlDrvTermData am_priority; static ErlDrvTermData am_tos; +static ErlDrvTermData am_tclass; static ErlDrvTermData am_ipv6_v6only; static ErlDrvTermData am_netns; +static ErlDrvTermData am_bind_to_device; #endif -/* speical errors for bad ports and sequences */ +static char str_eafnosupport[] = "eafnosupport"; +static char str_einval[] = "einval"; + +/* special errors for bad ports and sequences */ #define EXBADPORT "exbadport" #define EXBADSEQ "exbadseq" @@ -1546,7 +1392,7 @@ static const struct in6_addr in6addr_loopback = #endif /* HAVE_IN6 */ /* XXX: is this a driver interface function ??? */ -void erl_exit(int n, char*, ...); +void erts_exit(int n, char*, ...); /* * Malloc wrapper, @@ -1559,7 +1405,7 @@ void erl_exit(int n, char*, ...); static void *alloc_wrapper(ErlDrvSizeT size){ void *ret = driver_alloc(size); if(ret == NULL) - erl_exit(1,"Out of virtual memory in malloc (%s)", __FILE__); + erts_exit(ERTS_ERROR_EXIT,"Out of virtual memory in malloc (%s)", __FILE__); return ret; } #define ALLOC(X) alloc_wrapper(X) @@ -1567,7 +1413,7 @@ static void *alloc_wrapper(ErlDrvSizeT size){ static void *realloc_wrapper(void *current, ErlDrvSizeT size){ void *ret = driver_realloc(current,size); if(ret == NULL) - erl_exit(1,"Out of virtual memory in realloc (%s)", __FILE__); + erts_exit(ERTS_ERROR_EXIT,"Out of virtual memory in realloc (%s)", __FILE__); return ret; } #define REALLOC(X,Y) realloc_wrapper(X,Y) @@ -1672,40 +1518,67 @@ static void *realloc_wrapper(void *current, ErlDrvSizeT size){ #endif #ifdef HAVE_UDP -static int load_ip_port(ErlDrvTermData* spec, int i, char* buf) -{ - spec[i++] = ERL_DRV_INT; - spec[i++] = (ErlDrvTermData) get_int16(buf); - return i; -} - -static int load_ip_address(ErlDrvTermData* spec, int i, int family, char* buf) +static int load_address(ErlDrvTermData* spec, int i, char* buf) { int n; - if (family == AF_INET) { - for (n = 0; n < 4; n++) { + switch (*buf++) { /* Family */ + case INET_AF_INET: { + for (n = 2; n < 2+4; n++) { spec[i++] = ERL_DRV_INT; spec[i++] = (ErlDrvTermData) ((unsigned char)buf[n]); } spec[i++] = ERL_DRV_TUPLE; spec[i++] = 4; + spec[i++] = ERL_DRV_INT; + spec[i++] = (ErlDrvTermData) get_int16(buf); + break; } #if defined(HAVE_IN6) && defined(AF_INET6) - else if (family == AF_INET6) { - for (n = 0; n < 16; n += 2) { + case INET_AF_INET6: { + for (n = 2; n < 2+16; n += 2) { spec[i++] = ERL_DRV_INT; spec[i++] = (ErlDrvTermData) get_int16(buf+n); } spec[i++] = ERL_DRV_TUPLE; spec[i++] = 8; + spec[i++] = ERL_DRV_INT; + spec[i++] = (ErlDrvTermData) get_int16(buf); + break; } #endif - else { +#ifdef HAVE_SYS_UN_H + case INET_AF_LOCAL: { + int len = *(unsigned char*)buf++; + i = LOAD_ATOM(spec, i, am_local); + i = LOAD_BUF2BINARY(spec, i, buf, len); spec[i++] = ERL_DRV_TUPLE; + spec[i++] = 2; + spec[i++] = ERL_DRV_INT; spec[i++] = 0; + break; + } +#endif + case INET_AF_UNSPEC: { + i = LOAD_ATOM(spec, i, am_unspec); + i = LOAD_BUF2BINARY(spec, i, buf, 0); + spec[i++] = ERL_DRV_TUPLE; + spec[i++] = 2; + spec[i++] = ERL_DRV_INT; + spec[i++] = 0; + break; + } + default: { /* INET_AF_UNDEFINED */ + i = LOAD_ATOM(spec, i, am_undefined); + i = LOAD_BUF2BINARY(spec, i, buf, 0); + spec[i++] = ERL_DRV_TUPLE; + spec[i++] = 2; + spec[i++] = ERL_DRV_INT; + spec[i++] = 0; + break; + } } return i; -} + } #endif @@ -1713,10 +1586,13 @@ static int load_ip_address(ErlDrvTermData* spec, int i, int family, char* buf) /* For SCTP, we often need to return {IP, Port} tuples: */ static int inet_get_address(char* dst, inet_address* src, unsigned int* len); -#define LOAD_IP_AND_PORT_CNT \ +/* Max of {{int()*8},int()} | {{int()*4},int()} | + * {{'local',binary()},int()} + */ +#define LOAD_INET_GET_ADDRESS_CNT \ (8*LOAD_INT_CNT + LOAD_TUPLE_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT) -static int load_ip_and_port +static int load_inet_get_address (ErlDrvTermData* spec, int i, inet_descriptor* desc, struct sockaddr_storage* addr) { @@ -1734,8 +1610,7 @@ static int load_ip_and_port /* NB: the following functions are safe to use, as they create tuples of copied Ints on the "spec", and do not install any String pts -- a ptr to "abuf" would be dangling upon exiting this function: */ - i = load_ip_address(spec, i, desc->sfamily, abuf+3); - i = load_ip_port (spec, i, abuf+1); + i = load_address(spec, i, abuf); /* IP,Port | Family,Addr */ i = LOAD_TUPLE (spec, i, 2); return i; } @@ -1828,7 +1703,7 @@ check_double_release(InetDrvBufStk *bs, ErlDrvBinary* buf) int i; for (i = 0; i < bs->buf.pos; ++i) { if (bs->buf.stk[i] == buf) { - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "Multiple buffer release in inet_drv, this " "is a bug, save the core and send it to " "[email protected]!"); @@ -2324,13 +2199,16 @@ static int inet_reply_ok(inet_descriptor* desc) ErlDrvTermData caller = desc->caller; int i = 0; + desc->caller = 0; + if (is_not_internal_pid(caller)) + return 0; + i = LOAD_ATOM(spec, i, am_inet_reply); i = LOAD_PORT(spec, i, desc->dport); i = LOAD_ATOM(spec, i, am_ok); i = LOAD_TUPLE(spec, i, 3); ASSERT(i == sizeof(spec)/sizeof(*spec)); - desc->caller = 0; return erl_drv_send_term(desc->dport, caller, spec, i); } @@ -2427,7 +2305,6 @@ static ErlDrvTermData am_http_error; static ErlDrvTermData am_abs_path; static ErlDrvTermData am_absoluteURI; static ErlDrvTermData am_star; -static ErlDrvTermData am_undefined; static ErlDrvTermData am_http; static ErlDrvTermData am_https; static ErlDrvTermData am_scheme; @@ -2723,7 +2600,7 @@ int ssl_tls_inetdrv(void* arg, unsigned type, unsigned major, unsigned minor, { tcp_descriptor* desc = (tcp_descriptor*) arg; int i = 0; - ErlDrvTermData spec[28]; + ErlDrvTermData spec[30]; ErlDrvTermData caller = ERL_DRV_NIL; ErlDrvBinary* bin; int ret; @@ -2764,11 +2641,11 @@ int ssl_tls_inetdrv(void* arg, unsigned type, unsigned major, unsigned minor, if (desc->inet.active == INET_PASSIVE) { i = LOAD_TUPLE(spec, i, 2); i = LOAD_TUPLE(spec, i, 4); - ASSERT(i <= 28); + ASSERT(i <= sizeof(spec)/sizeof(*spec)); ret = erl_drv_send_term(desc->inet.dport, caller, spec, i); } else { - ASSERT(i <= 28); + ASSERT(i <= sizeof(spec)/sizeof(*spec)); ret = erl_drv_output_term(desc->inet.dport, spec, i); } done: @@ -2890,6 +2767,9 @@ static ErlDrvTermData am_sctp_rtoinfo, /* Option names */ /* For #sctp_paddrinfo{}: */ am_active, am_inactive, +# if HAVE_DECL_SCTP_UNCONFIRMED + am_unconfirmed, +# endif /* For #sctp_status{}: */ # if HAVE_DECL_SCTP_EMPTY @@ -3115,7 +2995,7 @@ static int sctp_parse_async_event ASSERT(sptr->spc_length <= sz); /* No buffer overrun */ i = LOAD_ATOM (spec, i, am_sctp_paddr_change); - i = load_ip_and_port(spec, i, desc, &sptr->spc_aaddr); + i = load_inet_get_address(spec, i, desc, &sptr->spc_aaddr); switch (sptr->spc_state) { @@ -3542,10 +3422,11 @@ static int tcp_error_message(tcp_descriptor* desc, int err) #ifdef HAVE_UDP /* ** active mode message: -** {udp, S, IP, Port, [H1,...Hsz | Data]} or -** {sctp, S, IP, Port, {[AncilData], Event_or_Data}} +** {udp, S, IP, Port, [H1,...Hsz | Data]} or +** {sctp, S, IP, Port, {[AncilData], Event_or_Data}} ** where ** [H1,...,HSz] are msg headers (without IP/Port, UDP only), +** [AddrLen, H2,...,HSz] are msg headers for UDP AF_UNIX only ** Data : List() | Binary() */ static int packet_binary_message @@ -3555,6 +3436,7 @@ static int packet_binary_message ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN]; int i = 0; int alen; + char* data = bin->orig_bytes+offs; DEBUGF(("packet_binary_message(%ld): len = %d\r\n", (long)desc->port, len)); @@ -3564,13 +3446,12 @@ static int packet_binary_message i = LOAD_ATOM(spec, i, am_udp ); /* UDP only */ # endif i = LOAD_PORT(spec, i, desc->dport); /* S */ - - alen = addrlen(desc->sfamily); - i = load_ip_address(spec, i, desc->sfamily, bin->orig_bytes+offs+3); - i = load_ip_port(spec, i, bin->orig_bytes+offs+1); /* IP, Port */ - - offs += (alen + 3); - len -= (alen + 3); + + alen = addrlen(data); + i = load_address(spec, i, data); /* IP,Port | Family,Addr */ + + offs += alen; + len -= alen; # ifdef HAVE_SCTP if (!IS_SCTP(desc)) @@ -3840,8 +3721,10 @@ static void inet_init_sctp(void) { INIT_ATOM(dontroute); INIT_ATOM(priority); INIT_ATOM(tos); + INIT_ATOM(tclass); INIT_ATOM(ipv6_v6only); INIT_ATOM(netns); + INIT_ATOM(bind_to_device); /* Option names */ INIT_ATOM(sctp_rtoinfo); @@ -3919,7 +3802,10 @@ static void inet_init_sctp(void) { /* For #sctp_paddrinfo{}: */ INIT_ATOM(active); INIT_ATOM(inactive); - +# if HAVE_DECL_SCTP_UNCONFIRMED + INIT_ATOM(unconfirmed); +# endif + /* For #sctp_status{}: */ # if HAVE_DECL_SCTP_EMPTY INIT_ATOM(empty); @@ -3948,12 +3834,14 @@ static int inet_init() if (0 != erl_drv_tsd_key_create("inet_buffer_stack_key", &buffer_stack_key)) goto error; - ASSERT(sizeof(struct in_addr) == 4); + ERTS_CT_ASSERT(sizeof(struct in_addr) == 4); # if defined(HAVE_IN6) && defined(AF_INET6) - ASSERT(sizeof(struct in6_addr) == 16); + ERTS_CT_ASSERT(sizeof(struct in6_addr) == 16); # endif INIT_ATOM(ok); + INIT_ATOM(undefined); + INIT_ATOM(unspec); INIT_ATOM(tcp); #ifdef HAVE_UDP INIT_ATOM(udp); @@ -3971,6 +3859,9 @@ static int inet_init() INIT_ATOM(udp_passive); INIT_ATOM(udp_error); #endif +#ifdef HAVE_SYS_UN_H + INIT_ATOM(local); +#endif INIT_ATOM(empty_out_q); INIT_ATOM(ssl_tls); @@ -3982,7 +3873,6 @@ static int inet_init() INIT_ATOM(abs_path); INIT_ATOM(absoluteURI); am_star = driver_mk_atom("*"); - INIT_ATOM(undefined); INIT_ATOM(http); INIT_ATOM(https); INIT_ATOM(scheme); @@ -3996,7 +3886,7 @@ static int inet_init() #ifdef HAVE_SCTP /* Check the size of SCTP AssocID -- currently both this driver and the Erlang part require 32 bit: */ - ASSERT(sizeof(sctp_assoc_t)==ASSOC_ID_LEN); + ERTS_CT_ASSERT(sizeof(sctp_assoc_t)==ASSOC_ID_LEN); # if defined(HAVE_SCTP_BINDX) p_sctp_bindx = sctp_bindx; # if defined(HAVE_SCTP_PEELOFF) @@ -4078,63 +3968,106 @@ static int inet_init() /* -** Set a inaddr structure: -** src = [P1,P0,X1,X2,.....] +** Set an inaddr structure: +** *src = [P1,P0,X1,X2,.....] ** dst points to a structure large enugh to keep any kind ** of inaddr. ** *len is set to length of src on call ** and is set to actual length of dst on return -** return NULL on error and ptr after port address on success +** return NULL if ok or ptr to errno string for error */ static char* inet_set_address(int family, inet_address* dst, - char* src, ErlDrvSizeT* len) + char* *src, ErlDrvSizeT* len) { short port; - if ((family == AF_INET) && (*len >= 2+4)) { + switch (family) { + case AF_INET: { + if (*len < 2+4) return str_einval; sys_memzero((char*)dst, sizeof(struct sockaddr_in)); - port = get_int16(src); + port = get_int16(*src); #ifndef NO_SA_LEN dst->sai.sin_len = sizeof(struct sockaddr_in); #endif dst->sai.sin_family = family; dst->sai.sin_port = sock_htons(port); - sys_memcpy(&dst->sai.sin_addr, src+2, 4); + sys_memcpy(&dst->sai.sin_addr, (*src)+2, 4); *len = sizeof(struct sockaddr_in); - return src + 2+4; + *src += 2 + 4; + return NULL; } #if defined(HAVE_IN6) && defined(AF_INET6) - else if ((family == AF_INET6) && (*len >= 2+16)) { + case AF_INET6: { + if (*len < 2+16) return str_einval; sys_memzero((char*)dst, sizeof(struct sockaddr_in6)); - port = get_int16(src); + port = get_int16(*src); #ifndef NO_SA_LEN dst->sai6.sin6_len = sizeof(struct sockaddr_in6); #endif dst->sai6.sin6_family = family; dst->sai6.sin6_port = sock_htons(port); dst->sai6.sin6_flowinfo = 0; /* XXX this may be set as well ?? */ - sys_memcpy(&dst->sai6.sin6_addr, src+2, 16); - *len = sizeof(struct sockaddr_in6); - return src + 2+16; + sys_memcpy(&dst->sai6.sin6_addr, (*src)+2, 16); + *len = sizeof(struct sockaddr_in6); + *src += 2 + 16; + return NULL; } #endif - return NULL; +#ifdef HAVE_SYS_UN_H + case AF_UNIX: { + int n; + if (*len == 0) return str_einval; + n = *((unsigned char*)(*src)); /* Length field */ + if (*len < 1+n) return str_einval; + if (n + +#ifdef __linux__ + /* Make sure the address gets zero terminated + * except when the first byte is \0 because then it is + * sort of zero terminated although the zero termination + * comes before the address... + * This fix handles Linux's nonportable + * abstract socket address extension. + */ + ((*len) > 1 && (*src)[1] == '\0' ? 0 : 1) +#else + 1 +#endif + > sizeof(dst->sal.sun_path)) { + return str_einval; + } + sys_memzero((char*)dst, sizeof(struct sockaddr_un)); + dst->sal.sun_family = family; + sys_memcpy(dst->sal.sun_path, (*src)+1, n); + *len = offsetof(struct sockaddr_un, sun_path) + n; +#ifndef NO_SA_LEN + dst->sal.sun_len = *len; +#endif + *src += 1 + n; + return NULL; + } +#endif + } + return str_eafnosupport; } /* ** Set an inaddr structure, address family comes from source data, ** or from argument if source data specifies constant address. ** -** src = [TAG,P1,P0] when TAG = INET_AF_ANY | INET_AF_LOOPBACK -** src = [TAG,P1,P0,X1,X2,...] when TAG = INET_AF_INET | INET_AF_INET6 +** *src = [TAG,P1,P0] +** when TAG = INET_AF_ANY | INET_AF_LOOPBACK +** *src = [TAG,P1,P0,X1,X2,...] +** when TAG = INET_AF_INET | INET_AF_INET6 | INET_AF_LOCAL +** *src = [TAG,Len,...] +** when TAG = INET_AF_LOCAL */ static char *inet_set_faddress(int family, inet_address* dst, - char *src, ErlDrvSizeT* len) { + char* *src, ErlDrvSizeT* len) { int tag; - if (*len < 1) return NULL; + if (*len < 1) return str_einval; (*len) --; - tag = *(src ++); + tag = *((*src) ++); switch (tag) { case INET_AF_INET: family = AF_INET; @@ -4144,12 +4077,18 @@ static char *inet_set_faddress(int family, inet_address* dst, family = AF_INET6; break; # endif +# ifdef HAVE_SYS_UN_H + case INET_AF_LOCAL: { + family = AF_UNIX; + break; + } +# endif case INET_AF_ANY: case INET_AF_LOOPBACK: { int port; - if (*len < 2) return NULL; - port = get_int16(src); + if (*len < 2) return str_einval; + port = get_int16(*src); switch (family) { case AF_INET: { struct in_addr addr; @@ -4161,7 +4100,7 @@ static char *inet_set_faddress(int family, inet_address* dst, addr.s_addr = sock_htonl(INADDR_LOOPBACK); break; default: - return NULL; + return str_einval; } sys_memzero((char*)dst, sizeof(struct sockaddr_in)); #ifndef NO_SA_LEN @@ -4183,7 +4122,7 @@ static char *inet_set_faddress(int family, inet_address* dst, paddr = &in6addr_loopback; break; default: - return NULL; + return str_einval; } sys_memzero((char*)dst, sizeof(struct sockaddr_in6)); #ifndef NO_SA_LEN @@ -4197,27 +4136,28 @@ static char *inet_set_faddress(int family, inet_address* dst, } break; # endif default: - return NULL; + return str_einval; } - return src + 2; + *src += 2; + return NULL; } break; default: - return NULL; + return str_eafnosupport; } return inet_set_address(family, dst, src, len); } - /* Get a inaddr structure ** src = inaddr structure -** *len is the lenght of structure ** dst is filled with [F,P1,P0,X1,....] +** *len is the length of structure ** where F is the family code (coded) ** and *len is the length of dst on return ** (suitable to deliver to erlang) */ static int inet_get_address(char* dst, inet_address* src, unsigned int* len) { + /* Compare the code with inet_address_to_erlang() */ int family; short port; @@ -4240,6 +4180,37 @@ static int inet_get_address(char* dst, inet_address* src, unsigned int* len) return 0; } #endif +#ifdef HAVE_SYS_UN_H + else if (family == AF_UNIX) { + size_t n, m; + if (*len < offsetof(struct sockaddr_un, sun_path)) return -1; + n = *len - offsetof(struct sockaddr_un, sun_path); + if (255 < n) return -1; + m = my_strnlen(src->sal.sun_path, n); +#ifdef __linux__ + /* Assume that the address is a zero terminated string, + * except when the first byte is \0 i.e the string length is 0, + * then use the reported length instead. + * This fix handles Linux's nonportable + * abstract socket address extension. + */ + if (m == 0) m = n; +#endif + dst[0] = INET_AF_LOCAL; + dst[1] = (char) ((unsigned char) m); + sys_memcpy(dst+2, src->sal.sun_path, m); + *len = 1 + 1 + m; + return 0; + } +#endif + else if (family == AF_UNSPEC) { + dst[0] = INET_AF_UNSPEC; + *len = 1; + } + else { + dst[0] = INET_AF_UNDEFINED; + *len = 1; + } return -1; } @@ -4248,7 +4219,9 @@ static int inet_get_address(char* dst, inet_address* src, unsigned int* len) ** according to the size of the current, ** and return the resulting encoded size */ -static int inet_address_to_erlang(char *dst, inet_address **src) { +static int +inet_address_to_erlang(char *dst, inet_address **src, SOCKLEN_T sz) { + /* Compare the code with inet_get_address() */ short port; switch ((*src)->sa.sa_family) { @@ -4273,6 +4246,31 @@ static int inet_address_to_erlang(char *dst, inet_address **src) { (*src) = (inet_address *) (&(*src)->sai6 + 1); return 1 + 2 + 16; #endif +#ifdef HAVE_SYS_UN_H + case AF_UNIX: { + size_t n, m; + if (sz < offsetof(struct sockaddr_un, sun_path)) return -1; + n = sz - offsetof(struct sockaddr_un, sun_path); + if (255 < n) return -1; + m = my_strnlen((*src)->sal.sun_path, n); +#ifdef __linux__ + /* Assume that the address is a zero terminated string, + * except when the first byte is \0 i.e the string length is 0, + * Then use the reported length instead. + * This fix handles Linux's nonportable + * abstract socket address extension. + */ + if (m == 0) m = n; +#endif + if (dst) { + dst[0] = INET_AF_LOCAL; + dst[1] = (char) ((unsigned char) m); + sys_memcpy(dst+2, (*src)->sal.sun_path, m); + } + (*src) = (inet_address *) (&(*src)->sal + 1); + return 1 + 1 + m; + } +#endif default: return -1; } @@ -4281,7 +4279,7 @@ static int inet_address_to_erlang(char *dst, inet_address **src) { /* Encode n encoded addresses from addrs in the result buffer */ static ErlDrvSizeT reply_inet_addrs -(int n, inet_address *addrs, char **rbuf, ErlDrvSizeT rsize) { +(int n, inet_address *addrs, char **rbuf, ErlDrvSizeT rsize, SOCKLEN_T sz) { inet_address *ia; int i, s; ErlDrvSizeT rlen; @@ -4289,11 +4287,19 @@ static ErlDrvSizeT reply_inet_addrs if (IS_SOCKET_ERROR(n)) return ctl_error(sock_errno(), rbuf, rsize); if (n == 0) return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + /* The sz argument is only used when we have got an actual size + * of addrs[0] from e.g getsockname() and then n == 1 + * so we will loop over 1 element below. Otherwise sz + * would be expected to differ between addresses but that + * can only happen for AF_UNIX and we will only be called with + * n > 1 for SCTP and that will never (?) happen with AF_UNIX + */ + /* Calculate result length */ rlen = 1; ia = addrs; for (i = 0; i < n; i++) { - s = inet_address_to_erlang(NULL, &ia); + s = inet_address_to_erlang(NULL, &ia, sz); if (s < 0) break; rlen += s; } @@ -4304,7 +4310,7 @@ static ErlDrvSizeT reply_inet_addrs rlen = 1; ia = addrs; for (i = 0; i < n; i++) { - s = inet_address_to_erlang((*rbuf)+rlen, &ia); + s = inet_address_to_erlang((*rbuf)+rlen, &ia, sz); if (s < 0) break; rlen += s; } @@ -4321,16 +4327,6 @@ static void desc_close(inet_descriptor* desc) desc->forced_events = 0; desc->send_would_block = 0; #endif -#ifdef __OSE__ - if (desc->events[0]) { - driver_select(desc->port,desc->events[0],FD_READ|FD_WRITE|ERL_DRV_USE,0); - driver_select(desc->port,desc->events[1],FD_READ|FD_WRITE|ERL_DRV_USE,0); - driver_select(desc->port,desc->events[2],FD_READ|FD_WRITE|ERL_DRV_USE,0); - driver_select(desc->port,desc->events[3],FD_READ|FD_WRITE|ERL_DRV_USE,0); - driver_select(desc->port,desc->events[4],FD_READ|FD_WRITE|ERL_DRV_USE,0); - driver_select(desc->port,desc->events[5],FD_READ|FD_WRITE|ERL_DRV_USE,0); - } -#else /* * We should close the fd here, but the other driver might still * be selecting on it. @@ -4340,10 +4336,15 @@ static void desc_close(inet_descriptor* desc) ERL_DRV_USE, 0); else inet_stop_select((ErlDrvEvent)(long)desc->event,NULL); -#endif desc->event = INVALID_EVENT; /* closed by stop_select callback */ desc->s = INVALID_SOCKET; desc->event_mask = 0; + + /* mark as disconnected in case when socket is left lingering due to + * {exit_on_close, false} option in gen_tcp socket creation. Next + * write to socket should produce {error, enotconn} and send a + * message {tcp_error,#Port<>,econnreset} */ + desc->state &= ~INET_STATE_CONNECTED; } } @@ -4372,7 +4373,7 @@ static int erl_inet_close(inet_descriptor* desc) desc_close(desc); desc->state = INET_STATE_CLOSED; } else if (desc->prebound && (desc->s != INVALID_SOCKET)) { - sock_select(desc, FD_READ | FD_WRITE | FD_CLOSE, 0); + sock_select(desc, FD_READ | FD_WRITE | FD_CLOSE | ERL_DRV_USE_NO_CALLBACK, 0); desc->event_mask = 0; #ifdef __WIN32__ desc->forced_events = 0; @@ -4382,69 +4383,11 @@ static int erl_inet_close(inet_descriptor* desc) return 0; } -#ifdef __OSE__ -static void inet_select_init(inet_descriptor* desc) -{ - desc->events[0] = - erl_drv_ose_event_alloc(SO_EVENT_READ_REPLY, - desc->s, - inet_resolve_signal, - NULL); - driver_select(desc->port, desc->events[0], - ERL_DRV_READ|ERL_DRV_USE, 1); - - desc->events[1] = - erl_drv_ose_event_alloc(SO_EVENT_EOF_REPLY, - desc->s, - inet_resolve_signal, - NULL); - driver_select(desc->port, desc->events[1], - ERL_DRV_READ|ERL_DRV_USE, 1); - - desc->events[2] = - erl_drv_ose_event_alloc(SO_EVENT_ACCEPT_REPLY, - desc->s, - inet_resolve_signal, - NULL); - driver_select(desc->port, desc->events[2], - ERL_DRV_READ|ERL_DRV_USE, 1); - - /* trigger tcp_inet_input */ - desc->events[3] = - erl_drv_ose_event_alloc(SO_EVENT_WRITE_REPLY, - desc->s, - inet_resolve_signal, - NULL); - driver_select(desc->port, desc->events[3], - ERL_DRV_WRITE|ERL_DRV_USE, 1); - - desc->events[4] = - erl_drv_ose_event_alloc(SO_EVENT_CONNECT_REPLY, - desc->s, - inet_resolve_signal, - NULL); - driver_select(desc->port, desc->events[4], - ERL_DRV_WRITE|ERL_DRV_USE, 1); - - desc->events[5] = - erl_drv_ose_event_alloc(SO_EVENT_ERROR_REPLY, - desc->s, - inet_resolve_signal, - NULL); - driver_select(desc->port, desc->events[5], - ERL_DRV_WRITE|ERL_DRV_USE, 1); - - /* Issue a select on error event before any other select to be sure we are - prepared to receive error notifications from the stack, even in the - situations when select isn't issued */ - sock_select(desc, SOCK_FD_ERROR, 1); -} -#endif - static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, char** rbuf, ErlDrvSizeT rsize) { int save_errno; + int protocol; #ifdef HAVE_SETNS int current_ns, new_ns; current_ns = new_ns = 0; @@ -4483,7 +4426,11 @@ static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, } } #endif - if ((desc->s = sock_open(domain, type, desc->sprotocol)) == INVALID_SOCKET) + protocol = desc->sprotocol; +#ifdef HAVE_SYS_UN_H + if (domain == AF_UNIX) protocol = 0; +#endif + if ((desc->s = sock_open(domain, type, protocol)) == INVALID_SOCKET) save_errno = sock_errno(); #ifdef HAVE_SETNS if (desc->netns != NULL) { @@ -4523,9 +4470,6 @@ static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, #ifdef __WIN32__ driver_select(desc->port, desc->event, ERL_DRV_READ, 1); #endif -#ifdef __OSE__ - inet_select_init(desc); -#endif desc->state = INET_STATE_OPEN; desc->stype = type; @@ -4536,23 +4480,22 @@ static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, /* as inet_open but pass in an open socket (MUST BE OF RIGHT TYPE) */ static ErlDrvSSizeT inet_ctl_fdopen(inet_descriptor* desc, int domain, int type, - SOCKET s, char** rbuf, ErlDrvSizeT rsize) + SOCKET s, Uint32 bound, + char** rbuf, ErlDrvSizeT rsize) { inet_address name; - unsigned int sz = sizeof(name); + unsigned int sz; - /* check that it is a socket and that the socket is bound */ - if (IS_SOCKET_ERROR(sock_name(s, (struct sockaddr*) &name, &sz))) - return ctl_error(sock_errno(), rbuf, rsize); - if (name.sa.sa_family != domain) - return ctl_error(EINVAL, rbuf, rsize); -#ifdef __OSE__ - /* for fdopen duplicating the sd will allow to uniquely identify - the signal from OSE with erlang port */ - desc->s = sock_dup(s); -#else + if (bound) { + /* check that it is a socket and that the socket is bound */ + sz = sizeof(name); + sys_memzero((char *) &name, sz); + if (IS_SOCKET_ERROR(sock_name(s, (struct sockaddr*) &name, &sz))) + return ctl_error(sock_errno(), rbuf, rsize); + if (name.sa.sa_family != domain) + return ctl_error(EINVAL, rbuf, rsize); + } desc->s = s; -#endif if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) return ctl_error(sock_errno(), rbuf, rsize); @@ -4560,17 +4503,13 @@ static ErlDrvSSizeT inet_ctl_fdopen(inet_descriptor* desc, int domain, int type, #ifdef __WIN32__ driver_select(desc->port, desc->event, ERL_DRV_READ, 1); #endif - desc->state = INET_STATE_BOUND; /* assume bound */ + + desc->state = INET_STATE_OPEN; + if (type == SOCK_STREAM) { /* check if connected */ sz = sizeof(name); if (!IS_SOCKET_ERROR(sock_peer(s, (struct sockaddr*) &name, &sz))) { desc->state = INET_STATE_CONNECTED; -#ifdef __OSE__ - /* since we are dealing with different descriptors (i.e. inet and - socket) the select part should be initialized with the right - values */ - inet_select_init(desc); -#endif } } @@ -4713,6 +4652,36 @@ static char* sockaddr_to_buf(struct sockaddr* addr, char* ptr, char* end) return NULL; } +/* sockaddr_bufsz_need + * Returns the number of bytes needed to store the information + * through sockaddr_to_buf + */ + +static size_t sockaddr_bufsz_need(struct sockaddr* addr) +{ + if (addr->sa_family == AF_INET || addr->sa_family == 0) { + return 1 + sizeof(struct in_addr); + } +#if defined(HAVE_IN6) && defined(AF_INET6) + else if (addr->sa_family == AF_INET6) { + return 1 + sizeof(struct in6_addr); + } +#endif +#if defined(AF_LINK) + if (addr->sa_family == AF_LINK) { + struct sockaddr_dl *sdl_p = (struct sockaddr_dl*) addr; + return 2 + sdl_p->sdl_alen; + } +#endif +#if defined(AF_PACKET) && defined(HAVE_NETPACKET_PACKET_H) + else if(addr->sa_family == AF_PACKET) { + struct sockaddr_ll *sll_p = (struct sockaddr_ll*) addr; + return 2 + sll_p->sll_halen; + } +#endif + return 0; +} + static char* buf_to_sockaddr(char* ptr, char* end, struct sockaddr* addr) { buf_check(ptr,end,1); @@ -5772,7 +5741,7 @@ done: ia_p->Ipv6IfIndex && ia_p->Ipv6IfIndex != index) { - /* Oops, there was an other interface for IPv6. Possible? XXX */ + /* Oops, there was another interface for IPv6. Possible? XXX */ index = ia_p->Ipv6IfIndex; goto index; } @@ -5791,6 +5760,11 @@ done: } #elif defined(HAVE_GETIFADDRS) +#ifdef DEBUG +#define GETIFADDRS_BUFSZ (1) +#else +#define GETIFADDRS_BUFSZ (512) +#endif static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, char **rbuf_pp, ErlDrvSizeT rsize) @@ -5801,15 +5775,15 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, char *buf_p; char *buf_alloc_p; - buf_size = 512; - buf_alloc_p = ALLOC(buf_size); + buf_size = GETIFADDRS_BUFSZ; + buf_alloc_p = ALLOC(GETIFADDRS_BUFSZ); buf_p = buf_alloc_p; # define BUF_ENSURE(Size) \ do { \ int NEED_, GOT_ = buf_p - buf_alloc_p; \ NEED_ = GOT_ + (Size); \ if (NEED_ > buf_size) { \ - buf_size = NEED_ + 512; \ + buf_size = NEED_ + GETIFADDRS_BUFSZ; \ buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \ buf_p = buf_alloc_p + GOT_; \ } \ @@ -5822,7 +5796,7 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, while (! (P_ = sockaddr_to_buf((sa), buf_p, \ buf_alloc_p+buf_size))) { \ int GOT_ = buf_p - buf_alloc_p; \ - buf_size += 512; \ + buf_size += GETIFADDRS_BUFSZ; \ buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \ buf_p = buf_alloc_p + GOT_; \ } \ @@ -5879,10 +5853,11 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, || ifa_p->ifa_addr->sa_family == AF_PACKET #endif ) { - char *bp = buf_p; - BUF_ENSURE(1); - SOCKADDR_TO_BUF(INET_IFOPT_HWADDR, ifa_p->ifa_addr); - if (buf_p - bp < 4) buf_p = bp; /* Empty hwaddr */ + size_t need = sockaddr_bufsz_need(ifa_p->ifa_addr); + if (need > 3) { + BUF_ENSURE(1 + need); + SOCKADDR_TO_BUF(INET_IFOPT_HWADDR, ifa_p->ifa_addr); + } } #endif } @@ -5897,6 +5872,7 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, return buf_size; # undef BUF_ENSURE } +#undef GETIFADDRS_BUFSZ #else @@ -5995,11 +5971,14 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) int ival; char* arg_ptr; int arg_sz; +#ifdef SO_BINDTODEVICE + char ifname[IFNAMSIZ]; +#endif enum PacketParseType old_htype = desc->htype; int old_active = desc->active; - int propagate = 0; /* Set to 1 if failure to set this option - should be propagated to erlang (not all - errors can be propagated for BC reasons) */ + int propagate; /* Set to 1 if failure to set this option + should be propagated to erlang (not all + errors can be propagated for BC reasons) */ int res; #ifdef HAVE_SCTP /* SCTP sockets are treated completely separately: */ @@ -6016,6 +5995,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) arg_ptr = (char*) &ival; arg_sz = sizeof(ival); proto = SOL_SOCKET; + propagate = 0; switch(opt) { case INET_LOPT_HEADER: @@ -6067,7 +6047,12 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) desc->active_count = 0; if ((desc->stype == SOCK_STREAM) && (desc->active != INET_PASSIVE) && (desc->state == INET_STATE_CLOSED)) { - tcp_closed_message((tcp_descriptor *) desc); + tcp_descriptor *tdesc = (tcp_descriptor *) desc; + if (tdesc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + tdesc->tcp_add_flags &= ~TCP_ADDF_DELAYED_ECONNRESET; + tcp_error_message(tdesc, ECONNRESET); + } + tcp_closed_message(tdesc); if (desc->exitf) { driver_exit(desc->port, 0); return 0; /* Give up on this socket, descriptor lost */ @@ -6184,6 +6169,22 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) continue; #endif + case INET_LOPT_TCP_SHOW_ECONNRESET: + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (ival) + tdesc->tcp_add_flags |= TCP_ADDF_SHOW_ECONNRESET; + else + tdesc->tcp_add_flags &= ~TCP_ADDF_SHOW_ECONNRESET; + } + continue; + + case INET_LOPT_LINE_DELIM: + DEBUGF(("inet_set_opts(%ld): s=%d, LINE_DELIM=%d\r\n", + (long)desc->port, desc->s, ival)); + desc->delimiter = (char)ival; + continue; + case INET_OPT_REUSEADDR: #ifdef __WIN32__ continue; /* Bjorn says */ @@ -6228,6 +6229,13 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) arg_sz = sizeof(li_val); DEBUGF(("inet_set_opts(%ld): s=%d, SO_LINGER=%d,%d", (long)desc->port, desc->s, li_val.l_onoff,li_val.l_linger)); + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (li_val.l_onoff && li_val.l_linger == 0) + tdesc->tcp_add_flags |= TCP_ADDF_LINGER_ZERO; + else + tdesc->tcp_add_flags &= ~TCP_ADDF_LINGER_ZERO; + } break; case INET_OPT_PRIORITY: @@ -6251,6 +6259,15 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) #else continue; #endif +#if defined(IPV6_TCLASS) && defined(SOL_IPV6) + case INET_OPT_TCLASS: + proto = SOL_IPV6; + type = IPV6_TCLASS; + propagate = 1; + DEBUGF(("inet_set_opts(%ld): s=%d, IPV6_TCLASS=%d\r\n", + (long)desc->port, desc->s, ival)); + break; +#endif case TCP_OPT_NODELAY: proto = IPPROTO_TCP; @@ -6342,6 +6359,29 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) len -= arg_sz; break; +#ifdef SO_BINDTODEVICE + case INET_OPT_BIND_TO_DEVICE: + if (ival < 0) return -1; + if (len < ival) return -1; + if (ival > sizeof(ifname)) { + return -1; + } + memcpy(ifname, ptr, ival); + ifname[ival] = '\0'; + ptr += ival; + len -= ival; + + proto = SOL_SOCKET; + type = SO_BINDTODEVICE; + arg_ptr = (char*)&ifname; + arg_sz = sizeof(ifname); + propagate = 1; /* We do want to know if this fails */ + + DEBUGF(("inet_set_opts(%ld): s=%d, SO_BINDTODEVICE=%s\r\n", + (long)desc->port, desc->s, ifname)); + break; +#endif + default: return -1; } @@ -6474,6 +6514,9 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) # ifdef SCTP_DELAYED_ACK_TIME struct sctp_assoc_value av; /* Not in SOLARIS10 */ # endif +# ifdef SO_BINDTODEVICE + char ifname[IFNAMSIZ]; +# endif } arg; @@ -6684,6 +6727,19 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) continue; /* Option not supported -- ignore it */ # endif +# if defined(IPV6_TCLASS) && defined(SOL_IPV6) + case INET_OPT_TCLASS: + { + arg.ival= get_int32 (curr); curr += 4; + proto = SOL_IPV6; + type = IPV6_TCLASS; + arg_ptr = (char*) (&arg.ival); + arg_sz = sizeof ( arg.ival); + break; + } +# endif + + case INET_OPT_IPV6_V6ONLY: # if HAVE_DECL_IPV6_V6ONLY { @@ -6700,6 +6756,23 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) continue; /* Option not supported -- ignore it */ # endif +#ifdef SO_BINDTODEVICE + case INET_OPT_BIND_TO_DEVICE: + arg_sz = get_int32(curr); curr += 4; + CHKLEN(curr, arg_sz); + if (arg_sz >= sizeof(arg.ifname)) + return -1; + memcpy(arg.ifname, curr, arg_sz); + arg.ifname[arg_sz] = '\0'; + curr += arg_sz; + + proto = SOL_SOCKET; + type = SO_BINDTODEVICE; + arg_ptr = (char*) (&arg.ifname); + arg_sz = sizeof ( arg.ifname); + break; +#endif + case SCTP_OPT_AUTOCLOSE: { arg.ival= get_int32 (curr); curr += 4; @@ -6740,7 +6813,6 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) case SCTP_OPT_SET_PEER_PRIMARY_ADDR: { ErlDrvSizeT alen; - char *after; CHKLEN(curr, ASSOC_ID_LEN); /* XXX: These 2 opts have isomorphic value data structures, @@ -6751,12 +6823,9 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) /* Fill in "arg.prim.sspp_addr": */ alen = ptr + len - curr; - after = inet_set_faddress(desc->sfamily, - (inet_address*) (&arg.prim.sspp_addr), - curr, &alen); - if (after == NULL) - return -1; - curr = after; + if (inet_set_faddress + (desc->sfamily, (inet_address*) (&arg.prim.sspp_addr), + &curr, &alen) != NULL) return -1; proto = IPPROTO_SCTP; if (eopt == SCTP_OPT_PRIMARY_ADDR) @@ -6782,7 +6851,6 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) case SCTP_OPT_PEER_ADDR_PARAMS: { ErlDrvSizeT alen; - char *after; # ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_FLAGS int eflags, cflags, hb_enable, hb_disable, pmtud_enable, pmtud_disable; @@ -6797,12 +6865,9 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) /* Fill in "pap.spp_address": */ alen = ptr + len - curr; - after = inet_set_faddress(desc->sfamily, - (inet_address*) (&arg.pap.spp_address), - curr, &alen); - if (after == NULL) - return -1; - curr = after; + if (inet_set_faddress + (desc->sfamily, (inet_address*) (&arg.pap.spp_address), + &curr, &alen) != NULL) return -1; CHKLEN(curr, 4 + 2 + 3*4); @@ -6973,6 +7038,9 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, ErlDrvSizeT dest_used = 0; ErlDrvSizeT dest_allocated = destlen; char *orig_dest = *dest; +#ifdef SO_BINDTODEVICE + char ifname[IFNAMSIZ]; +#endif /* Ptr is a name parameter */ #define RETURN_ERROR() \ @@ -7009,7 +7077,7 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, do { \ ErlDrvSizeT new_need = ((Ptr) - (*dest)) + (Size); \ if (new_need > dest_used) { \ - erl_exit(1,"Internal error in inet_drv, " \ + erts_exit(ERTS_ERROR_EXIT,"Internal error in inet_drv, " \ "miscalculated buffer size"); \ } \ dest_used = new_need; \ @@ -7163,6 +7231,17 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, continue; #endif + case INET_LOPT_TCP_SHOW_ECONNRESET: + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + *ptr++ = opt; + ival = !!(tdesc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET); + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; + case INET_OPT_PRIORITY: #ifdef SO_PRIORITY type = SO_PRIORITY; @@ -7182,6 +7261,15 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, put_int32(0, ptr); continue; #endif + case INET_OPT_TCLASS: +#if defined(IPV6_TCLASS) && defined(SOL_IPV6) + proto = SOL_IPV6; + type = IPV6_TCLASS; + break; +#else + TRUNCATE_TO(0,ptr); + continue; +#endif case INET_OPT_REUSEADDR: type = SO_REUSEADDR; break; @@ -7288,6 +7376,26 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, put_int32(arg_sz,ptr); continue; } + +#ifdef SO_BINDTODEVICE + case INET_OPT_BIND_TO_DEVICE: + arg_sz = sizeof(ifname); + TRUNCATE_TO(0,ptr); + PLACE_FOR(5 + arg_sz,ptr); + arg_ptr = ptr + 5; + if (IS_SOCKET_ERROR(sock_getopt(desc->s,SOL_SOCKET,SO_BINDTODEVICE, + arg_ptr,&arg_sz))) { + TRUNCATE_TO(0,ptr); + continue; + } + arg_sz = my_strnlen(arg_ptr, arg_sz); + TRUNCATE_TO(arg_sz + 5,ptr); + *ptr++ = opt; + put_int32(arg_sz,ptr); + ptr += arg_sz; + continue; +#endif + default: RETURN_ERROR(); } @@ -7314,14 +7422,14 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, #ifdef HAVE_SCTP #define LOAD_PADDRINFO_CNT \ - (2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + LOAD_IP_AND_PORT_CNT + \ + (2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + LOAD_INET_GET_ADDRESS_CNT + \ 4*LOAD_INT_CNT + LOAD_TUPLE_CNT) static int load_paddrinfo (ErlDrvTermData * spec, int i, inet_descriptor* desc, struct sctp_paddrinfo* pai) { i = LOAD_ATOM (spec, i, am_sctp_paddrinfo); i = LOAD_ASSOC_ID (spec, i, pai->spinfo_assoc_id); - i = load_ip_and_port(spec, i, desc, &pai->spinfo_address); + i = load_inet_get_address(spec, i, desc, &pai->spinfo_address); switch(pai->spinfo_state) { case SCTP_ACTIVE: @@ -7330,8 +7438,13 @@ static int load_paddrinfo (ErlDrvTermData * spec, int i, case SCTP_INACTIVE: i = LOAD_ATOM (spec, i, am_inactive); break; +# if HAVE_DECL_SCTP_UNCONFIRMED + case SCTP_UNCONFIRMED: + i = LOAD_ATOM (spec, i, am_unconfirmed); + break; +# endif default: - ASSERT(0); /* NB: SCTP_UNCONFIRMED modifier not yet supported */ + i = LOAD_ATOM (spec, i, am_undefined); } i = LOAD_INT (spec, i, pai->spinfo_cwnd); i = LOAD_INT (spec, i, pai->spinfo_srtt); @@ -7370,7 +7483,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, do { \ int need; \ if ((Index) > spec_allocated) { \ - erl_exit(1,"Internal error in inet_drv, " \ + erts_exit(ERTS_ERROR_EXIT,"Internal error in inet_drv, " \ "miscalculated buffer size"); \ } \ need = (Index) + (N); \ @@ -7564,6 +7677,25 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, i = LOAD_TUPLE (spec, i, 2); break; } + +#ifdef SO_BINDTODEVICE + /* The following option returns a binary: */ + case INET_OPT_BIND_TO_DEVICE: { + char ifname[IFNAMSIZ]; + unsigned int sz = sizeof(ifname); + + if (sock_getopt(desc->s, SOL_SOCKET, SO_BINDTODEVICE, + &ifname, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_bind_to_device); + i = LOAD_BUF2BINARY(spec, i, ifname, my_strnlen(ifname, sz)); + i = LOAD_TUPLE (spec, i, 2); + break; + } +#endif + /* The following options just return an integer value: */ case INET_OPT_RCVBUF : case INET_OPT_SNDBUF : @@ -7571,6 +7703,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, case INET_OPT_DONTROUTE: case INET_OPT_PRIORITY : case INET_OPT_TOS : + case INET_OPT_TCLASS : case INET_OPT_IPV6_V6ONLY: case SCTP_OPT_AUTOCLOSE: case SCTP_OPT_MAXSEG : @@ -7644,6 +7777,19 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, continue; # endif } + case INET_OPT_TCLASS: + { +# if defined(IPV6_TCLASS) && defined(SOL_IPV6) + proto = SOL_IPV6; + type = IPV6_TCLASS; + is_int = 1; + tag = am_tclass; + break; +# else + /* Not supported -- ignore */ + continue; +# endif + } case INET_OPT_IPV6_V6ONLY: # if HAVE_DECL_IPV6_V6ONLY { @@ -7732,7 +7878,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, /* Fill in the response: */ PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + - LOAD_IP_AND_PORT_CNT + 2*LOAD_TUPLE_CNT); + LOAD_INET_GET_ADDRESS_CNT + 2*LOAD_TUPLE_CNT); switch (eopt) { case SCTP_OPT_PRIMARY_ADDR: i = LOAD_ATOM(spec, i, am_sctp_primary_addr); @@ -7746,7 +7892,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, ASSERT(0); } i = LOAD_ASSOC_ID (spec, i, sp.sspp_assoc_id); - i = load_ip_and_port(spec, i, desc, &sp.sspp_addr); + i = load_inet_get_address(spec, i, desc, &sp.sspp_addr); i = LOAD_TUPLE (spec, i, 3); i = LOAD_TUPLE (spec, i, 2); break; @@ -7773,7 +7919,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, struct sctp_paddrparams ap; unsigned int sz = sizeof(ap); int n; - char *after; + char *before, *xerror; ErlDrvSizeT alen; if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); @@ -7781,23 +7927,32 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, buf += ASSOC_ID_LEN; buflen -= ASSOC_ID_LEN; alen = buflen; - after = inet_set_faddress(desc->sfamily, - (inet_address*) (&ap.spp_address), - buf, &alen); - if (after == NULL) RETURN_ERROR(spec, -EINVAL); - buflen -= after - buf; - buf = after; - + before = buf; + xerror = + inet_set_faddress + (desc->sfamily, (inet_address*) (&ap.spp_address), + &buf, &alen); + if (xerror != NULL) { +#ifdef EAFNOSUPPORT + if (xerror == str_eafnosupport) { + RETURN_ERROR(spec, -EAFNOSUPPORT); + } +#else + RETURN_ERROR(spec, -EINVAL); +#endif + } + buflen -= buf - before; + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_PEER_ADDR_PARAMS, &ap, &sz) < 0) continue; /* Fill in the response: */ PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + - LOAD_IP_AND_PORT_CNT + 4*LOAD_INT_CNT); + LOAD_INET_GET_ADDRESS_CNT + 4*LOAD_INT_CNT); i = LOAD_ATOM (spec, i, am_sctp_peer_addr_params); i = LOAD_ATOM (spec, i, am_sctp_paddrparams); i = LOAD_ASSOC_ID (spec, i, ap.spp_assoc_id); - i = load_ip_and_port(spec, i, desc, &ap.spp_address); + i = load_inet_get_address(spec, i, desc, &ap.spp_address); i = LOAD_INT (spec, i, ap.spp_hbinterval); i = LOAD_INT (spec, i, ap.spp_pathmaxrxt); @@ -8013,7 +8168,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, { struct sctp_paddrinfo pai; unsigned int sz = sizeof(pai); - char *after; + char *before, *xerror; ErlDrvSizeT alen; if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL); @@ -8021,13 +8176,22 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, buf += ASSOC_ID_LEN; buflen -= ASSOC_ID_LEN; alen = buflen; - after = inet_set_faddress(desc->sfamily, - (inet_address*) (&pai.spinfo_address), - buf, &alen); - if (after == NULL) RETURN_ERROR(spec, -EINVAL); - buflen -= after - buf; - buf = after; - + before = buf; + xerror = + inet_set_faddress + (desc->sfamily, (inet_address*) (&pai.spinfo_address), + &buf, &alen); + if (xerror != NULL) { +#ifdef EAFNOSUPPORT + if (xerror == str_eafnosupport) { + RETURN_ERROR(spec, -EAFNOSUPPORT); + } +#else + RETURN_ERROR(spec, -EINVAL); +#endif + } + buflen -= buf - before; + if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_GET_PEER_ADDR_INFO, &pai, &sz) < 0) continue; /* Fill in the response: */ @@ -8196,6 +8360,19 @@ static void inet_stop(inet_descriptor* desc) FREE(desc); } +static void inet_emergency_close(ErlDrvData data) +{ + /* valid for any (UDP, TCP or SCTP) descriptor */ + tcp_descriptor* tcp_desc = (tcp_descriptor*)data; + inet_descriptor* desc = INETP(tcp_desc); + DEBUGF(("inet_emergency_close(%ld) {s=%d\r\n", + (long)desc->port, desc->s)); + if (desc->s != INVALID_SOCKET) { + sock_close(desc->s); + } +} + + static void set_default_msgq_limits(ErlDrvPort port) { ErlDrvSizeT q_high = INET_HIGH_MSGQ_WATERMARK; @@ -8243,6 +8420,7 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) desc->deliver = INET_DELIVER_TERM; /* standard term format */ desc->active = INET_PASSIVE; /* start passive */ desc->active_count = 0; + desc->delimiter = '\n'; /* line delimiting char */ desc->oph = NULL; desc->opt = NULL; @@ -8276,23 +8454,14 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) #ifdef HAVE_SETNS desc->netns = NULL; #endif -#ifdef __OSE__ - desc->select_state = 0; - desc->events[0] = NULL; - desc->events[1] = NULL; - desc->events[2] = NULL; - desc->events[3] = NULL; - desc->events[4] = NULL; - desc->events[5] = NULL; -#endif return (ErlDrvData)desc; } - -#ifndef MAXHOSTNAMELEN -#define MAXHOSTNAMELEN 256 -#endif +/* MAXHOSTNAMELEN could be 64 or 255 depending +on the platform. Instead, use INET_MAXHOSTNAMELEN +which is always 255 across all platforms */ +#define INET_MAXHOSTNAMELEN 255 /* ** common TCP/UDP/SCTP control command @@ -8433,6 +8602,11 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, put_int32(INET_AF_INET6, &tbuf[0]); } #endif +#ifdef HAVE_SYS_UN_H + else if (desc->sfamily == AF_UNIX) { + put_int32(INET_AF_LOCAL, &tbuf[0]); + } +#endif else return ctl_error(EINVAL, rbuf, rsize); @@ -8464,13 +8638,14 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, } case INET_REQ_GETHOSTNAME: { /* get host name */ - char tbuf[MAXHOSTNAMELEN]; + char tbuf[INET_MAXHOSTNAMELEN + 1]; DEBUGF(("inet_ctl(%ld): GETHOSTNAME\r\n", (long)desc->port)); if (len != 0) return ctl_error(EINVAL, rbuf, rsize); - if (IS_SOCKET_ERROR(sock_hostname(tbuf, MAXHOSTNAMELEN))) + /* gethostname requires len to be max(hostname) + 1 */ + if (IS_SOCKET_ERROR(sock_hostname(tbuf, INET_MAXHOSTNAMELEN + 1))) return ctl_error(sock_errno(), rbuf, rsize); return ctl_reply(INET_REP_OK, tbuf, strlen(tbuf), rbuf, rsize); } @@ -8481,7 +8656,6 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (len != 4) return ctl_error(EINVAL, rbuf, rsize); if (! IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); - if (! IS_BOUND(desc)) return ctl_xerror(EXBADSEQ, rbuf, rsize); #ifdef HAVE_SCTP if (IS_SCTP(desc) && p_sctp_getpaddrs) { @@ -8492,19 +8666,19 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, assoc_id = get_int32(buf); n = p_sctp_getpaddrs(desc->s, assoc_id, &sa); - rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize); + rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize, 0); if (n > 0) p_sctp_freepaddrs(sa); return rlen; } #endif { /* Fallback to sock_peer */ inet_address addr; - unsigned int sz; + SOCKLEN_T sz; int i; sz = sizeof(addr); i = sock_peer(desc->s, (struct sockaddr *) &addr, &sz); - return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize); + return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize, sz); } } @@ -8512,15 +8686,22 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, char tbuf[sizeof(inet_address)]; inet_address peer; inet_address* ptr; - unsigned int sz = sizeof(peer); + unsigned int sz; DEBUGF(("inet_ctl(%ld): PEER\r\n", (long)desc->port)); if (!(desc->state & INET_F_ACTIVE)) return ctl_error(ENOTCONN, rbuf, rsize); - if ((ptr = desc->peer_ptr) == NULL) { + if ((ptr = desc->peer_ptr) != NULL) { + sz = desc->peer_addr_len; + } + else { ptr = &peer; - if (IS_SOCKET_ERROR(sock_peer(desc->s, (struct sockaddr*)ptr,&sz))) + sz = sizeof(peer); + sys_memzero((char *) &peer, sz); + if (IS_SOCKET_ERROR + (sock_peer + (desc->s, (struct sockaddr*)ptr, &sz))) return ctl_error(sock_errno(), rbuf, rsize); } if (inet_get_address(tbuf, ptr, &sz) < 0) @@ -8529,17 +8710,19 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, } case INET_REQ_SETPEER: { /* set fake peername Port Address */ + char *xerror; if (len == 0) { desc->peer_ptr = NULL; return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } else if (len < 2) return ctl_error(EINVAL, rbuf, rsize); - else if (inet_set_address(desc->sfamily, &desc->peer_addr, - buf, &len) == NULL) - return ctl_error(EINVAL, rbuf, rsize); + else if ((xerror = inet_set_faddress + (desc->sfamily, &desc->peer_addr, &buf, &len)) != NULL) + return ctl_xerror(xerror, rbuf, rsize); else { desc->peer_ptr = &desc->peer_addr; + desc->peer_addr_len = (SOCKLEN_T) len; return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } } @@ -8550,7 +8733,6 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (len != 4) return ctl_error(EINVAL, rbuf, rsize); if (! IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); - if (! IS_BOUND(desc)) return ctl_xerror(EXBADSEQ, rbuf, rsize); #ifdef HAVE_SCTP if (IS_SCTP(desc) && p_sctp_getladdrs) { @@ -8561,19 +8743,20 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, assoc_id = get_int32(buf); n = p_sctp_getladdrs(desc->s, assoc_id, &sa); - rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize); + rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize, 0); if (n > 0) p_sctp_freeladdrs(sa); return rlen; } #endif { /* Fallback to sock_name */ inet_address addr; - unsigned int sz; + SOCKLEN_T sz; int i; sz = sizeof(addr); + sys_memzero((char *) &addr, sz); i = sock_name(desc->s, (struct sockaddr *) &addr, &sz); - return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize); + return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize, sz); } } @@ -8581,16 +8764,19 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, char tbuf[sizeof(inet_address)]; inet_address name; inet_address* ptr; - unsigned int sz = sizeof(name); + unsigned int sz; DEBUGF(("inet_ctl(%ld): NAME\r\n", (long)desc->port)); - if (!IS_BOUND(desc)) - return ctl_error(EINVAL, rbuf, rsize); /* address is not valid */ - - if ((ptr = desc->name_ptr) == NULL) { + if ((ptr = desc->name_ptr) != NULL) { + sz = desc->name_addr_len; + } + else { ptr = &name; - if (IS_SOCKET_ERROR(sock_name(desc->s, (struct sockaddr*)ptr, &sz))) + sz = sizeof(name); + sys_memzero((char *) &name, sz); + if (IS_SOCKET_ERROR + (sock_name(desc->s, (struct sockaddr*)ptr, &sz))) return ctl_error(sock_errno(), rbuf, rsize); } if (inet_get_address(tbuf, ptr, &sz) < 0) @@ -8598,26 +8784,28 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize); } - case INET_REQ_SETNAME: { /* set fake peername Port Address */ + case INET_REQ_SETNAME: { /* set fake sockname Port Address */ + char *xerror; if (len == 0) { desc->name_ptr = NULL; return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } else if (len < 2) return ctl_error(EINVAL, rbuf, rsize); - else if (inet_set_address(desc->sfamily, &desc->name_addr, - buf, &len) == NULL) - return ctl_error(EINVAL, rbuf, rsize); + else if ((xerror = inet_set_faddress + (desc->sfamily, &desc->name_addr, &buf, &len)) != NULL) + return ctl_xerror(xerror, rbuf, rsize); else { desc->name_ptr = &desc->name_addr; + desc->name_addr_len = (SOCKLEN_T) len; return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } } case INET_REQ_BIND: { /* bind socket */ - char tbuf[2]; + char tbuf[2], *xerror; inet_address local; - short port; + int port; DEBUGF(("inet_ctl(%ld): BIND\r\n", (long)desc->port)); @@ -8626,21 +8814,24 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (desc->state != INET_STATE_OPEN) return ctl_xerror(EXBADPORT, rbuf, rsize); - if (inet_set_faddress(desc->sfamily, &local, buf, &len) == NULL) - return ctl_error(EINVAL, rbuf, rsize); + if ((xerror = inet_set_faddress + (desc->sfamily, &local, &buf, &len)) != NULL) + return ctl_xerror(xerror, rbuf, rsize); if (IS_SOCKET_ERROR(sock_bind(desc->s,(struct sockaddr*) &local, len))) return ctl_error(sock_errno(), rbuf, rsize); - desc->state = INET_STATE_BOUND; + desc->state = INET_STATE_OPEN; - if ((port = inet_address_port(&local)) == 0) { + port = inet_address_port(&local); + if (port == 0) { SOCKLEN_T adrlen = sizeof(local); + sys_memzero((char *) &local, adrlen); sock_name(desc->s, &local.sa, &adrlen); port = inet_address_port(&local); } - port = sock_ntohs(port); - put_int16(port, tbuf); + else if (port == -1) port = 0; + put_int16(sock_ntohs((Uint16) port), tbuf); return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); } @@ -8988,6 +9179,11 @@ static tcp_descriptor* tcp_inet_copy(tcp_descriptor* desc,SOCKET s, copy_desc->low = desc->low; copy_desc->send_timeout = desc->send_timeout; copy_desc->send_timeout_close = desc->send_timeout_close; + + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) + copy_desc->tcp_add_flags |= TCP_ADDF_SHOW_ECONNRESET; + else + copy_desc->tcp_add_flags &= ~TCP_ADDF_SHOW_ECONNRESET; /* The new port will be linked and connected to the original caller */ port = driver_create_port(port, owner, "tcp_inet", (ErlDrvData) copy_desc); @@ -9006,10 +9202,6 @@ static tcp_descriptor* tcp_inet_copy(tcp_descriptor* desc,SOCKET s, copy_desc->inet.port = port; copy_desc->inet.dport = driver_mk_port(port); -#ifdef __OSE__ - inet_select_init(©_desc->inet); -#endif - *err = 0; return copy_desc; } @@ -9061,32 +9253,30 @@ static void tcp_inet_stop(ErlDrvData e) tcp_descriptor* desc = (tcp_descriptor*)e; DEBUGF(("tcp_inet_stop(%ld) {s=%d\r\n", (long)desc->inet.port, desc->inet.s)); + tcp_close_check(desc); - /* free input buffer & output buffer */ - if (desc->i_buf != NULL) - release_buffer(desc->i_buf); - desc->i_buf = NULL; /* net_mess2 may call this function recursively when - faulty messages arrive on dist ports*/ + tcp_clear_input(desc); + DEBUGF(("tcp_inet_stop(%ld) }\r\n", (long)desc->inet.port)); inet_stop(INETP(desc)); } -#ifdef __OSE__ - -static ErlDrvSSizeT tcp_inet_ctl_ose(ErlDrvData e, unsigned int cmd, - char* buf, ErlDrvSizeT len, - char** rbuf, ErlDrvSizeT rsize) { - - tcp_descriptor* desc = (tcp_descriptor*)e; - int prev_select_state = INETP(desc)->select_state; - - ErlDrvSSizeT res = tcp_inet_ctl(e,cmd,buf,len,rbuf,rsize); - - tcp_inet_ose_dispatch_signals((tcp_descriptor*)e,prev_select_state,NULL); +/* Closes a tcp descriptor without leaving things hanging; the VM keeps trying + * to flush IO queues as long as it contains anything even after the port has + * been closed from the erlang side, which is desired behavior (Think escripts + * writing to files) but pretty hopeless if the underlying fd has been set to + * INVALID_SOCKET through desc_close. + * + * This function should be used in place of desc_close/erl_inet_close in all + * TCP-related operations. Note that this only closes the desc cleanly; it + * will be freed through tcp_inet_stop later on. */ +static void tcp_desc_close(tcp_descriptor* desc) +{ + tcp_clear_input(desc); + tcp_clear_output(desc); - return res; + erl_inet_close(INETP(desc)); } -#endif /* TCP requests from Erlang */ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, @@ -9108,23 +9298,25 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, case INET_AF_INET6: domain = AF_INET6; break; -#else - case INET_AF_INET6: - return ctl_xerror("eafnosupport", rbuf, rsize); +#endif +#ifdef HAVE_SYS_UN_H + case INET_AF_LOCAL: + domain = AF_UNIX; break; #endif default: - return ctl_error(EINVAL, rbuf, rsize); + return ctl_xerror(str_eafnosupport, rbuf, rsize); } if (buf[1] != INET_TYPE_STREAM) return ctl_error(EINVAL, rbuf, rsize); return inet_ctl_open(INETP(desc), domain, SOCK_STREAM, rbuf, rsize); break; } - case INET_REQ_FDOPEN: { /* pass in an open socket */ + case INET_REQ_FDOPEN: { /* pass in an open (and optionally bound) socket */ int domain; + int bound; DEBUGF(("tcp_inet_ctl(%ld): FDOPEN\r\n", (long)desc->inet.port)); - if (len != 6) return ctl_error(EINVAL, rbuf, rsize); + if (len != 6 && len != 10) return ctl_error(EINVAL, rbuf, rsize); switch(buf[0]) { case INET_AF_INET: domain = AF_INET; @@ -9133,17 +9325,23 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, case INET_AF_INET6: domain = AF_INET6; break; -#else - case INET_AF_INET6: - return ctl_xerror("eafnosupport", rbuf, rsize); +#endif +#ifdef HAVE_SYS_UN_H + case INET_AF_LOCAL: + domain = AF_UNIX; break; #endif default: - return ctl_error(EINVAL, rbuf, rsize); + return ctl_xerror(str_eafnosupport, rbuf, rsize); } if (buf[1] != INET_TYPE_STREAM) return ctl_error(EINVAL, rbuf, rsize); + + if (len == 6) bound = 1; + else bound = get_int32(buf+2+4); + return inet_ctl_fdopen(INETP(desc), domain, SOCK_STREAM, - (SOCKET) get_int32(buf+2), rbuf, rsize); + (SOCKET) get_int32(buf+2), + bound, rbuf, rsize); break; } @@ -9155,8 +9353,6 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_xerror(EXBADPORT, rbuf, rsize); if (!IS_OPEN(INETP(desc))) return ctl_xerror(EXBADPORT, rbuf, rsize); - if (!IS_BOUND(INETP(desc))) - return ctl_xerror(EXBADSEQ, rbuf, rsize); if (len != 2) return ctl_error(EINVAL, rbuf, rsize); backlog = get_int16(buf); @@ -9169,7 +9365,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, case INET_REQ_CONNECT: { /* do async connect */ int code; - char tbuf[2]; + char tbuf[2], *xerror; unsigned timeout; DEBUGF(("tcp_inet_ctl(%ld): CONNECT\r\n", (long)desc->inet.port)); @@ -9179,8 +9375,6 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_xerror(EXBADPORT, rbuf, rsize); if (IS_CONNECTED(INETP(desc))) return ctl_error(EISCONN, rbuf, rsize); - if (!IS_BOUND(INETP(desc))) - return ctl_xerror(EXBADSEQ, rbuf, rsize); if (IS_CONNECTING(INETP(desc))) return ctl_error(EINVAL, rbuf, rsize); if (len < 6) @@ -9188,9 +9382,9 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, timeout = get_int32(buf); buf += 4; len -= 4; - if (inet_set_address(desc->inet.sfamily, &desc->inet.remote, - buf, &len) == NULL) - return ctl_error(EINVAL, rbuf, rsize); + if ((xerror = inet_set_faddress + (desc->inet.sfamily, &desc->inet.remote, &buf, &len)) != NULL) + return ctl_xerror(xerror, rbuf, rsize); code = sock_connect(desc->inet.s, (struct sockaddr*) &desc->inet.remote, len); @@ -9281,6 +9475,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); } else { n = sizeof(desc->inet.remote); + sys_memzero((char *) &remote, n); s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &n); if (s == INVALID_SOCKET) { if (sock_errno() == ERRNO_BLOCK) { @@ -9327,7 +9522,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, case INET_REQ_CLOSE: DEBUGF(("tcp_inet_ctl(%ld): CLOSE\r\n", (long)desc->inet.port)); tcp_close_check(desc); - erl_inet_close(INETP(desc)); + tcp_desc_close(desc); return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); @@ -9343,7 +9538,11 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_RECV) { desc->tcp_add_flags &= ~(TCP_ADDF_DELAYED_CLOSE_RECV| TCP_ADDF_DELAYED_CLOSE_SEND); - return ctl_reply(INET_REP_ERROR, "closed", 6, rbuf, rsize); + if (desc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_ECONNRESET; + return ctl_reply(INET_REP_ERROR, "econnreset", 10, rbuf, rsize); + } else + return ctl_reply(INET_REP_ERROR, "closed", 6, rbuf, rsize); } return ctl_error(ENOTCONN, rbuf, rsize); } @@ -9395,10 +9594,20 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_error(EINVAL, rbuf, rsize); } how = buf[0]; - if (sock_shutdown(INETP(desc)->s, how) == 0) { + if (how != TCP_SHUT_RD && driver_sizeq(desc->inet.port) > 0) { + if (how == TCP_SHUT_WR) { + desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_WR; + } else if (how == TCP_SHUT_RDWR) { + desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_RDWR; + } return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); - } else { + } + if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) { + if (how != TCP_SHUT_RD) + desc->tcp_add_flags |= TCP_ADDF_SHUTDOWN_WR_DONE; return ctl_error(sock_errno(), rbuf, rsize); + } else { + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } } default: @@ -9437,7 +9646,7 @@ static void tcp_inet_timeout(ErlDrvData e) set_busy_port(desc->inet.port, 0); inet_reply_error_am(INETP(desc), am_timeout); if (desc->send_timeout_close) { - erl_inet_close(INETP(desc)); + tcp_desc_close(desc); } } else { @@ -9451,7 +9660,7 @@ static void tcp_inet_timeout(ErlDrvData e) else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) { /* assume connect timeout */ /* close the socket since it's not usable (see man pages) */ - erl_inet_close(INETP(desc)); + tcp_desc_close(desc); async_error_am(INETP(desc), am_timeout); } else if ((state & INET_STATE_ACCEPTING) == INET_STATE_ACCEPTING) { @@ -9509,17 +9718,6 @@ static void tcp_inet_command(ErlDrvData e, char *buf, ErlDrvSizeT len) DEBUGF(("tcp_inet_command(%ld) }\r\n", (long)desc->inet.port)); } -#ifdef __OSE__ - -static void tcp_inet_commandv_ose(ErlDrvData e, ErlIOVec* ev) { - int prev_select_state = INETP((tcp_descriptor*)e)->select_state; - tcp_inet_commandv(e, ev); - tcp_inet_ose_dispatch_signals((tcp_descriptor*)e,prev_select_state,NULL); -} - -#endif - - static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev) { tcp_descriptor* desc = (tcp_descriptor*)e; @@ -9530,11 +9728,19 @@ static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev) if (!IS_CONNECTED(INETP(desc))) { if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_SEND) { desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_CLOSE_SEND; - inet_reply_error_am(INETP(desc), am_closed); + if (desc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + /* Don't clear flag. Leave it enabled for the next receive + * operation. + */ + inet_reply_error(INETP(desc), ECONNRESET); + } else + inet_reply_error_am(INETP(desc), am_closed); } else inet_reply_error(INETP(desc), ENOTCONN); } + else if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) + tcp_shutdown_error(desc, EPIPE); else if (tcp_sendv(desc, ev) == 0) inet_reply_ok(INETP(desc)); DEBUGF(("tcp_inet_commandv(%ld) }\r\n", (long)desc->inet.port)); @@ -9547,6 +9753,8 @@ static void tcp_inet_flush(ErlDrvData e) /* Discard send queue to avoid hanging port (OTP-7615) */ tcp_clear_output(desc); } + if (desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO) + tcp_clear_output(desc); } static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp) @@ -9582,22 +9790,6 @@ static void inet_stop_select(ErlDrvEvent event, void* _) { #ifdef __WIN32__ WSACloseEvent((HANDLE)event); -#elif defined(__OSE__) - ErlDrvOseEventId id; - union SIGNAL *sig; - erl_drv_ose_event_fetch(event, NULL, &id,NULL); - DEBUGF(("inet_stop_select(?#?) {s=%d\n",id)); - sock_close((int)id); - /* On socket close all the signals waiting to be processed as part of the - select should be deallocated */ - while((sig = erl_drv_ose_get_signal(event))) { - DEBUGF(("inet_stop_select(?#?): Freeing signal %s\n", - signo_to_string(sig->signo))); - free_buf(&sig); - } - erl_drv_ose_event_free(event); - DEBUGF(("inet_stop_select(?#?) }\n")); - #else sock_close((SOCKET)(long)event); #endif @@ -9631,8 +9823,7 @@ static int tcp_recv_closed(tcp_descriptor* desc) /* passive mode do not terminate port ! */ tcp_clear_input(desc); if (desc->inet.exitf) { - tcp_clear_output(desc); - desc_close(INETP(desc)); + tcp_desc_close(desc); } else { desc_close_read(INETP(desc)); } @@ -9675,7 +9866,7 @@ static int tcp_recv_error(tcp_descriptor* desc, int err) driver_cancel_timer(desc->inet.port); tcp_clear_input(desc); if (desc->inet.exitf) { - desc_close(INETP(desc)); + tcp_desc_close(desc); } else { desc_close_read(INETP(desc)); } @@ -9719,7 +9910,7 @@ static int tcp_remain(tcp_descriptor* desc, int* len) tlen = packet_get_length(desc->inet.htype, ptr, n, desc->inet.psize, desc->i_bufsz, - &desc->http_state); + desc->inet.delimiter, &desc->http_state); DEBUGF(("tcp_remain(%ld): s=%d, n=%d, nfill=%d nsz=%d, tlen %d\r\n", (long)desc->inet.port, desc->inet.s, n, nfill, nsz, tlen)); @@ -9909,7 +10100,10 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) int err = sock_errno(); if (err == ECONNRESET) { DEBUGF((" => detected close (connreset)\r\n")); - return tcp_recv_closed(desc); + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) + return tcp_recv_error(desc, err); + else + return tcp_recv_closed(desc); } if (err == ERRNO_BLOCK) { DEBUGF((" => would block\r\n")); @@ -10121,7 +10315,19 @@ static void tcp_inet_event(ErlDrvData e, ErlDrvEvent event) if (netEv.lNetworkEvents & FD_CLOSE) { /* error in err = netEv.iErrorCode[FD_CLOSE_BIT] */ DEBUGF(("Detected close in %s, line %d\r\n", __FILE__, __LINE__)); - tcp_recv_closed(desc); + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) { + err = netEv.iErrorCode[FD_CLOSE_BIT]; + if (err == ECONNRESET) + tcp_recv_error(desc, err); + else if (err == ECONNABORTED && IS_CONNECTED(INETP(desc))) { + /* translate this error to ECONNRESET */ + tcp_recv_error(desc, ECONNRESET); + } + else + tcp_recv_closed(desc); + } + else + tcp_recv_closed(desc); } DEBUGF(("tcp_inet_event(%ld) }\r\n", (long)desc->inet.port)); return; @@ -10131,146 +10337,7 @@ static void tcp_inet_event(ErlDrvData e, ErlDrvEvent event) return; } -#elif defined(__OSE__) /* !__WIN32__ */ -/* The specific resolve signal function. It will return the socket descriptor - for which the select was issued */ -static ErlDrvOseEventId inet_resolve_signal(union SIGNAL *sig) { - DEBUGF(("%s(?#?): s=%d got signal %s, status = %d, extra = %d, sender = 0x%x\n", - __FUNCTION__,sig->async.fd,signo_to_string(sig->signo), - sig->async.event.status, - sig->async.event.extra,sender(&sig))); - if (sig->signo == SO_EVENT_READ_REPLY || - sig->signo == SO_EVENT_ACCEPT_REPLY || - sig->signo == SO_EVENT_EOF_REPLY || - sig->signo == SO_EVENT_WRITE_REPLY || - sig->signo == SO_EVENT_ERROR_REPLY || - sig->signo == SO_EVENT_CONNECT_REPLY ) { - return sig->async.fd; - } - - return -1; -} - -static void inet_driver_select(inet_descriptor* desc, - int flags, int onoff) { - ASSERT(!desc->is_ignored); - - if(onoff) { - desc->select_state |= flags; - } else { - desc->select_state &= ~flags; - } -} - -static ssize_t writev_fallback(int fd, const struct iovec *iov, int iovcnt, int max_sz) -{ - size_t data_len = 0; - size_t sent = 0; - ssize_t n; - int i; - - for(i = 0; i < iovcnt; i++) - { - data_len = iov[i].iov_len; -tryagain: - n = sock_send(fd, iov[i].iov_base, data_len, 0); - if (IS_SOCKET_ERROR(n)) { - /* If buffer length is greater than the amount stack is able to - * send out then try to send at least max_sz (this comes with - * SO_EVENT_WRITE_REPLY signal*/ - if ((errno == EMSGSIZE) && (max_sz > 0) && (data_len > max_sz)) { - data_len = max_sz; - goto tryagain; - } - break; - } - sent += n; - } - return sent; -} - -#define OSE_EVENT_REQ(TCP_DESC,EVENT) do { \ - union SIGNAL *sig = alloc(sizeof(struct OseAsyncSig), EVENT); \ - sig->async.fd = INETP(TCP_DESC)->s; \ - ose_request_event(INETP(TCP_DESC)->s, &sig, 1); \ - DEBUGF(("%s(%ld): s=%d sent %s\r\n",__FUNCTION__, \ - INETP(TCP_DESC)->port,INETP(TCP_DESC)->s,signo_to_string(EVENT))); \ - } while(0) - -static void tcp_inet_ose_dispatch_signals(tcp_descriptor *desc, - int prev_select_state, - union SIGNAL *sig) { - if (sig) { - DEBUGF(("tcp_inet_ose_dispatch_signals(%ld) {s=%d resend\r\n", - (long)INETP(desc)->port,INETP(desc)->s)); - /* We are reacting to a signal, which means that if - the select_state for that signal is still activated - we should send a new signal */ - switch (sig->signo) { - case SO_EVENT_READ_REPLY: { - if (INETP(desc)->select_state & FD_READ) - OSE_EVENT_REQ(desc,SO_EVENT_READ_REQUEST); - break; - } - case SO_EVENT_WRITE_REPLY: { - if (INETP(desc)->select_state & FD_WRITE) - OSE_EVENT_REQ(desc,SO_EVENT_WRITE_REQUEST); - break; - } - case SO_EVENT_CONNECT_REPLY: { - if (INETP(desc)->select_state & FD_CONNECT) - OSE_EVENT_REQ(desc,SO_EVENT_CONNECT_REQUEST); - break; - } - case SO_EVENT_ACCEPT_REPLY: { - if (INETP(desc)->select_state & FD_ACCEPT) - OSE_EVENT_REQ(desc,SO_EVENT_ACCEPT_REQUEST); - break; - } - case SO_EVENT_ERROR_REPLY: { - if (INETP(desc)->select_state & SOCK_FD_ERROR) - OSE_EVENT_REQ(desc,SO_EVENT_ERROR_REQUEST); - break; - } - - } - DEBUGF(("tcp_inet_ose_dispatch_signals(%ld) }\r\n", - (long)INETP(desc)->port)); - } - - if (INETP(desc)->select_state != prev_select_state) { - /* If the select state has changed we have to issue signals for - the state parts that have changed. */ - int xor_select_state = INETP(desc)->select_state ^ prev_select_state; - DEBUGF(("tcp_inet_ose_dispatch_signals(%ld) {s=%d select change\r\n", - (long)INETP(desc)->port,INETP(desc)->s)); - if ((xor_select_state & FD_READ) && - (INETP(desc)->select_state & FD_READ)) { - OSE_EVENT_REQ(desc,SO_EVENT_READ_REQUEST); - } - if ((xor_select_state & FD_WRITE) && - (INETP(desc)->select_state & FD_WRITE)) { - OSE_EVENT_REQ(desc,SO_EVENT_WRITE_REQUEST); - } - if ((xor_select_state & FD_CONNECT) && - (INETP(desc)->select_state & FD_CONNECT)) { - OSE_EVENT_REQ(desc,SO_EVENT_CONNECT_REQUEST); - } - if ((xor_select_state & FD_ACCEPT) && - (INETP(desc)->select_state & FD_ACCEPT)) { - OSE_EVENT_REQ(desc,SO_EVENT_ACCEPT_REQUEST); - } - if ((xor_select_state & SOCK_FD_ERROR) && - (INETP(desc)->select_state & SOCK_FD_ERROR)) { - OSE_EVENT_REQ(desc,SO_EVENT_ERROR_REQUEST); - } - - DEBUGF(("tcp_inet_ose_dispatch_signals(%ld) }\r\n", - (long)INETP(desc)->port)); - } -} - -#endif /* __OSE__ */ +#endif /* __WIN32__ */ /* socket has input: @@ -10293,6 +10360,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) inet_async_op *this_op = desc->inet.opt; len = sizeof(desc->inet.remote); + sys_memzero((char *) &remote, len); s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &len); if (s == INVALID_SOCKET && sock_errno() == ERRNO_BLOCK) { /* Just try again, no real error, just a ghost trigger from poll, @@ -10359,6 +10427,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) while (desc->inet.state == INET_STATE_MULTI_ACCEPTING) { len = sizeof(desc->inet.remote); + sys_memzero((char *) &remote, len); s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &len); if (s == INVALID_SOCKET && sock_errno() == ERRNO_BLOCK) { /* Just try again, no real error, keep the last return code */ @@ -10428,8 +10497,11 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) return ret; } -static int tcp_send_error(tcp_descriptor* desc, int err) +static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) { + int show_econnreset = (err == ECONNRESET + && desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET); + /* * If the port is busy, we must do some clean-up before proceeding. */ @@ -10445,26 +10517,34 @@ static int tcp_send_error(tcp_descriptor* desc, int err) /* * We used to handle "expected errors" differently from unexpected ones. - * Now we handle all errors in the same way. We just have to distinguish - * between passive and active sockets. + * Now we handle all errors in the same way (unless the show_econnreset + * socket option is enabled). We just have to distinguish between passive + * and active sockets. */ DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n", (long)desc->inet.port, __FILE__, __LINE__)); if (desc->inet.active) { - tcp_closed_message(desc); - inet_reply_error_am(INETP(desc), am_closed); + if (show_econnreset) { + tcp_error_message(desc, err); + tcp_closed_message(desc); + inet_reply_error(INETP(desc), err); + } else { + tcp_closed_message(desc); + inet_reply_error_am(INETP(desc), am_closed); + } if (desc->inet.exitf) driver_exit(desc->inet.port, 0); else - desc_close(INETP(desc)); + tcp_desc_close(desc); } else { - tcp_clear_output(desc); - tcp_clear_input(desc); tcp_close_check(desc); - erl_inet_close(INETP(desc)); + tcp_desc_close(desc); if (desc->inet.caller) { - inet_reply_error_am(INETP(desc), am_closed); + if (show_econnreset) + inet_reply_error(INETP(desc), err); + else + inet_reply_error_am(INETP(desc), am_closed); } else { /* No blocking send op to reply to right now. @@ -10481,10 +10561,54 @@ static int tcp_send_error(tcp_descriptor* desc, int err) * in the receive operation. */ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_RECV; + + if (show_econnreset) { + /* Return {error, econnreset} instead of {error, closed} + * on send or receive operations. + */ + desc->tcp_add_flags |= TCP_ADDF_DELAYED_ECONNRESET; + } } return -1; } +static int tcp_send_error(tcp_descriptor* desc, int err) +{ + /* EPIPE errors usually occur in one of three ways: + * 1. We write to a socket when we've already shutdown() the write side. On + * Windows the error returned for this is ESHUTDOWN rather than EPIPE. + * 2. The TCP peer sends us an RST through no fault of our own (perhaps + * by aborting the connection using SO_LINGER) and we then attempt + * to write to the socket. On Linux and Windows we would actually + * receive an ECONNRESET error for this, but on the BSDs, Darwin, + * Illumos and presumably Solaris, it's an EPIPE. + * 3. We cause the TCP peer to send us an RST by writing to a socket + * after we receive a FIN from them. Our first write will be + * successful, but if the they have closed the connection (rather + * than just shutting down the write side of it) this will cause their + * OS to send us an RST. Then, when we attempt to write to the socket + * a second time, we will get an EPIPE error. On Windows we get an + * ECONNABORTED. + * + * What we are going to do here is to treat all EPIPE messages that aren't + * of type 1 as ECONNRESET errors. This will allow users who have the + * show_econnreset socket option enabled to receive {error, econnreset} on + * both send and recv operations to indicate that an RST has been received. + */ +#ifdef __WIN_32__ + if (err == ECONNABORTED) + err = ECONNRESET; +#endif + if (err == EPIPE && !(desc->tcp_add_flags & TCP_ADDF_SHUTDOWN_WR_DONE)) + err = ECONNRESET; + return tcp_send_or_shutdown_error(desc, err); +} + +static int tcp_shutdown_error(tcp_descriptor* desc, int err) +{ + return tcp_send_or_shutdown_error(desc, err); +} + /* ** Send non-blocking vector data */ @@ -10685,49 +10809,21 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len) return 0; } -#ifdef __OSE__ - -static void tcp_inet_drv_output_ose(ErlDrvData data, ErlDrvEvent event) +/* shutdown on the socket: +** Assume caller has confirmed TCP_ADDF_PENDING_SHUTDOWN is set. +*/ +static void tcp_shutdown_async(tcp_descriptor* desc) { - union SIGNAL *event_sig = erl_drv_ose_get_signal(event); + int how; - while (event_sig) { - int prev_select_state = INETP((tcp_descriptor*)data)->select_state; - int res = tcp_inet_output((tcp_descriptor*)data, (HANDLE)event_sig); - if (res != -1) { - tcp_inet_ose_dispatch_signals((tcp_descriptor*)data, - prev_select_state,event_sig); - free_buf(&event_sig); - event_sig = erl_drv_ose_get_signal(event); - } else { - /* NOTE: here the event object could have been deallocated!!!! - inet_stop_select is called when doing driver_select(ERL_DRV_USE,0) - */ - free_buf(&event_sig); - return; - } - } + how = (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUT_WR) ? + TCP_SHUT_WR : TCP_SHUT_RDWR; + if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) + tcp_shutdown_error(desc, sock_errno()); + else + desc->tcp_add_flags |= TCP_ADDF_SHUTDOWN_WR_DONE; } -static void tcp_inet_drv_input_ose(ErlDrvData data, ErlDrvEvent event) -{ - union SIGNAL *event_sig = erl_drv_ose_get_signal(event); - - while (event_sig) { - int prev_select_state = INETP((tcp_descriptor*)data)->select_state; - int res = tcp_inet_input((tcp_descriptor*)data, (HANDLE)event); - if (res != -1) { - tcp_inet_ose_dispatch_signals((tcp_descriptor*)data, prev_select_state, - event_sig); - free_buf(&event_sig); - event_sig = erl_drv_ose_get_signal(event); - } else { - free_buf(&event_sig); - return; - } - } -} -#else static void tcp_inet_drv_output(ErlDrvData data, ErlDrvEvent event) { (void)tcp_inet_output((tcp_descriptor*)data, (HANDLE)event); @@ -10737,7 +10833,6 @@ static void tcp_inet_drv_input(ErlDrvData data, ErlDrvEvent event) { (void)tcp_inet_input((tcp_descriptor*)data, (HANDLE)event); } -#endif /* socket ready for ouput: ** 1. INET_STATE_CONNECTING => non block connect ? @@ -10766,12 +10861,13 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) #ifndef SO_ERROR { - int sz = sizeof(desc->inet.remote); - int code = sock_peer(desc->inet.s, - (struct sockaddr*) &desc->inet.remote, &sz); - + int sz, code; + sz = sizeof(desc->inet.remote); + sys_memzero((char *) &desc->inet.remote, sz); + code = sock_peer(desc->inet.s, + (struct sockaddr*) &desc->inet.remote, &sz); if (IS_SOCKET_ERROR(code)) { - desc->inet.state = INET_STATE_BOUND; /* restore state */ + desc->inet.state = INET_STATE_OPEN; /* restore state */ ret = async_error(INETP(desc), sock_errno()); goto done; } @@ -10784,7 +10880,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) (void *)&error, &sz); if ((code < 0) || error) { - desc->inet.state = INET_STATE_BOUND; /* restore state */ + desc->inet.state = INET_STATE_OPEN; /* restore state */ ret = async_error(INETP(desc), error); goto done; } @@ -10803,16 +10899,11 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) ssize_t n; SysIOVec* iov; -#ifdef __OSE__ - /* For large size buffers case the amount of data that the stack is - able to send out (received in the .extra field) should be passed - down to writev_fallback */ - n = event ? ((union SIGNAL*)event)->async.event.extra : 0; -#endif - if ((iov = driver_peekq(ix, &vsize)) == NULL) { sock_select(INETP(desc), FD_WRITE, 0); send_empty_out_q_msgs(INETP(desc)); + if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) + tcp_shutdown_async(desc); goto done; } vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize; @@ -10835,12 +10926,6 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) sizes > (max 32 bit signed int) */ size_t howmuch = 0x7FFFFFFF; /* max signed 32 bit */ int x; -#ifdef __OSE__ - /* For EWOULDBLOCK sock_sendv returns 0 so we have to be sure it - wasn't the case */ - if(sock_errno() == ERRNO_BLOCK) - goto done; -#endif for(x = 0; x < vsize && iov[x].iov_len == 0; ++x) ; if (x < vsize) { @@ -11077,13 +11162,12 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, case INET_AF_INET: af = AF_INET; break; #if defined(HAVE_IN6) && defined(AF_INET6) case INET_AF_INET6: af = AF_INET6; break; -#else - case INET_AF_INET6: - return ctl_xerror("eafnosupport", rbuf, rsize); - break; +#endif +#ifdef HAVE_SYS_UN_H + case INET_AF_LOCAL: af = AF_UNIX; break; #endif default: - return ctl_error(EINVAL, rbuf, rsize); + return ctl_xerror(str_eafnosupport, rbuf, rsize); } switch (buf[1]) { case INET_TYPE_STREAM: type = SOCK_STREAM; break; @@ -11116,23 +11200,23 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, return replen; - case INET_REQ_FDOPEN: { /* pass in an open (and bound) socket */ + case INET_REQ_FDOPEN: { /* pass in an open (and optionally bound) socket */ SOCKET s; + int bound; DEBUGF(("packet inet_ctl(%ld): FDOPEN\r\n", (long)desc->port)); - if (len != 6) { + if (len != 6 && len != 10) { return ctl_error(EINVAL, rbuf, rsize); } switch (buf[0]) { case INET_AF_INET: af = AF_INET; break; #if defined(HAVE_IN6) && defined(AF_INET6) case INET_AF_INET6: af = AF_INET6; break; -#else - case INET_AF_INET6: - return ctl_xerror("eafnosupport", rbuf, rsize); - break; +#endif +#ifdef HAVE_SYS_UN_H + case INET_AF_LOCAL: af = AF_UNIX; break; #endif default: - return ctl_error(EINVAL, rbuf, rsize); + return ctl_xerror(str_eafnosupport, rbuf, rsize); } switch (buf[1]) { case INET_TYPE_STREAM: type = SOCK_STREAM; break; @@ -11144,7 +11228,11 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, return ctl_error(EINVAL, rbuf, rsize); } s = (SOCKET)get_int32(buf+2); - replen = inet_ctl_fdopen(desc, af, type, s, rbuf, rsize); + + if (len == 6) bound = 1; + else bound = get_int32(buf+2+4); + + replen = inet_ctl_fdopen(desc, af, type, s, bound, rbuf, rsize); if ((*rbuf)[0] != INET_REP_ERROR) { if (desc->active) @@ -11193,11 +11281,10 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, if (!IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); - if (!IS_BOUND(desc)) - return ctl_xerror(EXBADSEQ, rbuf, rsize); #ifdef HAVE_SCTP if (IS_SCTP(desc)) { inet_address remote; + char *xerror; if (IS_CONNECTING(desc)) return ctl_error(EINVAL, rbuf, rsize); @@ -11209,8 +11296,9 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, /* For SCTP, we do not set the peer's addr in desc->remote, as multiple peers are possible: */ - if (inet_set_address(desc->sfamily, &remote, buf, &len) == NULL) - return ctl_error(EINVAL, rbuf, rsize); + if ((xerror = inet_set_faddress + (desc->sfamily, &remote, &buf, &len)) != NULL) + return ctl_xerror(xerror, rbuf, rsize); sock_select(desc, FD_CONNECT, 1); code = sock_connect(desc->s, &remote.sa, len); @@ -11246,12 +11334,13 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, else if (len < 6) return ctl_error(EINVAL, rbuf, rsize); else { + char *xerror; /* Ignore timeout */ buf += 4; len -= 4; - if (inet_set_address(desc->sfamily, - &desc->remote, buf, &len) == NULL) - return ctl_error(EINVAL, rbuf, rsize); + if ((xerror = inet_set_faddress + (desc->sfamily, &desc->remote, &buf, &len)) != NULL) + return ctl_xerror(xerror, rbuf, rsize); code = sock_connect(desc->s, (struct sockaddr*) &desc->remote, len); @@ -11281,8 +11370,6 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, return ctl_xerror(EXBADPORT, rbuf, rsize); if (!IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); - if (!IS_BOUND(desc)) - return ctl_xerror(EXBADSEQ, rbuf, rsize); if (len != 2) return ctl_error(EINVAL, rbuf, rsize); @@ -11317,11 +11404,13 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, while (curr < buf+len) { + char *xerror; /* List item format: see "inet_set_faddress": */ ErlDrvSizeT alen = buf + len - curr; - curr = inet_set_faddress(desc->sfamily, &addr, curr, &alen); - if (curr == NULL) - return ctl_error(EINVAL, rbuf, rsize); + xerror = inet_set_faddress + (desc->sfamily, &addr, &curr, &alen); + if (xerror != NULL) + return ctl_xerror(xerror, rbuf, rsize); /* Invoke the call: */ if (p_sctp_bindx(desc->s, (struct sockaddr *)&addr, 1, @@ -11329,7 +11418,7 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, return ctl_error(sock_errno(), rbuf, rsize); } - desc->state = INET_STATE_BOUND; + desc->state = INET_STATE_OPEN; return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } @@ -11346,8 +11435,6 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, return ctl_xerror(EXBADPORT, rbuf, rsize); if (!IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); - if (!IS_BOUND(desc)) - return ctl_xerror(EXBADSEQ, rbuf, rsize); if (! p_sctp_peeloff) return ctl_error(ENOTSUP, rbuf, rsize); @@ -11388,8 +11475,6 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, /* INPUT: Timeout(4), Length(4) */ if (!IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); - if (!IS_BOUND(desc)) - return ctl_error(EINVAL, rbuf, rsize); if (desc->active || (len != 8)) return ctl_error(EINVAL, rbuf, rsize); timeout = get_int32(buf); @@ -11430,12 +11515,12 @@ static void packet_inet_timeout(ErlDrvData e) /* THIS IS A "send*" REQUEST; on the Erlang side: "port_command". -** input should be: P1 P0 Address buffer . +** input should be: Family Address buffer . ** For UDP, buffer (after Address) is just data to be sent. ** For SCTP, buffer contains a list representing 2 items: ** (1) 6 parms for sctp_sndrcvinfo, as in sctp_get_sendparams(); ** (2) 0+ real data bytes. -** There is no destination address -- SCTYP send is performed over +** There is no destination address -- SCTP send is performed over ** an existing association, using "sctp_sndrcvinfo" specified. */ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len) @@ -11444,6 +11529,7 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len) inet_descriptor* desc = INETP(udesc); char* ptr = buf; char* qtr; + char* xerror; ErlDrvSizeT sz; int code; inet_address other; @@ -11454,10 +11540,6 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len) inet_reply_error(desc, EINVAL); return; } - if (!IS_BOUND(desc)) { - inet_reply_error(desc, EINVAL); - return; - } #ifdef HAVE_SCTP if (IS_SCTP(desc)) @@ -11501,6 +11583,7 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len) VALGRIND_MAKE_MEM_DEFINED(mhdr.msg_control, mhdr.msg_controllen); /*suppress "uninitialised bytes"*/ mhdr.msg_flags = 0; /* Not used with "sendmsg" */ + inet_output_count(desc, data_len); /* Now do the actual sending. NB: "flags" in "sendmsg" itself are NOT used: */ code = sock_sendmsg(desc->s, &mhdr, 0); @@ -11510,9 +11593,10 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len) /* UDP socket. Even if it is connected, there is an address prefix here -- ignored for connected sockets: */ sz = len; - qtr = inet_set_address(desc->sfamily, &other, ptr, &sz); - if (qtr == NULL) { - inet_reply_error(desc, EINVAL); + qtr = ptr; + xerror = inet_set_faddress(desc->sfamily, &other, &qtr, &sz); + if (xerror != NULL) { + inet_reply_error_am(desc, driver_mk_atom(xerror)); return; } len -= (qtr - ptr); @@ -11588,6 +11672,8 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) while(packet_count--) { unsigned int len = sizeof(other); + sys_memzero((char *) &other, sizeof(other)); + /* udesc->i_buf is only kept between SCTP fragments */ if (udesc->i_buf == NULL) { udesc->i_bufsz = desc->bufsz + len; @@ -11783,7 +11869,7 @@ static int packet_inet_output(udp_descriptor* udesc, HANDLE event) (struct sockaddr*) &desc->remote, &sz); if (IS_SOCKET_ERROR(code)) { - desc->state = INET_STATE_BOUND; /* restore state */ + desc->state = INET_STATE_OPEN; /* restore state */ ret = async_error(desc, sock_errno()); goto done; } @@ -11796,7 +11882,7 @@ static int packet_inet_output(udp_descriptor* udesc, HANDLE event) (void *)&error, &sz); if ((code < 0) || error) { - desc->state = INET_STATE_BOUND; /* restore state */ + desc->state = INET_STATE_OPEN; /* restore state */ ret = async_error(desc, error); goto done; } @@ -11877,115 +11963,18 @@ make_noninheritable_handle(SOCKET s) * Multi-timers */ -static void absolute_timeout(unsigned millis, ErlDrvNowData *out) -{ - unsigned rest; - unsigned long millipart; - unsigned long secpart; - unsigned long megasecpart; - unsigned tmo_secs = (millis / 1000U); - unsigned tmo_millis = (millis % 1000); - driver_get_now(out); - rest = (out->microsecs) % 1000; - millipart = ((out->microsecs) / 1000UL); - if (rest >= 500) { - ++millipart; - } - secpart = out->secs; - megasecpart = out->megasecs; - millipart += tmo_millis; - secpart += (millipart / 1000000UL); - millipart %= 1000000UL; - secpart += tmo_secs; - megasecpart += (secpart / 1000000UL); - secpart %= 1000000UL; - out->megasecs = megasecpart; - out->secs = secpart; - out->microsecs = (millipart * 1000UL); -} - -static unsigned relative_timeout(ErlDrvNowData *in) -{ - ErlDrvNowData now; - unsigned rest; - unsigned long millipart, in_millis, in_secs, in_megasecs; - - driver_get_now(&now); - - in_secs = in->secs; - in_megasecs = in->megasecs; - - rest = (now.microsecs) % 1000; - millipart = ((now.microsecs) / 1000UL); - if (rest >= 500) { - ++millipart; - } - in_millis = ((in->microsecs) / 1000UL); - if ( in_millis < millipart ) { - if (in_secs > 0) { - --in_secs; - } else { - in_secs = (1000000UL - 1UL); - if (in_megasecs <= now.megasecs) { - return 0; - } else { - --in_megasecs; - } - } - in_millis += 1000UL; - } - in_millis -= millipart; - - if (in_secs < now.secs) { - if (in_megasecs <= now.megasecs) { - return 0; - } else { - --in_megasecs; - } - in_secs += 1000000; - } - in_secs -= now.secs; - if (in_megasecs < now.megasecs) { - return 0; - } else { - in_megasecs -= now.megasecs; - } - return (unsigned) ((in_megasecs * 1000000000UL) + - (in_secs * 1000UL) + - in_millis); -} - -#ifdef DEBUG -static int nowcmp(ErlDrvNowData *d1, ErlDrvNowData *d2) -{ - /* Assume it's not safe to do signed conversion on megasecs... */ - if (d1->megasecs < d2->megasecs) { - return -1; - } else if (d1->megasecs > d2->megasecs) { - return 1; - } else if (d1->secs != d2->secs) { - return ((int) d1->secs) - ((int) d2->secs); - } - return ((int) d1->microsecs) - ((int) d2->microsecs); -} -#endif - static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, ErlDrvData data) { - unsigned next_timeout; + ErlDrvTime next_timeout; if (!*first) { ASSERT(0); return; } #ifdef DEBUG { - ErlDrvNowData chk; - driver_get_now(&chk); - chk.microsecs /= 10000UL; - chk.microsecs *= 10000UL; - chk.microsecs += 10000; - ASSERT(nowcmp(&chk,&((*first)->when)) >= 0); + ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC); + ASSERT(chk >= (*first)->when); } #endif do { @@ -11997,9 +11986,9 @@ static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, return; } (*first)->prev = NULL; - next_timeout = relative_timeout(&((*first)->when)); - } while (next_timeout == 0); - driver_set_timer(port,next_timeout); + next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + } while (next_timeout <= 0); + driver_set_timer(port, (unsigned long) next_timeout); } static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port) @@ -12022,8 +12011,10 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim driver_cancel_timer(port); *first = p->next; if (*first) { - unsigned ntmo = relative_timeout(&((*first)->when)); - driver_set_timer(port,ntmo); + ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + if (ntmo < 0) + ntmo = 0; + driver_set_timer(port, (unsigned long) ntmo); } } if (p->next != NULL) { @@ -12039,32 +12030,16 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, { MultiTimerData *mtd, *p, *s; mtd = ALLOC(sizeof(MultiTimerData)); - absolute_timeout(timeout, &(mtd->when)); + mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1; mtd->timeout_function = timeout_fun; mtd->caller = caller; mtd->next = mtd->prev = NULL; for(p = *first,s = NULL; p != NULL; s = p, p = p->next) { - if (p->when.megasecs >= mtd->when.megasecs) { - break; - } - } - if (!p || p->when.megasecs > mtd->when.megasecs) { - goto found; - } - for (; p!= NULL; s = p, p = p->next) { - if (p->when.secs >= mtd->when.secs) { - break; - } - } - if (!p || p->when.secs > mtd->when.secs) { - goto found; - } - for (; p!= NULL; s = p, p = p->next) { - if (p->when.microsecs >= mtd->when.microsecs) { + if (p->when >= mtd->when) { break; } } - found: + if (!p) { if (!s) { *first = mtd; @@ -12090,10 +12065,6 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, } return mtd; } - - - - /*----------------------------------------------------------------------------- @@ -12211,7 +12182,7 @@ void erts_sock_close(erts_sock_t socket) int erts_sock_connect(erts_sock_t socket, byte *ip_addr, int len, Uint16 port) { SOCKET s = (SOCKET) socket; - char buf[2 + 4]; + char buf[2 + 4], *p; ErlDrvSizeT blen = 6; inet_address addr; @@ -12221,12 +12192,12 @@ int erts_sock_connect(erts_sock_t socket, byte *ip_addr, int len, Uint16 port) put_int16(port, buf); memcpy((void *) (buf + 2), (void *) ip_addr, 4); - if (!inet_set_address(AF_INET, &addr, buf, &blen)) + p = buf; + if (inet_set_address(AF_INET, &addr, &p, &blen) != NULL) return 0; - if (IS_SOCKET_ERROR(sock_connect(s, - (struct sockaddr *) &addr, - sizeof(struct sockaddr_in)))) + if (IS_SOCKET_ERROR + (sock_connect(s, (struct sockaddr *) &addr, blen))) return 0; return 1; } |