diff options
Diffstat (limited to 'erts/emulator/drivers/common/inet_drv.c')
-rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 1754 |
1 files changed, 1489 insertions, 265 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 301ce2d0e2..2d2bd80783 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -1,18 +1,19 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1997-2013. All Rights Reserved. + * Copyright Ericsson AB 1997-2015. All Rights Reserved. * - * The contents of this file are subject to the Erlang Public License, - * Version 1.1, (the "License"); you may not use this file except in - * compliance with the License. You should have received a copy of the - * Erlang Public License along with this software. If not, it can be - * retrieved online at http://www.erlang.org/. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Software distributed under the License is distributed on an "AS IS" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See - * the License for the specific language governing rights and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. * * %CopyrightEnd% */ @@ -86,6 +87,17 @@ #endif typedef unsigned long long llu_t; +#ifndef INT16_MIN +#define INT16_MIN (-32768) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif + +#ifdef __OSE__ +#include "inet.h" +#endif + #ifdef __WIN32__ #define STRNCASECMP strncasecmp @@ -189,6 +201,7 @@ typedef unsigned long long llu_t; #define HAVE_MULTICAST_SUPPORT +#define HAVE_UDP #define ERRNO_BLOCK WSAEWOULDBLOCK @@ -256,14 +269,13 @@ static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD); #define sock_htonl(x) htonl((x)) #define sock_send(s,buf,len,flag) send((s),(buf),(len),(flag)) #define sock_sendv(s, vec, size, np, flag) \ - WSASend((s),(WSABUF*)(vec),\ - (size),(np),(flag),NULL,NULL) + WSASend((s),(WSABUF*)(vec),(size),(np),(flag),NULL,NULL) #define sock_recv(s,buf,len,flag) recv((s),(buf),(len),(flag)) #define sock_recvfrom(s,buf,blen,flag,addr,alen) \ - recvfrom((s),(buf),(blen),(flag),(addr),(alen)) + recvfrom((s),(buf),(blen),(flag),(addr),(alen)) #define sock_sendto(s,buf,blen,flag,addr,alen) \ - sendto((s),(buf),(blen),(flag),(addr),(alen)) + sendto((s),(buf),(blen),(flag),(addr),(alen)) #define sock_hostname(buf, len) gethostname((buf), (len)) #define sock_getservbyname(name,proto) getservbyname((name),(proto)) @@ -282,7 +294,143 @@ static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD); static unsigned long zero_value = 0; static unsigned long one_value = 1; -#else +#define TCP_SHUT_WR SD_SEND +#define TCP_SHUT_RD SD_RECEIVE +#define TCP_SHUT_RDWR SD_BOTH + +#elif defined (__OSE__) + +/* + * Some notes about how inet (currently only tcp) works on OSE. + * The driver uses OSE signals to communicate with the one_inet + * process. Because of the difference in how signals and file descriptors + * work the whole select/deselect mechanic is very different. + * In ose when a sock_select is done a function is called. That function + * notes the changes that the driver want to do, but does not act on it. + * later when the function returns the new desired state is compared + * to the previous state and the apprioriate actions are taken. The action + * is usually to either request more data from the stack or stop requesting + * data. + * + * One thing to note is that the driver never does select/deselect. It always + * listens for the signals. Flow of data is regulated by sending or not sending + * signals to the ose inet process. + * + * The interesting functions to look at are: + * * inet_driver_select : called when sock_select is called + * * tcp_inet_ose_dispatch_signal : checks state changes and sends new signals + * * tcp_inet_drv_output_ose : ready output callback, reads signals and calls + * dispatch_signal + * * tcp_inet_drv_input_ose : ready input callback. + */ + +#include "efs.h" +#include "sys/socket.h" +#include "sys/uio.h" +#include "sfk/sys/sfk_uio.h" +#include "netinet/in.h" +#include "netinet/tcp.h" +#include "netdb.h" +#include "ose_spi/socket.sig" + + +static ssize_t writev_fallback(int fd, const struct iovec *iov, int iovcnt, int max_sz); + +#define INVALID_SOCKET -1 +#define INVALID_EVENT -1 +#define SOCKET_ERROR -1 + +#define SOCKET int +#define HANDLE int +#define FD_READ ERL_DRV_READ +#define FD_WRITE ERL_DRV_WRITE +#define FD_CLOSE 0 +#define FD_CONNECT (1<<4) +#define FD_ACCEPT (1<<5) +#define SOCK_FD_ERROR (1<<6) + +#define sock_connect(s, addr, len) connect((s), (addr), (len)) +#define sock_listen(s, b) listen((s), (b)) +#define sock_bind(s, addr, len) bind((s), (addr), (len)) +#define sock_getopt(s,t,n,v,l) getsockopt((s),(t),(n),(v),(l)) +#define sock_setopt(s,t,n,v,l) setsockopt((s),(t),(n),(v),(l)) +#define sock_name(s, addr, len) getsockname((s), (addr), (len)) +#define sock_peer(s, addr, len) getpeername((s), (addr), (len)) +#define sock_ntohs(x) ntohs((x)) +#define sock_ntohl(x) ntohl((x)) +#define sock_htons(x) htons((x)) +#define sock_htonl(x) htonl((x)) + +#define sock_accept(s, addr, len) accept((s), (addr), (len)) +#define sock_send(s,buf,len,flag) inet_send((s),(buf),(len),(flag)) +#define sock_sendto(s,buf,blen,flag,addr,alen) \ + sendto((s),(buf),(blen),(flag),(addr),(alen)) +#define sock_sendv(s, vec, size, np, flag) \ + (*(np) = writev_fallback((s), (struct iovec*)(vec), (size), (*(np)))) +#define sock_sendmsg(s,msghdr,flag) sendmsg((s),(msghdr),(flag)) + +#define sock_open(af, type, proto) socket((af), (type), (proto)) +#define sock_close(s) close((s)) +#define sock_dup(s) dup((s)) +#define sock_shutdown(s, how) shutdown((s), (how)) + +#define sock_hostname(buf, len) gethostname((buf), (len)) +#define sock_getservbyname(name,proto) getservbyname((name), (proto)) +#define sock_getservbyport(port,proto) getservbyport((port), (proto)) + +#define sock_recv(s,buf,len,flag) recv((s),(buf),(len),(flag)) +#define sock_recvfrom(s,buf,blen,flag,addr,alen) \ + recvfrom((s),(buf),(blen),(flag),(addr),(alen)) +#define sock_recvmsg(s,msghdr,flag) recvmsg((s),(msghdr),(flag)) + +#define sock_errno() errno +#define sock_create_event(d) ((d)->s) /* return file descriptor */ +#define sock_close_event(e) /* do nothing */ + +#ifndef WANT_NONBLOCKING +#define WANT_NONBLOCKING +#endif +#include "sys.h" + +typedef unsigned long u_long; +#define IN_CLASSA(a) ((((in_addr_t)(a)) & 0x80000000) == 0) +#define IN_CLASSA_NET 0xff000000 +#define IN_CLASSA_NSHIFT 24 +#define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET) +#define IN_CLASSA_MAX 128 + +#define IN_CLASSB(a) ((((in_addr_t)(a)) & 0xc0000000) == 0x80000000) +#define IN_CLASSB_NET 0xffff0000 +#define IN_CLASSB_NSHIFT 16 +#define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET) +#define IN_CLASSB_MAX 65536 + +#define IN_CLASSC(a) ((((in_addr_t)(a)) & 0xe0000000) == 0xc0000000) +#define IN_CLASSC_NET 0xffffff00 +#define IN_CLASSC_NSHIFT 8 +#define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET) + +#define IN_CLASSD(a) ((((in_addr_t)(a)) & 0xf0000000) == 0xe0000000) +#define IN_MULTICAST(a) IN_CLASSD(a) + +#define IN_EXPERIMENTAL(a) ((((in_addr_t)(a)) & 0xe0000000) == 0xe0000000) +#define IN_BADCLASS(a) ((((in_addr_t)(a)) & 0xf0000000) == 0xf0000000) + +#define sock_select(d, flags, onoff) do { \ + ASSERT(!(d)->is_ignored); \ + (d)->event_mask = (onoff) ? \ + ((d)->event_mask | (flags)) : \ + ((d)->event_mask & ~(flags)); \ + DEBUGF(("(%s / %d) sock_select(%ld): flags=%02X, onoff=%d, event_mask=%02lX, s=%d\r\n", \ + __FILE__, __LINE__, (long) (d)->port, (flags), (onoff), (unsigned long) (d)->event_mask, (d)->s)); \ + inet_driver_select((d), (flags), (onoff)); \ + } while(0) + +#define TCP_SHUT_WR SHUT_WR +#define TCP_SHUT_RD SHUT_RD +#define TCP_SHUT_RDWR SHUT_RDWR + +#else /* !__OSE__ && !__WIN32__ */ #include <sys/time.h> #ifdef NETDB_H_NEEDS_IN_H @@ -315,9 +463,19 @@ static unsigned long one_value = 1; #include <net/if.h> +#ifdef HAVE_SCHED_H +#include <sched.h> +#endif + +#ifdef HAVE_SETNS_H +#include <setns.h> +#endif + +#define HAVE_UDP + /* SCTP support -- currently for UNIX platforms only: */ #undef HAVE_SCTP -#if (!defined(__WIN32__) && defined(HAVE_SCTP_H)) +#if defined(HAVE_SCTP_H) #include <netinet/sctp.h> @@ -409,16 +567,47 @@ static unsigned long one_value = 1; # define sctp_adaptation_layer_event sctp_adaption_layer_event #endif -#ifdef __GNUC__ +#if defined(__GNUC__) && defined(HAVE_SCTP_BINDX) static typeof(sctp_bindx) *p_sctp_bindx = NULL; +#else +static int (*p_sctp_bindx) + (int sd, struct sockaddr *addrs, int addrcnt, int flags) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_PEELOFF) static typeof(sctp_peeloff) *p_sctp_peeloff = NULL; #else -static int (*p_sctp_bindx)(int sd, struct sockaddr *addrs, - int addrcnt, int flags) = NULL; -static int (*p_sctp_peeloff)(int sd, sctp_assoc_t assoc_id) = NULL; +static int (*p_sctp_peeloff) + (int sd, sctp_assoc_t assoc_id) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_GETLADDRS) +static typeof(sctp_getladdrs) *p_sctp_getladdrs = NULL; +#else +static int (*p_sctp_getladdrs) + (int sd, sctp_assoc_t assoc_id, struct sockaddr **ss) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_FREELADDRS) +static typeof(sctp_freeladdrs) *p_sctp_freeladdrs = NULL; +#else +static void (*p_sctp_freeladdrs)(struct sockaddr *addrs) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_GETPADDRS) +static typeof(sctp_getpaddrs) *p_sctp_getpaddrs = NULL; +#else +static int (*p_sctp_getpaddrs) + (int sd, sctp_assoc_t assoc_id, struct sockaddr **ss) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_FREEPADDRS) +static typeof(sctp_freepaddrs) *p_sctp_freepaddrs = NULL; +#else +static void (*p_sctp_freepaddrs)(struct sockaddr *addrs) = NULL; #endif -#endif /* SCTP supported */ +#endif /* #if defined(HAVE_SCTP_H) */ #ifndef WANT_NONBLOCKING #define WANT_NONBLOCKING @@ -506,13 +695,16 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) (d)->event_mask = (onoff) ? \ ((d)->event_mask | (flags)) : \ ((d)->event_mask & ~(flags)); \ - DEBUGF(("sock_select(%ld): flags=%02X, onoff=%d, event_mask=%02lX\r\n", \ - (long) (d)->port, (flags), (onoff), (unsigned long) (d)->event_mask)); \ + DEBUGF(("(%s / %d) sock_select(%ld): flags=%02X, onoff=%d, event_mask=%02lX\r\n", \ + __FILE__, __LINE__, (long) (d)->port, (flags), (onoff), (unsigned long) (d)->event_mask)); \ inet_driver_select((d)->port, (ErlDrvEvent)(long)(d)->event, (flags), (onoff)); \ } while(0) +#define TCP_SHUT_WR SHUT_WR +#define TCP_SHUT_RD SHUT_RD +#define TCP_SHUT_RDWR SHUT_RDWR -#endif /* __WIN32__ */ +#endif /* !__WIN32__ && !__OSE__ */ #ifdef HAVE_SOCKLEN_T # define SOCKLEN_T socklen_t @@ -573,6 +765,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_PASSIVE 0 /* false */ #define INET_ACTIVE 1 /* true */ #define INET_ONCE 2 /* true; active once then passive */ +#define INET_MULTI 3 /* true; active N then passive */ /* INET_REQ_GETSTATUS enumeration */ #define INET_F_OPEN 0x0001 @@ -585,7 +778,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_F_BUSY 0x0080 #define INET_F_MULTI_CLIENT 0x0100 /* Multiple clients for one descriptor, i.e. multi-accept */ -/* One numberspace for *_REC_* so if an e.g UDP request is issued +/* One numberspace for *_REQ_* so if an e.g UDP request is issued ** for a TCP socket, the driver can protest. */ #define INET_REQ_OPEN 1 @@ -616,6 +809,8 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_REQ_ACCEPT 26 #define INET_REQ_LISTEN 27 #define INET_REQ_IGNOREFD 28 +#define INET_REQ_GETLADDRS 29 +#define INET_REQ_GETPADDRS 30 /* TCP requests */ /* #define TCP_REQ_ACCEPT 40 MOVED */ @@ -637,6 +832,14 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define TCP_ADDF_CLOSE_SENT 2 /* Close sent (active mode only) */ #define TCP_ADDF_DELAYED_CLOSE_RECV 4 /* If receive fails, report {error,closed} (passive mode) */ #define TCP_ADDF_DELAYED_CLOSE_SEND 8 /* If send fails, report {error,closed} (passive mode) */ +#define TCP_ADDF_PENDING_SHUT_WR 16 /* Call shutdown(sock, SHUT_WR) when queue empties */ +#define TCP_ADDF_PENDING_SHUT_RDWR 32 /* Call shutdown(sock, SHUT_RDWR) when queue empties */ +#define TCP_ADDF_PENDING_SHUTDOWN \ + (TCP_ADDF_PENDING_SHUT_WR | TCP_ADDF_PENDING_SHUT_RDWR) +#define TCP_ADDF_SHOW_ECONNRESET 64 /* Tell user about incoming RST */ +#define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occured on send or shutdown */ +#define TCP_ADDF_SHUTDOWN_WR_DONE 256 /* A shutdown(sock, SHUT_WR) or SHUT_RDWR was made */ +#define TCP_ADDF_LINGER_ZERO 512 /* Discard driver queue on port close */ /* *_REQ_* replies */ #define INET_REP_ERROR 0 @@ -680,6 +883,9 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_LOPT_TCP_SEND_TIMEOUT_CLOSE 35 /* auto-close on send timeout or not */ #define INET_LOPT_MSGQ_HIWTRMRK 36 /* set local msgq high watermark */ #define INET_LOPT_MSGQ_LOWTRMRK 37 /* set local msgq low watermark */ +#define INET_LOPT_NETNS 38 /* Network namespace pathname */ +#define INET_LOPT_TCP_SHOW_ECONNRESET 39 /* tell user about incoming RST */ +#define INET_LOPT_LINE_DELIM 40 /* Line delimiting char */ /* SCTP options: a separate range, from 100: */ #define SCTP_OPT_RTOINFO 100 #define SCTP_OPT_ASSOCINFO 101 @@ -804,9 +1010,10 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_IFNAMSIZ 16 /* INET Ignore states */ -#define INET_IGNORE_NONE 0 -#define INET_IGNORE_READ 1 -#define INET_IGNORE_WRITE 1 << 1 +#define INET_IGNORE_NONE 0 +#define INET_IGNORE_READ (1 << 0) +#define INET_IGNORE_WRITE (1 << 1) +#define INET_IGNORE_PASSIVE (1 << 2) /* Max length of Erlang Term Buffer (for outputting structured terms): */ #ifdef HAVE_SCTP @@ -841,7 +1048,7 @@ typedef union { #endif typedef struct _multi_timer_data { - ErlDrvNowData when; + ErlDrvTime when; ErlDrvTermData caller; void (*timeout_function)(ErlDrvData drv_data, ErlDrvTermData caller); struct _multi_timer_data *next; @@ -916,6 +1123,7 @@ typedef struct { inet_async_op op_queue[INET_MAX_ASYNC]; /* call queue */ int active; /* 0 = passive, 1 = active, 2 = active once */ + Sint16 active_count; /* counter for {active,N} */ int stype; /* socket type: SOCK_STREAM/SOCK_DGRAM/SOCK_SEQPACKET */ int sprotocol; /* socket protocol: @@ -947,6 +1155,7 @@ typedef struct { #else Uint32 send_oct[2]; /* number of octets sent, 64 bits */ #endif + char delimiter; /* Line delimiting character (def: '\n') */ unsigned long send_cnt; /* number of packets sent */ unsigned long send_max; /* maximum packet send */ double send_avg; /* average packet size sent */ @@ -955,6 +1164,17 @@ typedef struct { int is_ignored; /* if a fd is ignored by the inet_drv. This flag should be set to true when the fd is used outside of inet_drv. */ +#ifdef HAVE_SETNS + char *netns; /* Socket network namespace name + as full file path */ +#endif +#ifdef __OSE__ + int select_state; /* state to keep track of whether we + should trigger another read/write + request at end of ready_input/output */ + ErlDrvEvent events[6]; +#endif + } inet_descriptor; @@ -970,20 +1190,88 @@ static void tcp_inet_stop(ErlDrvData); static void tcp_inet_command(ErlDrvData, char*, ErlDrvSizeT); static void tcp_inet_commandv(ErlDrvData, ErlIOVec*); static void tcp_inet_flush(ErlDrvData drv_data); +#ifndef __OSE__ static void tcp_inet_drv_input(ErlDrvData, ErlDrvEvent); static void tcp_inet_drv_output(ErlDrvData data, ErlDrvEvent event); +#endif static ErlDrvData tcp_inet_start(ErlDrvPort, char* command); static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData, unsigned int, char*, ErlDrvSizeT, char**, ErlDrvSizeT); static void tcp_inet_timeout(ErlDrvData); static void tcp_inet_process_exit(ErlDrvData, ErlDrvMonitor *); static void inet_stop_select(ErlDrvEvent, void*); +static void inet_emergency_close(ErlDrvData); #ifdef __WIN32__ static void tcp_inet_event(ErlDrvData, ErlDrvEvent); static void find_dynamic_functions(void); #endif -static struct erl_drv_entry tcp_inet_driver_entry = +#ifdef __OSE__ +/* The structure of the signal used for requesting asynchronous + * notification from the stack. Under normal circumstances the network stack + * shouldn't overwrite the value set in the fd field by the sender + * of the request */ +struct OseAsyncSig { + struct FmEvent event; + int fd; +}; + +union SIGNAL { + SIGSELECT signo; + struct OseAsyncSig async; +}; + +static ErlDrvSSizeT tcp_inet_ctl_ose(ErlDrvData e, unsigned int cmd, + char* buf, ErlDrvSizeT len, + char** rbuf, ErlDrvSizeT rsize); +static void tcp_inet_commandv_ose(ErlDrvData e, ErlIOVec* ev); +static void tcp_inet_drv_output_ose(ErlDrvData data, ErlDrvEvent event); +static void tcp_inet_drv_input_ose(ErlDrvData data, ErlDrvEvent event); +static ErlDrvOseEventId inet_resolve_signal(union SIGNAL *sig); + +#ifdef INET_DRV_DEBUG + +static char *read_req = "SO_EVENT_READ_REQUEST"; +static char *read_rep = "SO_EVENT_READ_REPLY"; +static char *write_req = "SO_EVENT_WRITE_REQUEST"; +static char *write_rep = "SO_EVENT_WRITE_REPLY"; +static char *eof_req = "SO_EVENT_EOF_REQUEST"; +static char *eof_rep = "SO_EVENT_EOF_REPLY"; +static char *accept_req = "SO_EVENT_ACCEPT_REQUEST"; +static char *accept_rep = "SO_EVENT_ACCEPT_REPLY"; +static char *connect_req = "SO_EVENT_CONNECT_REQUEST"; +static char *connect_rep = "SO_EVENT_CONNECT_REPLY"; +static char *error_req = "SO_EVENT_ERROR_REQUEST"; +static char *error_rep = "SO_EVENT_ERROR_REPLY"; +static char signo_tmp[32]; + +static char *signo_to_string(SIGSELECT signo) { + switch (signo) { + case SO_EVENT_READ_REQUEST: { return read_req; } + case SO_EVENT_READ_REPLY: { return read_rep; } + case SO_EVENT_WRITE_REQUEST: { return write_req; } + case SO_EVENT_WRITE_REPLY: { return write_rep; } + case SO_EVENT_EOF_REQUEST: { return eof_req; } + case SO_EVENT_EOF_REPLY: { return eof_rep; } + case SO_EVENT_ACCEPT_REQUEST: { return accept_req; } + case SO_EVENT_ACCEPT_REPLY: { return accept_rep; } + case SO_EVENT_CONNECT_REQUEST: { return connect_req; } + case SO_EVENT_CONNECT_REPLY: { return connect_rep; } + case SO_EVENT_ERROR_REQUEST: { return error_req; } + case SO_EVENT_ERROR_REPLY: { return error_rep; } + } + + snprintf(signo_tmp,32,"0x%x",signo); + + return signo_tmp; +} + +#endif + +#endif /* __OSE__ */ + + +static struct erl_drv_entry tcp_inet_driver_entry = { tcp_inet_init, /* inet_init will add this driver !! */ tcp_inet_start, @@ -992,6 +1280,9 @@ static struct erl_drv_entry tcp_inet_driver_entry = #ifdef __WIN32__ tcp_inet_event, NULL, +#elif defined(__OSE__) + tcp_inet_drv_input_ose, /*ready_input*/ + tcp_inet_drv_output_ose, /*ready_output*/ #else tcp_inet_drv_input, tcp_inet_drv_output, @@ -999,9 +1290,17 @@ static struct erl_drv_entry tcp_inet_driver_entry = "tcp_inet", NULL, NULL, +#ifdef __OSE__ + tcp_inet_ctl_ose, +#else tcp_inet_ctl, +#endif tcp_inet_timeout, +#ifdef __OSE__ + tcp_inet_commandv_ose, +#else tcp_inet_commandv, +#endif NULL, tcp_inet_flush, NULL, @@ -1012,11 +1311,13 @@ static struct erl_drv_entry tcp_inet_driver_entry = ERL_DRV_FLAG_USE_PORT_LOCKING|ERL_DRV_FLAG_SOFT_BUSY, NULL, tcp_inet_process_exit, - inet_stop_select + inet_stop_select, + inet_emergency_close }; +#ifdef HAVE_UDP static int packet_inet_init(void); static void packet_inet_stop(ErlDrvData); static void packet_inet_command(ErlDrvData, char*, ErlDrvSizeT); @@ -1064,8 +1365,10 @@ static struct erl_drv_entry udp_inet_driver_entry = ERL_DRV_FLAG_USE_PORT_LOCKING, NULL, NULL, - inet_stop_select + inet_stop_select, + inet_emergency_close }; +#endif #ifdef HAVE_SCTP static struct erl_drv_entry sctp_inet_driver_entry = @@ -1097,7 +1400,8 @@ static struct erl_drv_entry sctp_inet_driver_entry = ERL_DRV_FLAG_USE_PORT_LOCKING, NULL, NULL, /* process_exit */ - inet_stop_select + inet_stop_select, + inet_emergency_close }; #endif @@ -1126,9 +1430,12 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev); static int tcp_recv(tcp_descriptor* desc, int request_len); static int tcp_deliver(tcp_descriptor* desc, int len); +static int tcp_shutdown_error(tcp_descriptor* desc, int err); + static int tcp_inet_output(tcp_descriptor* desc, HANDLE event); static int tcp_inet_input(tcp_descriptor* desc, HANDLE event); +#ifdef HAVE_UDP typedef struct { inet_descriptor inet; /* common data structure (DON'T MOVE) */ int read_packets; /* Number of packets to read per invocation */ @@ -1140,29 +1447,54 @@ typedef struct { static int packet_inet_input(udp_descriptor* udesc, HANDLE event); static int packet_inet_output(udp_descriptor* udesc, HANDLE event); +#endif -/* convert descriptor poiner to inet_descriptor pointer */ +/* convert descriptor pointer to inet_descriptor pointer */ #define INETP(d) (&(d)->inet) +#ifdef __OSE__ +static void inet_driver_select(inet_descriptor* desc, + int flags, int onoff); +static void tcp_inet_ose_dispatch_signals(tcp_descriptor *desc, + int prev_select_state, + union SIGNAL *sig); +#endif + static int async_ref = 0; /* async reference id generator */ #define NEW_ASYNC_ID() ((async_ref++) & 0xffff) +/* check for transition from active to passive */ +#define INET_CHECK_ACTIVE_TO_PASSIVE(inet) \ + do { \ + if ((inet)->active == INET_ONCE) \ + (inet)->active = INET_PASSIVE; \ + else if ((inet)->active == INET_MULTI && --((inet)->active_count) == 0) { \ + (inet)->active = INET_PASSIVE; \ + packet_passive_message(inet); \ + } \ + } while (0) static ErlDrvTermData am_ok; static ErlDrvTermData am_tcp; -static ErlDrvTermData am_udp; static ErlDrvTermData am_error; +static ErlDrvTermData am_einval; static ErlDrvTermData am_inet_async; static ErlDrvTermData am_inet_reply; static ErlDrvTermData am_timeout; static ErlDrvTermData am_closed; +static ErlDrvTermData am_tcp_passive; static ErlDrvTermData am_tcp_closed; static ErlDrvTermData am_tcp_error; -static ErlDrvTermData am_udp_error; static ErlDrvTermData am_empty_out_q; static ErlDrvTermData am_ssl_tls; +#ifdef HAVE_UDP +static ErlDrvTermData am_udp; +static ErlDrvTermData am_udp_passive; +static ErlDrvTermData am_udp_error; +#endif #ifdef HAVE_SCTP static ErlDrvTermData am_sctp; +static ErlDrvTermData am_sctp_passive; static ErlDrvTermData am_sctp_error; static ErlDrvTermData am_true; static ErlDrvTermData am_false; @@ -1172,6 +1504,7 @@ static ErlDrvTermData am_list; static ErlDrvTermData am_binary; static ErlDrvTermData am_active; static ErlDrvTermData am_once; +static ErlDrvTermData am_multi; static ErlDrvTermData am_buffer; static ErlDrvTermData am_linger; static ErlDrvTermData am_recbuf; @@ -1181,6 +1514,7 @@ static ErlDrvTermData am_dontroute; static ErlDrvTermData am_priority; static ErlDrvTermData am_tos; static ErlDrvTermData am_ipv6_v6only; +static ErlDrvTermData am_netns; #endif /* speical errors for bad ports and sequences */ @@ -1240,7 +1574,7 @@ static const struct in6_addr in6addr_loopback = #endif /* HAVE_IN6 */ /* XXX: is this a driver interface function ??? */ -void erl_exit(int n, char*, ...); +void erts_exit(int n, char*, ...); /* * Malloc wrapper, @@ -1253,7 +1587,7 @@ void erl_exit(int n, char*, ...); static void *alloc_wrapper(ErlDrvSizeT size){ void *ret = driver_alloc(size); if(ret == NULL) - erl_exit(1,"Out of virtual memory in malloc (%s)", __FILE__); + erts_exit(ERTS_ERROR_EXIT,"Out of virtual memory in malloc (%s)", __FILE__); return ret; } #define ALLOC(X) alloc_wrapper(X) @@ -1261,7 +1595,7 @@ static void *alloc_wrapper(ErlDrvSizeT size){ static void *realloc_wrapper(void *current, ErlDrvSizeT size){ void *ret = driver_realloc(current,size); if(ret == NULL) - erl_exit(1,"Out of virtual memory in realloc (%s)", __FILE__); + erts_exit(ERTS_ERROR_EXIT,"Out of virtual memory in realloc (%s)", __FILE__); return ret; } #define REALLOC(X,Y) realloc_wrapper(X,Y) @@ -1365,6 +1699,7 @@ static void *realloc_wrapper(void *current, ErlDrvSizeT size){ # define SCTP_ANC_BUFF_SIZE INET_DEF_BUFFER/2 /* XXX: not very good... */ #endif +#ifdef HAVE_UDP static int load_ip_port(ErlDrvTermData* spec, int i, char* buf) { spec[i++] = ERL_DRV_INT; @@ -1399,12 +1734,12 @@ static int load_ip_address(ErlDrvTermData* spec, int i, int family, char* buf) } return i; } +#endif #ifdef HAVE_SCTP /* For SCTP, we often need to return {IP, Port} tuples: */ -static int inet_get_address - (int family, char* dst, inet_address* src, unsigned int* len); +static int inet_get_address(char* dst, inet_address* src, unsigned int* len); #define LOAD_IP_AND_PORT_CNT \ (8*LOAD_INT_CNT + LOAD_TUPLE_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT) @@ -1419,10 +1754,9 @@ static int load_ip_and_port unsigned int len = sizeof(struct sockaddr_storage); unsigned int alen = len; char abuf [len]; - int res = - inet_get_address(desc->sfamily, abuf, (inet_address*) addr, &alen); - ASSERT(res==0); - res = 0; + int res = inet_get_address(abuf, (inet_address*) addr, &alen); + ASSERT(res==0); (void)res; + /* Now "abuf" contains: Family(1b), Port(2b), IP(4|16b) */ /* NB: the following functions are safe to use, as they create tuples @@ -1522,7 +1856,7 @@ check_double_release(InetDrvBufStk *bs, ErlDrvBinary* buf) int i; for (i = 0; i < bs->buf.pos; ++i) { if (bs->buf.stk[i] == buf) { - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "Multiple buffer release in inet_drv, this " "is a bug, save the core and send it to " "[email protected]!"); @@ -1567,10 +1901,12 @@ static void release_buffer(ErlDrvBinary* buf) } } +#ifdef HAVE_UDP static ErlDrvBinary* realloc_buffer(ErlDrvBinary* buf, ErlDrvSizeT newsz) { return driver_realloc_binary(buf, newsz); } +#endif /* use a TRICK, access the refc field to see if any one else has * a ref to this buffer then call driver_free_binary else @@ -2415,7 +2751,7 @@ int ssl_tls_inetdrv(void* arg, unsigned type, unsigned major, unsigned minor, { tcp_descriptor* desc = (tcp_descriptor*) arg; int i = 0; - ErlDrvTermData spec[28]; + ErlDrvTermData spec[30]; ErlDrvTermData caller = ERL_DRV_NIL; ErlDrvBinary* bin; int ret; @@ -2456,11 +2792,11 @@ int ssl_tls_inetdrv(void* arg, unsigned type, unsigned major, unsigned minor, if (desc->inet.active == INET_PASSIVE) { i = LOAD_TUPLE(spec, i, 2); i = LOAD_TUPLE(spec, i, 4); - ASSERT(i <= 28); + ASSERT(i <= sizeof(spec)/sizeof(*spec)); ret = erl_drv_send_term(desc->inet.dport, caller, spec, i); } else { - ASSERT(i <= 28); + ASSERT(i <= sizeof(spec)/sizeof(*spec)); ret = erl_drv_output_term(desc->inet.dport, spec, i); } done: @@ -2582,6 +2918,9 @@ static ErlDrvTermData am_sctp_rtoinfo, /* Option names */ /* For #sctp_paddrinfo{}: */ am_active, am_inactive, +# if HAVE_DECL_SCTP_UNCONFIRMED + am_unconfirmed, +# endif /* For #sctp_status{}: */ # if HAVE_DECL_SCTP_EMPTY @@ -3231,6 +3570,7 @@ static int tcp_error_message(tcp_descriptor* desc, int err) return erl_drv_output_term(desc->inet.dport, spec, i); } +#ifdef HAVE_UDP /* ** active mode message: ** {udp, S, IP, Port, [H1,...Hsz | Data]} or @@ -3321,7 +3661,41 @@ static int packet_binary_message ASSERT(i <= PACKET_ERL_DRV_TERM_DATA_LEN); return erl_drv_output_term(desc->dport, spec, i); } +#endif + +/* +** active mode message: send active-to-passive transition message +** {tcp_passive, S} or +** {udp_passive, S} or +** {sctp_passive, S} +*/ + static int packet_passive_message(inet_descriptor* desc) + { + ErlDrvTermData spec[6]; + int i = 0; + + DEBUGF(("packet_passive_message(%ld):\r\n", (long)desc->port)); + +#if !defined(HAVE_UDP) && !defined(HAVE_SCTP) + i = LOAD_ATOM(spec, i, am_tcp_passive); +#else + if (desc->sprotocol == IPPROTO_TCP) + i = LOAD_ATOM(spec, i, am_tcp_passive); + else { +#ifdef HAVE_SCTP + i = LOAD_ATOM(spec, i, IS_SCTP(desc) ? am_sctp_passive : am_udp_passive); +#else + i = LOAD_ATOM(spec, i, am_udp_passive); +#endif + } +#endif + i = LOAD_PORT(spec, i, desc->dport); + i = LOAD_TUPLE(spec, i, 2); + ASSERT(i <= 6); + return erl_drv_output_term(desc->dport, spec, i); + } +#ifdef HAVE_UDP /* ** send active message {udp_error|sctp_error, S, Error} */ @@ -3348,7 +3722,7 @@ static int packet_error_message(udp_descriptor* udesc, int err) ASSERT(i == sizeof(spec)/sizeof(*spec)); return erl_drv_output_term(desc->dport, spec, i); } - +#endif /* ** active=TRUE: @@ -3364,7 +3738,7 @@ static int tcp_reply_data(tcp_descriptor* desc, char* buf, int len) int code; const char* body = buf; int bodylen = len; - + packet_get_body(desc->inet.htype, &body, &bodylen); if (desc->inet.deliver == INET_DELIVER_PORT) { @@ -3382,8 +3756,7 @@ static int tcp_reply_data(tcp_descriptor* desc, char* buf, int len) if (code < 0) return code; - if (desc->inet.active == INET_ONCE) - desc->inet.active = INET_PASSIVE; + INET_CHECK_ACTIVE_TO_PASSIVE(INETP(desc)); return code; } @@ -3410,12 +3783,11 @@ tcp_reply_binary_data(tcp_descriptor* desc, ErlDrvBinary* bin, int offs, int len } if (code < 0) return code; - if (desc->inet.active == INET_ONCE) - desc->inet.active = INET_PASSIVE; + INET_CHECK_ACTIVE_TO_PASSIVE(INETP(desc)); return code; } - +#ifdef HAVE_UDP static int packet_reply_binary_data(inet_descriptor* desc, unsigned int hsz, ErlDrvBinary * bin, int offs, int len, @@ -3434,11 +3806,11 @@ packet_reply_binary_data(inet_descriptor* desc, unsigned int hsz, code = packet_binary_message(desc, bin, offs, len, extra); if (code < 0) return code; - if (desc->active == INET_ONCE) - desc->active = INET_PASSIVE; + INET_CHECK_ACTIVE_TO_PASSIVE(desc); return code; } } +#endif /* ---------------------------------------------------------------------------- @@ -3480,6 +3852,7 @@ sock_init(void) /* May be called multiple times. */ #ifdef HAVE_SCTP static void inet_init_sctp(void) { INIT_ATOM(sctp); + INIT_ATOM(sctp_passive); INIT_ATOM(sctp_error); INIT_ATOM(true); INIT_ATOM(false); @@ -3489,6 +3862,7 @@ static void inet_init_sctp(void) { INIT_ATOM(binary); INIT_ATOM(active); INIT_ATOM(once); + INIT_ATOM(multi); INIT_ATOM(buffer); INIT_ATOM(linger); INIT_ATOM(recbuf); @@ -3498,6 +3872,7 @@ static void inet_init_sctp(void) { INIT_ATOM(priority); INIT_ATOM(tos); INIT_ATOM(ipv6_v6only); + INIT_ATOM(netns); /* Option names */ INIT_ATOM(sctp_rtoinfo); @@ -3575,7 +3950,10 @@ static void inet_init_sctp(void) { /* For #sctp_paddrinfo{}: */ INIT_ATOM(active); INIT_ATOM(inactive); - +# if HAVE_DECL_SCTP_UNCONFIRMED + INIT_ATOM(unconfirmed); +# endif + /* For #sctp_status{}: */ # if HAVE_DECL_SCTP_EMPTY INIT_ATOM(empty); @@ -3604,22 +3982,29 @@ static int inet_init() if (0 != erl_drv_tsd_key_create("inet_buffer_stack_key", &buffer_stack_key)) goto error; - ASSERT(sizeof(struct in_addr) == 4); + ERTS_CT_ASSERT(sizeof(struct in_addr) == 4); # if defined(HAVE_IN6) && defined(AF_INET6) - ASSERT(sizeof(struct in6_addr) == 16); + ERTS_CT_ASSERT(sizeof(struct in6_addr) == 16); # endif INIT_ATOM(ok); INIT_ATOM(tcp); +#ifdef HAVE_UDP INIT_ATOM(udp); +#endif INIT_ATOM(error); + INIT_ATOM(einval); INIT_ATOM(inet_async); INIT_ATOM(inet_reply); INIT_ATOM(timeout); INIT_ATOM(closed); + INIT_ATOM(tcp_passive); INIT_ATOM(tcp_closed); INIT_ATOM(tcp_error); +#ifdef HAVE_UDP + INIT_ATOM(udp_passive); INIT_ATOM(udp_error); +#endif INIT_ATOM(empty_out_q); INIT_ATOM(ssl_tls); @@ -3638,14 +4023,35 @@ static int inet_init() /* add TCP, UDP and SCTP drivers */ add_driver_entry(&tcp_inet_driver_entry); +#ifdef HAVE_UDP add_driver_entry(&udp_inet_driver_entry); +#endif + #ifdef HAVE_SCTP /* Check the size of SCTP AssocID -- currently both this driver and the Erlang part require 32 bit: */ - ASSERT(sizeof(sctp_assoc_t)==ASSOC_ID_LEN); -# if defined(HAVE_SCTP_BINDX) && defined (HAVE_SCTP_PEELOFF) + ERTS_CT_ASSERT(sizeof(sctp_assoc_t)==ASSOC_ID_LEN); +# if defined(HAVE_SCTP_BINDX) p_sctp_bindx = sctp_bindx; +# if defined(HAVE_SCTP_PEELOFF) p_sctp_peeloff = sctp_peeloff; +# else + p_sctp_peeloff = NULL; +# endif +# if defined(HAVE_SCTP_GETLADDRS) && defined(HAVE_SCTP_FREELADDRS) + p_sctp_getladdrs = sctp_getladdrs; + p_sctp_freeladdrs = sctp_freeladdrs; +# else + p_sctp_getladdrs = NULL; + p_sctp_freeladdrs = NULL; +# endif +# if defined(HAVE_SCTP_GETPADDRS) && defined(HAVE_SCTP_FREEPADDRS) + p_sctp_getpaddrs = sctp_getpaddrs; + p_sctp_freepaddrs = sctp_freepaddrs; +# else + p_sctp_getpaddrs = NULL; + p_sctp_freepaddrs = NULL; +# endif inet_init_sctp(); add_driver_entry(&sctp_inet_driver_entry); # else @@ -3660,12 +4066,36 @@ static int inet_init() void *ptr; if (erts_sys_ddll_sym(h_libsctp, "sctp_bindx", &ptr) == 0) { p_sctp_bindx = ptr; - inet_init_sctp(); - add_driver_entry(&sctp_inet_driver_entry); if (erts_sys_ddll_sym(h_libsctp, "sctp_peeloff", &ptr) == 0) { p_sctp_peeloff = ptr; } + else p_sctp_peeloff = NULL; + if (erts_sys_ddll_sym(h_libsctp, "sctp_getladdrs", &ptr) == 0) { + p_sctp_getladdrs = ptr; + } + else p_sctp_getladdrs = NULL; + if (erts_sys_ddll_sym(h_libsctp, "sctp_freeladdrs", &ptr) == 0) { + p_sctp_freeladdrs = ptr; + } + else { + p_sctp_freeladdrs = NULL; + p_sctp_getladdrs = NULL; + } + if (erts_sys_ddll_sym(h_libsctp, "sctp_getpaddrs", &ptr) == 0) { + p_sctp_getpaddrs = ptr; + } + else p_sctp_getpaddrs = NULL; + if (erts_sys_ddll_sym(h_libsctp, "sctp_freepaddrs", &ptr) == 0) { + p_sctp_freepaddrs = ptr; + } + else { + p_sctp_freepaddrs = NULL; + p_sctp_getpaddrs = NULL; + } + inet_init_sctp(); + add_driver_entry(&sctp_inet_driver_entry); } + else p_sctp_bindx = NULL; } } # endif @@ -3820,10 +4250,12 @@ static char *inet_set_faddress(int family, inet_address* dst, ** and *len is the length of dst on return ** (suitable to deliver to erlang) */ -static int inet_get_address(int family, char* dst, inet_address* src, unsigned int* len) +static int inet_get_address(char* dst, inet_address* src, unsigned int* len) { + int family; short port; + family = src->sa.sa_family; if ((family == AF_INET) && (*len >= sizeof(struct sockaddr_in))) { dst[0] = INET_AF_INET; port = sock_ntohs(src->sai.sin_port); @@ -3845,6 +4277,75 @@ static int inet_get_address(int family, char* dst, inet_address* src, unsigned i return -1; } +/* Same as the above, but take family from the address structure, +** and advance the address pointer to the next address +** according to the size of the current, +** and return the resulting encoded size +*/ +static int inet_address_to_erlang(char *dst, inet_address **src) { + short port; + + switch ((*src)->sa.sa_family) { + case AF_INET: + if (dst) { + dst[0] = INET_AF_INET; + port = sock_ntohs((*src)->sai.sin_port); + put_int16(port, dst+1); + sys_memcpy(dst+1+2, (char *) &(*src)->sai.sin_addr, 4); + } + (*src) = (inet_address *) (&(*src)->sai + 1); + return 1 + 2 + 4; +#if defined(HAVE_IN6) && defined(AF_INET6) + case AF_INET6: + if (dst) { + dst[0] = INET_AF_INET6; + port = sock_ntohs((*src)->sai6.sin6_port); + put_int16(port, dst+1); + VALGRIND_MAKE_MEM_DEFINED(&(*src)->sai6.sin6_addr,16); /* false undefs from syscall sctp_get[lp]addrs */ + sys_memcpy(dst+1+2, (char *) &(*src)->sai6.sin6_addr, 16); + } + (*src) = (inet_address *) (&(*src)->sai6 + 1); + return 1 + 2 + 16; +#endif + default: + return -1; + } +} + +/* Encode n encoded addresses from addrs in the result buffer +*/ +static ErlDrvSizeT reply_inet_addrs +(int n, inet_address *addrs, char **rbuf, ErlDrvSizeT rsize) { + inet_address *ia; + int i, s; + ErlDrvSizeT rlen; + + if (IS_SOCKET_ERROR(n)) return ctl_error(sock_errno(), rbuf, rsize); + if (n == 0) return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + + /* Calculate result length */ + rlen = 1; + ia = addrs; + for (i = 0; i < n; i++) { + s = inet_address_to_erlang(NULL, &ia); + if (s < 0) break; + rlen += s; + } + + if (rlen > rsize) (*rbuf) = ALLOC(rlen); + + (*rbuf)[0] = INET_REP_OK; + rlen = 1; + ia = addrs; + for (i = 0; i < n; i++) { + s = inet_address_to_erlang((*rbuf)+rlen, &ia); + if (s < 0) break; + rlen += s; + } + + return rlen; +} + static void desc_close(inet_descriptor* desc) { if (desc->s != INVALID_SOCKET) { @@ -3854,6 +4355,16 @@ static void desc_close(inet_descriptor* desc) desc->forced_events = 0; desc->send_would_block = 0; #endif +#ifdef __OSE__ + if (desc->events[0]) { + driver_select(desc->port,desc->events[0],FD_READ|FD_WRITE|ERL_DRV_USE,0); + driver_select(desc->port,desc->events[1],FD_READ|FD_WRITE|ERL_DRV_USE,0); + driver_select(desc->port,desc->events[2],FD_READ|FD_WRITE|ERL_DRV_USE,0); + driver_select(desc->port,desc->events[3],FD_READ|FD_WRITE|ERL_DRV_USE,0); + driver_select(desc->port,desc->events[4],FD_READ|FD_WRITE|ERL_DRV_USE,0); + driver_select(desc->port,desc->events[5],FD_READ|FD_WRITE|ERL_DRV_USE,0); + } +#else /* * We should close the fd here, but the other driver might still * be selecting on it. @@ -3863,6 +4374,7 @@ static void desc_close(inet_descriptor* desc) ERL_DRV_USE, 0); else inet_stop_select((ErlDrvEvent)(long)desc->event,NULL); +#endif desc->event = INVALID_EVENT; /* closed by stop_select callback */ desc->s = INVALID_SOCKET; desc->event_mask = 0; @@ -3894,7 +4406,7 @@ static int erl_inet_close(inet_descriptor* desc) desc_close(desc); desc->state = INET_STATE_CLOSED; } else if (desc->prebound && (desc->s != INVALID_SOCKET)) { - sock_select(desc, FD_READ | FD_WRITE | FD_CLOSE, 0); + sock_select(desc, FD_READ | FD_WRITE | FD_CLOSE | ERL_DRV_USE_NO_CALLBACK, 0); desc->event_mask = 0; #ifdef __WIN32__ desc->forced_events = 0; @@ -3904,20 +4416,151 @@ static int erl_inet_close(inet_descriptor* desc) return 0; } +#ifdef __OSE__ +static void inet_select_init(inet_descriptor* desc) +{ + desc->events[0] = + erl_drv_ose_event_alloc(SO_EVENT_READ_REPLY, + desc->s, + inet_resolve_signal, + NULL); + driver_select(desc->port, desc->events[0], + ERL_DRV_READ|ERL_DRV_USE, 1); + + desc->events[1] = + erl_drv_ose_event_alloc(SO_EVENT_EOF_REPLY, + desc->s, + inet_resolve_signal, + NULL); + driver_select(desc->port, desc->events[1], + ERL_DRV_READ|ERL_DRV_USE, 1); + + desc->events[2] = + erl_drv_ose_event_alloc(SO_EVENT_ACCEPT_REPLY, + desc->s, + inet_resolve_signal, + NULL); + driver_select(desc->port, desc->events[2], + ERL_DRV_READ|ERL_DRV_USE, 1); + + /* trigger tcp_inet_input */ + desc->events[3] = + erl_drv_ose_event_alloc(SO_EVENT_WRITE_REPLY, + desc->s, + inet_resolve_signal, + NULL); + driver_select(desc->port, desc->events[3], + ERL_DRV_WRITE|ERL_DRV_USE, 1); + + desc->events[4] = + erl_drv_ose_event_alloc(SO_EVENT_CONNECT_REPLY, + desc->s, + inet_resolve_signal, + NULL); + driver_select(desc->port, desc->events[4], + ERL_DRV_WRITE|ERL_DRV_USE, 1); + + desc->events[5] = + erl_drv_ose_event_alloc(SO_EVENT_ERROR_REPLY, + desc->s, + inet_resolve_signal, + NULL); + driver_select(desc->port, desc->events[5], + ERL_DRV_WRITE|ERL_DRV_USE, 1); + + /* Issue a select on error event before any other select to be sure we are + prepared to receive error notifications from the stack, even in the + situations when select isn't issued */ + sock_select(desc, SOCK_FD_ERROR, 1); +} +#endif static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, char** rbuf, ErlDrvSizeT rsize) { + int save_errno; +#ifdef HAVE_SETNS + int current_ns, new_ns; + current_ns = new_ns = 0; +#endif + save_errno = 0; + if (desc->state != INET_STATE_CLOSED) return ctl_xerror(EXBADSEQ, rbuf, rsize); + +#ifdef HAVE_SETNS + if (desc->netns != NULL) { + /* Temporarily change network namespace for this thread + * while creating the socket + */ + current_ns = open("/proc/self/ns/net", O_RDONLY); + if (current_ns == INVALID_SOCKET) + return ctl_error(sock_errno(), rbuf, rsize); + new_ns = open(desc->netns, O_RDONLY); + if (new_ns == INVALID_SOCKET) { + save_errno = sock_errno(); + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + if (setns(new_ns, CLONE_NEWNET) != 0) { + save_errno = sock_errno(); + while (close(new_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + else { + while (close(new_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + } + } +#endif if ((desc->s = sock_open(domain, type, desc->sprotocol)) == INVALID_SOCKET) - return ctl_error(sock_errno(), rbuf, rsize); - if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) - return ctl_error(sock_errno(), rbuf, rsize); + save_errno = sock_errno(); +#ifdef HAVE_SETNS + if (desc->netns != NULL) { + /* Restore network namespace */ + if (setns(current_ns, CLONE_NEWNET) != 0) { + /* XXX Failed to restore network namespace. + * What to do? Tidy up and return an error... + * Note that the thread now might still be in the namespace. + * Can this even happen? Should the emulator be aborted? + */ + if (desc->s != INVALID_SOCKET) + save_errno = sock_errno(); + while (close(desc->s) == INVALID_SOCKET && + sock_errno() == EINTR); + desc->s = INVALID_SOCKET; + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + else { + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + } + } +#endif + if (desc->s == INVALID_SOCKET) + return ctl_error(save_errno, rbuf, rsize); + + if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) { + save_errno = sock_errno(); + while (close(desc->s) == INVALID_SOCKET && + sock_errno() == EINTR); + desc->s = INVALID_SOCKET; + return ctl_error(save_errno, rbuf, rsize); + } SET_NONBLOCKING(desc->s); #ifdef __WIN32__ driver_select(desc->port, desc->event, ERL_DRV_READ, 1); #endif +#ifdef __OSE__ + inet_select_init(desc); +#endif + desc->state = INET_STATE_OPEN; desc->stype = type; desc->sfamily = domain; @@ -3927,28 +4570,50 @@ static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, /* as inet_open but pass in an open socket (MUST BE OF RIGHT TYPE) */ static ErlDrvSSizeT inet_ctl_fdopen(inet_descriptor* desc, int domain, int type, - SOCKET s, char** rbuf, ErlDrvSizeT rsize) + SOCKET s, Uint32 bound, + char** rbuf, ErlDrvSizeT rsize) { inet_address name; unsigned int sz = sizeof(name); - /* check that it is a socket and that the socket is bound */ - if (IS_SOCKET_ERROR(sock_name(s, (struct sockaddr*) &name, &sz))) - return ctl_error(sock_errno(), rbuf, rsize); - if (name.sa.sa_family != domain) - return ctl_error(EINVAL, rbuf, rsize); + if (bound) { + /* check that it is a socket and that the socket is bound */ + if (IS_SOCKET_ERROR(sock_name(s, (struct sockaddr*) &name, &sz))) + return ctl_error(sock_errno(), rbuf, rsize); + if (name.sa.sa_family != domain) + return ctl_error(EINVAL, rbuf, rsize); + } +#ifdef __OSE__ + /* for fdopen duplicating the sd will allow to uniquely identify + the signal from OSE with erlang port */ + desc->s = sock_dup(s); +#else desc->s = s; +#endif + if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) return ctl_error(sock_errno(), rbuf, rsize); SET_NONBLOCKING(desc->s); #ifdef __WIN32__ driver_select(desc->port, desc->event, ERL_DRV_READ, 1); #endif - desc->state = INET_STATE_BOUND; /* assume bound */ + + if (bound) + desc->state = INET_STATE_BOUND; + else + desc->state = INET_STATE_OPEN; + if (type == SOCK_STREAM) { /* check if connected */ sz = sizeof(name); - if (!IS_SOCKET_ERROR(sock_peer(s, (struct sockaddr*) &name, &sz))) + if (!IS_SOCKET_ERROR(sock_peer(s, (struct sockaddr*) &name, &sz))) { desc->state = INET_STATE_CONNECTED; +#ifdef __OSE__ + /* since we are dealing with different descriptors (i.e. inet and + socket) the select part should be initialized with the right + values */ + inet_select_init(desc); +#endif + } } desc->prebound = 1; /* used to prevent a real close since @@ -3974,8 +4639,7 @@ struct addr_if { #ifndef SIOCGIFNETMASK -static struct in_addr net_mask(in) -struct in_addr in; +static struct in_addr net_mask(struct in_addr in) { register u_long i = sock_ntohl(in.s_addr); @@ -4091,6 +4755,36 @@ static char* sockaddr_to_buf(struct sockaddr* addr, char* ptr, char* end) return NULL; } +/* sockaddr_bufsz_need + * Returns the number of bytes needed to store the information + * through sockaddr_to_buf + */ + +static size_t sockaddr_bufsz_need(struct sockaddr* addr) +{ + if (addr->sa_family == AF_INET || addr->sa_family == 0) { + return 1 + sizeof(struct in_addr); + } +#if defined(HAVE_IN6) && defined(AF_INET6) + else if (addr->sa_family == AF_INET6) { + return 1 + sizeof(struct in6_addr); + } +#endif +#if defined(AF_LINK) + if (addr->sa_family == AF_LINK) { + struct sockaddr_dl *sdl_p = (struct sockaddr_dl*) addr; + return 2 + sdl_p->sdl_alen; + } +#endif +#if defined(AF_PACKET) && defined(HAVE_NETPACKET_PACKET_H) + else if(addr->sa_family == AF_PACKET) { + struct sockaddr_ll *sll_p = (struct sockaddr_ll*) addr; + return 2 + sll_p->sll_halen; + } +#endif + return 0; +} + static char* buf_to_sockaddr(char* ptr, char* end, struct sockaddr* addr) { buf_check(ptr,end,1); @@ -4349,7 +5043,7 @@ static ErlDrvSSizeT inet_ctl_getiflist(inet_descriptor* desc, case AF_INET6: #endif case AF_INET: - ASSERT(sp+IFNAMSIZ+1 < sbuf+ifc.ifc_len+1) + ASSERT(sp+IFNAMSIZ+1 < sbuf+ifc.ifc_len+1); strncpy(sp, ifrp->ifr_name, IFNAMSIZ); sp[IFNAMSIZ] = '\0'; sp += strlen(sp), ++sp; @@ -5150,7 +5844,7 @@ done: ia_p->Ipv6IfIndex && ia_p->Ipv6IfIndex != index) { - /* Oops, there was an other interface for IPv6. Possible? XXX */ + /* Oops, there was another interface for IPv6. Possible? XXX */ index = ia_p->Ipv6IfIndex; goto index; } @@ -5169,6 +5863,11 @@ done: } #elif defined(HAVE_GETIFADDRS) +#ifdef DEBUG +#define GETIFADDRS_BUFSZ (1) +#else +#define GETIFADDRS_BUFSZ (512) +#endif static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, char **rbuf_pp, ErlDrvSizeT rsize) @@ -5179,15 +5878,15 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, char *buf_p; char *buf_alloc_p; - buf_size = 512; - buf_alloc_p = ALLOC(buf_size); + buf_size = GETIFADDRS_BUFSZ; + buf_alloc_p = ALLOC(GETIFADDRS_BUFSZ); buf_p = buf_alloc_p; # define BUF_ENSURE(Size) \ do { \ int NEED_, GOT_ = buf_p - buf_alloc_p; \ NEED_ = GOT_ + (Size); \ if (NEED_ > buf_size) { \ - buf_size = NEED_ + 512; \ + buf_size = NEED_ + GETIFADDRS_BUFSZ; \ buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \ buf_p = buf_alloc_p + GOT_; \ } \ @@ -5200,7 +5899,7 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, while (! (P_ = sockaddr_to_buf((sa), buf_p, \ buf_alloc_p+buf_size))) { \ int GOT_ = buf_p - buf_alloc_p; \ - buf_size += 512; \ + buf_size += GETIFADDRS_BUFSZ; \ buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \ buf_p = buf_alloc_p + GOT_; \ } \ @@ -5257,10 +5956,11 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, || ifa_p->ifa_addr->sa_family == AF_PACKET #endif ) { - char *bp = buf_p; - BUF_ENSURE(1); - SOCKADDR_TO_BUF(INET_IFOPT_HWADDR, ifa_p->ifa_addr); - if (buf_p - bp < 4) buf_p = bp; /* Empty hwaddr */ + size_t need = sockaddr_bufsz_need(ifa_p->ifa_addr); + if (need > 3) { + BUF_ENSURE(1 + need); + SOCKADDR_TO_BUF(INET_IFOPT_HWADDR, ifa_p->ifa_addr); + } } #endif } @@ -5275,6 +5975,7 @@ static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p, return buf_size; # undef BUF_ENSURE } +#undef GETIFADDRS_BUFSZ #else @@ -5375,9 +6076,9 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) int arg_sz; enum PacketParseType old_htype = desc->htype; int old_active = desc->active; - int propagate = 0; /* Set to 1 if failure to set this option - should be propagated to erlang (not all - errors can be propagated for BC reasons) */ + int propagate; /* Set to 1 if failure to set this option + should be propagated to erlang (not all + errors can be propagated for BC reasons) */ int res; #ifdef HAVE_SCTP /* SCTP sockets are treated completely separately: */ @@ -5394,6 +6095,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) arg_ptr = (char*) &ival; arg_sz = sizeof(ival); proto = SOL_SOCKET; + propagate = 0; switch(opt) { case INET_LOPT_HEADER: @@ -5424,11 +6126,33 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) case INET_LOPT_ACTIVE: DEBUGF(("inet_set_opts(%ld): s=%d, ACTIVE=%d\r\n", - (long)desc->port, desc->s,ival)); + (long)desc->port, desc->s, ival)); desc->active = ival; + if (desc->active == INET_MULTI) { + long ac = desc->active_count; + Sint16 nval = get_int16(ptr); + ptr += 2; + len -= 2; + ac += nval; + if (ac > INT16_MAX || ac < INT16_MIN) + return -1; + desc->active_count += nval; + if (desc->active_count < 0) + desc->active_count = 0; + if (desc->active_count == 0) { + desc->active = INET_PASSIVE; + packet_passive_message(desc); + } + } else + desc->active_count = 0; if ((desc->stype == SOCK_STREAM) && (desc->active != INET_PASSIVE) && (desc->state == INET_STATE_CLOSED)) { - tcp_closed_message((tcp_descriptor *) desc); + tcp_descriptor *tdesc = (tcp_descriptor *) desc; + if (tdesc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + tdesc->tcp_add_flags &= ~TCP_ADDF_DELAYED_ECONNRESET; + tcp_error_message(tdesc, ECONNRESET); + } + tcp_closed_message(tdesc); if (desc->exitf) { driver_exit(desc->port, 0); return 0; /* Give up on this socket, descriptor lost */ @@ -5521,6 +6245,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) } continue; +#ifdef HAVE_UDP case INET_LOPT_UDP_READ_PACKETS: if (desc->stype == SOCK_DGRAM) { udp_descriptor* udesc = (udp_descriptor*) desc; @@ -5528,6 +6253,37 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) udesc->read_packets = ival; } continue; +#endif + +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + /* It is annoying that ival and len are both (signed) int */ + if (ival < 0) return -1; + if (len < ival) return -1; + if (desc->netns != NULL) FREE(desc->netns); + desc->netns = ALLOC(((unsigned int) ival) + 1); + memcpy(desc->netns, ptr, ival); + desc->netns[ival] = '\0'; + ptr += ival; + len -= ival; + continue; +#endif + + case INET_LOPT_TCP_SHOW_ECONNRESET: + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (ival) + tdesc->tcp_add_flags |= TCP_ADDF_SHOW_ECONNRESET; + else + tdesc->tcp_add_flags &= ~TCP_ADDF_SHOW_ECONNRESET; + } + continue; + + case INET_LOPT_LINE_DELIM: + DEBUGF(("inet_set_opts(%ld): s=%d, LINE_DELIM=%d\r\n", + (long)desc->port, desc->s, ival)); + desc->delimiter = (char)ival; + continue; case INET_OPT_REUSEADDR: #ifdef __WIN32__ @@ -5573,6 +6329,13 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) arg_sz = sizeof(li_val); DEBUGF(("inet_set_opts(%ld): s=%d, SO_LINGER=%d,%d", (long)desc->port, desc->s, li_val.l_onoff,li_val.l_linger)); + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (li_val.l_onoff && li_val.l_linger == 0) + tdesc->tcp_add_flags |= TCP_ADDF_LINGER_ZERO; + else + tdesc->tcp_add_flags &= ~TCP_ADDF_LINGER_ZERO; + } break; case INET_OPT_PRIORITY: @@ -5722,7 +6485,8 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) /* XXX fprintf(stderr,"desc->htype == %d, old_htype == %d, desc->active == %d, old_active == %d\r\n",(int)desc->htype, (int) old_htype, (int) desc->active, (int) old_active );*/ - return 1+(desc->htype == old_htype && desc->active == INET_ONCE); + return 1+(desc->htype == old_htype && + (desc->active == INET_ONCE || desc->active == INET_MULTI)); } return 0; } @@ -5855,9 +6619,39 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) case INET_LOPT_ACTIVE: desc->active = get_int32(curr); curr += 4; + if (desc->active == INET_MULTI) { + long ac = desc->active_count; + Sint16 nval = get_int16(curr); curr += 2; + ac += nval; + if (ac > INT16_MAX || ac < INT16_MIN) + return -1; + desc->active_count += nval; + if (desc->active_count < 0) + desc->active_count = 0; + if (desc->active_count == 0) { + desc->active = INET_PASSIVE; + packet_passive_message(desc); + } + } else + desc->active_count = 0; res = 0; continue; +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + { + size_t ns_len; + ns_len = get_int32(curr); curr += 4; + CHKLEN(curr, ns_len); + if (desc->netns != NULL) FREE(desc->netns); + desc->netns = ALLOC(ns_len + 1); + memcpy(desc->netns, curr, ns_len); + desc->netns[ns_len] = '\0'; + curr += ns_len; + } + continue; +#endif + /* SCTP options and applicable generic INET options: */ case SCTP_OPT_RTOINFO: @@ -6323,7 +7117,7 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, do { \ ErlDrvSizeT new_need = ((Ptr) - (*dest)) + (Size); \ if (new_need > dest_used) { \ - erl_exit(1,"Internal error in inet_drv, " \ + erts_exit(ERTS_ERROR_EXIT,"Internal error in inet_drv, " \ "miscalculated buffer size"); \ } \ dest_used = new_need; \ @@ -6362,6 +7156,11 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, case INET_LOPT_ACTIVE: *ptr++ = opt; put_int32(desc->active, ptr); + if (desc->active == INET_MULTI) { + PLACE_FOR(2,ptr); + put_int16(desc->active_count, ptr); + ptr += 2; + } continue; case INET_LOPT_PACKET: *ptr++ = opt; @@ -6444,6 +7243,7 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, } continue; +#ifdef HAVE_UDP case INET_LOPT_UDP_READ_PACKETS: if (desc->stype == SOCK_DGRAM) { *ptr++ = opt; @@ -6453,6 +7253,34 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, TRUNCATE_TO(0,ptr); } continue; +#endif + +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + if (desc->netns != NULL) { + size_t netns_len; + netns_len = strlen(desc->netns); + *ptr++ = opt; + put_int32(netns_len, ptr); + PLACE_FOR(netns_len, ptr); + memcpy(ptr, desc->netns, netns_len); + ptr += netns_len; + } else { + TRUNCATE_TO(0,ptr); + } + continue; +#endif + + case INET_LOPT_TCP_SHOW_ECONNRESET: + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + *ptr++ = opt; + ival = !!(tdesc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET); + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; case INET_OPT_PRIORITY: #ifdef SO_PRIORITY @@ -6621,8 +7449,13 @@ static int load_paddrinfo (ErlDrvTermData * spec, int i, case SCTP_INACTIVE: i = LOAD_ATOM (spec, i, am_inactive); break; +# if HAVE_DECL_SCTP_UNCONFIRMED + case SCTP_UNCONFIRMED: + i = LOAD_ATOM (spec, i, am_unconfirmed); + break; +# endif default: - ASSERT(0); /* NB: SCTP_UNCONFIRMED modifier not yet supported */ + i = LOAD_ATOM (spec, i, am_undefined); } i = LOAD_INT (spec, i, pai->spinfo_cwnd); i = LOAD_INT (spec, i, pai->spinfo_srtt); @@ -6661,7 +7494,7 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, do { \ int need; \ if ((Index) > spec_allocated) { \ - erl_exit(1,"Internal error in inet_drv, " \ + erts_exit(ERTS_ERROR_EXIT,"Internal error in inet_drv, " \ "miscalculated buffer size"); \ } \ need = (Index) + (N); \ @@ -6718,7 +7551,10 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, } case INET_LOPT_ACTIVE: { - PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_TUPLE_CNT); + if (desc->active == INET_MULTI) + PLACE_FOR(spec, i, LOAD_ATOM_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT); + else + PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_TUPLE_CNT); i = LOAD_ATOM (spec, i, am_active); switch (desc->active) { @@ -6731,12 +7567,31 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, case INET_ONCE : { i = LOAD_ATOM (spec, i, am_once); break; } + case INET_MULTI : + { i = LOAD_INT(spec, i, desc->active_count); break; } + default: ASSERT (0); } i = LOAD_TUPLE (spec, i, 2); break; } +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + if (desc->netns != NULL) { + PLACE_FOR + (spec, i, + LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_netns); + i = LOAD_BUF2BINARY + (spec, i, desc->netns, strlen(desc->netns)); + i = LOAD_TUPLE (spec, i, 2); + break; + } + else + continue; /* Ignore */ +#endif + /* SCTP and generic INET options: */ case SCTP_OPT_RTOINFO: @@ -7458,9 +8313,26 @@ static ErlDrvSSizeT inet_subscribe(inet_descriptor* desc, static void inet_stop(inet_descriptor* desc) { erl_inet_close(desc); +#ifdef HAVE_SETNS + if (desc->netns != NULL) + FREE(desc->netns); +#endif FREE(desc); } +static void inet_emergency_close(ErlDrvData data) +{ + /* valid for any (UDP, TCP or SCTP) descriptor */ + tcp_descriptor* tcp_desc = (tcp_descriptor*)data; + inet_descriptor* desc = INETP(tcp_desc); + DEBUGF(("inet_emergency_close(%ld) {s=%d\r\n", + (long)desc->port, desc->s)); + if (desc->s != INVALID_SOCKET) { + sock_close(desc->s); + } +} + + static void set_default_msgq_limits(ErlDrvPort port) { ErlDrvSizeT q_high = INET_HIGH_MSGQ_WATERMARK; @@ -7507,6 +8379,8 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) socket */ desc->deliver = INET_DELIVER_TERM; /* standard term format */ desc->active = INET_PASSIVE; /* start passive */ + desc->active_count = 0; + desc->delimiter = '\n'; /* line delimiting char */ desc->oph = NULL; desc->opt = NULL; @@ -7537,6 +8411,19 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) desc->is_ignored = 0; +#ifdef HAVE_SETNS + desc->netns = NULL; +#endif +#ifdef __OSE__ + desc->select_state = 0; + desc->events[0] = NULL; + desc->events[1] = NULL; + desc->events[2] = NULL; + desc->events[3] = NULL; + desc->events[4] = NULL; + desc->events[5] = NULL; +#endif + return (ErlDrvData)desc; } @@ -7726,6 +8613,39 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, return ctl_reply(INET_REP_OK, tbuf, strlen(tbuf), rbuf, rsize); } + case INET_REQ_GETPADDRS: { + DEBUGF(("inet_ctl(%ld): INET_GETPADDRS\r\n", (long)desc->port)); + + if (len != 4) return ctl_error(EINVAL, rbuf, rsize); + + if (! IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); + if (! IS_BOUND(desc)) return ctl_xerror(EXBADSEQ, rbuf, rsize); + +#ifdef HAVE_SCTP + if (IS_SCTP(desc) && p_sctp_getpaddrs) { + struct sockaddr *sa; + Uint32 assoc_id; + int n; + ErlDrvSizeT rlen; + + assoc_id = get_int32(buf); + n = p_sctp_getpaddrs(desc->s, assoc_id, &sa); + rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize); + if (n > 0) p_sctp_freepaddrs(sa); + return rlen; + } +#endif + { /* Fallback to sock_peer */ + inet_address addr; + unsigned int sz; + int i; + + sz = sizeof(addr); + i = sock_peer(desc->s, (struct sockaddr *) &addr, &sz); + return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize); + } + } + case INET_REQ_PEER: { /* get peername */ char tbuf[sizeof(inet_address)]; inet_address peer; @@ -7741,7 +8661,7 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (IS_SOCKET_ERROR(sock_peer(desc->s, (struct sockaddr*)ptr,&sz))) return ctl_error(sock_errno(), rbuf, rsize); } - if (inet_get_address(desc->sfamily, tbuf, ptr, &sz) < 0) + if (inet_get_address(tbuf, ptr, &sz) < 0) return ctl_error(EINVAL, rbuf, rsize); return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize); } @@ -7762,6 +8682,39 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, } } + case INET_REQ_GETLADDRS: { + DEBUGF(("inet_ctl(%ld): INET_GETLADDRS\r\n", (long)desc->port)); + + if (len != 4) return ctl_error(EINVAL, rbuf, rsize); + + if (! IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); + if (! IS_BOUND(desc)) return ctl_xerror(EXBADSEQ, rbuf, rsize); + +#ifdef HAVE_SCTP + if (IS_SCTP(desc) && p_sctp_getladdrs) { + struct sockaddr *sa; + Uint32 assoc_id; + int n; + ErlDrvSizeT rlen; + + assoc_id = get_int32(buf); + n = p_sctp_getladdrs(desc->s, assoc_id, &sa); + rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize); + if (n > 0) p_sctp_freeladdrs(sa); + return rlen; + } +#endif + { /* Fallback to sock_name */ + inet_address addr; + unsigned int sz; + int i; + + sz = sizeof(addr); + i = sock_name(desc->s, (struct sockaddr *) &addr, &sz); + return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize); + } + } + case INET_REQ_NAME: { /* get sockname */ char tbuf[sizeof(inet_address)]; inet_address name; @@ -7778,7 +8731,7 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (IS_SOCKET_ERROR(sock_name(desc->s, (struct sockaddr*)ptr, &sz))) return ctl_error(sock_errno(), rbuf, rsize); } - if (inet_get_address(desc->sfamily, tbuf, ptr, &sz) < 0) + if (inet_get_address(tbuf, ptr, &sz) < 0) return ctl_error(EINVAL, rbuf, rsize); return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize); } @@ -7845,11 +8798,19 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (*buf == 1 && !desc->is_ignored) { sock_select(desc, (FD_READ|FD_WRITE|FD_CLOSE|ERL_DRV_USE_NO_CALLBACK), 0); - desc->is_ignored = INET_IGNORE_READ; + if (desc->active) + desc->is_ignored = INET_IGNORE_READ; + else + desc->is_ignored = INET_IGNORE_PASSIVE; } else if (*buf == 0 && desc->is_ignored) { - int flags = (FD_READ|FD_CLOSE|((desc->is_ignored & INET_IGNORE_WRITE)?FD_WRITE:0)); + int flags = FD_CLOSE; + if (desc->is_ignored & INET_IGNORE_READ) + flags |= FD_READ; + if (desc->is_ignored & INET_IGNORE_WRITE) + flags |= FD_WRITE; desc->is_ignored = INET_IGNORE_NONE; - sock_select(desc, flags, 1); + if (flags != FD_CLOSE) + sock_select(desc, flags, 1); } else return ctl_error(EINVAL, rbuf, rsize); @@ -8165,6 +9126,11 @@ static tcp_descriptor* tcp_inet_copy(tcp_descriptor* desc,SOCKET s, copy_desc->low = desc->low; copy_desc->send_timeout = desc->send_timeout; copy_desc->send_timeout_close = desc->send_timeout_close; + + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) + copy_desc->tcp_add_flags |= TCP_ADDF_SHOW_ECONNRESET; + else + copy_desc->tcp_add_flags &= ~TCP_ADDF_SHOW_ECONNRESET; /* The new port will be linked and connected to the original caller */ port = driver_create_port(port, owner, "tcp_inet", (ErlDrvData) copy_desc); @@ -8182,6 +9148,11 @@ static tcp_descriptor* tcp_inet_copy(tcp_descriptor* desc,SOCKET s, copy_desc->inet.port = port; copy_desc->inet.dport = driver_mk_port(port); + +#ifdef __OSE__ + inet_select_init(©_desc->inet); +#endif + *err = 0; return copy_desc; } @@ -8243,8 +9214,22 @@ static void tcp_inet_stop(ErlDrvData e) inet_stop(INETP(desc)); } +#ifdef __OSE__ - +static ErlDrvSSizeT tcp_inet_ctl_ose(ErlDrvData e, unsigned int cmd, + char* buf, ErlDrvSizeT len, + char** rbuf, ErlDrvSizeT rsize) { + + tcp_descriptor* desc = (tcp_descriptor*)e; + int prev_select_state = INETP(desc)->select_state; + + ErlDrvSSizeT res = tcp_inet_ctl(e,cmd,buf,len,rbuf,rsize); + + tcp_inet_ose_dispatch_signals((tcp_descriptor*)e,prev_select_state,NULL); + + return res; +} +#endif /* TCP requests from Erlang */ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, @@ -8279,10 +9264,11 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, break; } - case INET_REQ_FDOPEN: { /* pass in an open socket */ + case INET_REQ_FDOPEN: { /* pass in an open (and optionally bound) socket */ int domain; + int bound; DEBUGF(("tcp_inet_ctl(%ld): FDOPEN\r\n", (long)desc->inet.port)); - if (len != 6) return ctl_error(EINVAL, rbuf, rsize); + if (len != 6 && len != 10) return ctl_error(EINVAL, rbuf, rsize); switch(buf[0]) { case INET_AF_INET: domain = AF_INET; @@ -8300,8 +9286,13 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_error(EINVAL, rbuf, rsize); } if (buf[1] != INET_TYPE_STREAM) return ctl_error(EINVAL, rbuf, rsize); + + if (len == 6) bound = 1; + else bound = get_int32(buf+2+4); + return inet_ctl_fdopen(INETP(desc), domain, SOCK_STREAM, - (SOCKET) get_int32(buf+2), rbuf, rsize); + (SOCKET) get_int32(buf+2), + bound, rbuf, rsize); break; } @@ -8460,7 +9451,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, ErlDrvTermData caller = driver_caller(desc->inet.port); tcp_descriptor* accept_desc; int err; - + if ((accept_desc = tcp_inet_copy(desc,s,caller,&err)) == NULL) { sock_close(s); return ctl_error(err, rbuf, rsize); @@ -8494,13 +9485,18 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, char tbuf[2]; int n; - DEBUGF(("tcp_inet_ctl(%ld): RECV\r\n", (long)desc->inet.port)); + DEBUGF(("tcp_inet_ctl(%ld): RECV (s=%d)\r\n", + (long)desc->inet.port, desc->inet.s)); /* INPUT: Timeout(4), Length(4) */ if (!IS_CONNECTED(INETP(desc))) { if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_RECV) { desc->tcp_add_flags &= ~(TCP_ADDF_DELAYED_CLOSE_RECV| TCP_ADDF_DELAYED_CLOSE_SEND); - return ctl_reply(INET_REP_ERROR, "closed", 6, rbuf, rsize); + if (desc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_ECONNRESET; + return ctl_reply(INET_REP_ERROR, "econnreset", 10, rbuf, rsize); + } else + return ctl_reply(INET_REP_ERROR, "closed", 6, rbuf, rsize); } return ctl_error(ENOTCONN, rbuf, rsize); } @@ -8526,6 +9522,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, driver_set_timer(desc->inet.port, timeout); if (!INETP(desc)->is_ignored) sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); + else + INETP(desc)->is_ignored |= INET_IGNORE_READ; } } return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); @@ -8550,10 +9548,20 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_error(EINVAL, rbuf, rsize); } how = buf[0]; - if (sock_shutdown(INETP(desc)->s, how) == 0) { + if (how != TCP_SHUT_RD && driver_sizeq(desc->inet.port) > 0) { + if (how == TCP_SHUT_WR) { + desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_WR; + } else if (how == TCP_SHUT_RDWR) { + desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_RDWR; + } return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); - } else { + } + if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) { + if (how != TCP_SHUT_RD) + desc->tcp_add_flags |= TCP_ADDF_SHUTDOWN_WR_DONE; return ctl_error(sock_errno(), rbuf, rsize); + } else { + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } } default: @@ -8664,6 +9672,16 @@ static void tcp_inet_command(ErlDrvData e, char *buf, ErlDrvSizeT len) DEBUGF(("tcp_inet_command(%ld) }\r\n", (long)desc->inet.port)); } +#ifdef __OSE__ + +static void tcp_inet_commandv_ose(ErlDrvData e, ErlIOVec* ev) { + int prev_select_state = INETP((tcp_descriptor*)e)->select_state; + tcp_inet_commandv(e, ev); + tcp_inet_ose_dispatch_signals((tcp_descriptor*)e,prev_select_state,NULL); +} + +#endif + static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev) { @@ -8675,11 +9693,19 @@ static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev) if (!IS_CONNECTED(INETP(desc))) { if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_SEND) { desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_CLOSE_SEND; - inet_reply_error_am(INETP(desc), am_closed); + if (desc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + /* Don't clear flag. Leave it enabled for the next receive + * operation. + */ + inet_reply_error(INETP(desc), ECONNRESET); + } else + inet_reply_error_am(INETP(desc), am_closed); } else inet_reply_error(INETP(desc), ENOTCONN); } + else if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) + tcp_shutdown_error(desc, EPIPE); else if (tcp_sendv(desc, ev) == 0) inet_reply_ok(INETP(desc)); DEBUGF(("tcp_inet_commandv(%ld) }\r\n", (long)desc->inet.port)); @@ -8692,6 +9718,8 @@ static void tcp_inet_flush(ErlDrvData e) /* Discard send queue to avoid hanging port (OTP-7615) */ tcp_clear_output(desc); } + if (desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO) + tcp_clear_output(desc); } static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp) @@ -8727,6 +9755,22 @@ static void inet_stop_select(ErlDrvEvent event, void* _) { #ifdef __WIN32__ WSACloseEvent((HANDLE)event); +#elif defined(__OSE__) + ErlDrvOseEventId id; + union SIGNAL *sig; + erl_drv_ose_event_fetch(event, NULL, &id,NULL); + DEBUGF(("inet_stop_select(?#?) {s=%d\n",id)); + sock_close((int)id); + /* On socket close all the signals waiting to be processed as part of the + select should be deallocated */ + while((sig = erl_drv_ose_get_signal(event))) { + DEBUGF(("inet_stop_select(?#?): Freeing signal %s\n", + signo_to_string(sig->signo))); + free_buf(&sig); + } + erl_drv_ose_event_free(event); + DEBUGF(("inet_stop_select(?#?) }\n")); + #else sock_close((SOCKET)(long)event); #endif @@ -8846,12 +9890,13 @@ static int tcp_remain(tcp_descriptor* desc, int* len) int n = desc->i_ptr - ptr; /* number of bytes read */ int tlen; - DEBUGF(("tcp_remain(%ld): s=%d, n=%d, nfill=%d nsz=%d\r\n", - (long)desc->inet.port, desc->inet.s, n, nfill, nsz)); - tlen = packet_get_length(desc->inet.htype, ptr, n, desc->inet.psize, desc->i_bufsz, - &desc->http_state); + desc->inet.delimiter, &desc->http_state); + + DEBUGF(("tcp_remain(%ld): s=%d, n=%d, nfill=%d nsz=%d, tlen %d\r\n", + (long)desc->inet.port, desc->inet.s, n, nfill, nsz, tlen)); + if (tlen > 0) { if (tlen <= n) { /* got a packet */ *len = tlen; @@ -9037,7 +10082,10 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) int err = sock_errno(); if (err == ECONNRESET) { DEBUGF((" => detected close (connreset)\r\n")); - return tcp_recv_closed(desc); + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) + return tcp_recv_error(desc, err); + else + return tcp_recv_closed(desc); } if (err == ERRNO_BLOCK) { DEBUGF((" => would block\r\n")); @@ -9249,7 +10297,19 @@ static void tcp_inet_event(ErlDrvData e, ErlDrvEvent event) if (netEv.lNetworkEvents & FD_CLOSE) { /* error in err = netEv.iErrorCode[FD_CLOSE_BIT] */ DEBUGF(("Detected close in %s, line %d\r\n", __FILE__, __LINE__)); - tcp_recv_closed(desc); + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) { + err = netEv.iErrorCode[FD_CLOSE_BIT]; + if (err == ECONNRESET) + tcp_recv_error(desc, err); + else if (err == ECONNABORTED && IS_CONNECTED(INETP(desc))) { + /* translate this error to ECONNRESET */ + tcp_recv_error(desc, ECONNRESET); + } + else + tcp_recv_closed(desc); + } + else + tcp_recv_closed(desc); } DEBUGF(("tcp_inet_event(%ld) }\r\n", (long)desc->inet.port)); return; @@ -9259,7 +10319,146 @@ static void tcp_inet_event(ErlDrvData e, ErlDrvEvent event) return; } -#endif /* WIN32 */ +#elif defined(__OSE__) /* !__WIN32__ */ +/* The specific resolve signal function. It will return the socket descriptor + for which the select was issued */ +static ErlDrvOseEventId inet_resolve_signal(union SIGNAL *sig) { + DEBUGF(("%s(?#?): s=%d got signal %s, status = %d, extra = %d, sender = 0x%x\n", + __FUNCTION__,sig->async.fd,signo_to_string(sig->signo), + sig->async.event.status, + sig->async.event.extra,sender(&sig))); + if (sig->signo == SO_EVENT_READ_REPLY || + sig->signo == SO_EVENT_ACCEPT_REPLY || + sig->signo == SO_EVENT_EOF_REPLY || + sig->signo == SO_EVENT_WRITE_REPLY || + sig->signo == SO_EVENT_ERROR_REPLY || + sig->signo == SO_EVENT_CONNECT_REPLY ) { + return sig->async.fd; + } + + return -1; +} + +static void inet_driver_select(inet_descriptor* desc, + int flags, int onoff) { + ASSERT(!desc->is_ignored); + + if(onoff) { + desc->select_state |= flags; + } else { + desc->select_state &= ~flags; + } +} + +static ssize_t writev_fallback(int fd, const struct iovec *iov, int iovcnt, int max_sz) +{ + size_t data_len = 0; + size_t sent = 0; + ssize_t n; + int i; + + for(i = 0; i < iovcnt; i++) + { + data_len = iov[i].iov_len; +tryagain: + n = sock_send(fd, iov[i].iov_base, data_len, 0); + if (IS_SOCKET_ERROR(n)) { + /* If buffer length is greater than the amount stack is able to + * send out then try to send at least max_sz (this comes with + * SO_EVENT_WRITE_REPLY signal*/ + if ((errno == EMSGSIZE) && (max_sz > 0) && (data_len > max_sz)) { + data_len = max_sz; + goto tryagain; + } + break; + } + sent += n; + } + return sent; +} + +#define OSE_EVENT_REQ(TCP_DESC,EVENT) do { \ + union SIGNAL *sig = alloc(sizeof(struct OseAsyncSig), EVENT); \ + sig->async.fd = INETP(TCP_DESC)->s; \ + ose_request_event(INETP(TCP_DESC)->s, &sig, 1); \ + DEBUGF(("%s(%ld): s=%d sent %s\r\n",__FUNCTION__, \ + INETP(TCP_DESC)->port,INETP(TCP_DESC)->s,signo_to_string(EVENT))); \ + } while(0) + +static void tcp_inet_ose_dispatch_signals(tcp_descriptor *desc, + int prev_select_state, + union SIGNAL *sig) { + if (sig) { + DEBUGF(("tcp_inet_ose_dispatch_signals(%ld) {s=%d resend\r\n", + (long)INETP(desc)->port,INETP(desc)->s)); + /* We are reacting to a signal, which means that if + the select_state for that signal is still activated + we should send a new signal */ + switch (sig->signo) { + case SO_EVENT_READ_REPLY: { + if (INETP(desc)->select_state & FD_READ) + OSE_EVENT_REQ(desc,SO_EVENT_READ_REQUEST); + break; + } + case SO_EVENT_WRITE_REPLY: { + if (INETP(desc)->select_state & FD_WRITE) + OSE_EVENT_REQ(desc,SO_EVENT_WRITE_REQUEST); + break; + } + case SO_EVENT_CONNECT_REPLY: { + if (INETP(desc)->select_state & FD_CONNECT) + OSE_EVENT_REQ(desc,SO_EVENT_CONNECT_REQUEST); + break; + } + case SO_EVENT_ACCEPT_REPLY: { + if (INETP(desc)->select_state & FD_ACCEPT) + OSE_EVENT_REQ(desc,SO_EVENT_ACCEPT_REQUEST); + break; + } + case SO_EVENT_ERROR_REPLY: { + if (INETP(desc)->select_state & SOCK_FD_ERROR) + OSE_EVENT_REQ(desc,SO_EVENT_ERROR_REQUEST); + break; + } + + } + DEBUGF(("tcp_inet_ose_dispatch_signals(%ld) }\r\n", + (long)INETP(desc)->port)); + } + + if (INETP(desc)->select_state != prev_select_state) { + /* If the select state has changed we have to issue signals for + the state parts that have changed. */ + int xor_select_state = INETP(desc)->select_state ^ prev_select_state; + DEBUGF(("tcp_inet_ose_dispatch_signals(%ld) {s=%d select change\r\n", + (long)INETP(desc)->port,INETP(desc)->s)); + if ((xor_select_state & FD_READ) && + (INETP(desc)->select_state & FD_READ)) { + OSE_EVENT_REQ(desc,SO_EVENT_READ_REQUEST); + } + if ((xor_select_state & FD_WRITE) && + (INETP(desc)->select_state & FD_WRITE)) { + OSE_EVENT_REQ(desc,SO_EVENT_WRITE_REQUEST); + } + if ((xor_select_state & FD_CONNECT) && + (INETP(desc)->select_state & FD_CONNECT)) { + OSE_EVENT_REQ(desc,SO_EVENT_CONNECT_REQUEST); + } + if ((xor_select_state & FD_ACCEPT) && + (INETP(desc)->select_state & FD_ACCEPT)) { + OSE_EVENT_REQ(desc,SO_EVENT_ACCEPT_REQUEST); + } + if ((xor_select_state & SOCK_FD_ERROR) && + (INETP(desc)->select_state & SOCK_FD_ERROR)) { + OSE_EVENT_REQ(desc,SO_EVENT_ERROR_REQUEST); + } + + DEBUGF(("tcp_inet_ose_dispatch_signals(%ld) }\r\n", + (long)INETP(desc)->port)); + } +} + +#endif /* __OSE__ */ /* socket has input: @@ -9280,7 +10479,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) unsigned int len; inet_address remote; inet_async_op *this_op = desc->inet.opt; - + len = sizeof(desc->inet.remote); s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &len); if (s == INVALID_SOCKET && sock_errno() == ERRNO_BLOCK) { @@ -9349,7 +10548,6 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) while (desc->inet.state == INET_STATE_MULTI_ACCEPTING) { len = sizeof(desc->inet.remote); s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &len); - if (s == INVALID_SOCKET && sock_errno() == ERRNO_BLOCK) { /* Just try again, no real error, keep the last return code */ goto done; @@ -9418,8 +10616,11 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) return ret; } -static int tcp_send_error(tcp_descriptor* desc, int err) +static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) { + int show_econnreset = (err == ECONNRESET + && desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET); + /* * If the port is busy, we must do some clean-up before proceeding. */ @@ -9435,14 +10636,21 @@ static int tcp_send_error(tcp_descriptor* desc, int err) /* * We used to handle "expected errors" differently from unexpected ones. - * Now we handle all errors in the same way. We just have to distinguish - * between passive and active sockets. + * Now we handle all errors in the same way (unless the show_econnreset + * socket option is enabled). We just have to distinguish between passive + * and active sockets. */ DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n", (long)desc->inet.port, __FILE__, __LINE__)); if (desc->inet.active) { - tcp_closed_message(desc); - inet_reply_error_am(INETP(desc), am_closed); + if (show_econnreset) { + tcp_error_message(desc, err); + tcp_closed_message(desc); + inet_reply_error(INETP(desc), err); + } else { + tcp_closed_message(desc); + inet_reply_error_am(INETP(desc), am_closed); + } if (desc->inet.exitf) driver_exit(desc->inet.port, 0); else @@ -9454,7 +10662,10 @@ static int tcp_send_error(tcp_descriptor* desc, int err) erl_inet_close(INETP(desc)); if (desc->inet.caller) { - inet_reply_error_am(INETP(desc), am_closed); + if (show_econnreset) + inet_reply_error(INETP(desc), err); + else + inet_reply_error_am(INETP(desc), am_closed); } else { /* No blocking send op to reply to right now. @@ -9471,10 +10682,54 @@ static int tcp_send_error(tcp_descriptor* desc, int err) * in the receive operation. */ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_RECV; + + if (show_econnreset) { + /* Return {error, econnreset} instead of {error, closed} + * on send or receive operations. + */ + desc->tcp_add_flags |= TCP_ADDF_DELAYED_ECONNRESET; + } } return -1; } +static int tcp_send_error(tcp_descriptor* desc, int err) +{ + /* EPIPE errors usually occur in one of three ways: + * 1. We write to a socket when we've already shutdown() the write side. On + * Windows the error returned for this is ESHUTDOWN rather than EPIPE. + * 2. The TCP peer sends us an RST through no fault of our own (perhaps + * by aborting the connection using SO_LINGER) and we then attempt + * to write to the socket. On Linux and Windows we would actually + * receive an ECONNRESET error for this, but on the BSDs, Darwin, + * Illumos and presumably Solaris, it's an EPIPE. + * 3. We cause the TCP peer to send us an RST by writing to a socket + * after we receive a FIN from them. Our first write will be + * successful, but if the they have closed the connection (rather + * than just shutting down the write side of it) this will cause their + * OS to send us an RST. Then, when we attempt to write to the socket + * a second time, we will get an EPIPE error. On Windows we get an + * ECONNABORTED. + * + * What we are going to do here is to treat all EPIPE messages that aren't + * of type 1 as ECONNRESET errors. This will allow users who have the + * show_econnreset socket option enabled to receive {error, econnreset} on + * both send and recv operations to indicate that an RST has been received. + */ +#ifdef __WIN_32__ + if (err == ECONNABORTED) + err = ECONNRESET; +#endif + if (err == EPIPE && !(desc->tcp_add_flags & TCP_ADDF_SHUTDOWN_WR_DONE)) + err = ECONNRESET; + return tcp_send_or_shutdown_error(desc, err); +} + +static int tcp_shutdown_error(tcp_descriptor* desc, int err) +{ + return tcp_send_or_shutdown_error(desc, err); +} + /* ** Send non-blocking vector data */ @@ -9675,6 +10930,64 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len) return 0; } +/* shutdown on the socket: +** Assume caller has confirmed TCP_ADDF_PENDING_SHUTDOWN is set. +*/ +static void tcp_shutdown_async(tcp_descriptor* desc) +{ + int how; + + how = (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUT_WR) ? + TCP_SHUT_WR : TCP_SHUT_RDWR; + if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) + tcp_shutdown_error(desc, sock_errno()); + else + desc->tcp_add_flags |= TCP_ADDF_SHUTDOWN_WR_DONE; +} + +#ifdef __OSE__ + +static void tcp_inet_drv_output_ose(ErlDrvData data, ErlDrvEvent event) +{ + union SIGNAL *event_sig = erl_drv_ose_get_signal(event); + + while (event_sig) { + int prev_select_state = INETP((tcp_descriptor*)data)->select_state; + int res = tcp_inet_output((tcp_descriptor*)data, (HANDLE)event_sig); + if (res != -1) { + tcp_inet_ose_dispatch_signals((tcp_descriptor*)data, + prev_select_state,event_sig); + free_buf(&event_sig); + event_sig = erl_drv_ose_get_signal(event); + } else { + /* NOTE: here the event object could have been deallocated!!!! + inet_stop_select is called when doing driver_select(ERL_DRV_USE,0) + */ + free_buf(&event_sig); + return; + } + } +} + +static void tcp_inet_drv_input_ose(ErlDrvData data, ErlDrvEvent event) +{ + union SIGNAL *event_sig = erl_drv_ose_get_signal(event); + + while (event_sig) { + int prev_select_state = INETP((tcp_descriptor*)data)->select_state; + int res = tcp_inet_input((tcp_descriptor*)data, (HANDLE)event); + if (res != -1) { + tcp_inet_ose_dispatch_signals((tcp_descriptor*)data, prev_select_state, + event_sig); + free_buf(&event_sig); + event_sig = erl_drv_ose_get_signal(event); + } else { + free_buf(&event_sig); + return; + } + } +} +#else static void tcp_inet_drv_output(ErlDrvData data, ErlDrvEvent event) { (void)tcp_inet_output((tcp_descriptor*)data, (HANDLE)event); @@ -9684,6 +10997,7 @@ static void tcp_inet_drv_input(ErlDrvData data, ErlDrvEvent event) { (void)tcp_inet_input((tcp_descriptor*)data, (HANDLE)event); } +#endif /* socket ready for ouput: ** 1. INET_STATE_CONNECTING => non block connect ? @@ -9749,9 +11063,18 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) ssize_t n; SysIOVec* iov; +#ifdef __OSE__ + /* For large size buffers case the amount of data that the stack is + able to send out (received in the .extra field) should be passed + down to writev_fallback */ + n = event ? ((union SIGNAL*)event)->async.event.extra : 0; +#endif + if ((iov = driver_peekq(ix, &vsize)) == NULL) { sock_select(INETP(desc), FD_WRITE, 0); send_empty_out_q_msgs(INETP(desc)); + if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) + tcp_shutdown_async(desc); goto done; } vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize; @@ -9760,8 +11083,8 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, iov, vsize, &n, 0))) { write_error: if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) { - DEBUGF(("tcp_inet_output(%ld): sock_sendv(%d) errno = %d\r\n", - (long)desc->inet.port, vsize, sock_errno())); + DEBUGF(("tcp_inet_output(%ld): sock_sendv(%d) errno = %d (errno %d)\r\n", + (long)desc->inet.port, vsize, sock_errno(), errno)); ret = tcp_send_error(desc, sock_errno()); goto done; } @@ -9774,6 +11097,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) sizes > (max 32 bit signed int) */ size_t howmuch = 0x7FFFFFFF; /* max signed 32 bit */ int x; +#ifdef __OSE__ + /* For EWOULDBLOCK sock_sendv returns 0 so we have to be sure it + wasn't the case */ + if(sock_errno() == ERRNO_BLOCK) + goto done; +#endif for(x = 0; x < vsize && iov[x].iov_len == 0; ++x) ; if (x < vsize) { @@ -9918,6 +11247,7 @@ static udp_descriptor* sctp_inet_copy(udp_descriptor* desc, SOCKET s, int* err) +#ifdef HAVE_UDP static int packet_inet_init() { return 0; @@ -9948,6 +11278,7 @@ static ErlDrvData udp_inet_start(ErlDrvPort port, char *args) set_default_msgq_limits(port); return data; } +#endif #ifdef HAVE_SCTP static ErlDrvData sctp_inet_start(ErlDrvPort port, char *args) @@ -9958,6 +11289,7 @@ static ErlDrvData sctp_inet_start(ErlDrvPort port, char *args) } #endif +#ifdef HAVE_UDP static void packet_inet_stop(ErlDrvData e) { /* There should *never* be any "empty out q" subscribers on @@ -10046,10 +11378,11 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, return replen; - case INET_REQ_FDOPEN: { /* pass in an open (and bound) socket */ + case INET_REQ_FDOPEN: { /* pass in an open (and optionally bound) socket */ SOCKET s; + int bound; DEBUGF(("packet inet_ctl(%ld): FDOPEN\r\n", (long)desc->port)); - if (len != 6) { + if (len != 6 && len != 10) { return ctl_error(EINVAL, rbuf, rsize); } switch (buf[0]) { @@ -10074,7 +11407,11 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, return ctl_error(EINVAL, rbuf, rsize); } s = (SOCKET)get_int32(buf+2); - replen = inet_ctl_fdopen(desc, af, type, s, rbuf, rsize); + + if (len == 6) bound = 1; + else bound = get_int32(buf+2+4); + + replen = inet_ctl_fdopen(desc, af, type, s, bound, rbuf, rsize); if ((*rbuf)[0] != INET_REP_ERROR) { if (desc->active) @@ -10468,7 +11805,7 @@ static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len) else inet_reply_ok(desc); } - +#endif #ifdef __WIN32__ static void packet_inet_event(ErlDrvData e, ErlDrvEvent event) @@ -10490,6 +11827,7 @@ static void packet_inet_event(ErlDrvData e, ErlDrvEvent event) #endif +#ifdef HAVE_UDP static void packet_inet_drv_input(ErlDrvData e, ErlDrvEvent event) { (void) packet_inet_input((udp_descriptor*)e, (HANDLE)event); @@ -10617,7 +11955,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) inet_input_count(desc, n); udesc->i_ptr += n; - inet_get_address(desc->sfamily, abuf, &other, &len); + inet_get_address(abuf, &other, &len); /* Copy formatted address to the buffer allocated; "len" is the actual length which must be <= than the original reserved. This means that the addr + data in the buffer are contiguous, @@ -10746,6 +12084,7 @@ static int packet_inet_output(udp_descriptor* udesc, HANDLE event) DEBUGF(("packet_inet_output(%ld) }\r\n", (long)desc->port)); return ret; } +#endif /*---------------------------------------------------------------------------*/ @@ -10805,115 +12144,18 @@ make_noninheritable_handle(SOCKET s) * Multi-timers */ -static void absolute_timeout(unsigned millis, ErlDrvNowData *out) -{ - unsigned rest; - unsigned long millipart; - unsigned long secpart; - unsigned long megasecpart; - unsigned tmo_secs = (millis / 1000U); - unsigned tmo_millis = (millis % 1000); - driver_get_now(out); - rest = (out->microsecs) % 1000; - millipart = ((out->microsecs) / 1000UL); - if (rest >= 500) { - ++millipart; - } - secpart = out->secs; - megasecpart = out->megasecs; - millipart += tmo_millis; - secpart += (millipart / 1000000UL); - millipart %= 1000000UL; - secpart += tmo_secs; - megasecpart += (secpart / 1000000UL); - secpart %= 1000000UL; - out->megasecs = megasecpart; - out->secs = secpart; - out->microsecs = (millipart * 1000UL); -} - -static unsigned relative_timeout(ErlDrvNowData *in) -{ - ErlDrvNowData now; - unsigned rest; - unsigned long millipart, in_millis, in_secs, in_megasecs; - - driver_get_now(&now); - - in_secs = in->secs; - in_megasecs = in->megasecs; - - rest = (now.microsecs) % 1000; - millipart = ((now.microsecs) / 1000UL); - if (rest >= 500) { - ++millipart; - } - in_millis = ((in->microsecs) / 1000UL); - if ( in_millis < millipart ) { - if (in_secs > 0) { - --in_secs; - } else { - in_secs = (1000000UL - 1UL); - if (in_megasecs <= now.megasecs) { - return 0; - } else { - --in_megasecs; - } - } - in_millis += 1000UL; - } - in_millis -= millipart; - - if (in_secs < now.secs) { - if (in_megasecs <= now.megasecs) { - return 0; - } else { - --in_megasecs; - } - in_secs += 1000000; - } - in_secs -= now.secs; - if (in_megasecs < now.megasecs) { - return 0; - } else { - in_megasecs -= now.megasecs; - } - return (unsigned) ((in_megasecs * 1000000000UL) + - (in_secs * 1000UL) + - in_millis); -} - -#ifdef DEBUG -static int nowcmp(ErlDrvNowData *d1, ErlDrvNowData *d2) -{ - /* Assume it's not safe to do signed conversion on megasecs... */ - if (d1->megasecs < d2->megasecs) { - return -1; - } else if (d1->megasecs > d2->megasecs) { - return 1; - } else if (d1->secs != d2->secs) { - return ((int) d1->secs) - ((int) d2->secs); - } - return ((int) d1->microsecs) - ((int) d2->microsecs); -} -#endif - static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, ErlDrvData data) { - unsigned next_timeout; + ErlDrvTime next_timeout; if (!*first) { ASSERT(0); return; } #ifdef DEBUG { - ErlDrvNowData chk; - driver_get_now(&chk); - chk.microsecs /= 10000UL; - chk.microsecs *= 10000UL; - chk.microsecs += 10000; - ASSERT(nowcmp(&chk,&((*first)->when)) >= 0); + ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC); + ASSERT(chk >= (*first)->when); } #endif do { @@ -10925,9 +12167,9 @@ static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, return; } (*first)->prev = NULL; - next_timeout = relative_timeout(&((*first)->when)); - } while (next_timeout == 0); - driver_set_timer(port,next_timeout); + next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + } while (next_timeout <= 0); + driver_set_timer(port, (unsigned long) next_timeout); } static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port) @@ -10950,8 +12192,10 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim driver_cancel_timer(port); *first = p->next; if (*first) { - unsigned ntmo = relative_timeout(&((*first)->when)); - driver_set_timer(port,ntmo); + ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + if (ntmo < 0) + ntmo = 0; + driver_set_timer(port, (unsigned long) ntmo); } } if (p->next != NULL) { @@ -10967,32 +12211,16 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, { MultiTimerData *mtd, *p, *s; mtd = ALLOC(sizeof(MultiTimerData)); - absolute_timeout(timeout, &(mtd->when)); + mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1; mtd->timeout_function = timeout_fun; mtd->caller = caller; mtd->next = mtd->prev = NULL; for(p = *first,s = NULL; p != NULL; s = p, p = p->next) { - if (p->when.megasecs >= mtd->when.megasecs) { - break; - } - } - if (!p || p->when.megasecs > mtd->when.megasecs) { - goto found; - } - for (; p!= NULL; s = p, p = p->next) { - if (p->when.secs >= mtd->when.secs) { + if (p->when >= mtd->when) { break; } } - if (!p || p->when.secs > mtd->when.secs) { - goto found; - } - for (; p!= NULL; s = p, p = p->next) { - if (p->when.microsecs >= mtd->when.microsecs) { - break; - } - } - found: + if (!p) { if (!s) { *first = mtd; @@ -11018,10 +12246,6 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, } return mtd; } - - - - /*----------------------------------------------------------------------------- |