diff options
Diffstat (limited to 'erts/emulator/drivers/common/inet_drv.c')
| -rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 671 |
1 files changed, 574 insertions, 97 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index fb128fff7d..80937dfcc8 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -86,6 +86,13 @@ #endif typedef unsigned long long llu_t; +#ifndef INT16_MIN +#define INT16_MIN (-32768) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif + #ifdef __WIN32__ #define STRNCASECMP strncasecmp @@ -282,7 +289,7 @@ static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD); static unsigned long zero_value = 0; static unsigned long one_value = 1; -#else +#else /* #ifdef __WIN32__ */ #include <sys/time.h> #ifdef NETDB_H_NEEDS_IN_H @@ -315,9 +322,17 @@ static unsigned long one_value = 1; #include <net/if.h> +#ifdef HAVE_SCHED_H +#include <sched.h> +#endif + +#ifdef HAVE_SETNS_H +#include <setns.h> +#endif + /* SCTP support -- currently for UNIX platforms only: */ #undef HAVE_SCTP -#if (!defined(__WIN32__) && defined(HAVE_SCTP_H)) +#if defined(HAVE_SCTP_H) #include <netinet/sctp.h> @@ -409,16 +424,47 @@ static unsigned long one_value = 1; # define sctp_adaptation_layer_event sctp_adaption_layer_event #endif -#ifdef __GNUC__ +#if defined(__GNUC__) && defined(HAVE_SCTP_BINDX) static typeof(sctp_bindx) *p_sctp_bindx = NULL; +#else +static int (*p_sctp_bindx) + (int sd, struct sockaddr *addrs, int addrcnt, int flags) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_PEELOFF) static typeof(sctp_peeloff) *p_sctp_peeloff = NULL; #else -static int (*p_sctp_bindx)(int sd, struct sockaddr *addrs, - int addrcnt, int flags) = NULL; -static int (*p_sctp_peeloff)(int sd, sctp_assoc_t assoc_id) = NULL; +static int (*p_sctp_peeloff) + (int sd, sctp_assoc_t assoc_id) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_GETLADDRS) +static typeof(sctp_getladdrs) *p_sctp_getladdrs = NULL; +#else +static int (*p_sctp_getladdrs) + (int sd, sctp_assoc_t assoc_id, struct sockaddr **ss) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_FREELADDRS) +static typeof(sctp_freeladdrs) *p_sctp_freeladdrs = NULL; +#else +static void (*p_sctp_freeladdrs)(struct sockaddr *addrs) = NULL; +#endif + +#if defined(__GNUC__) && defined(HAVE_SCTP_GETPADDRS) +static typeof(sctp_getpaddrs) *p_sctp_getpaddrs = NULL; +#else +static int (*p_sctp_getpaddrs) + (int sd, sctp_assoc_t assoc_id, struct sockaddr **ss) = NULL; #endif -#endif /* SCTP supported */ +#if defined(__GNUC__) && defined(HAVE_SCTP_FREEPADDRS) +static typeof(sctp_freepaddrs) *p_sctp_freepaddrs = NULL; +#else +static void (*p_sctp_freepaddrs)(struct sockaddr *addrs) = NULL; +#endif + +#endif /* #if defined(HAVE_SCTP_H) */ #ifndef WANT_NONBLOCKING #define WANT_NONBLOCKING @@ -512,7 +558,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) } while(0) -#endif /* __WIN32__ */ +#endif /* #ifdef __WIN32__ #else */ #ifdef HAVE_SOCKLEN_T # define SOCKLEN_T socklen_t @@ -573,6 +619,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_PASSIVE 0 /* false */ #define INET_ACTIVE 1 /* true */ #define INET_ONCE 2 /* true; active once then passive */ +#define INET_MULTI 3 /* true; active N then passive */ /* INET_REQ_GETSTATUS enumeration */ #define INET_F_OPEN 0x0001 @@ -585,7 +632,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_F_BUSY 0x0080 #define INET_F_MULTI_CLIENT 0x0100 /* Multiple clients for one descriptor, i.e. multi-accept */ -/* One numberspace for *_REC_* so if an e.g UDP request is issued +/* One numberspace for *_REQ_* so if an e.g UDP request is issued ** for a TCP socket, the driver can protest. */ #define INET_REQ_OPEN 1 @@ -616,6 +663,8 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_REQ_ACCEPT 26 #define INET_REQ_LISTEN 27 #define INET_REQ_IGNOREFD 28 +#define INET_REQ_GETLADDRS 29 +#define INET_REQ_GETPADDRS 30 /* TCP requests */ /* #define TCP_REQ_ACCEPT 40 MOVED */ @@ -678,8 +727,9 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_LOPT_UDP_READ_PACKETS 33 /* Number of packets to read */ #define INET_OPT_RAW 34 /* Raw socket options */ #define INET_LOPT_TCP_SEND_TIMEOUT_CLOSE 35 /* auto-close on send timeout or not */ -#define INET_LOPT_TCP_MSGQ_HIWTRMRK 36 /* set local high watermark */ -#define INET_LOPT_TCP_MSGQ_LOWTRMRK 37 /* set local low watermark */ +#define INET_LOPT_MSGQ_HIWTRMRK 36 /* set local msgq high watermark */ +#define INET_LOPT_MSGQ_LOWTRMRK 37 /* set local msgq low watermark */ +#define INET_LOPT_NETNS 38 /* Network namespace pathname */ /* SCTP options: a separate range, from 100: */ #define SCTP_OPT_RTOINFO 100 #define SCTP_OPT_ASSOCINFO 101 @@ -916,6 +966,7 @@ typedef struct { inet_async_op op_queue[INET_MAX_ASYNC]; /* call queue */ int active; /* 0 = passive, 1 = active, 2 = active once */ + Sint16 active_count; /* counter for {active,N} */ int stype; /* socket type: SOCK_STREAM/SOCK_DGRAM/SOCK_SEQPACKET */ int sprotocol; /* socket protocol: @@ -955,6 +1006,10 @@ typedef struct { int is_ignored; /* if a fd is ignored by the inet_drv. This flag should be set to true when the fd is used outside of inet_drv. */ +#ifdef HAVE_SETNS + char *netns; /* Socket network namespace name + as full file path */ +#endif } inet_descriptor; @@ -1147,22 +1202,36 @@ static int packet_inet_output(udp_descriptor* udesc, HANDLE event); static int async_ref = 0; /* async reference id generator */ #define NEW_ASYNC_ID() ((async_ref++) & 0xffff) +/* check for transition from active to passive */ +#define INET_CHECK_ACTIVE_TO_PASSIVE(inet) \ + do { \ + if ((inet)->active == INET_ONCE) \ + (inet)->active = INET_PASSIVE; \ + else if ((inet)->active == INET_MULTI && --((inet)->active_count) == 0) { \ + (inet)->active = INET_PASSIVE; \ + packet_passive_message(inet); \ + } \ + } while (0) static ErlDrvTermData am_ok; static ErlDrvTermData am_tcp; static ErlDrvTermData am_udp; static ErlDrvTermData am_error; +static ErlDrvTermData am_einval; static ErlDrvTermData am_inet_async; static ErlDrvTermData am_inet_reply; static ErlDrvTermData am_timeout; static ErlDrvTermData am_closed; +static ErlDrvTermData am_tcp_passive; static ErlDrvTermData am_tcp_closed; static ErlDrvTermData am_tcp_error; +static ErlDrvTermData am_udp_passive; static ErlDrvTermData am_udp_error; static ErlDrvTermData am_empty_out_q; static ErlDrvTermData am_ssl_tls; #ifdef HAVE_SCTP static ErlDrvTermData am_sctp; +static ErlDrvTermData am_sctp_passive; static ErlDrvTermData am_sctp_error; static ErlDrvTermData am_true; static ErlDrvTermData am_false; @@ -1172,6 +1241,7 @@ static ErlDrvTermData am_list; static ErlDrvTermData am_binary; static ErlDrvTermData am_active; static ErlDrvTermData am_once; +static ErlDrvTermData am_multi; static ErlDrvTermData am_buffer; static ErlDrvTermData am_linger; static ErlDrvTermData am_recbuf; @@ -1181,6 +1251,7 @@ static ErlDrvTermData am_dontroute; static ErlDrvTermData am_priority; static ErlDrvTermData am_tos; static ErlDrvTermData am_ipv6_v6only; +static ErlDrvTermData am_netns; #endif /* speical errors for bad ports and sequences */ @@ -1403,8 +1474,7 @@ static int load_ip_address(ErlDrvTermData* spec, int i, int family, char* buf) #ifdef HAVE_SCTP /* For SCTP, we often need to return {IP, Port} tuples: */ -static int inet_get_address - (int family, char* dst, inet_address* src, unsigned int* len); +static int inet_get_address(char* dst, inet_address* src, unsigned int* len); #define LOAD_IP_AND_PORT_CNT \ (8*LOAD_INT_CNT + LOAD_TUPLE_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT) @@ -1419,10 +1489,9 @@ static int load_ip_and_port unsigned int len = sizeof(struct sockaddr_storage); unsigned int alen = len; char abuf [len]; - int res = - inet_get_address(desc->sfamily, abuf, (inet_address*) addr, &alen); - ASSERT(res==0); - res = 0; + int res = inet_get_address(abuf, (inet_address*) addr, &alen); + ASSERT(res==0); (void)res; + /* Now "abuf" contains: Family(1b), Port(2b), IP(4|16b) */ /* NB: the following functions are safe to use, as they create tuples @@ -3323,6 +3392,34 @@ static int packet_binary_message } /* +** active mode message: send active-to-passive transition message +** {tcp_passive, S} or +** {udp_passive, S} or +** {sctp_passive, S} +*/ + static int packet_passive_message(inet_descriptor* desc) + { + ErlDrvTermData spec[6]; + int i = 0; + + DEBUGF(("packet_passive_message(%ld):\r\n", (long)desc->port)); + + if (desc->sprotocol == IPPROTO_TCP) + i = LOAD_ATOM(spec, i, am_tcp_passive); + else { +#ifdef HAVE_SCTP + i = LOAD_ATOM(spec, i, IS_SCTP(desc) ? am_sctp_passive : am_udp_passive); +#else + i = LOAD_ATOM(spec, i, am_udp_passive); +#endif + } + i = LOAD_PORT(spec, i, desc->dport); + i = LOAD_TUPLE(spec, i, 2); + ASSERT(i <= 6); + return erl_drv_output_term(desc->dport, spec, i); + } + +/* ** send active message {udp_error|sctp_error, S, Error} */ static int packet_error_message(udp_descriptor* udesc, int err) @@ -3364,7 +3461,7 @@ static int tcp_reply_data(tcp_descriptor* desc, char* buf, int len) int code; const char* body = buf; int bodylen = len; - + packet_get_body(desc->inet.htype, &body, &bodylen); if (desc->inet.deliver == INET_DELIVER_PORT) { @@ -3382,8 +3479,7 @@ static int tcp_reply_data(tcp_descriptor* desc, char* buf, int len) if (code < 0) return code; - if (desc->inet.active == INET_ONCE) - desc->inet.active = INET_PASSIVE; + INET_CHECK_ACTIVE_TO_PASSIVE(INETP(desc)); return code; } @@ -3410,8 +3506,7 @@ tcp_reply_binary_data(tcp_descriptor* desc, ErlDrvBinary* bin, int offs, int len } if (code < 0) return code; - if (desc->inet.active == INET_ONCE) - desc->inet.active = INET_PASSIVE; + INET_CHECK_ACTIVE_TO_PASSIVE(INETP(desc)); return code; } @@ -3434,8 +3529,7 @@ packet_reply_binary_data(inet_descriptor* desc, unsigned int hsz, code = packet_binary_message(desc, bin, offs, len, extra); if (code < 0) return code; - if (desc->active == INET_ONCE) - desc->active = INET_PASSIVE; + INET_CHECK_ACTIVE_TO_PASSIVE(desc); return code; } } @@ -3480,6 +3574,7 @@ sock_init(void) /* May be called multiple times. */ #ifdef HAVE_SCTP static void inet_init_sctp(void) { INIT_ATOM(sctp); + INIT_ATOM(sctp_passive); INIT_ATOM(sctp_error); INIT_ATOM(true); INIT_ATOM(false); @@ -3489,6 +3584,7 @@ static void inet_init_sctp(void) { INIT_ATOM(binary); INIT_ATOM(active); INIT_ATOM(once); + INIT_ATOM(multi); INIT_ATOM(buffer); INIT_ATOM(linger); INIT_ATOM(recbuf); @@ -3498,6 +3594,7 @@ static void inet_init_sctp(void) { INIT_ATOM(priority); INIT_ATOM(tos); INIT_ATOM(ipv6_v6only); + INIT_ATOM(netns); /* Option names */ INIT_ATOM(sctp_rtoinfo); @@ -3613,12 +3710,15 @@ static int inet_init() INIT_ATOM(tcp); INIT_ATOM(udp); INIT_ATOM(error); + INIT_ATOM(einval); INIT_ATOM(inet_async); INIT_ATOM(inet_reply); INIT_ATOM(timeout); INIT_ATOM(closed); + INIT_ATOM(tcp_passive); INIT_ATOM(tcp_closed); INIT_ATOM(tcp_error); + INIT_ATOM(udp_passive); INIT_ATOM(udp_error); INIT_ATOM(empty_out_q); INIT_ATOM(ssl_tls); @@ -3643,9 +3743,27 @@ static int inet_init() /* Check the size of SCTP AssocID -- currently both this driver and the Erlang part require 32 bit: */ ASSERT(sizeof(sctp_assoc_t)==ASSOC_ID_LEN); -# if defined(HAVE_SCTP_BINDX) && defined (HAVE_SCTP_PEELOFF) +# if defined(HAVE_SCTP_BINDX) p_sctp_bindx = sctp_bindx; +# if defined(HAVE_SCTP_PEELOFF) p_sctp_peeloff = sctp_peeloff; +# else + p_sctp_peeloff = NULL; +# endif +# if defined(HAVE_SCTP_GETLADDRS) && defined(HAVE_SCTP_FREELADDRS) + p_sctp_getladdrs = sctp_getladdrs; + p_sctp_freeladdrs = sctp_freeladdrs; +# else + p_sctp_getladdrs = NULL; + p_sctp_freeladdrs = NULL; +# endif +# if defined(HAVE_SCTP_GETPADDRS) && defined(HAVE_SCTP_FREEPADDRS) + p_sctp_getpaddrs = sctp_getpaddrs; + p_sctp_freepaddrs = sctp_freepaddrs; +# else + p_sctp_getpaddrs = NULL; + p_sctp_freepaddrs = NULL; +# endif inet_init_sctp(); add_driver_entry(&sctp_inet_driver_entry); # else @@ -3660,12 +3778,36 @@ static int inet_init() void *ptr; if (erts_sys_ddll_sym(h_libsctp, "sctp_bindx", &ptr) == 0) { p_sctp_bindx = ptr; - inet_init_sctp(); - add_driver_entry(&sctp_inet_driver_entry); if (erts_sys_ddll_sym(h_libsctp, "sctp_peeloff", &ptr) == 0) { p_sctp_peeloff = ptr; } + else p_sctp_peeloff = NULL; + if (erts_sys_ddll_sym(h_libsctp, "sctp_getladdrs", &ptr) == 0) { + p_sctp_getladdrs = ptr; + } + else p_sctp_getladdrs = NULL; + if (erts_sys_ddll_sym(h_libsctp, "sctp_freeladdrs", &ptr) == 0) { + p_sctp_freeladdrs = ptr; + } + else { + p_sctp_freeladdrs = NULL; + p_sctp_getladdrs = NULL; + } + if (erts_sys_ddll_sym(h_libsctp, "sctp_getpaddrs", &ptr) == 0) { + p_sctp_getpaddrs = ptr; + } + else p_sctp_getpaddrs = NULL; + if (erts_sys_ddll_sym(h_libsctp, "sctp_freepaddrs", &ptr) == 0) { + p_sctp_freepaddrs = ptr; + } + else { + p_sctp_freepaddrs = NULL; + p_sctp_getpaddrs = NULL; + } + inet_init_sctp(); + add_driver_entry(&sctp_inet_driver_entry); } + else p_sctp_bindx = NULL; } } # endif @@ -3820,10 +3962,12 @@ static char *inet_set_faddress(int family, inet_address* dst, ** and *len is the length of dst on return ** (suitable to deliver to erlang) */ -static int inet_get_address(int family, char* dst, inet_address* src, unsigned int* len) +static int inet_get_address(char* dst, inet_address* src, unsigned int* len) { + int family; short port; + family = src->sa.sa_family; if ((family == AF_INET) && (*len >= sizeof(struct sockaddr_in))) { dst[0] = INET_AF_INET; port = sock_ntohs(src->sai.sin_port); @@ -3845,6 +3989,75 @@ static int inet_get_address(int family, char* dst, inet_address* src, unsigned i return -1; } +/* Same as the above, but take family from the address structure, +** and advance the address pointer to the next address +** according to the size of the current, +** and return the resulting encoded size +*/ +static int inet_address_to_erlang(char *dst, inet_address **src) { + short port; + + switch ((*src)->sa.sa_family) { + case AF_INET: + if (dst) { + dst[0] = INET_AF_INET; + port = sock_ntohs((*src)->sai.sin_port); + put_int16(port, dst+1); + sys_memcpy(dst+1+2, (char *) &(*src)->sai.sin_addr, 4); + } + (*src) = (inet_address *) (&(*src)->sai + 1); + return 1 + 2 + 4; +#if defined(HAVE_IN6) && defined(AF_INET6) + case AF_INET6: + if (dst) { + dst[0] = INET_AF_INET6; + port = sock_ntohs((*src)->sai6.sin6_port); + put_int16(port, dst+1); + VALGRIND_MAKE_MEM_DEFINED(&(*src)->sai6.sin6_addr,16); /* false undefs from syscall sctp_get[lp]addrs */ + sys_memcpy(dst+1+2, (char *) &(*src)->sai6.sin6_addr, 16); + } + (*src) = (inet_address *) (&(*src)->sai6 + 1); + return 1 + 2 + 16; +#endif + default: + return -1; + } +} + +/* Encode n encoded addresses from addrs in the result buffer +*/ +static ErlDrvSizeT reply_inet_addrs +(int n, inet_address *addrs, char **rbuf, ErlDrvSizeT rsize) { + inet_address *ia; + int i, s; + ErlDrvSizeT rlen; + + if (IS_SOCKET_ERROR(n)) return ctl_error(sock_errno(), rbuf, rsize); + if (n == 0) return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); + + /* Calculate result length */ + rlen = 1; + ia = addrs; + for (i = 0; i < n; i++) { + s = inet_address_to_erlang(NULL, &ia); + if (s < 0) break; + rlen += s; + } + + if (rlen > rsize) (*rbuf) = ALLOC(rlen); + + (*rbuf)[0] = INET_REP_OK; + rlen = 1; + ia = addrs; + for (i = 0; i < n; i++) { + s = inet_address_to_erlang((*rbuf)+rlen, &ia); + if (s < 0) break; + rlen += s; + } + + return rlen; +} + static void desc_close(inet_descriptor* desc) { if (desc->s != INVALID_SOCKET) { @@ -3854,8 +4067,10 @@ static void desc_close(inet_descriptor* desc) desc->forced_events = 0; desc->send_would_block = 0; #endif - // We should close the fd here, but the other driver might still - // be selecting on it. + /* + * We should close the fd here, but the other driver might still + * be selecting on it. + */ if (!desc->is_ignored) driver_select(desc->port,(ErlDrvEvent)(long)desc->event, ERL_DRV_USE, 0); @@ -3906,12 +4121,81 @@ static int erl_inet_close(inet_descriptor* desc) static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, char** rbuf, ErlDrvSizeT rsize) { + int save_errno; +#ifdef HAVE_SETNS + int current_ns, new_ns; + current_ns = new_ns = 0; +#endif + save_errno = 0; + if (desc->state != INET_STATE_CLOSED) return ctl_xerror(EXBADSEQ, rbuf, rsize); + +#ifdef HAVE_SETNS + if (desc->netns != NULL) { + /* Temporarily change network namespace for this thread + * while creating the socket + */ + current_ns = open("/proc/self/ns/net", O_RDONLY); + if (current_ns == INVALID_SOCKET) + return ctl_error(sock_errno(), rbuf, rsize); + new_ns = open(desc->netns, O_RDONLY); + if (new_ns == INVALID_SOCKET) { + save_errno = sock_errno(); + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + if (setns(new_ns, CLONE_NEWNET) != 0) { + save_errno = sock_errno(); + while (close(new_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + else { + while (close(new_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + } + } +#endif if ((desc->s = sock_open(domain, type, desc->sprotocol)) == INVALID_SOCKET) - return ctl_error(sock_errno(), rbuf, rsize); - if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) - return ctl_error(sock_errno(), rbuf, rsize); + save_errno = sock_errno(); +#ifdef HAVE_SETNS + if (desc->netns != NULL) { + /* Restore network namespace */ + if (setns(current_ns, CLONE_NEWNET) != 0) { + /* XXX Failed to restore network namespace. + * What to do? Tidy up and return an error... + * Note that the thread now might still be in the namespace. + * Can this even happen? Should the emulator be aborted? + */ + if (desc->s != INVALID_SOCKET) + save_errno = sock_errno(); + while (close(desc->s) == INVALID_SOCKET && + sock_errno() == EINTR); + desc->s = INVALID_SOCKET; + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + else { + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + } + } +#endif + if (desc->s == INVALID_SOCKET) + return ctl_error(save_errno, rbuf, rsize); + + if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) { + save_errno = sock_errno(); + while (close(desc->s) == INVALID_SOCKET && + sock_errno() == EINTR); + desc->s = INVALID_SOCKET; + return ctl_error(save_errno, rbuf, rsize); + } SET_NONBLOCKING(desc->s); #ifdef __WIN32__ driver_select(desc->port, desc->event, ERL_DRV_READ, 1); @@ -4347,7 +4631,7 @@ static ErlDrvSSizeT inet_ctl_getiflist(inet_descriptor* desc, case AF_INET6: #endif case AF_INET: - ASSERT(sp+IFNAMSIZ+1 < sbuf+ifc.ifc_len+1) + ASSERT(sp+IFNAMSIZ+1 < sbuf+ifc.ifc_len+1); strncpy(sp, ifrp->ifr_name, IFNAMSIZ); sp[IFNAMSIZ] = '\0'; sp += strlen(sp), ++sp; @@ -5422,8 +5706,25 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) case INET_LOPT_ACTIVE: DEBUGF(("inet_set_opts(%ld): s=%d, ACTIVE=%d\r\n", - (long)desc->port, desc->s,ival)); + (long)desc->port, desc->s, ival)); desc->active = ival; + if (desc->active == INET_MULTI) { + long ac = desc->active_count; + Sint16 nval = get_int16(ptr); + ptr += 2; + len -= 2; + ac += nval; + if (ac > INT16_MAX || ac < INT16_MIN) + return -1; + desc->active_count += nval; + if (desc->active_count < 0) + desc->active_count = 0; + if (desc->active_count == 0) { + desc->active = INET_PASSIVE; + packet_passive_message(desc); + } + } else + desc->active_count = 0; if ((desc->stype == SOCK_STREAM) && (desc->active != INET_PASSIVE) && (desc->state == INET_STATE_CLOSED)) { tcp_closed_message((tcp_descriptor *) desc); @@ -5474,27 +5775,25 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) } continue; - case INET_LOPT_TCP_MSGQ_HIWTRMRK: - if (desc->stype == SOCK_STREAM) { - ErlDrvSizeT high; - if (ival < ERL_DRV_BUSY_MSGQ_LIM_MIN - || ERL_DRV_BUSY_MSGQ_LIM_MAX < ival) - return -1; - high = (ErlDrvSizeT) ival; - erl_drv_busy_msgq_limits(desc->port, NULL, &high); - } + case INET_LOPT_MSGQ_HIWTRMRK: { + ErlDrvSizeT high; + if (ival < ERL_DRV_BUSY_MSGQ_LIM_MIN + || ERL_DRV_BUSY_MSGQ_LIM_MAX < ival) + return -1; + high = (ErlDrvSizeT) ival; + erl_drv_busy_msgq_limits(desc->port, NULL, &high); continue; + } - case INET_LOPT_TCP_MSGQ_LOWTRMRK: - if (desc->stype == SOCK_STREAM) { - ErlDrvSizeT low; - if (ival < ERL_DRV_BUSY_MSGQ_LIM_MIN - || ERL_DRV_BUSY_MSGQ_LIM_MAX < ival) - return -1; - low = (ErlDrvSizeT) ival; - erl_drv_busy_msgq_limits(desc->port, &low, NULL); - } + case INET_LOPT_MSGQ_LOWTRMRK: { + ErlDrvSizeT low; + if (ival < ERL_DRV_BUSY_MSGQ_LIM_MIN + || ERL_DRV_BUSY_MSGQ_LIM_MAX < ival) + return -1; + low = (ErlDrvSizeT) ival; + erl_drv_busy_msgq_limits(desc->port, &low, NULL); continue; + } case INET_LOPT_TCP_SEND_TIMEOUT: if (desc->stype == SOCK_STREAM) { @@ -5529,6 +5828,20 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) } continue; +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + /* It is annoying that ival and len are both (signed) int */ + if (ival < 0) return -1; + if (len < ival) return -1; + if (desc->netns != NULL) FREE(desc->netns); + desc->netns = ALLOC(((unsigned int) ival) + 1); + memcpy(desc->netns, ptr, ival); + desc->netns[ival] = '\0'; + ptr += ival; + len -= ival; + continue; +#endif + case INET_OPT_REUSEADDR: #ifdef __WIN32__ continue; /* Bjorn says */ @@ -5722,7 +6035,8 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) /* XXX fprintf(stderr,"desc->htype == %d, old_htype == %d, desc->active == %d, old_active == %d\r\n",(int)desc->htype, (int) old_htype, (int) desc->active, (int) old_active );*/ - return 1+(desc->htype == old_htype && desc->active == INET_ONCE); + return 1+(desc->htype == old_htype && + (desc->active == INET_ONCE || desc->active == INET_MULTI)); } return 0; } @@ -5855,9 +6169,39 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) case INET_LOPT_ACTIVE: desc->active = get_int32(curr); curr += 4; + if (desc->active == INET_MULTI) { + long ac = desc->active_count; + Sint16 nval = get_int16(curr); curr += 2; + ac += nval; + if (ac > INT16_MAX || ac < INT16_MIN) + return -1; + desc->active_count += nval; + if (desc->active_count < 0) + desc->active_count = 0; + if (desc->active_count == 0) { + desc->active = INET_PASSIVE; + packet_passive_message(desc); + } + } else + desc->active_count = 0; res = 0; continue; +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + { + size_t ns_len; + ns_len = get_int32(curr); curr += 4; + CHKLEN(curr, ns_len); + if (desc->netns != NULL) FREE(desc->netns); + desc->netns = ALLOC(ns_len + 1); + memcpy(desc->netns, curr, ns_len); + desc->netns[ns_len] = '\0'; + curr += ns_len; + } + continue; +#endif + /* SCTP options and applicable generic INET options: */ case SCTP_OPT_RTOINFO: @@ -6362,6 +6706,11 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, case INET_LOPT_ACTIVE: *ptr++ = opt; put_int32(desc->active, ptr); + if (desc->active == INET_MULTI) { + PLACE_FOR(2,ptr); + put_int16(desc->active_count, ptr); + ptr += 2; + } continue; case INET_LOPT_PACKET: *ptr++ = opt; @@ -6396,31 +6745,23 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, } continue; - case INET_LOPT_TCP_MSGQ_HIWTRMRK: - if (desc->stype == SOCK_STREAM) { - ErlDrvSizeT high = ERL_DRV_BUSY_MSGQ_READ_ONLY; - *ptr++ = opt; - erl_drv_busy_msgq_limits(desc->port, NULL, &high); - ival = high > INT_MAX ? INT_MAX : (int) high; - put_int32(ival, ptr); - } - else { - TRUNCATE_TO(0,ptr); - } + case INET_LOPT_MSGQ_HIWTRMRK: { + ErlDrvSizeT high = ERL_DRV_BUSY_MSGQ_READ_ONLY; + *ptr++ = opt; + erl_drv_busy_msgq_limits(desc->port, NULL, &high); + ival = high > INT_MAX ? INT_MAX : (int) high; + put_int32(ival, ptr); continue; + } - case INET_LOPT_TCP_MSGQ_LOWTRMRK: - if (desc->stype == SOCK_STREAM) { - ErlDrvSizeT low = ERL_DRV_BUSY_MSGQ_READ_ONLY; - *ptr++ = opt; - erl_drv_busy_msgq_limits(desc->port, &low, NULL); - ival = low > INT_MAX ? INT_MAX : (int) low; - put_int32(ival, ptr); - } - else { - TRUNCATE_TO(0,ptr); - } + case INET_LOPT_MSGQ_LOWTRMRK: { + ErlDrvSizeT low = ERL_DRV_BUSY_MSGQ_READ_ONLY; + *ptr++ = opt; + erl_drv_busy_msgq_limits(desc->port, &low, NULL); + ival = low > INT_MAX ? INT_MAX : (int) low; + put_int32(ival, ptr); continue; + } case INET_LOPT_TCP_SEND_TIMEOUT: if (desc->stype == SOCK_STREAM) { @@ -6462,6 +6803,22 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, } continue; +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + if (desc->netns != NULL) { + size_t netns_len; + netns_len = strlen(desc->netns); + *ptr++ = opt; + put_int32(netns_len, ptr); + PLACE_FOR(netns_len, ptr); + memcpy(ptr, desc->netns, netns_len); + ptr += netns_len; + } else { + TRUNCATE_TO(0,ptr); + } + continue; +#endif + case INET_OPT_PRIORITY: #ifdef SO_PRIORITY type = SO_PRIORITY; @@ -6726,7 +7083,10 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, } case INET_LOPT_ACTIVE: { - PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_TUPLE_CNT); + if (desc->active == INET_MULTI) + PLACE_FOR(spec, i, LOAD_ATOM_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT); + else + PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_TUPLE_CNT); i = LOAD_ATOM (spec, i, am_active); switch (desc->active) { @@ -6739,12 +7099,31 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, case INET_ONCE : { i = LOAD_ATOM (spec, i, am_once); break; } + case INET_MULTI : + { i = LOAD_INT(spec, i, desc->active_count); break; } + default: ASSERT (0); } i = LOAD_TUPLE (spec, i, 2); break; } +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + if (desc->netns != NULL) { + PLACE_FOR + (spec, i, + LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_netns); + i = LOAD_BUF2BINARY + (spec, i, desc->netns, strlen(desc->netns)); + i = LOAD_TUPLE (spec, i, 2); + break; + } + else + continue; /* Ignore */ +#endif + /* SCTP and generic INET options: */ case SCTP_OPT_RTOINFO: @@ -7466,9 +7845,27 @@ static ErlDrvSSizeT inet_subscribe(inet_descriptor* desc, static void inet_stop(inet_descriptor* desc) { erl_inet_close(desc); +#ifdef HAVE_SETNS + if (desc->netns != NULL) + FREE(desc->netns); +#endif FREE(desc); } +static void set_default_msgq_limits(ErlDrvPort port) +{ + ErlDrvSizeT q_high = INET_HIGH_MSGQ_WATERMARK; + ErlDrvSizeT q_low = INET_LOW_MSGQ_WATERMARK; + if (q_low < ERL_DRV_BUSY_MSGQ_LIM_MIN) + q_low = ERL_DRV_BUSY_MSGQ_LIM_MIN; + else if (q_low > ERL_DRV_BUSY_MSGQ_LIM_MAX) + q_low = ERL_DRV_BUSY_MSGQ_LIM_MAX; + if (q_high < ERL_DRV_BUSY_MSGQ_LIM_MIN) + q_high = ERL_DRV_BUSY_MSGQ_LIM_MIN; + else if (q_high > ERL_DRV_BUSY_MSGQ_LIM_MAX) + q_high = ERL_DRV_BUSY_MSGQ_LIM_MAX; + erl_drv_busy_msgq_limits(port, &q_low, &q_high); +} /* Allocate descriptor */ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) @@ -7501,6 +7898,7 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) socket */ desc->deliver = INET_DELIVER_TERM; /* standard term format */ desc->active = INET_PASSIVE; /* start passive */ + desc->active_count = 0; desc->oph = NULL; desc->opt = NULL; @@ -7531,6 +7929,10 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) desc->is_ignored = 0; +#ifdef HAVE_SETNS + desc->netns = NULL; +#endif + return (ErlDrvData)desc; } @@ -7720,6 +8122,39 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, return ctl_reply(INET_REP_OK, tbuf, strlen(tbuf), rbuf, rsize); } + case INET_REQ_GETPADDRS: { + DEBUGF(("inet_ctl(%ld): INET_GETPADDRS\r\n", (long)desc->port)); + + if (len != 4) return ctl_error(EINVAL, rbuf, rsize); + + if (! IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); + if (! IS_BOUND(desc)) return ctl_xerror(EXBADSEQ, rbuf, rsize); + +#ifdef HAVE_SCTP + if (IS_SCTP(desc) && p_sctp_getpaddrs) { + struct sockaddr *sa; + Uint32 assoc_id; + int n; + ErlDrvSizeT rlen; + + assoc_id = get_int32(buf); + n = p_sctp_getpaddrs(desc->s, assoc_id, &sa); + rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize); + if (n > 0) p_sctp_freepaddrs(sa); + return rlen; + } +#endif + { /* Fallback to sock_peer */ + inet_address addr; + unsigned int sz; + int i; + + sz = sizeof(addr); + i = sock_peer(desc->s, (struct sockaddr *) &addr, &sz); + return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize); + } + } + case INET_REQ_PEER: { /* get peername */ char tbuf[sizeof(inet_address)]; inet_address peer; @@ -7735,7 +8170,7 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (IS_SOCKET_ERROR(sock_peer(desc->s, (struct sockaddr*)ptr,&sz))) return ctl_error(sock_errno(), rbuf, rsize); } - if (inet_get_address(desc->sfamily, tbuf, ptr, &sz) < 0) + if (inet_get_address(tbuf, ptr, &sz) < 0) return ctl_error(EINVAL, rbuf, rsize); return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize); } @@ -7756,6 +8191,39 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, } } + case INET_REQ_GETLADDRS: { + DEBUGF(("inet_ctl(%ld): INET_GETLADDRS\r\n", (long)desc->port)); + + if (len != 4) return ctl_error(EINVAL, rbuf, rsize); + + if (! IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize); + if (! IS_BOUND(desc)) return ctl_xerror(EXBADSEQ, rbuf, rsize); + +#ifdef HAVE_SCTP + if (IS_SCTP(desc) && p_sctp_getladdrs) { + struct sockaddr *sa; + Uint32 assoc_id; + int n; + ErlDrvSizeT rlen; + + assoc_id = get_int32(buf); + n = p_sctp_getladdrs(desc->s, assoc_id, &sa); + rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize); + if (n > 0) p_sctp_freeladdrs(sa); + return rlen; + } +#endif + { /* Fallback to sock_name */ + inet_address addr; + unsigned int sz; + int i; + + sz = sizeof(addr); + i = sock_name(desc->s, (struct sockaddr *) &addr, &sz); + return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize); + } + } + case INET_REQ_NAME: { /* get sockname */ char tbuf[sizeof(inet_address)]; inet_address name; @@ -7772,7 +8240,7 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (IS_SOCKET_ERROR(sock_name(desc->s, (struct sockaddr*)ptr, &sz))) return ctl_error(sock_errno(), rbuf, rsize); } - if (inet_get_address(desc->sfamily, tbuf, ptr, &sz) < 0) + if (inet_get_address(tbuf, ptr, &sz) < 0) return ctl_error(EINVAL, rbuf, rsize); return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize); } @@ -8089,9 +8557,8 @@ static int tcp_inet_init(void) /* initialize the TCP descriptor */ -static ErlDrvData tcp_inet_start(ErlDrvPort port, char* args) +static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args) { - ErlDrvSizeT q_low, q_high; tcp_descriptor* desc; DEBUGF(("tcp_inet_start(%ld) {\r\n", (long)port)); @@ -8101,17 +8568,6 @@ static ErlDrvData tcp_inet_start(ErlDrvPort port, char* args) return ERL_DRV_ERROR_ERRNO; desc->high = INET_HIGH_WATERMARK; desc->low = INET_LOW_WATERMARK; - q_high = INET_HIGH_MSGQ_WATERMARK; - q_low = INET_LOW_MSGQ_WATERMARK; - if (q_low < ERL_DRV_BUSY_MSGQ_LIM_MIN) - q_low = ERL_DRV_BUSY_MSGQ_LIM_MIN; - else if (q_low > ERL_DRV_BUSY_MSGQ_LIM_MAX) - q_low = ERL_DRV_BUSY_MSGQ_LIM_MAX; - if (q_high < ERL_DRV_BUSY_MSGQ_LIM_MIN) - q_high = ERL_DRV_BUSY_MSGQ_LIM_MIN; - else if (q_high > ERL_DRV_BUSY_MSGQ_LIM_MAX) - q_high = ERL_DRV_BUSY_MSGQ_LIM_MAX; - erl_drv_busy_msgq_limits(port, &q_low, &q_high); desc->send_timeout = INET_INFINITY; desc->send_timeout_close = 0; desc->busy_on_send = 0; @@ -8128,6 +8584,12 @@ static ErlDrvData tcp_inet_start(ErlDrvPort port, char* args) return (ErlDrvData) desc; } +static ErlDrvData tcp_inet_start(ErlDrvPort port, char* args) +{ + ErlDrvData data = prep_tcp_inet_start(port, args); + set_default_msgq_limits(port); + return data; +} /* Copy a descriptor, by creating a new port with same settings * as the descriptor desc. * return NULL on error (SYSTEM_LIMIT no ports avail) @@ -8139,7 +8601,7 @@ static tcp_descriptor* tcp_inet_copy(tcp_descriptor* desc,SOCKET s, ErlDrvPort port = desc->inet.port; tcp_descriptor* copy_desc; - copy_desc = (tcp_descriptor*) tcp_inet_start(port, NULL); + copy_desc = (tcp_descriptor*) prep_tcp_inet_start(port, NULL); /* Setup event if needed */ if ((copy_desc->inet.s = s) != INVALID_SOCKET) { @@ -9862,12 +10324,15 @@ static int should_use_so_bsdcompat(void) * as the descriptor desc. * return NULL on error (ENFILE no ports avail) */ +static ErlDrvData packet_inet_start(ErlDrvPort port, char* args, int protocol); + static udp_descriptor* sctp_inet_copy(udp_descriptor* desc, SOCKET s, int* err) { + ErlDrvSizeT q_low, q_high; ErlDrvPort port = desc->inet.port; udp_descriptor* copy_desc; - copy_desc = (udp_descriptor*) sctp_inet_start(port, NULL); + copy_desc = (udp_descriptor*) packet_inet_start(port, NULL, IPPROTO_SCTP); /* Setup event if needed */ if ((copy_desc->inet.s = s) != INVALID_SOCKET) { @@ -9898,9 +10363,17 @@ static udp_descriptor* sctp_inet_copy(udp_descriptor* desc, SOCKET s, int* err) FREE(copy_desc); return NULL; } + + /* Read busy msgq limits of parent */ + q_low = q_high = ERL_DRV_BUSY_MSGQ_READ_ONLY; + erl_drv_busy_msgq_limits(desc->inet.port, &q_low, &q_high); + /* Write same busy msgq limits to child */ + erl_drv_busy_msgq_limits(port, &q_low, &q_high); + copy_desc->inet.port = port; copy_desc->inet.dport = driver_mk_port(port); *err = 0; + return copy_desc; } #endif @@ -9933,13 +10406,17 @@ static ErlDrvData packet_inet_start(ErlDrvPort port, char* args, int protocol) static ErlDrvData udp_inet_start(ErlDrvPort port, char *args) { - return packet_inet_start(port, args, IPPROTO_UDP); + ErlDrvData data = packet_inet_start(port, args, IPPROTO_UDP); + set_default_msgq_limits(port); + return data; } #ifdef HAVE_SCTP static ErlDrvData sctp_inet_start(ErlDrvPort port, char *args) { - return packet_inet_start(port, args, IPPROTO_SCTP); + ErlDrvData data = packet_inet_start(port, args, IPPROTO_SCTP); + set_default_msgq_limits(port); + return data; } #endif @@ -10602,7 +11079,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) inet_input_count(desc, n); udesc->i_ptr += n; - inet_get_address(desc->sfamily, abuf, &other, &len); + inet_get_address(abuf, &other, &len); /* Copy formatted address to the buffer allocated; "len" is the actual length which must be <= than the original reserved. This means that the addr + data in the buffer are contiguous, |
